diff --git a/.github/shared/install_sqlite/action.yml b/.github/shared/install_sqlite/action.yml index f74f620f1..533d4e17d 100644 --- a/.github/shared/install_sqlite/action.yml +++ b/.github/shared/install_sqlite/action.yml @@ -6,10 +6,10 @@ runs: steps: - name: Install SQLite env: - SQLITE_VERSION: "3470200" - YEAR: 2024 + SQLITE_VERSION: "3490100" + YEAR: 2025 run: | - curl -o /tmp/sqlite.zip https://www.sqlite.org/$YEAR/sqlite-tools-linux-x64-$SQLITE_VERSION.zip > /dev/null + curl -o /tmp/sqlite.zip https://sqlite.org/$YEAR/sqlite-tools-linux-x64-$SQLITE_VERSION.zip > /dev/null unzip -j /tmp/sqlite.zip sqlite3 -d /usr/local/bin/ sqlite3 --version shell: bash diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index daf032087..ca92fbd34 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -1,10 +1,12 @@ -# Copyright 2022-2024, axodotdev +# This file was autogenerated by dist: https://github.com/astral-sh/cargo-dist +# +# Copyright 2025 Astral Software Inc. # SPDX-License-Identifier: MIT or Apache-2.0 # # CI that: # # * checks for a Git Tag that looks like a release -# * builds artifacts with cargo-dist (archives, installers, hashes) +# * builds artifacts with dist (archives, installers, hashes) # * uploads those artifacts to temporary workflow zip # * on success, uploads the artifacts to a GitHub Release # @@ -24,10 +26,10 @@ permissions: # must be a Cargo-style SemVer Version (must have at least major.minor.patch). # # If PACKAGE_NAME is specified, then the announcement will be for that -# package (erroring out if it doesn't have the given version or isn't cargo-dist-able). +# package (erroring out if it doesn't have the given version or isn't dist-able). # # If PACKAGE_NAME isn't specified, then the announcement will be for all -# (cargo-dist-able) packages in the workspace with that version (this mode is +# (dist-able) packages in the workspace with that version (this mode is # intended for workspaces with only one dist-able package, or with all dist-able # packages versioned/released in lockstep). # @@ -45,9 +47,9 @@ on: - '**[0-9]+.[0-9]+.[0-9]+*' jobs: - # Run 'cargo dist plan' (or host) to determine what tasks we need to do + # Run 'dist plan' (or host) to determine what tasks we need to do plan: - runs-on: "ubuntu-20.04" + runs-on: "ubuntu-22.04" outputs: val: ${{ steps.plan.outputs.manifest }} tag: ${{ !github.event.pull_request && github.ref_name || '' }} @@ -59,16 +61,16 @@ jobs: - uses: actions/checkout@v4 with: submodules: recursive - - name: Install cargo-dist + - name: Install dist # we specify bash to get pipefail; it guards against the `curl` command # failing. otherwise `sh` won't catch that `curl` returned non-0 shell: bash - run: "curl --proto '=https' --tlsv1.2 -LsSf https://github.com/axodotdev/cargo-dist/releases/download/v0.21.0/cargo-dist-installer.sh | sh" - - name: Cache cargo-dist + run: "curl --proto '=https' --tlsv1.2 -LsSf https://github.com/astral-sh/cargo-dist/releases/download/v0.28.3/cargo-dist-installer.sh | sh" + - name: Cache dist uses: actions/upload-artifact@v4 with: name: cargo-dist-cache - path: ~/.cargo/bin/cargo-dist + path: ~/.cargo/bin/dist # sure would be cool if github gave us proper conditionals... # so here's a doubly-nested ternary-via-truthiness to try to provide the best possible # functionality based on whether this is a pull_request, and whether it's from a fork. @@ -76,8 +78,8 @@ jobs: # but also really annoying to build CI around when it needs secrets to work right.) - id: plan run: | - cargo dist ${{ (!github.event.pull_request && format('host --steps=create --tag={0}', github.ref_name)) || 'plan' }} --output-format=json > plan-dist-manifest.json - echo "cargo dist ran successfully" + dist ${{ (!github.event.pull_request && format('host --steps=create --tag={0}', github.ref_name)) || 'plan' }} --output-format=json > plan-dist-manifest.json + echo "dist ran successfully" cat plan-dist-manifest.json echo "manifest=$(jq -c "." plan-dist-manifest.json)" >> "$GITHUB_OUTPUT" - name: "Upload dist-manifest.json" @@ -95,18 +97,19 @@ jobs: if: ${{ fromJson(needs.plan.outputs.val).ci.github.artifacts_matrix.include != null && (needs.plan.outputs.publishing == 'true' || fromJson(needs.plan.outputs.val).ci.github.pr_run_mode == 'upload') }} strategy: fail-fast: false - # Target platforms/runners are computed by cargo-dist in create-release. + # Target platforms/runners are computed by dist in create-release. # Each member of the matrix has the following arguments: # # - runner: the github runner - # - dist-args: cli flags to pass to cargo dist - # - install-dist: expression to run to install cargo-dist on the runner + # - dist-args: cli flags to pass to dist + # - install-dist: expression to run to install dist on the runner # # Typically there will be: # - 1 "global" task that builds universal installers # - N "local" tasks that build each platform's binaries and platform-specific installers matrix: ${{ fromJson(needs.plan.outputs.val).ci.github.artifacts_matrix }} runs-on: ${{ matrix.runner }} + container: ${{ matrix.container && matrix.container.image || null }} env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} BUILD_MANIFEST_NAME: target/distrib/${{ join(matrix.targets, '-') }}-dist-manifest.json @@ -117,8 +120,15 @@ jobs: - uses: actions/checkout@v4 with: submodules: recursive - - name: Install cargo-dist - run: ${{ matrix.install_dist }} + - name: Install Rust non-interactively if not already installed + if: ${{ matrix.container }} + run: | + if ! command -v cargo > /dev/null 2>&1; then + curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y + echo "$HOME/.cargo/bin" >> $GITHUB_PATH + fi + - name: Install dist + run: ${{ matrix.install_dist.run }} # Get the dist-manifest - name: Fetch local artifacts uses: actions/download-artifact@v4 @@ -132,10 +142,10 @@ jobs: - name: Build artifacts run: | # Actually do builds and make zips and whatnot - cargo dist build ${{ needs.plan.outputs.tag-flag }} --print=linkage --output-format=json ${{ matrix.dist_args }} > dist-manifest.json - echo "cargo dist ran successfully" + dist build ${{ needs.plan.outputs.tag-flag }} --print=linkage --output-format=json ${{ matrix.dist_args }} > dist-manifest.json + echo "dist ran successfully" - name: Attest - uses: actions/attest-build-provenance@v1 + uses: actions/attest-build-provenance@v2 with: subject-path: "target/distrib/*${{ join(matrix.targets, ', ') }}*" - id: cargo-dist @@ -147,7 +157,7 @@ jobs: run: | # Parse out what we just built and upload it to scratch storage echo "paths<> "$GITHUB_OUTPUT" - jq --raw-output ".upload_files[]" dist-manifest.json >> "$GITHUB_OUTPUT" + dist print-upload-files-from-manifest --manifest dist-manifest.json >> "$GITHUB_OUTPUT" echo "EOF" >> "$GITHUB_OUTPUT" cp dist-manifest.json "$BUILD_MANIFEST_NAME" @@ -164,7 +174,7 @@ jobs: needs: - plan - build-local-artifacts - runs-on: "ubuntu-20.04" + runs-on: "ubuntu-22.04" env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} BUILD_MANIFEST_NAME: target/distrib/global-dist-manifest.json @@ -172,12 +182,12 @@ jobs: - uses: actions/checkout@v4 with: submodules: recursive - - name: Install cached cargo-dist + - name: Install cached dist uses: actions/download-artifact@v4 with: name: cargo-dist-cache path: ~/.cargo/bin/ - - run: chmod +x ~/.cargo/bin/cargo-dist + - run: chmod +x ~/.cargo/bin/dist # Get all the local artifacts for the global tasks to use (for e.g. checksums) - name: Fetch local artifacts uses: actions/download-artifact@v4 @@ -188,8 +198,8 @@ jobs: - id: cargo-dist shell: bash run: | - cargo dist build ${{ needs.plan.outputs.tag-flag }} --output-format=json "--artifacts=global" > dist-manifest.json - echo "cargo dist ran successfully" + dist build ${{ needs.plan.outputs.tag-flag }} --output-format=json "--artifacts=global" > dist-manifest.json + echo "dist ran successfully" # Parse out what we just built and upload it to scratch storage echo "paths<> "$GITHUB_OUTPUT" @@ -214,19 +224,19 @@ jobs: if: ${{ always() && needs.plan.outputs.publishing == 'true' && (needs.build-global-artifacts.result == 'skipped' || needs.build-global-artifacts.result == 'success') && (needs.build-local-artifacts.result == 'skipped' || needs.build-local-artifacts.result == 'success') }} env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - runs-on: "ubuntu-20.04" + runs-on: "ubuntu-22.04" outputs: val: ${{ steps.host.outputs.manifest }} steps: - uses: actions/checkout@v4 with: submodules: recursive - - name: Install cached cargo-dist + - name: Install cached dist uses: actions/download-artifact@v4 with: name: cargo-dist-cache path: ~/.cargo/bin/ - - run: chmod +x ~/.cargo/bin/cargo-dist + - run: chmod +x ~/.cargo/bin/dist # Fetch artifacts from scratch-storage - name: Fetch artifacts uses: actions/download-artifact@v4 @@ -237,7 +247,7 @@ jobs: - id: host shell: bash run: | - cargo dist host ${{ needs.plan.outputs.tag-flag }} --steps=upload --steps=release --output-format=json > dist-manifest.json + dist host ${{ needs.plan.outputs.tag-flag }} --steps=upload --steps=release --output-format=json > dist-manifest.json echo "artifacts uploaded and released successfully" cat dist-manifest.json echo "manifest=$(jq -c "." dist-manifest.json)" >> "$GITHUB_OUTPUT" @@ -278,7 +288,7 @@ jobs: # still allowing individual publish jobs to skip themselves (for prereleases). # "host" however must run to completion, no skipping allowed! if: ${{ always() && needs.host.result == 'success' }} - runs-on: "ubuntu-20.04" + runs-on: "ubuntu-22.04" env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} steps: diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 67d6d7e23..fcc054d81 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -75,6 +75,18 @@ jobs: curl -L $LINK/$CARGO_C_FILE | tar xz -C ~/.cargo/bin - uses: actions/checkout@v3 + + - name: Install uv + uses: astral-sh/setup-uv@v5 + with: + enable-cache: true + + - name: Set up Python + run: uv python install + + - name: Install the project + run: uv sync --all-extras --dev --all-packages + - uses: "./.github/shared/install_sqlite" - name: Test run: make test diff --git a/.gitignore b/.gitignore index 8a7437707..893d3fd3f 100644 --- a/.gitignore +++ b/.gitignore @@ -34,3 +34,5 @@ dist/ # testing testing/limbo_output.txt **/limbo_output.txt +testing/*.log +.bugbase diff --git a/.python-version b/.python-version new file mode 100644 index 000000000..24ee5b1be --- /dev/null +++ b/.python-version @@ -0,0 +1 @@ +3.13 diff --git a/CHANGELOG.md b/CHANGELOG.md index eaf9c0ed5..42a9c5bd1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,91 @@ # Changelog +## 0.0.19 - 2025-04-16 + +### Added + +* Add `BeginSubrtn`, `NotFound` and `Affinity` bytecodes (Diego Reis) +* Add Ansi Colors to tcl test runner (Pedro Muniz) +* support modifiers for julianday() (meteorgan) +* Implement Once and OpenAutoindex opcodes (Jussi Saurio) +* Add support for OpenEphemeral bytecode (Diego Reis) +* simulator: Add Bug Database(BugBase) (Alperen Keleş) +* feat: Add timediff data and time function (Sachin Kumar Singh) +* core/btree: Add PageContent::new() helper (Pekka Enberg) +* Add support to load log file with stress test (Pere Diaz Bou) +* Support UPDATE for virtual tables (Preston Thorpe) +* Add `.timer` command to print SQL execution statistics (Pere Diaz Bou) +* Strict table support (Ihor Andrianov) +* Support backwards index scan and seeks + utilize indexes in removing ORDER BY (Jussi Saurio) +* Add deterministic Clock (Avinash Sajjanshetty) +* Support offset clause in Update queries (Preston Thorpe) +* Support Create Index (Preston Thorpe) +* Support insert default values syntax (Preston Thorpe) +* Add support for default values in INSERT statements (Diego Reis) + +### Updated + +* Test: write tests for file backed db (Pedro Muniz) +* btree: move some blocks of code to more reasonable places (Jussi Saurio) +* Parse hex integers 2 (Anton Harniakou) +* More index utils (Jussi Saurio) +* Index utils (Jussi Saurio) +* Feature: VDestroy for Dropping Virtual Tables (Pedro Muniz) +* Feat balance shallower (Lâm Hoàng Phúc) +* Parse hexidecimal integers (Anton Harniakou) +* Code clean-ups (Diego Reis) +* Return null when parameter is unbound (Levy A.) +* Enhance robusteness of optimization for Binary expressions (Diego Reis) +* Check that index seek key members are not null (Jussi Saurio) +* Better diagnostics (Pedro Muniz) +* simulator: provide high level commands on top of a single runner (Alperen Keleş) +* build(deps-dev): bump vite from 6.0.7 to 6.2.6 in /bindings/wasm/test-limbo-pkg (dependabot[bot]) +* btree: remove IterationState (Jussi Saurio) +* build(deps): bump pyo3 from 0.24.0 to 0.24.1 (dependabot[bot]) +* Multi column indexes + index seek refactor (Jussi Saurio) +* Emit ANSI codes only when tracing is outputting to terminal (Preston Thorpe) +* B-Tree code cleanups (Pekka Enberg) +* btree index selection on rightmost pointer in `balance_non_root` (Pere Diaz Bou) +* io/linux: make syscallio the default (io_uring is really slow) (Jussi Saurio) +* Stress improvements (Pekka Enberg) +* VDBE code cleanups (Pekka Enberg) +* Memory tests to track large blob insertions (Pedro Muniz) +* Setup tracing to allow output during test runs (Preston Thorpe) +* allow insertion of multiple overflow cells (Pere Diaz Bou) +* Properly handle insertion of indexed columns (Preston Thorpe) +* VTabs: Proper handling of re-opened db files without the relevant extensions loaded (Preston Thorpe) +* Account divider cell in size while distributing cells (Pere Diaz Bou) +* Format infinite float as "Inf"/"-Inf" (jachewz) +* update sqlite download version to 2025 + remove www. (Pere Diaz Bou) +* Improve validation of btree balancing (Pere Diaz Bou) +* Aggregation without group by produces incorrect results for scalars (Ihor Andrianov) +* Dot command completion (Pedro Muniz) +* Allow reading altered tables by defaulting to null in Column insn (Preston Thorpe) +* docs(readme): update discord link (Jamie Barton) +* More VDBE cleanups (Pekka Enberg) +* Request load page on `insert_into_page` (Pere Diaz Bou) +* core/vdbe: Rename execute_insn_* to op_* (Pekka Enberg) +* Remove RWLock from Shared wal state (Pere Diaz Bou) +* VDBE with indirect function dispatch (Pere Diaz Bou) + +### Fixed + +* Fix truncation of error output in tests (Pedro Muniz) +* Fix Unary Negate Operation on Blobs (Pedro Muniz) +* Fix incompatibility `AND` Operation (Pedro Muniz) +* Fix: comment out incorrect assert in fuzz (Pedro Muniz) +* Fix two issues with indexes (Jussi Saurio) +* Fuzz fix some operations (Pedro Muniz) +* simulator: updates to bug base, refactors (Alperen Keleş) +* Fix overwrite cell with size less than cell size (Pere Diaz Bou) +* Fix `EXPLAIN` to be case insensitive (Pedro Muniz) +* core: Fix syscall VFS on Linux (Pekka Enberg) +* Index insert fixes (Pere Diaz Bou) +* Decrease page count on balancing fixes (Pere Diaz Bou) +* Remainder fixes (jachewz) +* Fix virtual table translation issues (Preston Thorpe) +* Fix overflow position in write_page() (Lâm Hoàng Phúc) + ## 0.0.18 - 2025-04-02 ### Added diff --git a/COMPAT.md b/COMPAT.md index 7fff65b87..799411193 100644 --- a/COMPAT.md +++ b/COMPAT.md @@ -4,6 +4,8 @@ This document describes the compatibility of Limbo with SQLite. ## Table of contents +- [Limbo compatibility with SQLite](#limbo-compatibility-with-sqlite) + - [Table of contents](#table-of-contents) - [Overview](#overview) - [Features](#features) - [Limitations](#limitations) @@ -41,7 +43,6 @@ Limbo aims to be fully compatible with SQLite, with opt-in features not supporte * ⛔️ Concurrent access from multiple processes is not supported. * ⛔️ Savepoints are not supported. * ⛔️ Triggers are not supported. -* ⛔️ Indexes are not supported. * ⛔️ Views are not supported. * ⛔️ Vacuum is not supported. @@ -56,15 +57,16 @@ Limbo aims to be fully compatible with SQLite, with opt-in features not supporte | ATTACH DATABASE | No | | | BEGIN TRANSACTION | Partial | Transaction names are not supported. | | COMMIT TRANSACTION | Partial | Transaction names are not supported. | -| CREATE INDEX | No | | +| CREATE INDEX | Yes | | | CREATE TABLE | Partial | | +| CREATE TABLE ... STRICT | Yes | | | CREATE TRIGGER | No | | | CREATE VIEW | No | | -| CREATE VIRTUAL TABLE | No | | +| CREATE VIRTUAL TABLE | Yes | | | DELETE | Yes | | | DETACH DATABASE | No | | | DROP INDEX | No | | -| DROP TABLE | No | | +| DROP TABLE | Yes | | | DROP TRIGGER | No | | | DROP VIEW | No | | | END TRANSACTION | Partial | Alias for `COMMIT TRANSACTION` | @@ -198,7 +200,7 @@ Feature support of [sqlite expr syntax](https://www.sqlite.org/lang_expr.html). | (NOT) MATCH | No | | | IS (NOT) | Yes | | | IS (NOT) DISTINCT FROM | Yes | | -| (NOT) BETWEEN ... AND ... | No | | +| (NOT) BETWEEN ... AND ... | Yes | Expression is rewritten in the optimizer | | (NOT) IN (subquery) | No | | | (NOT) EXISTS (subquery) | No | | | CASE WHEN THEN ELSE END | Yes | | @@ -226,8 +228,8 @@ Feature support of [sqlite expr syntax](https://www.sqlite.org/lang_expr.html). | length(X) | Yes | | | like(X,Y) | Yes | | | like(X,Y,Z) | Yes | | -| likelihood(X,Y) | No | | -| likely(X) | No | | +| likelihood(X,Y) | Yes | | +| likely(X) | Yes | | | load_extension(X) | Yes | sqlite3 extensions not yet supported | | load_extension(X,Y) | No | | | lower(X) | Yes | | @@ -325,10 +327,10 @@ Feature support of [sqlite expr syntax](https://www.sqlite.org/lang_expr.html). | date() | Yes | partially supports modifiers | | time() | Yes | partially supports modifiers | | datetime() | Yes | partially supports modifiers | -| julianday() | Partial | does not support modifiers | +| julianday() | Yes | partially support modifiers | | unixepoch() | Partial | does not support modifiers | | strftime() | Yes | partially supports modifiers | -| timediff() | No | | +| timediff() | Yes | partially supports modifiers | Modifiers: @@ -425,6 +427,7 @@ Modifiers: | BitNot | Yes | | | BitOr | Yes | | | Blob | Yes | | +| BeginSubrtn | Yes | | | Checkpoint | No | | | Clear | No | | | Close | No | | @@ -460,7 +463,7 @@ Modifiers: | HaltIfNull | No | | | IdxDelete | No | | | IdxGE | Yes | | -| IdxInsert | No | | +| IdxInsert | Yes | | | IdxLE | Yes | | | IdxLT | Yes | | | IdxRowid | No | | @@ -472,9 +475,7 @@ Modifiers: | IncrVacuum | No | | | Init | Yes | | | InitCoroutine | Yes | | -| Insert | No | | -| InsertAsync | Yes | | -| InsertAwait | Yes | | +| Insert | Yes | | | InsertInt | No | | | Int64 | No | | | Integer | Yes | | @@ -495,9 +496,7 @@ Modifiers: | MustBeInt | Yes | | | Ne | Yes | | | NewRowid | Yes | | -| Next | No | | -| NextAsync | Yes | | -| NextAwait | Yes | | +| Next | Yes | | | Noop | Yes | | | Not | Yes | | | NotExists | Yes | | @@ -505,23 +504,18 @@ Modifiers: | NotNull | Yes | | | Null | Yes | | | NullRow | Yes | | -| Once | No | | -| OpenAutoindex | No | | -| OpenEphemeral | No | | +| Once | Yes | | +| OpenAutoindex | Yes | | +| OpenEphemeral | Yes | | | OpenPseudo | Yes | | | OpenRead | Yes | | -| OpenReadAsync | Yes | | -| OpenWrite | No | | -| OpenWriteAsync | Yes | | -| OpenWriteAwait | Yes | | +| OpenWrite | Yes | | | Or | Yes | | | Pagecount | Partial| no temp databases | | Param | No | | | ParseSchema | No | | | Permutation | No | | -| Prev | No | | -| PrevAsync | Yes | | -| PrevAwait | Yes | | +| Prev | Yes | | | Program | No | | | ReadCookie | Partial| no temp databases, only user_version supported | | Real | Yes | | @@ -531,8 +525,6 @@ Modifiers: | ResultRow | Yes | | | Return | Yes | | | Rewind | Yes | | -| RewindAsync | Yes | | -| RewindAwait | Yes | | | RowData | No | | | RowId | Yes | | | RowKey | No | | @@ -548,6 +540,7 @@ Modifiers: | SeekLe | No | | | SeekLt | No | | | SeekRowid | Yes | | +| SeekEnd | Yes | | | Sequence | No | | | SetCookie | No | | | ShiftLeft | Yes | | @@ -574,10 +567,10 @@ Modifiers: | VBegin | No | | | VColumn | Yes | | | VCreate | Yes | | -| VDestroy | No | | +| VDestroy | Yes | | | VFilter | Yes | | | VNext | Yes | | -| VOpen | Yes |VOpenAsync| +| VOpen | Yes | | | VRename | No | | | VUpdate | Yes | | | Vacuum | No | | diff --git a/Cargo.lock b/Cargo.lock index c6e5eab5f..d4f6c43df 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -344,6 +344,7 @@ dependencies = [ "iana-time-zone", "js-sys", "num-traits", + "serde", "wasm-bindgen", "windows-link", ] @@ -397,6 +398,18 @@ dependencies = [ "strsim", ] +[[package]] +name = "clap_complete" +version = "4.5.47" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c06f5378ea264ad4f82bbc826628b5aad714a75abf6ece087e923010eb937fb6" +dependencies = [ + "clap", + "clap_lex", + "is_executable", + "shlex", +] + [[package]] name = "clap_derive" version = "4.5.32" @@ -499,7 +512,7 @@ checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" [[package]] name = "core_tester" -version = "0.0.19-pre.4" +version = "0.0.19" dependencies = [ "anyhow", "assert_cmd", @@ -571,6 +584,15 @@ dependencies = [ "itertools", ] +[[package]] +name = "crossbeam-channel" +version = "0.5.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06ba6d68e24814cb8de6bb986db8222d3a027d15872cabc0d18817bc3c0e4471" +dependencies = [ + "crossbeam-utils", +] + [[package]] name = "crossbeam-deque" version = "0.8.6" @@ -711,7 +733,16 @@ version = "5.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "44c45a9d03d6676652bcb5e724c7e988de1acad23a711b5217ab9cbecbec2225" dependencies = [ - "dirs-sys", + "dirs-sys 0.4.1", +] + +[[package]] +name = "dirs" +version = "6.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3e8aa94d75141228480295a7d0e7feb620b1a5ad9f12bc40be62411e38cce4e" +dependencies = [ + "dirs-sys 0.5.0", ] [[package]] @@ -722,10 +753,22 @@ checksum = "520f05a5cbd335fae5a99ff7a6ab8627577660ee5cfd6a94a6a929b52ff0321c" dependencies = [ "libc", "option-ext", - "redox_users", + "redox_users 0.4.6", "windows-sys 0.48.0", ] +[[package]] +name = "dirs-sys" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e01a3366d27ee9890022452ee61b2b63a67e6f13f58900b651ff5665f0bb1fab" +dependencies = [ + "libc", + "option-ext", + "redox_users 0.5.0", + "windows-sys 0.59.0", +] + [[package]] name = "displaydoc" version = "0.2.5" @@ -868,7 +911,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0ce92ff622d6dadf7349484f42c93271a0d49b7cc4d466a936405bacbe10aa78" dependencies = [ "cfg-if", - "rustix 1.0.3", + "rustix 1.0.7", "windows-sys 0.59.0", ] @@ -1369,11 +1412,12 @@ dependencies = [ [[package]] name = "io-uring" -version = "0.6.4" +version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "595a0399f411a508feb2ec1e970a4a30c249351e30208960d58298de8660b0e5" +checksum = "3c2f96dfbc20c12b9b4f12eef60472d8c29b9c3f29463570dcb47e4a48551168" dependencies = [ - "bitflags 1.3.2", + "bitflags 2.9.0", + "cfg-if", "libc", ] @@ -1400,6 +1444,15 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7655c9839580ee829dfacba1d1278c2b7883e50a277ff7541299489d6bdfdc45" +[[package]] +name = "is_executable" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4a1b5bad6f9072935961dfbf1cced2f3d129963d091b6f69f007fe04e758ae2" +dependencies = [ + "winapi", +] + [[package]] name = "is_terminal_polyfill" version = "1.70.1" @@ -1488,9 +1541,9 @@ dependencies = [ [[package]] name = "julian_day_converter" -version = "0.4.4" +version = "0.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1aa5652b85ab018289638c6b924db618da9edd2ddfff7fa0ec38a8b51a9192d3" +checksum = "f2987f71b89b85c812c8484cbf0c5d7912589e77bfdc66fd3e52f760e7859f16" dependencies = [ "chrono", ] @@ -1523,9 +1576,9 @@ checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" [[package]] name = "libc" -version = "0.2.171" +version = "0.2.172" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c19937216e9d3aa9956d9bb8dfc0b0c8beb6058fc4f7a4dc4d850edf86a237d6" +checksum = "d750af042f7ef4f724306de029d18836c26c1765a54a6a3f094cbd23a7267ffa" [[package]] name = "libgit2-sys" @@ -1557,9 +1610,9 @@ checksum = "8355be11b20d696c8f18f6cc018c4e372165b1fa8126cef092399c9951984ffa" [[package]] name = "libmimalloc-sys" -version = "0.1.40" +version = "0.1.42" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07d0e07885d6a754b9c7993f2625187ad694ee985d60f23355ff0e7077261502" +checksum = "ec9d6fac27761dabcd4ee73571cdb06b7022dc99089acbe5435691edffaac0f4" dependencies = [ "cc", "libc", @@ -1601,7 +1654,7 @@ dependencies = [ [[package]] name = "limbo" -version = "0.0.19-pre.4" +version = "0.0.19" dependencies = [ "limbo_core", "thiserror 2.0.12", @@ -1610,14 +1663,14 @@ dependencies = [ [[package]] name = "limbo-go" -version = "0.0.19-pre.4" +version = "0.0.19" dependencies = [ "limbo_core", ] [[package]] name = "limbo-java" -version = "0.0.19-pre.4" +version = "0.0.19" dependencies = [ "jni", "limbo_core", @@ -1626,7 +1679,7 @@ dependencies = [ [[package]] name = "limbo-wasm" -version = "0.0.19-pre.4" +version = "0.0.19" dependencies = [ "console_error_panic_hook", "getrandom 0.2.15", @@ -1639,28 +1692,32 @@ dependencies = [ [[package]] name = "limbo_cli" -version = "0.0.19-pre.4" +version = "0.0.19" dependencies = [ "anyhow", "cfg-if", "clap", + "clap_complete", "comfy-table", "csv", "ctrlc", - "dirs", + "dirs 5.0.1", "env_logger 0.10.2", + "libc", "limbo_core", "miette", "nu-ansi-term 0.50.1", "rustyline", + "shlex", "syntect", "tracing", + "tracing-appender", "tracing-subscriber", ] [[package]] name = "limbo_completion" -version = "0.0.19-pre.4" +version = "0.0.19" dependencies = [ "limbo_ext", "mimalloc", @@ -1668,8 +1725,9 @@ dependencies = [ [[package]] name = "limbo_core" -version = "0.0.19-pre.4" +version = "0.0.19" dependencies = [ + "bitflags 2.9.0", "built", "cfg_block", "chrono", @@ -1710,7 +1768,7 @@ dependencies = [ "regex-syntax 0.8.5", "rstest", "rusqlite", - "rustix 0.38.44", + "rustix 1.0.7", "ryu", "strum", "tempfile", @@ -1721,7 +1779,7 @@ dependencies = [ [[package]] name = "limbo_crypto" -version = "0.0.19-pre.4" +version = "0.0.19" dependencies = [ "blake3", "data-encoding", @@ -1734,7 +1792,7 @@ dependencies = [ [[package]] name = "limbo_ext" -version = "0.0.19-pre.4" +version = "0.0.19" dependencies = [ "chrono", "getrandom 0.3.2", @@ -1743,7 +1801,7 @@ dependencies = [ [[package]] name = "limbo_ext_tests" -version = "0.0.19-pre.4" +version = "0.0.19" dependencies = [ "env_logger 0.11.7", "lazy_static", @@ -1754,7 +1812,7 @@ dependencies = [ [[package]] name = "limbo_ipaddr" -version = "0.0.19-pre.4" +version = "0.0.19" dependencies = [ "ipnetwork", "limbo_ext", @@ -1763,7 +1821,7 @@ dependencies = [ [[package]] name = "limbo_macros" -version = "0.0.19-pre.4" +version = "0.0.19" dependencies = [ "proc-macro2", "quote", @@ -1772,7 +1830,7 @@ dependencies = [ [[package]] name = "limbo_node" -version = "0.0.19-pre.4" +version = "0.0.19" dependencies = [ "limbo_core", "napi", @@ -1782,7 +1840,7 @@ dependencies = [ [[package]] name = "limbo_percentile" -version = "0.0.19-pre.4" +version = "0.0.19" dependencies = [ "limbo_ext", "mimalloc", @@ -1790,7 +1848,7 @@ dependencies = [ [[package]] name = "limbo_regexp" -version = "0.0.19-pre.4" +version = "0.0.19" dependencies = [ "limbo_ext", "mimalloc", @@ -1799,7 +1857,7 @@ dependencies = [ [[package]] name = "limbo_series" -version = "0.0.19-pre.4" +version = "0.0.19" dependencies = [ "limbo_ext", "mimalloc", @@ -1809,10 +1867,12 @@ dependencies = [ [[package]] name = "limbo_sim" -version = "0.0.19-pre.4" +version = "0.0.19" dependencies = [ "anarchist-readable-name-generator-lib", + "chrono", "clap", + "dirs 6.0.0", "env_logger 0.10.2", "limbo_core", "log", @@ -1824,12 +1884,11 @@ dependencies = [ "rusqlite", "serde", "serde_json", - "tempfile", ] [[package]] name = "limbo_sqlite3" -version = "0.0.19-pre.4" +version = "0.0.19" dependencies = [ "env_logger 0.11.7", "libc", @@ -1839,7 +1898,7 @@ dependencies = [ [[package]] name = "limbo_sqlite3_parser" -version = "0.0.19-pre.4" +version = "0.0.19" dependencies = [ "bitflags 2.9.0", "cc", @@ -1859,18 +1918,22 @@ dependencies = [ [[package]] name = "limbo_stress" -version = "0.0.19-pre.4" +version = "0.0.19" dependencies = [ + "anarchist-readable-name-generator-lib", "antithesis_sdk", "clap", + "hex", "limbo", - "serde_json", "tokio", + "tracing", + "tracing-appender", + "tracing-subscriber", ] [[package]] name = "limbo_time" -version = "0.0.19-pre.4" +version = "0.0.19" dependencies = [ "chrono", "limbo_ext", @@ -1882,7 +1945,7 @@ dependencies = [ [[package]] name = "limbo_uuid" -version = "0.0.19-pre.4" +version = "0.0.19" dependencies = [ "limbo_ext", "mimalloc", @@ -1951,9 +2014,9 @@ checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94" [[package]] name = "lru" -version = "0.13.0" +version = "0.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "227748d55f2f0ab4735d87fd623798cb6b664512fe979705f829c9f81c934465" +checksum = "9f8cc7106155f10bdf99a6f379688f543ad6596a415375b36a59a054ceda1198" dependencies = [ "hashbrown", ] @@ -1999,9 +2062,9 @@ dependencies = [ [[package]] name = "miette" -version = "7.5.0" +version = "7.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a955165f87b37fd1862df2a59547ac542c77ef6d17c666f619d1ad22dd89484" +checksum = "5f98efec8807c63c752b5bd61f862c165c115b0a35685bdcfd9238c7aeb592b7" dependencies = [ "backtrace", "backtrace-ext", @@ -2013,15 +2076,14 @@ dependencies = [ "supports-unicode", "terminal_size", "textwrap", - "thiserror 1.0.69", "unicode-width 0.1.14", ] [[package]] name = "miette-derive" -version = "7.5.0" +version = "7.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf45bf44ab49be92fd1227a3be6fc6f617f1a337c06af54981048574d8783147" +checksum = "db5b29714e950dbb20d5e6f74f9dcec4edbcc1067bb7f8ed198c097b8c1a818b" dependencies = [ "proc-macro2", "quote", @@ -2030,9 +2092,9 @@ dependencies = [ [[package]] name = "mimalloc" -version = "0.1.44" +version = "0.1.46" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "99585191385958383e13f6b822e6b6d8d9cf928e7d286ceb092da92b43c87bc1" +checksum = "995942f432bbb4822a7e9c3faa87a695185b0d09273ba85f097b54f4e458f2af" dependencies = [ "libmimalloc-sys", ] @@ -2238,9 +2300,9 @@ dependencies = [ [[package]] name = "once_cell" -version = "1.21.1" +version = "1.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d75b0bedcc4fe52caa0e03d9f1151a323e4aa5e2d78ba3580400cd3c9e2bc4bc" +checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" [[package]] name = "onig" @@ -2521,7 +2583,7 @@ dependencies = [ [[package]] name = "py-limbo" -version = "0.0.19-pre.4" +version = "0.0.19" dependencies = [ "anyhow", "limbo_core", @@ -2532,9 +2594,9 @@ dependencies = [ [[package]] name = "pyo3" -version = "0.24.0" +version = "0.24.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f1c6c3591120564d64db2261bec5f910ae454f01def849b9c22835a84695e86" +checksum = "17da310086b068fbdcefbba30aeb3721d5bb9af8db4987d6735b2183ca567229" dependencies = [ "anyhow", "cfg-if", @@ -2551,9 +2613,9 @@ dependencies = [ [[package]] name = "pyo3-build-config" -version = "0.24.0" +version = "0.24.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e9b6c2b34cf71427ea37c7001aefbaeb85886a074795e35f161f5aecc7620a7a" +checksum = "e27165889bd793000a098bb966adc4300c312497ea25cf7a690a9f0ac5aa5fc1" dependencies = [ "once_cell", "target-lexicon", @@ -2561,9 +2623,9 @@ dependencies = [ [[package]] name = "pyo3-ffi" -version = "0.24.0" +version = "0.24.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5507651906a46432cdda02cd02dd0319f6064f1374c9147c45b978621d2c3a9c" +checksum = "05280526e1dbf6b420062f3ef228b78c0c54ba94e157f5cb724a609d0f2faabc" dependencies = [ "libc", "pyo3-build-config", @@ -2571,9 +2633,9 @@ dependencies = [ [[package]] name = "pyo3-macros" -version = "0.24.0" +version = "0.24.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0d394b5b4fd8d97d48336bb0dd2aebabad39f1d294edd6bcd2cccf2eefe6f42" +checksum = "5c3ce5686aa4d3f63359a5100c62a127c9f15e8398e5fdeb5deef1fed5cd5f44" dependencies = [ "proc-macro2", "pyo3-macros-backend", @@ -2583,9 +2645,9 @@ dependencies = [ [[package]] name = "pyo3-macros-backend" -version = "0.24.0" +version = "0.24.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd72da09cfa943b1080f621f024d2ef7e2773df7badd51aa30a2be1f8caa7c8e" +checksum = "f4cf6faa0cbfb0ed08e89beb8103ae9724eb4750e3a78084ba4017cbe94f3855" dependencies = [ "heck", "proc-macro2", @@ -2759,6 +2821,17 @@ dependencies = [ "thiserror 1.0.69", ] +[[package]] +name = "redox_users" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd6f9d3d47bdd2ad6945c5015a226ec6155d0bcdfd8f7cd29f86b71f8de99d2b" +dependencies = [ + "getrandom 0.2.15", + "libredox", + "thiserror 2.0.12", +] + [[package]] name = "regex" version = "1.11.1" @@ -2928,9 +3001,9 @@ dependencies = [ [[package]] name = "rustix" -version = "1.0.3" +version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e56a18552996ac8d29ecc3b190b4fdbb2d91ca4ec396de7bbffaf43f3d637e96" +checksum = "c71e83d6afe7ff64890ec6b71d6a69bb8a610ab78ce364b3352876bb4c801266" dependencies = [ "bitflags 2.9.0", "errno", @@ -3093,6 +3166,15 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "sqlparser_bench" +version = "0.1.0" +dependencies = [ + "criterion", + "fallible-iterator", + "limbo_sqlite3_parser", +] + [[package]] name = "stable_deref_trait" version = "1.2.0" @@ -3247,7 +3329,7 @@ dependencies = [ "fastrand", "getrandom 0.3.2", "once_cell", - "rustix 1.0.3", + "rustix 1.0.7", "windows-sys 0.59.0", ] @@ -3266,7 +3348,7 @@ version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "45c6481c4829e4cc63825e62c49186a34538b7b2750b73b266581ffb612fb5ed" dependencies = [ - "rustix 1.0.3", + "rustix 1.0.7", "windows-sys 0.59.0", ] @@ -3449,6 +3531,18 @@ dependencies = [ "tracing-core", ] +[[package]] +name = "tracing-appender" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3566e8ce28cc0a3fe42519fc80e6b4c943cc4c8cef275620eb8dac2d3d4e06cf" +dependencies = [ + "crossbeam-channel", + "thiserror 1.0.69", + "time", + "tracing-subscriber", +] + [[package]] name = "tracing-attributes" version = "0.1.28" diff --git a/Cargo.toml b/Cargo.toml index edbae0cec..5178b9fb1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -25,56 +25,31 @@ members = [ "sqlite3", "stress", "tests", + "vendored/sqlite3-parser/sqlparser_bench", ] exclude = ["perf/latency/limbo"] [workspace.package] -version = "0.0.19-pre.4" +version = "0.0.19" authors = ["the Limbo authors"] edition = "2021" license = "MIT" repository = "https://github.com/tursodatabase/limbo" [workspace.dependencies] -limbo_completion = { path = "extensions/completion", version = "0.0.19-pre.4" } -limbo_core = { path = "core", version = "0.0.19-pre.4" } -limbo_crypto = { path = "extensions/crypto", version = "0.0.19-pre.4" } -limbo_ext = { path = "extensions/core", version = "0.0.19-pre.4" } -limbo_ext_tests = { path = "extensions/tests", version = "0.0.19-pre.4" } -limbo_ipaddr = { path = "extensions/ipaddr", version = "0.0.19-pre.4" } -limbo_macros = { path = "macros", version = "0.0.19-pre.4" } -limbo_percentile = { path = "extensions/percentile", version = "0.0.19-pre.4" } -limbo_regexp = { path = "extensions/regexp", version = "0.0.19-pre.4" } -limbo_series = { path = "extensions/series", version = "0.0.19-pre.4" } -limbo_sqlite3_parser = { path = "vendored/sqlite3-parser", version = "0.0.19-pre.4" } -limbo_time = { path = "extensions/time", version = "0.0.19-pre.4" } -limbo_uuid = { path = "extensions/uuid", version = "0.0.19-pre.4" } - -# Config for 'cargo dist' -[workspace.metadata.dist] -# The preferred cargo-dist version to use in CI (Cargo.toml SemVer syntax) -cargo-dist-version = "0.21.0" -# CI backends to support -ci = "github" -# The installers to generate for each app -installers = ["shell", "powershell"] -# Target platforms to build apps for (Rust target-triple syntax) -targets = [ - "aarch64-apple-darwin", - "x86_64-apple-darwin", - "x86_64-unknown-linux-gnu", - "x86_64-pc-windows-msvc", -] -# Which actions to run on pull requests -pr-run-mode = "plan" -# Path that installers should place binaries in -install-path = "~/.limbo" -# Whether to install an updater program -install-updater = true -# Whether to consider the binaries in a package for distribution (defaults true) -dist = false -# Whether to enable GitHub Attestations -github-attestations = true +limbo_completion = { path = "extensions/completion", version = "0.0.19" } +limbo_core = { path = "core", version = "0.0.19" } +limbo_crypto = { path = "extensions/crypto", version = "0.0.19" } +limbo_ext = { path = "extensions/core", version = "0.0.19" } +limbo_ext_tests = { path = "extensions/tests", version = "0.0.19" } +limbo_ipaddr = { path = "extensions/ipaddr", version = "0.0.19" } +limbo_macros = { path = "macros", version = "0.0.19" } +limbo_percentile = { path = "extensions/percentile", version = "0.0.19" } +limbo_regexp = { path = "extensions/regexp", version = "0.0.19" } +limbo_series = { path = "extensions/series", version = "0.0.19" } +limbo_sqlite3_parser = { path = "vendored/sqlite3-parser", version = "0.0.19" } +limbo_time = { path = "extensions/time", version = "0.0.19" } +limbo_uuid = { path = "extensions/uuid", version = "0.0.19" } [profile.release] debug = "line-tables-only" @@ -82,6 +57,13 @@ codegen-units = 1 panic = "abort" lto = true +[profile.antithesis] +inherits = "release" +debug = true +codegen-units = 1 +panic = "abort" +lto = true + [profile.bench-profile] inherits = "release" debug = true diff --git a/Dockerfile.antithesis b/Dockerfile.antithesis index 056ad0947..b3ce828d5 100644 --- a/Dockerfile.antithesis +++ b/Dockerfile.antithesis @@ -14,6 +14,7 @@ COPY ./Cargo.lock ./Cargo.lock COPY ./Cargo.toml ./Cargo.toml COPY ./bindings/go ./bindings/go/ COPY ./bindings/java ./bindings/java/ +COPY ./bindings/javascript ./bindings/javascript/ COPY ./bindings/python ./bindings/python/ COPY ./bindings/rust ./bindings/rust/ COPY ./bindings/wasm ./bindings/wasm/ @@ -51,7 +52,7 @@ COPY --from=planner /app/vendored ./vendored/ RUN if [ "$antithesis" = "true" ]; then \ cp /opt/antithesis/libvoidstar.so /usr/lib/libvoidstar.so && \ export RUSTFLAGS="-Ccodegen-units=1 -Cpasses=sancov-module -Cllvm-args=-sanitizer-coverage-level=3 -Cllvm-args=-sanitizer-coverage-trace-pc-guard -Clink-args=-Wl,--build-id -L/usr/lib/ -lvoidstar" && \ - cargo build --bin limbo_stress --release; \ + cargo build --bin limbo_stress; \ else \ cargo build --bin limbo_stress --release; \ fi @@ -61,7 +62,8 @@ RUN if [ "$antithesis" = "true" ]; then \ # FROM debian:bullseye-slim AS runtime -RUN apt-get update && apt-get install -y bash && rm -rf /var/lib/apt/lists/* +RUN apt-get update && apt-get install -y bash curl xz-utils python3 sqlite3 bc binutils pip && rm -rf /var/lib/apt/lists/* +RUN pip install antithesis pylimbo WORKDIR /app EXPOSE 8080 @@ -69,5 +71,15 @@ COPY --from=builder /usr/lib/libvoidstar.so* /usr/lib/ COPY --from=builder /app/target/release/limbo_stress /bin/limbo_stress COPY stress/docker-entrypoint.sh /bin RUN chmod +x /bin/docker-entrypoint.sh + +COPY ./antithesis-tests/bank-test/*.py /opt/antithesis/test/v1/bank-test/ +COPY ./antithesis-tests/stress-composer/*.py /opt/antithesis/test/v1/stress-composer/ +COPY ./antithesis-tests/stress /opt/antithesis/test/v1/stress +RUN chmod 777 -R /opt/antithesis/test/v1 + +RUN mkdir /opt/antithesis/catalog +RUN ln -s /opt/antithesis/test/v1/bank-test/*.py /opt/antithesis/catalog + +ENV RUST_BACKTRACE=1 + ENTRYPOINT ["/bin/docker-entrypoint.sh"] -CMD ["/bin/limbo_stress"] diff --git a/Makefile b/Makefile index 5202112d1..db3c3acdb 100644 --- a/Makefile +++ b/Makefile @@ -62,16 +62,19 @@ limbo-wasm: cargo build --package limbo-wasm --target wasm32-wasi .PHONY: limbo-wasm -test: limbo test-compat test-vector test-sqlite3 test-shell test-extensions +uv-sync: + uv sync --all-packages +.PHONE: uv-sync + +test: limbo uv-sync test-compat test-vector test-sqlite3 test-shell test-extensions test-memory test-write test-update test-constraint .PHONY: test -test-extensions: limbo - cargo build --package limbo_regexp - ./testing/cli_tests/extensions.py +test-extensions: limbo uv-sync + uv run --project limbo_test test-extensions .PHONY: test-extensions -test-shell: limbo - SQLITE_EXEC=$(SQLITE_EXEC) ./testing/cli_tests/cli_test_cases.py +test-shell: limbo uv-sync + SQLITE_EXEC=$(SQLITE_EXEC) uv run --project limbo_test test-shell .PHONY: test-shell test-compat: @@ -94,6 +97,26 @@ test-json: SQLITE_EXEC=$(SQLITE_EXEC) ./testing/json.test .PHONY: test-json +test-memory: limbo uv-sync + SQLITE_EXEC=$(SQLITE_EXEC) uv run --project limbo_test test-memory +.PHONY: test-memory + +test-write: limbo uv-sync + SQLITE_EXEC=$(SQLITE_EXEC) uv run --project limbo_test test-write +.PHONY: test-write + +test-update: limbo uv-sync + SQLITE_EXEC=$(SQLITE_EXEC) uv run --project limbo_test test-update +.PHONY: test-update + +test-constraint: limbo uv-sync + SQLITE_EXEC=$(SQLITE_EXEC) uv run --project limbo_test test-constraint +.PHONY: test-constraint + +bench-vfs: uv-sync + cargo build --release + uv run --project limbo_test bench-vfs "$(SQL)" "$(N)" + clickbench: ./perf/clickbench/benchmark.sh .PHONY: clickbench diff --git a/PERF.md b/PERF.md index 40edcf7ea..0eda689e5 100644 --- a/PERF.md +++ b/PERF.md @@ -32,3 +32,41 @@ make clickbench This will build Limbo in release mode, create a database, and run the benchmarks with a small subset of the Clickbench dataset. It will run the queries for both Limbo and SQLite, and print the results. + + + +## Comparing VFS's/IO Back-ends (io_uring | syscall) + +```shell +make bench-vfs SQL="select * from users;" N=500 +``` + +The naive script will build and run limbo in release mode and execute the given SQL (against a copy of the `testing/testing.db` file) +`N` times with each `vfs`. This is not meant to be a definitive or thorough performance benchmark but serves to compare the two. + + +## TPC-H + +1. Clone the Taratool TPC-H benchmarking tool: + +```shell +git clone git@github.com:tarantool/tpch.git +``` + +2. Patch the benchmark runner script: + +```patch +diff --git a/bench_queries.sh b/bench_queries.sh +index 6b894f9..c808e9a 100755 +--- a/bench_queries.sh ++++ b/bench_queries.sh +@@ -4,7 +4,7 @@ function check_q { + local query=queries/$*.sql + ( + echo $query +- time ( sqlite3 TPC-H.db < $query > /dev/null ) ++ time ( ../../limbo/target/release/limbo -m list TPC-H.db < $query > /dev/null ) + ) + } +``` + diff --git a/README.md b/README.md index e23823c3e..255843d80 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,9 @@

- PyPI + PyPI + PyPI + PyPI
@@ -45,7 +47,7 @@ In the future, we will be also working on:
You can install the latest `limbo` release with: -```shell +```shell curl --proto '=https' --tlsv1.2 -LsSf \ https://github.com/tursodatabase/limbo/releases/latest/download/limbo_cli-installer.sh | sh ``` @@ -72,6 +74,24 @@ cargo run ``` +

+🦀 Rust +
+ +```console +cargo add limbo +``` + +Example usage: + +```rust +let db = Builder::new_local("sqlite.db").build().await?; +let conn = db.connect()?; + +let res = conn.query("SELECT * FROM users", ()).await?; +``` +
+
✨ JavaScript
@@ -144,7 +164,7 @@ defer stmt.Close() rows, _ = stmt.Query() for rows.Next() { - var id int + var id int var username string _ := rows.Scan(&id, &username) fmt.Printf("User: ID: %d, Username: %s\n", id, username) @@ -153,7 +173,7 @@ for rows.Next() {
- + ☕️ Java
@@ -190,3 +210,11 @@ terms or conditions. [contribution guide]: https://github.com/tursodatabase/limbo/blob/main/CONTRIBUTING.md [MIT license]: https://github.com/tursodatabase/limbo/blob/main/LICENSE.md + +## Contributors + +Thanks to all the contributors to Limbo! + + + + diff --git a/antithesis-tests/bank-test/anytime_validate.py b/antithesis-tests/bank-test/anytime_validate.py new file mode 100755 index 000000000..8bfb11304 --- /dev/null +++ b/antithesis-tests/bank-test/anytime_validate.py @@ -0,0 +1,26 @@ +#!/usr/bin/env -S python3 -u + +import limbo +from antithesis.random import get_random +from antithesis.assertions import always + +con = limbo.connect("bank_test.db") +cur = con.cursor() + +initial_state = cur.execute(f''' + SELECT * FROM initial_state +''').fetchone() + +curr_total = cur.execute(f''' + SELECT SUM(balance) AS total FROM accounts; +''').fetchone() + +always( + initial_state[1] == curr_total[0], + '[Anytime] Initial balance always equals current balance', + { + 'init_bal': initial_state[1], + 'curr_bal': curr_total[0] + } +) + diff --git a/antithesis-tests/bank-test/eventually_validate.py b/antithesis-tests/bank-test/eventually_validate.py new file mode 100755 index 000000000..413d04aae --- /dev/null +++ b/antithesis-tests/bank-test/eventually_validate.py @@ -0,0 +1,26 @@ +#!/usr/bin/env -S python3 -u + +import limbo +from antithesis.random import get_random +from antithesis.assertions import always + +con = limbo.connect("bank_test.db") +cur = con.cursor() + +initial_state = cur.execute(f''' + SELECT * FROM initial_state +''').fetchone() + +curr_total = cur.execute(f''' + SELECT SUM(balance) AS total FROM accounts; +''').fetchone() + +always( + initial_state[1] == curr_total[0], + '[Eventually] Initial balance always equals current balance', + { + 'init_bal': initial_state[1], + 'curr_bal': curr_total[0] + } +) + diff --git a/antithesis-tests/bank-test/finally_validate.py b/antithesis-tests/bank-test/finally_validate.py new file mode 100755 index 000000000..fa90b15f8 --- /dev/null +++ b/antithesis-tests/bank-test/finally_validate.py @@ -0,0 +1,26 @@ +#!/usr/bin/env -S python3 -u + +import limbo +from antithesis.random import get_random +from antithesis.assertions import always + +con = limbo.connect("bank_test.db") +cur = con.cursor() + +initial_state = cur.execute(f''' + SELECT * FROM initial_state +''').fetchone() + +curr_total = cur.execute(f''' + SELECT SUM(balance) AS total FROM accounts; +''').fetchone() + +always( + initial_state[1] == curr_total[0], + '[Finally] Initial balance always equals current balance', + { + 'init_bal': initial_state[1], + 'curr_bal': curr_total[0] + } +) + diff --git a/antithesis-tests/bank-test/first_setup.py b/antithesis-tests/bank-test/first_setup.py new file mode 100755 index 000000000..86580315d --- /dev/null +++ b/antithesis-tests/bank-test/first_setup.py @@ -0,0 +1,47 @@ +#!/usr/bin/env -S python3 -u + +import limbo +from antithesis.random import get_random + +con = limbo.connect("bank_test.db") +cur = con.cursor() + +# drop accounts table if it exists and create a new table +cur.execute(f''' + DROP TABLE IF EXISTS accounts; +''') + +cur.execute(f''' + CREATE TABLE accounts ( + account_id INTEGER PRIMARY KEY AUTOINCREMENT, + balance REAL NOT NULL DEFAULT 0.0 + ); +''') + +# randomly create up to 100 accounts with a balance up to 1e9 +total = 0 +num_accts = get_random() % 100 + 1 +for i in range(num_accts): + bal = get_random() % 1e9 + total += bal + cur.execute(f''' + INSERT INTO accounts (balance) + VALUES ({bal}) + ''') + +# drop initial_state table if it exists and create a new table +cur.execute(f''' + DROP TABLE IF EXISTS initial_state; +''') +cur.execute(f''' + CREATE TABLE initial_state ( + num_accts INTEGER, + total REAL + ); +''') + +# store initial state in the table +cur.execute(f''' + INSERT INTO initial_state (num_accts, total) + VALUES ({num_accts}, {total}) +''') \ No newline at end of file diff --git a/antithesis-tests/bank-test/parallel_driver_generate_transaction.py b/antithesis-tests/bank-test/parallel_driver_generate_transaction.py new file mode 100755 index 000000000..9e96260ba --- /dev/null +++ b/antithesis-tests/bank-test/parallel_driver_generate_transaction.py @@ -0,0 +1,54 @@ +#!/usr/bin/env -S python3 -u + +import limbo +import logging +from logging.handlers import RotatingFileHandler +from antithesis.random import get_random + +handler = RotatingFileHandler(filename='bank_test.log', mode='a', maxBytes=1*1024*1024, backupCount=5, encoding=None, delay=0) +handler.setLevel(logging.INFO) + +logger = logging.getLogger('root') +logger.setLevel(logging.INFO) + +logger.addHandler(handler) + +con = limbo.connect("bank_test.db") +cur = con.cursor() + +length = cur.execute("SELECT num_accts FROM initial_state").fetchone()[0] + +def transaction(): + # check that sender and recipient are different + sender = get_random() % length + 1 + recipient = get_random() % length + 1 + if sender != recipient: + # get a random value to transfer between accounts + value = get_random() % 1e9 + + logger.info(f"Sender ID: {sender} | Recipient ID: {recipient} | Txn Val: {value}") + + cur.execute("BEGIN TRANSACTION;") + + # subtract value from balance of the sender account + cur.execute(f''' + UPDATE accounts + SET balance = balance - {value} + WHERE account_id = {sender}; + ''') + + # add value to balance of the recipient account + cur.execute(f''' + UPDATE accounts + SET balance = balance + {value} + WHERE account_id = {recipient}; + ''') + + cur.execute("COMMIT;") + +# run up to 100 transactions +iterations = get_random() % 100 +# logger.info(f"Starting {iterations} iterations") +for i in range(iterations): + transaction() +# logger.info(f"Finished {iterations} iterations") diff --git a/antithesis-tests/stress-composer/first_setup.py b/antithesis-tests/stress-composer/first_setup.py new file mode 100755 index 000000000..cccf5e015 --- /dev/null +++ b/antithesis-tests/stress-composer/first_setup.py @@ -0,0 +1,75 @@ +#!/usr/bin/env -S python3 -u + +import json +import glob +import os +import limbo +from antithesis.random import get_random, random_choice + +constraints = ['NOT NULL', 'UNIQUE', ''] +data_type = ['INTEGER', 'REAL', 'TEXT', 'BLOB', 'NUMERIC'] + +# remove any existing db files +for f in glob.glob('*.db'): + try: + os.remove(f) + except OSError: + pass + +for f in glob.glob('*.db-wal'): + try: + os.remove(f) + except OSError: + pass + +# store initial states in a separate db +con_init = limbo.connect('init_state.db') +cur_init = con_init.cursor() +cur_init.execute('CREATE TABLE schemas (schema TEXT, tbl INT PRIMARY KEY)') +cur_init.execute('CREATE TABLE tables (count INT)') + +con = limbo.connect('stress_composer.db') +cur = con.cursor() + +tbl_count = max(1, get_random() % 10) + +cur_init.execute(f'INSERT INTO tables (count) VALUES ({tbl_count})') + +schemas = [] +for i in range(tbl_count): + col_count = max(1, get_random() % 10) + pk = get_random() % col_count + + schema = { + 'table': i, + 'colCount': col_count, + 'pk': pk + } + + cols = [] + cols_str = '' + for j in range(col_count): + col_data_type = random_choice(data_type) + col_constraint_1 = random_choice(constraints) + col_constraint_2 = random_choice(constraints) + + col = f'col_{j} {col_data_type} {col_constraint_1} {col_constraint_2 if col_constraint_2 != col_constraint_1 else ""}' if j != pk else f'col_{j} {col_data_type} PRIMARY KEY NOT NULL' + + cols.append(col) + + schema[f'col_{j}'] = { + 'data_type': col_data_type, + 'constraint1': col_constraint_1 if j != pk else 'PRIMARY KEY', + 'constraint2': col_constraint_2 if col_constraint_1 != col_constraint_2 else "" if j != pk else 'NOT NULL', + } + + cols_str = ', '.join(cols) + + schemas.append(schema) + cur_init.execute(f"INSERT INTO schemas (schema, tbl) VALUES ('{json.dumps(schema)}', {i})") + + cur.execute(f''' + CREATE TABLE tbl_{i} ({cols_str}) + ''') + +print(f'DB Schemas\n------------\n{json.dumps(schemas, indent=2)}') \ No newline at end of file diff --git a/antithesis-tests/stress-composer/parallel_driver_delete.py b/antithesis-tests/stress-composer/parallel_driver_delete.py new file mode 100755 index 000000000..6d0331f56 --- /dev/null +++ b/antithesis-tests/stress-composer/parallel_driver_delete.py @@ -0,0 +1,33 @@ +#!/usr/bin/env -S python3 -u + +import json +import limbo +from utils import generate_random_value +from antithesis.random import get_random + +# Get initial state +con_init = limbo.connect('init_state.db') +cur_init = con_init.cursor() + +tbl_len = cur_init.execute('SELECT count FROM tables').fetchone()[0] +selected_tbl = get_random() % tbl_len +tbl_schema = json.loads(cur_init.execute(f'SELECT schema FROM schemas WHERE tbl = {selected_tbl}').fetchone()[0]) + +# get primary key column +pk = tbl_schema['pk'] +# get non-pk columns +cols = [f'col_{col}' for col in range(tbl_schema['colCount']) if col != pk] + +con = limbo.connect('stress_composer.db') +cur = con.cursor() + +deletions = get_random() % 100 +print(f'Attempt to delete {deletions} rows in tbl_{selected_tbl}...') + +for i in range(deletions): + where_clause = f"col_{pk} = {generate_random_value(tbl_schema[f'col_{pk}']['data_type'])}" + + cur.execute(f''' + DELETE FROM tbl_{selected_tbl} WHERE {where_clause} + ''') + diff --git a/antithesis-tests/stress-composer/parallel_driver_insert.py b/antithesis-tests/stress-composer/parallel_driver_insert.py new file mode 100755 index 000000000..89d4daea0 --- /dev/null +++ b/antithesis-tests/stress-composer/parallel_driver_insert.py @@ -0,0 +1,31 @@ +#!/usr/bin/env -S python3 -u + +import json +import limbo +from utils import generate_random_value +from antithesis.random import get_random + + +# Get initial state +con_init = limbo.connect('init_state.db') +cur_init = con_init.cursor() + +tbl_len = cur_init.execute('SELECT count FROM tables').fetchone()[0] +selected_tbl = get_random() % tbl_len +tbl_schema = json.loads(cur_init.execute(f'SELECT schema FROM schemas WHERE tbl = {selected_tbl}').fetchone()[0]) +cols = ', '.join([f'col_{col}' for col in range(tbl_schema['colCount'])]) + +con = limbo.connect('stress_composer.db') +cur = con.cursor() + +# insert up to 100 rows in the selected table +insertions = get_random() % 100 +print(f'Inserting {insertions} rows...') + +for i in range(insertions): + values = [generate_random_value(tbl_schema[f'col_{col}']['data_type']) for col in range(tbl_schema['colCount'])] + cur.execute(f''' + INSERT INTO tbl_{selected_tbl} ({cols}) + VALUES ({', '.join(values)}) + ''') + diff --git a/antithesis-tests/stress-composer/parallel_driver_update.py b/antithesis-tests/stress-composer/parallel_driver_update.py new file mode 100755 index 000000000..fc707cb8b --- /dev/null +++ b/antithesis-tests/stress-composer/parallel_driver_update.py @@ -0,0 +1,45 @@ +#!/usr/bin/env -S python3 -u + +import json +import limbo +from utils import generate_random_value +from antithesis.random import get_random + +# Get initial state +con_init = limbo.connect('init_state.db') +cur_init = con_init.cursor() + +tbl_len = cur_init.execute('SELECT count FROM tables').fetchone()[0] +selected_tbl = get_random() % tbl_len +tbl_schema = json.loads(cur_init.execute(f'SELECT schema FROM schemas WHERE tbl = {selected_tbl}').fetchone()[0]) + +# get primary key column +pk = tbl_schema['pk'] +# get non-pk columns +cols = [f'col_{col}' for col in range(tbl_schema['colCount']) if col != pk] +# print(cols) +con = limbo.connect('stress_composer.db') +cur = con.cursor() + +# insert up to 100 rows in the selected table +updates = get_random() % 100 +print(f'Attempt to update {updates} rows in tbl_{selected_tbl}...') + +for i in range(updates): + set_clause = '' + if tbl_schema['colCount'] == 1: + set_clause = f"col_{pk} = {generate_random_value(tbl_schema[f'col_{pk}']['data_type'])}" + else: + values = [] + for col in cols: + # print(col) + values.append(f"{col} = {generate_random_value(tbl_schema[col]['data_type'])}") + set_clause = ', '.join(values) + + where_clause = f"col_{pk} = {generate_random_value(tbl_schema[f'col_{pk}']['data_type'])}" + # print(where_clause) + + cur.execute(f''' + UPDATE tbl_{selected_tbl} SET {set_clause} WHERE {where_clause} + ''') + diff --git a/antithesis-tests/stress-composer/utils.py b/antithesis-tests/stress-composer/utils.py new file mode 100755 index 000000000..358e44670 --- /dev/null +++ b/antithesis-tests/stress-composer/utils.py @@ -0,0 +1,19 @@ +import string +from antithesis.random import get_random, random_choice + +def generate_random_identifier(type: str, num: int): + return ''.join(type, '_', get_random() % num) + +def generate_random_value(type_str): + if type_str == 'INTEGER': + return str(get_random() % 100) + elif type_str == 'REAL': + return '{:.2f}'.format(get_random() % 100 / 100.0) + elif type_str == 'TEXT': + return f"'{''.join(random_choice(string.ascii_lowercase) for _ in range(5))}'" + elif type_str == 'BLOB': + return f"x'{''.join(random_choice(string.ascii_lowercase) for _ in range(5)).encode().hex()}'" + elif type_str == 'NUMERIC': + return str(get_random() % 100) + else: + return NULL diff --git a/antithesis-tests/stress/singleton_driver_stress.sh b/antithesis-tests/stress/singleton_driver_stress.sh new file mode 100755 index 000000000..06f27223f --- /dev/null +++ b/antithesis-tests/stress/singleton_driver_stress.sh @@ -0,0 +1,3 @@ +#!/usr/bin/env bash + +/bin/limbo_stress \ No newline at end of file diff --git a/bindings/java/rs_src/limbo_statement.rs b/bindings/java/rs_src/limbo_statement.rs index b28ff55b1..c49469cd6 100644 --- a/bindings/java/rs_src/limbo_statement.rs +++ b/bindings/java/rs_src/limbo_statement.rs @@ -138,7 +138,7 @@ pub extern "system" fn Java_tech_turso_core_LimboStatement_columns<'local>( for i in 0..num_columns { let column_name = stmt.stmt.get_column_name(i); - let str = env.new_string(column_name.as_str()).unwrap(); + let str = env.new_string(column_name.into_owned()).unwrap(); env.set_object_array_element(&obj_arr, i as i32, str) .unwrap(); } diff --git a/bindings/javascript/npm/darwin-universal/package.json b/bindings/javascript/npm/darwin-universal/package.json index a404de79b..bee9cf13b 100644 --- a/bindings/javascript/npm/darwin-universal/package.json +++ b/bindings/javascript/npm/darwin-universal/package.json @@ -1,6 +1,6 @@ { "name": "@tursodatabase/limbo-darwin-universal", - "version": "0.0.19-pre.4", + "version": "0.0.19", "repository": { "type": "git", "url": "https://github.com/tursodatabase/limbo" diff --git a/bindings/javascript/npm/linux-x64-gnu/package.json b/bindings/javascript/npm/linux-x64-gnu/package.json index 3030e3ac3..a34ccda73 100644 --- a/bindings/javascript/npm/linux-x64-gnu/package.json +++ b/bindings/javascript/npm/linux-x64-gnu/package.json @@ -1,6 +1,6 @@ { "name": "@tursodatabase/limbo-linux-x64-gnu", - "version": "0.0.19-pre.4", + "version": "0.0.19", "repository": { "type": "git", "url": "https://github.com/tursodatabase/limbo" diff --git a/bindings/javascript/npm/win32-x64-msvc/package.json b/bindings/javascript/npm/win32-x64-msvc/package.json index 3461f0719..c4bc40bb6 100644 --- a/bindings/javascript/npm/win32-x64-msvc/package.json +++ b/bindings/javascript/npm/win32-x64-msvc/package.json @@ -1,6 +1,6 @@ { "name": "@tursodatabase/limbo-win32-x64-msvc", - "version": "0.0.19-pre.4", + "version": "0.0.19", "repository": { "type": "git", "url": "https://github.com/tursodatabase/limbo" diff --git a/bindings/javascript/package.json b/bindings/javascript/package.json index 574884692..62c5c27e3 100644 --- a/bindings/javascript/package.json +++ b/bindings/javascript/package.json @@ -1,6 +1,6 @@ { "name": "@tursodatabase/limbo", - "version": "0.0.19-pre.4", + "version": "0.0.19", "repository": { "type": "git", "url": "https://github.com/tursodatabase/limbo" diff --git a/bindings/javascript/src/lib.rs b/bindings/javascript/src/lib.rs index 614b17677..aa18b208c 100644 --- a/bindings/javascript/src/lib.rs +++ b/bindings/javascript/src/lib.rs @@ -4,6 +4,7 @@ use std::cell::RefCell; use std::rc::Rc; use std::sync::Arc; +use limbo_core::{maybe_init_database_file, Clock, Instant}; use napi::{Env, JsUnknown, Result as NapiResult}; use napi_derive::napi; @@ -28,20 +29,9 @@ impl Database { let file = io .open_file(&path, limbo_core::OpenFlags::Create, false) .unwrap(); - limbo_core::maybe_init_database_file(&file, &io).unwrap(); + maybe_init_database_file(&file, &io).unwrap(); let db_file = Arc::new(DatabaseFile::new(file)); - let db_header = limbo_core::Pager::begin_open(db_file.clone()).unwrap(); - - // ensure db header is there - io.run_once().unwrap(); - - let page_size = db_header.lock().page_size; - - let wal_path = format!("{}-wal", path); - let wal_shared = - limbo_core::WalFileShared::open_shared(&io, wal_path.as_str(), page_size).unwrap(); - - let db = limbo_core::Database::open(io, db_file, wal_shared, false).unwrap(); + let db = limbo_core::Database::open(io, &path, db_file, false).unwrap(); let conn = db.connect().unwrap(); Self { memory, @@ -152,6 +142,12 @@ impl limbo_core::DatabaseStorage for DatabaseFile { struct IO {} +impl Clock for IO { + fn now(&self) -> Instant { + todo!() + } +} + impl limbo_core::IO for IO { fn open_file( &self, @@ -170,7 +166,7 @@ impl limbo_core::IO for IO { todo!(); } - fn get_current_time(&self) -> String { - todo!(); + fn get_memory_io(&self) -> Arc { + Arc::new(limbo_core::MemoryIO::new()) } } diff --git a/bindings/python/Cargo.toml b/bindings/python/Cargo.toml index 402a3a760..4a8eaef59 100644 --- a/bindings/python/Cargo.toml +++ b/bindings/python/Cargo.toml @@ -18,7 +18,7 @@ extension-module = ["pyo3/extension-module"] [dependencies] anyhow = "1.0" limbo_core = { path = "../../core", features = ["io_uring"] } -pyo3 = { version = "0.24.0", features = ["anyhow"] } +pyo3 = { version = "0.24.1", features = ["anyhow"] } [build-dependencies] version_check = "0.9.5" diff --git a/bindings/rust/src/lib.rs b/bindings/rust/src/lib.rs index fe653fdb5..e537932c4 100644 --- a/bindings/rust/src/lib.rs +++ b/bindings/rust/src/lib.rs @@ -6,6 +6,7 @@ pub use value::Value; pub use params::params_from_iter; use crate::params::*; +use std::fmt::Debug; use std::num::NonZero; use std::rc::Rc; use std::sync::{Arc, Mutex}; @@ -16,11 +17,13 @@ pub enum Error { ToSqlConversionFailure(BoxError), #[error("Mutex lock error: {0}")] MutexError(String), + #[error("SQL execution failure: `{0}`")] + SqlExecutionFailure(String), } impl From for Error { - fn from(_err: limbo_core::LimboError) -> Self { - todo!(); + fn from(err: limbo_core::LimboError) -> Self { + Error::SqlExecutionFailure(err.to_string()) } } @@ -55,6 +58,7 @@ impl Builder { } } +#[derive(Clone)] pub struct Database { inner: Arc, } @@ -62,6 +66,12 @@ pub struct Database { unsafe impl Send for Database {} unsafe impl Sync for Database {} +impl Debug for Database { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("Database").finish() + } +} + impl Database { pub fn connect(&self) -> Result { let conn = self.inner.connect()?; @@ -119,6 +129,14 @@ pub struct Statement { inner: Arc>, } +impl Clone for Statement { + fn clone(&self) -> Self { + Self { + inner: Arc::clone(&self.inner), + } + } +} + unsafe impl Send for Statement {} unsafe impl Sync for Statement {} @@ -143,6 +161,10 @@ impl Statement { } pub async fn execute(&mut self, params: impl IntoParams) -> Result { + { + // Reset the statement before executing + self.inner.lock().unwrap().reset(); + } let params = params.into_params()?; match params { params::Params::None => (), @@ -180,6 +202,39 @@ impl Statement { } } } + + pub fn columns(&self) -> Vec { + let stmt = self.inner.lock().unwrap(); + + let n = stmt.num_columns(); + + let mut cols = Vec::with_capacity(n); + + for i in 0..n { + let name = stmt.get_column_name(i).into_owned(); + cols.push(Column { + name, + decl_type: None, // TODO + }); + } + + cols + } +} + +pub struct Column { + name: String, + decl_type: Option, +} + +impl Column { + pub fn name(&self) -> &str { + &self.name + } + + pub fn decl_type(&self) -> Option<&str> { + self.decl_type.as_deref() + } } pub trait IntoValue { @@ -198,6 +253,14 @@ pub struct Rows { inner: Arc>, } +impl Clone for Rows { + fn clone(&self) -> Self { + Self { + inner: Arc::clone(&self.inner), + } + } +} + unsafe impl Send for Rows {} unsafe impl Sync for Rows {} @@ -220,6 +283,7 @@ impl Rows { } } +#[derive(Debug)] pub struct Row { values: Vec, } @@ -238,4 +302,8 @@ impl Row { limbo_core::OwnedValue::Blob(items) => Ok(Value::Blob(items.to_vec())), } } + + pub fn column_count(&self) -> usize { + self.values.len() + } } diff --git a/bindings/wasm/lib.rs b/bindings/wasm/lib.rs index 3a5819efc..02f9a2e35 100644 --- a/bindings/wasm/lib.rs +++ b/bindings/wasm/lib.rs @@ -1,5 +1,5 @@ use js_sys::{Array, Object}; -use limbo_core::{maybe_init_database_file, OpenFlags, Pager, Result, WalFileShared}; +use limbo_core::{maybe_init_database_file, Clock, Instant, OpenFlags, Result}; use std::cell::RefCell; use std::rc::Rc; use std::sync::Arc; @@ -17,22 +17,10 @@ impl Database { #[wasm_bindgen(constructor)] pub fn new(path: &str) -> Database { let io: Arc = Arc::new(PlatformIO { vfs: VFS::new() }); - let file = io - .open_file(path, limbo_core::OpenFlags::Create, false) - .unwrap(); + let file = io.open_file(path, OpenFlags::Create, false).unwrap(); maybe_init_database_file(&file, &io).unwrap(); let db_file = Arc::new(DatabaseFile::new(file)); - let db_header = Pager::begin_open(db_file.clone()).unwrap(); - - // ensure db header is there - io.run_once().unwrap(); - - let page_size = db_header.lock().page_size; - - let wal_path = format!("{}-wal", path); - let wal_shared = WalFileShared::open_shared(&io, wal_path.as_str(), page_size).unwrap(); - - let db = limbo_core::Database::open(io, db_file, wal_shared, false).unwrap(); + let db = limbo_core::Database::open(io, path, db_file, false).unwrap(); let conn = db.connect().unwrap(); Database { db, conn } } @@ -269,6 +257,18 @@ pub struct PlatformIO { unsafe impl Send for PlatformIO {} unsafe impl Sync for PlatformIO {} +impl Clock for PlatformIO { + fn now(&self) -> Instant { + let date = Date::new(); + let ms_since_epoch = date.getTime(); + + Instant { + secs: (ms_since_epoch / 1000.0) as i64, + micros: ((ms_since_epoch % 1000.0) * 1000.0) as u32, + } + } +} + impl limbo_core::IO for PlatformIO { fn open_file( &self, @@ -292,9 +292,8 @@ impl limbo_core::IO for PlatformIO { (random_f64 * i64::MAX as f64) as i64 } - fn get_current_time(&self) -> String { - let date = Date::new(); - date.toISOString() + fn get_memory_io(&self) -> Arc { + Arc::new(limbo_core::MemoryIO::new()) } } @@ -312,6 +311,9 @@ extern "C" { #[wasm_bindgen(method, getter)] fn toISOString(this: &Date) -> String; + + #[wasm_bindgen(method)] + fn getTime(this: &Date) -> f64; } pub struct DatabaseFile { diff --git a/bindings/wasm/package-lock.json b/bindings/wasm/package-lock.json index 663a8217d..b357a1ef1 100644 --- a/bindings/wasm/package-lock.json +++ b/bindings/wasm/package-lock.json @@ -1,12 +1,12 @@ { "name": "limbo-wasm", - "version": "0.0.19-pre.4", + "version": "0.0.19", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "limbo-wasm", - "version": "0.0.19-pre.4", + "version": "0.0.19", "license": "MIT", "devDependencies": { "@playwright/test": "^1.49.1", diff --git a/bindings/wasm/package.json b/bindings/wasm/package.json index 77d9f1af8..9f519b652 100644 --- a/bindings/wasm/package.json +++ b/bindings/wasm/package.json @@ -3,7 +3,7 @@ "collaborators": [ "the Limbo authors" ], - "version": "0.0.19-pre.4", + "version": "0.0.19", "license": "MIT", "repository": { "type": "git", diff --git a/bindings/wasm/test-limbo-pkg/package-lock.json b/bindings/wasm/test-limbo-pkg/package-lock.json index 48584c018..4500e3f1c 100644 --- a/bindings/wasm/test-limbo-pkg/package-lock.json +++ b/bindings/wasm/test-limbo-pkg/package-lock.json @@ -6,17 +6,18 @@ "": { "name": "test-limbo", "dependencies": { - "limbo-wasm": "file:../limbo-wasm-0.0.11.tgz" + "limbo-wasm": ".." }, "devDependencies": { - "vite": "^6.0.7", + "vite": "^6.2.6", "vite-plugin-wasm": "^3.4.1" } }, + "..": {}, "node_modules/@esbuild/aix-ppc64": { - "version": "0.24.2", - "resolved": "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.24.2.tgz", - "integrity": "sha512-thpVCb/rhxE/BnMLQ7GReQLLN8q9qbHmI55F4489/ByVg2aQaQ6kbcLb6FHkocZzQhxc4gx0sCk0tJkKBFzDhA==", + "version": "0.25.2", + "resolved": "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.25.2.tgz", + "integrity": "sha512-wCIboOL2yXZym2cgm6mlA742s9QeJ8DjGVaL39dLN4rRwrOgOyYSnOaFPhKZGLb2ngj4EyfAFjsNJwPXZvseag==", "cpu": [ "ppc64" ], @@ -31,9 +32,9 @@ } }, "node_modules/@esbuild/android-arm": { - "version": "0.24.2", - "resolved": "https://registry.npmjs.org/@esbuild/android-arm/-/android-arm-0.24.2.tgz", - "integrity": "sha512-tmwl4hJkCfNHwFB3nBa8z1Uy3ypZpxqxfTQOcHX+xRByyYgunVbZ9MzUUfb0RxaHIMnbHagwAxuTL+tnNM+1/Q==", + "version": "0.25.2", + "resolved": "https://registry.npmjs.org/@esbuild/android-arm/-/android-arm-0.25.2.tgz", + "integrity": "sha512-NQhH7jFstVY5x8CKbcfa166GoV0EFkaPkCKBQkdPJFvo5u+nGXLEH/ooniLb3QI8Fk58YAx7nsPLozUWfCBOJA==", "cpu": [ "arm" ], @@ -48,9 +49,9 @@ } }, "node_modules/@esbuild/android-arm64": { - "version": "0.24.2", - "resolved": "https://registry.npmjs.org/@esbuild/android-arm64/-/android-arm64-0.24.2.tgz", - "integrity": "sha512-cNLgeqCqV8WxfcTIOeL4OAtSmL8JjcN6m09XIgro1Wi7cF4t/THaWEa7eL5CMoMBdjoHOTh/vwTO/o2TRXIyzg==", + "version": "0.25.2", + "resolved": "https://registry.npmjs.org/@esbuild/android-arm64/-/android-arm64-0.25.2.tgz", + "integrity": "sha512-5ZAX5xOmTligeBaeNEPnPaeEuah53Id2tX4c2CVP3JaROTH+j4fnfHCkr1PjXMd78hMst+TlkfKcW/DlTq0i4w==", "cpu": [ "arm64" ], @@ -65,9 +66,9 @@ } }, "node_modules/@esbuild/android-x64": { - "version": "0.24.2", - "resolved": "https://registry.npmjs.org/@esbuild/android-x64/-/android-x64-0.24.2.tgz", - "integrity": "sha512-B6Q0YQDqMx9D7rvIcsXfmJfvUYLoP722bgfBlO5cGvNVb5V/+Y7nhBE3mHV9OpxBf4eAS2S68KZztiPaWq4XYw==", + "version": "0.25.2", + "resolved": "https://registry.npmjs.org/@esbuild/android-x64/-/android-x64-0.25.2.tgz", + "integrity": "sha512-Ffcx+nnma8Sge4jzddPHCZVRvIfQ0kMsUsCMcJRHkGJ1cDmhe4SsrYIjLUKn1xpHZybmOqCWwB0zQvsjdEHtkg==", "cpu": [ "x64" ], @@ -82,9 +83,9 @@ } }, "node_modules/@esbuild/darwin-arm64": { - "version": "0.24.2", - "resolved": "https://registry.npmjs.org/@esbuild/darwin-arm64/-/darwin-arm64-0.24.2.tgz", - "integrity": "sha512-kj3AnYWc+CekmZnS5IPu9D+HWtUI49hbnyqk0FLEJDbzCIQt7hg7ucF1SQAilhtYpIujfaHr6O0UHlzzSPdOeA==", + "version": "0.25.2", + "resolved": "https://registry.npmjs.org/@esbuild/darwin-arm64/-/darwin-arm64-0.25.2.tgz", + "integrity": "sha512-MpM6LUVTXAzOvN4KbjzU/q5smzryuoNjlriAIx+06RpecwCkL9JpenNzpKd2YMzLJFOdPqBpuub6eVRP5IgiSA==", "cpu": [ "arm64" ], @@ -99,9 +100,9 @@ } }, "node_modules/@esbuild/darwin-x64": { - "version": "0.24.2", - "resolved": "https://registry.npmjs.org/@esbuild/darwin-x64/-/darwin-x64-0.24.2.tgz", - "integrity": "sha512-WeSrmwwHaPkNR5H3yYfowhZcbriGqooyu3zI/3GGpF8AyUdsrrP0X6KumITGA9WOyiJavnGZUwPGvxvwfWPHIA==", + "version": "0.25.2", + "resolved": "https://registry.npmjs.org/@esbuild/darwin-x64/-/darwin-x64-0.25.2.tgz", + "integrity": "sha512-5eRPrTX7wFyuWe8FqEFPG2cU0+butQQVNcT4sVipqjLYQjjh8a8+vUTfgBKM88ObB85ahsnTwF7PSIt6PG+QkA==", "cpu": [ "x64" ], @@ -116,9 +117,9 @@ } }, "node_modules/@esbuild/freebsd-arm64": { - "version": "0.24.2", - "resolved": "https://registry.npmjs.org/@esbuild/freebsd-arm64/-/freebsd-arm64-0.24.2.tgz", - "integrity": "sha512-UN8HXjtJ0k/Mj6a9+5u6+2eZ2ERD7Edt1Q9IZiB5UZAIdPnVKDoG7mdTVGhHJIeEml60JteamR3qhsr1r8gXvg==", + "version": "0.25.2", + "resolved": "https://registry.npmjs.org/@esbuild/freebsd-arm64/-/freebsd-arm64-0.25.2.tgz", + "integrity": "sha512-mLwm4vXKiQ2UTSX4+ImyiPdiHjiZhIaE9QvC7sw0tZ6HoNMjYAqQpGyui5VRIi5sGd+uWq940gdCbY3VLvsO1w==", "cpu": [ "arm64" ], @@ -133,9 +134,9 @@ } }, "node_modules/@esbuild/freebsd-x64": { - "version": "0.24.2", - "resolved": "https://registry.npmjs.org/@esbuild/freebsd-x64/-/freebsd-x64-0.24.2.tgz", - "integrity": "sha512-TvW7wE/89PYW+IevEJXZ5sF6gJRDY/14hyIGFXdIucxCsbRmLUcjseQu1SyTko+2idmCw94TgyaEZi9HUSOe3Q==", + "version": "0.25.2", + "resolved": "https://registry.npmjs.org/@esbuild/freebsd-x64/-/freebsd-x64-0.25.2.tgz", + "integrity": "sha512-6qyyn6TjayJSwGpm8J9QYYGQcRgc90nmfdUb0O7pp1s4lTY+9D0H9O02v5JqGApUyiHOtkz6+1hZNvNtEhbwRQ==", "cpu": [ "x64" ], @@ -150,9 +151,9 @@ } }, "node_modules/@esbuild/linux-arm": { - "version": "0.24.2", - "resolved": "https://registry.npmjs.org/@esbuild/linux-arm/-/linux-arm-0.24.2.tgz", - "integrity": "sha512-n0WRM/gWIdU29J57hJyUdIsk0WarGd6To0s+Y+LwvlC55wt+GT/OgkwoXCXvIue1i1sSNWblHEig00GBWiJgfA==", + "version": "0.25.2", + "resolved": "https://registry.npmjs.org/@esbuild/linux-arm/-/linux-arm-0.25.2.tgz", + "integrity": "sha512-UHBRgJcmjJv5oeQF8EpTRZs/1knq6loLxTsjc3nxO9eXAPDLcWW55flrMVc97qFPbmZP31ta1AZVUKQzKTzb0g==", "cpu": [ "arm" ], @@ -167,9 +168,9 @@ } }, "node_modules/@esbuild/linux-arm64": { - "version": "0.24.2", - "resolved": "https://registry.npmjs.org/@esbuild/linux-arm64/-/linux-arm64-0.24.2.tgz", - "integrity": "sha512-7HnAD6074BW43YvvUmE/35Id9/NB7BeX5EoNkK9obndmZBUk8xmJJeU7DwmUeN7tkysslb2eSl6CTrYz6oEMQg==", + "version": "0.25.2", + "resolved": "https://registry.npmjs.org/@esbuild/linux-arm64/-/linux-arm64-0.25.2.tgz", + "integrity": "sha512-gq/sjLsOyMT19I8obBISvhoYiZIAaGF8JpeXu1u8yPv8BE5HlWYobmlsfijFIZ9hIVGYkbdFhEqC0NvM4kNO0g==", "cpu": [ "arm64" ], @@ -184,9 +185,9 @@ } }, "node_modules/@esbuild/linux-ia32": { - "version": "0.24.2", - "resolved": "https://registry.npmjs.org/@esbuild/linux-ia32/-/linux-ia32-0.24.2.tgz", - "integrity": "sha512-sfv0tGPQhcZOgTKO3oBE9xpHuUqguHvSo4jl+wjnKwFpapx+vUDcawbwPNuBIAYdRAvIDBfZVvXprIj3HA+Ugw==", + "version": "0.25.2", + "resolved": "https://registry.npmjs.org/@esbuild/linux-ia32/-/linux-ia32-0.25.2.tgz", + "integrity": "sha512-bBYCv9obgW2cBP+2ZWfjYTU+f5cxRoGGQ5SeDbYdFCAZpYWrfjjfYwvUpP8MlKbP0nwZ5gyOU/0aUzZ5HWPuvQ==", "cpu": [ "ia32" ], @@ -201,9 +202,9 @@ } }, "node_modules/@esbuild/linux-loong64": { - "version": "0.24.2", - "resolved": "https://registry.npmjs.org/@esbuild/linux-loong64/-/linux-loong64-0.24.2.tgz", - "integrity": "sha512-CN9AZr8kEndGooS35ntToZLTQLHEjtVB5n7dl8ZcTZMonJ7CCfStrYhrzF97eAecqVbVJ7APOEe18RPI4KLhwQ==", + "version": "0.25.2", + "resolved": "https://registry.npmjs.org/@esbuild/linux-loong64/-/linux-loong64-0.25.2.tgz", + "integrity": "sha512-SHNGiKtvnU2dBlM5D8CXRFdd+6etgZ9dXfaPCeJtz+37PIUlixvlIhI23L5khKXs3DIzAn9V8v+qb1TRKrgT5w==", "cpu": [ "loong64" ], @@ -218,9 +219,9 @@ } }, "node_modules/@esbuild/linux-mips64el": { - "version": "0.24.2", - "resolved": "https://registry.npmjs.org/@esbuild/linux-mips64el/-/linux-mips64el-0.24.2.tgz", - "integrity": "sha512-iMkk7qr/wl3exJATwkISxI7kTcmHKE+BlymIAbHO8xanq/TjHaaVThFF6ipWzPHryoFsesNQJPE/3wFJw4+huw==", + "version": "0.25.2", + "resolved": "https://registry.npmjs.org/@esbuild/linux-mips64el/-/linux-mips64el-0.25.2.tgz", + "integrity": "sha512-hDDRlzE6rPeoj+5fsADqdUZl1OzqDYow4TB4Y/3PlKBD0ph1e6uPHzIQcv2Z65u2K0kpeByIyAjCmjn1hJgG0Q==", "cpu": [ "mips64el" ], @@ -235,9 +236,9 @@ } }, "node_modules/@esbuild/linux-ppc64": { - "version": "0.24.2", - "resolved": "https://registry.npmjs.org/@esbuild/linux-ppc64/-/linux-ppc64-0.24.2.tgz", - "integrity": "sha512-shsVrgCZ57Vr2L8mm39kO5PPIb+843FStGt7sGGoqiiWYconSxwTiuswC1VJZLCjNiMLAMh34jg4VSEQb+iEbw==", + "version": "0.25.2", + "resolved": "https://registry.npmjs.org/@esbuild/linux-ppc64/-/linux-ppc64-0.25.2.tgz", + "integrity": "sha512-tsHu2RRSWzipmUi9UBDEzc0nLc4HtpZEI5Ba+Omms5456x5WaNuiG3u7xh5AO6sipnJ9r4cRWQB2tUjPyIkc6g==", "cpu": [ "ppc64" ], @@ -252,9 +253,9 @@ } }, "node_modules/@esbuild/linux-riscv64": { - "version": "0.24.2", - "resolved": "https://registry.npmjs.org/@esbuild/linux-riscv64/-/linux-riscv64-0.24.2.tgz", - "integrity": "sha512-4eSFWnU9Hhd68fW16GD0TINewo1L6dRrB+oLNNbYyMUAeOD2yCK5KXGK1GH4qD/kT+bTEXjsyTCiJGHPZ3eM9Q==", + "version": "0.25.2", + "resolved": "https://registry.npmjs.org/@esbuild/linux-riscv64/-/linux-riscv64-0.25.2.tgz", + "integrity": "sha512-k4LtpgV7NJQOml/10uPU0s4SAXGnowi5qBSjaLWMojNCUICNu7TshqHLAEbkBdAszL5TabfvQ48kK84hyFzjnw==", "cpu": [ "riscv64" ], @@ -269,9 +270,9 @@ } }, "node_modules/@esbuild/linux-s390x": { - "version": "0.24.2", - "resolved": "https://registry.npmjs.org/@esbuild/linux-s390x/-/linux-s390x-0.24.2.tgz", - "integrity": "sha512-S0Bh0A53b0YHL2XEXC20bHLuGMOhFDO6GN4b3YjRLK//Ep3ql3erpNcPlEFed93hsQAjAQDNsvcK+hV90FubSw==", + "version": "0.25.2", + "resolved": "https://registry.npmjs.org/@esbuild/linux-s390x/-/linux-s390x-0.25.2.tgz", + "integrity": "sha512-GRa4IshOdvKY7M/rDpRR3gkiTNp34M0eLTaC1a08gNrh4u488aPhuZOCpkF6+2wl3zAN7L7XIpOFBhnaE3/Q8Q==", "cpu": [ "s390x" ], @@ -286,9 +287,9 @@ } }, "node_modules/@esbuild/linux-x64": { - "version": "0.24.2", - "resolved": "https://registry.npmjs.org/@esbuild/linux-x64/-/linux-x64-0.24.2.tgz", - "integrity": "sha512-8Qi4nQcCTbLnK9WoMjdC9NiTG6/E38RNICU6sUNqK0QFxCYgoARqVqxdFmWkdonVsvGqWhmm7MO0jyTqLqwj0Q==", + "version": "0.25.2", + "resolved": "https://registry.npmjs.org/@esbuild/linux-x64/-/linux-x64-0.25.2.tgz", + "integrity": "sha512-QInHERlqpTTZ4FRB0fROQWXcYRD64lAoiegezDunLpalZMjcUcld3YzZmVJ2H/Cp0wJRZ8Xtjtj0cEHhYc/uUg==", "cpu": [ "x64" ], @@ -303,9 +304,9 @@ } }, "node_modules/@esbuild/netbsd-arm64": { - "version": "0.24.2", - "resolved": "https://registry.npmjs.org/@esbuild/netbsd-arm64/-/netbsd-arm64-0.24.2.tgz", - "integrity": "sha512-wuLK/VztRRpMt9zyHSazyCVdCXlpHkKm34WUyinD2lzK07FAHTq0KQvZZlXikNWkDGoT6x3TD51jKQ7gMVpopw==", + "version": "0.25.2", + "resolved": "https://registry.npmjs.org/@esbuild/netbsd-arm64/-/netbsd-arm64-0.25.2.tgz", + "integrity": "sha512-talAIBoY5M8vHc6EeI2WW9d/CkiO9MQJ0IOWX8hrLhxGbro/vBXJvaQXefW2cP0z0nQVTdQ/eNyGFV1GSKrxfw==", "cpu": [ "arm64" ], @@ -320,9 +321,9 @@ } }, "node_modules/@esbuild/netbsd-x64": { - "version": "0.24.2", - "resolved": "https://registry.npmjs.org/@esbuild/netbsd-x64/-/netbsd-x64-0.24.2.tgz", - "integrity": "sha512-VefFaQUc4FMmJuAxmIHgUmfNiLXY438XrL4GDNV1Y1H/RW3qow68xTwjZKfj/+Plp9NANmzbH5R40Meudu8mmw==", + "version": "0.25.2", + "resolved": "https://registry.npmjs.org/@esbuild/netbsd-x64/-/netbsd-x64-0.25.2.tgz", + "integrity": "sha512-voZT9Z+tpOxrvfKFyfDYPc4DO4rk06qamv1a/fkuzHpiVBMOhpjK+vBmWM8J1eiB3OLSMFYNaOaBNLXGChf5tg==", "cpu": [ "x64" ], @@ -337,9 +338,9 @@ } }, "node_modules/@esbuild/openbsd-arm64": { - "version": "0.24.2", - "resolved": "https://registry.npmjs.org/@esbuild/openbsd-arm64/-/openbsd-arm64-0.24.2.tgz", - "integrity": "sha512-YQbi46SBct6iKnszhSvdluqDmxCJA+Pu280Av9WICNwQmMxV7nLRHZfjQzwbPs3jeWnuAhE9Jy0NrnJ12Oz+0A==", + "version": "0.25.2", + "resolved": "https://registry.npmjs.org/@esbuild/openbsd-arm64/-/openbsd-arm64-0.25.2.tgz", + "integrity": "sha512-dcXYOC6NXOqcykeDlwId9kB6OkPUxOEqU+rkrYVqJbK2hagWOMrsTGsMr8+rW02M+d5Op5NNlgMmjzecaRf7Tg==", "cpu": [ "arm64" ], @@ -354,9 +355,9 @@ } }, "node_modules/@esbuild/openbsd-x64": { - "version": "0.24.2", - "resolved": "https://registry.npmjs.org/@esbuild/openbsd-x64/-/openbsd-x64-0.24.2.tgz", - "integrity": "sha512-+iDS6zpNM6EnJyWv0bMGLWSWeXGN/HTaF/LXHXHwejGsVi+ooqDfMCCTerNFxEkM3wYVcExkeGXNqshc9iMaOA==", + "version": "0.25.2", + "resolved": "https://registry.npmjs.org/@esbuild/openbsd-x64/-/openbsd-x64-0.25.2.tgz", + "integrity": "sha512-t/TkWwahkH0Tsgoq1Ju7QfgGhArkGLkF1uYz8nQS/PPFlXbP5YgRpqQR3ARRiC2iXoLTWFxc6DJMSK10dVXluw==", "cpu": [ "x64" ], @@ -371,9 +372,9 @@ } }, "node_modules/@esbuild/sunos-x64": { - "version": "0.24.2", - "resolved": "https://registry.npmjs.org/@esbuild/sunos-x64/-/sunos-x64-0.24.2.tgz", - "integrity": "sha512-hTdsW27jcktEvpwNHJU4ZwWFGkz2zRJUz8pvddmXPtXDzVKTTINmlmga3ZzwcuMpUvLw7JkLy9QLKyGpD2Yxig==", + "version": "0.25.2", + "resolved": "https://registry.npmjs.org/@esbuild/sunos-x64/-/sunos-x64-0.25.2.tgz", + "integrity": "sha512-cfZH1co2+imVdWCjd+D1gf9NjkchVhhdpgb1q5y6Hcv9TP6Zi9ZG/beI3ig8TvwT9lH9dlxLq5MQBBgwuj4xvA==", "cpu": [ "x64" ], @@ -388,9 +389,9 @@ } }, "node_modules/@esbuild/win32-arm64": { - "version": "0.24.2", - "resolved": "https://registry.npmjs.org/@esbuild/win32-arm64/-/win32-arm64-0.24.2.tgz", - "integrity": "sha512-LihEQ2BBKVFLOC9ZItT9iFprsE9tqjDjnbulhHoFxYQtQfai7qfluVODIYxt1PgdoyQkz23+01rzwNwYfutxUQ==", + "version": "0.25.2", + "resolved": "https://registry.npmjs.org/@esbuild/win32-arm64/-/win32-arm64-0.25.2.tgz", + "integrity": "sha512-7Loyjh+D/Nx/sOTzV8vfbB3GJuHdOQyrOryFdZvPHLf42Tk9ivBU5Aedi7iyX+x6rbn2Mh68T4qq1SDqJBQO5Q==", "cpu": [ "arm64" ], @@ -405,9 +406,9 @@ } }, "node_modules/@esbuild/win32-ia32": { - "version": "0.24.2", - "resolved": "https://registry.npmjs.org/@esbuild/win32-ia32/-/win32-ia32-0.24.2.tgz", - "integrity": "sha512-q+iGUwfs8tncmFC9pcnD5IvRHAzmbwQ3GPS5/ceCyHdjXubwQWI12MKWSNSMYLJMq23/IUCvJMS76PDqXe1fxA==", + "version": "0.25.2", + "resolved": "https://registry.npmjs.org/@esbuild/win32-ia32/-/win32-ia32-0.25.2.tgz", + "integrity": "sha512-WRJgsz9un0nqZJ4MfhabxaD9Ft8KioqU3JMinOTvobbX6MOSUigSBlogP8QB3uxpJDsFS6yN+3FDBdqE5lg9kg==", "cpu": [ "ia32" ], @@ -422,9 +423,9 @@ } }, "node_modules/@esbuild/win32-x64": { - "version": "0.24.2", - "resolved": "https://registry.npmjs.org/@esbuild/win32-x64/-/win32-x64-0.24.2.tgz", - "integrity": "sha512-7VTgWzgMGvup6aSqDPLiW5zHaxYJGTO4OokMjIlrCtf+VpEL+cXKtCvg723iguPYI5oaUNdS+/V7OU2gvXVWEg==", + "version": "0.25.2", + "resolved": "https://registry.npmjs.org/@esbuild/win32-x64/-/win32-x64-0.25.2.tgz", + "integrity": "sha512-kM3HKb16VIXZyIeVrM1ygYmZBKybX8N4p754bw390wGO3Tf2j4L2/WYL+4suWujpgf6GBYs3jv7TyUivdd05JA==", "cpu": [ "x64" ], @@ -712,9 +713,9 @@ "license": "MIT" }, "node_modules/esbuild": { - "version": "0.24.2", - "resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.24.2.tgz", - "integrity": "sha512-+9egpBW8I3CD5XPe0n6BfT5fxLzxrlDzqydF3aviG+9ni1lDC/OvMHcxqEFV0+LANZG5R1bFMWfUrjVsdwxJvA==", + "version": "0.25.2", + "resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.25.2.tgz", + "integrity": "sha512-16854zccKPnC+toMywC+uKNeYSv+/eXkevRAfwRD/G9Cleq66m8XFIrigkbvauLLlCfDL45Q2cWegSg53gGBnQ==", "dev": true, "hasInstallScript": true, "license": "MIT", @@ -725,31 +726,31 @@ "node": ">=18" }, "optionalDependencies": { - "@esbuild/aix-ppc64": "0.24.2", - "@esbuild/android-arm": "0.24.2", - "@esbuild/android-arm64": "0.24.2", - "@esbuild/android-x64": "0.24.2", - "@esbuild/darwin-arm64": "0.24.2", - "@esbuild/darwin-x64": "0.24.2", - "@esbuild/freebsd-arm64": "0.24.2", - "@esbuild/freebsd-x64": "0.24.2", - "@esbuild/linux-arm": "0.24.2", - "@esbuild/linux-arm64": "0.24.2", - "@esbuild/linux-ia32": "0.24.2", - "@esbuild/linux-loong64": "0.24.2", - "@esbuild/linux-mips64el": "0.24.2", - "@esbuild/linux-ppc64": "0.24.2", - "@esbuild/linux-riscv64": "0.24.2", - "@esbuild/linux-s390x": "0.24.2", - "@esbuild/linux-x64": "0.24.2", - "@esbuild/netbsd-arm64": "0.24.2", - "@esbuild/netbsd-x64": "0.24.2", - "@esbuild/openbsd-arm64": "0.24.2", - "@esbuild/openbsd-x64": "0.24.2", - "@esbuild/sunos-x64": "0.24.2", - "@esbuild/win32-arm64": "0.24.2", - "@esbuild/win32-ia32": "0.24.2", - "@esbuild/win32-x64": "0.24.2" + "@esbuild/aix-ppc64": "0.25.2", + "@esbuild/android-arm": "0.25.2", + "@esbuild/android-arm64": "0.25.2", + "@esbuild/android-x64": "0.25.2", + "@esbuild/darwin-arm64": "0.25.2", + "@esbuild/darwin-x64": "0.25.2", + "@esbuild/freebsd-arm64": "0.25.2", + "@esbuild/freebsd-x64": "0.25.2", + "@esbuild/linux-arm": "0.25.2", + "@esbuild/linux-arm64": "0.25.2", + "@esbuild/linux-ia32": "0.25.2", + "@esbuild/linux-loong64": "0.25.2", + "@esbuild/linux-mips64el": "0.25.2", + "@esbuild/linux-ppc64": "0.25.2", + "@esbuild/linux-riscv64": "0.25.2", + "@esbuild/linux-s390x": "0.25.2", + "@esbuild/linux-x64": "0.25.2", + "@esbuild/netbsd-arm64": "0.25.2", + "@esbuild/netbsd-x64": "0.25.2", + "@esbuild/openbsd-arm64": "0.25.2", + "@esbuild/openbsd-x64": "0.25.2", + "@esbuild/sunos-x64": "0.25.2", + "@esbuild/win32-arm64": "0.25.2", + "@esbuild/win32-ia32": "0.25.2", + "@esbuild/win32-x64": "0.25.2" } }, "node_modules/fsevents": { @@ -768,14 +769,13 @@ } }, "node_modules/limbo-wasm": { - "version": "0.0.11", - "resolved": "file:../limbo-wasm-0.0.11.tgz", - "integrity": "sha512-Gxs1kqnCKbfwWjTSWaNQzh954DltmDK28j4EmzDEm/7NZtmwnbfeBj92pS3yJVeQpXuu6zQtaDAS0pYAhi3Q0w==" + "resolved": "..", + "link": true }, "node_modules/nanoid": { - "version": "3.3.8", - "resolved": "https://registry.npmjs.org/nanoid/-/nanoid-3.3.8.tgz", - "integrity": "sha512-WNLf5Sd8oZxOm+TzppcYk8gVOgP+l58xNy58D0nbUnOxOWRWvlcCV4kUF7ltmI6PsrLl/BgKEyS4mqsGChFN0w==", + "version": "3.3.11", + "resolved": "https://registry.npmjs.org/nanoid/-/nanoid-3.3.11.tgz", + "integrity": "sha512-N8SpfPUnUp1bK+PMYW8qSWdl9U+wwNWI4QKxOYDy9JAro3WMX7p2OeVRF9v+347pnakNevPmiHhNmZ2HbFA76w==", "dev": true, "funding": [ { @@ -799,9 +799,9 @@ "license": "ISC" }, "node_modules/postcss": { - "version": "8.5.1", - "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.5.1.tgz", - "integrity": "sha512-6oz2beyjc5VMn/KV1pPw8fliQkhBXrVn1Z3TVyqZxU8kZpzEKhBdmCFqI6ZbmGtamQvQGuU1sgPTk8ZrXDD7jQ==", + "version": "8.5.3", + "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.5.3.tgz", + "integrity": "sha512-dle9A3yYxlBSrt8Fu+IpjGT8SY8hN0mlaA6GY8t0P5PjIOZemULz/E2Bnm/2dcUOena75OTNkHI76uZBNUUq3A==", "dev": true, "funding": [ { @@ -877,15 +877,15 @@ } }, "node_modules/vite": { - "version": "6.0.7", - "resolved": "https://registry.npmjs.org/vite/-/vite-6.0.7.tgz", - "integrity": "sha512-RDt8r/7qx9940f8FcOIAH9PTViRrghKaK2K1jY3RaAURrEUbm9Du1mJ72G+jlhtG3WwodnfzY8ORQZbBavZEAQ==", + "version": "6.2.6", + "resolved": "https://registry.npmjs.org/vite/-/vite-6.2.6.tgz", + "integrity": "sha512-9xpjNl3kR4rVDZgPNdTL0/c6ao4km69a/2ihNQbcANz8RuCOK3hQBmLSJf3bRKVQjVMda+YvizNE8AwvogcPbw==", "dev": true, "license": "MIT", "dependencies": { - "esbuild": "^0.24.2", - "postcss": "^8.4.49", - "rollup": "^4.23.0" + "esbuild": "^0.25.0", + "postcss": "^8.5.3", + "rollup": "^4.30.1" }, "bin": { "vite": "bin/vite.js" diff --git a/bindings/wasm/test-limbo-pkg/package.json b/bindings/wasm/test-limbo-pkg/package.json index 9d64cc94c..96bbaa647 100644 --- a/bindings/wasm/test-limbo-pkg/package.json +++ b/bindings/wasm/test-limbo-pkg/package.json @@ -9,7 +9,7 @@ "dev": "vite" }, "devDependencies": { - "vite": "^6.0.7", + "vite": "^6.2.6", "vite-plugin-wasm": "^3.4.1" } } diff --git a/cli/Cargo.toml b/cli/Cargo.toml index 2a16f2dd0..253c08b45 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -20,25 +20,28 @@ path = "main.rs" [dependencies] anyhow = "1.0.75" +cfg-if = "1.0.0" clap = { version = "4.5.31", features = ["derive"] } +clap_complete = { version = "=4.5.47", features = ["unstable-dynamic"] } comfy-table = "7.1.4" +csv = "1.3.1" +ctrlc = "3.4.4" dirs = "5.0.1" env_logger = "0.10.1" +libc = "0.2.172" limbo_core = { path = "../core", default-features = true, features = [ "completion", ] } +miette = { version = "7.4.0", features = ["fancy"] } +nu-ansi-term = "0.50.1" rustyline = { version = "15.0.0", default-features = true, features = [ "derive", ] } -ctrlc = "3.4.4" -csv = "1.3.1" -miette = { version = "7.4.0", features = ["fancy"] } -cfg-if = "1.0.0" -tracing-subscriber = { version = "0.3.19", features = ["env-filter"] } -tracing = "0.1.41" +shlex = "1.3.0" syntect = "5.2.0" -nu-ansi-term = "0.50.1" - +tracing = "0.1.41" +tracing-appender = "0.2.3" +tracing-subscriber = { version = "0.3.19", features = ["env-filter"] } [features] default = ["io_uring"] @@ -46,4 +49,3 @@ io_uring = ["limbo_core/io_uring"] [build-dependencies] syntect = "5.2.0" - diff --git a/cli/app.rs b/cli/app.rs index f82c587bd..e1f0351a5 100644 --- a/cli/app.rs +++ b/cli/app.rs @@ -1,23 +1,31 @@ use crate::{ - commands::{args::EchoMode, import::ImportFile, Command, CommandParser}, + commands::{ + args::{EchoMode, TimerMode}, + import::ImportFile, + Command, CommandParser, + }, helper::LimboHelper, input::{get_io, get_writer, DbLocation, OutputMode, Settings}, opcodes_dictionary::OPCODE_DESCRIPTIONS, + HISTORY_FILE, }; use comfy_table::{Attribute, Cell, CellAlignment, Color, ContentArrangement, Row, Table}; use limbo_core::{Database, LimboError, OwnedValue, Statement, StepResult}; +use tracing_appender::non_blocking::WorkerGuard; +use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt, EnvFilter}; use clap::Parser; -use rustyline::{history::DefaultHistory, Editor}; +use rustyline::{error::ReadlineError, history::DefaultHistory, Editor}; use std::{ fmt, - io::{self, Write}, + io::{self, BufRead as _, Write}, path::PathBuf, rc::Rc, sync::{ atomic::{AtomicUsize, Ordering}, Arc, }, + time::{Duration, Instant}, }; #[derive(Parser)] @@ -49,11 +57,13 @@ pub struct Opts { pub vfs: Option, #[clap(long, help = "Enable experimental MVCC feature")] pub experimental_mvcc: bool, + #[clap(short = 't', long, help = "specify output file for log traces")] + pub tracing_output: Option, } const PROMPT: &str = "limbo> "; -pub struct Limbo<'a> { +pub struct Limbo { pub prompt: String, io: Arc, writer: Box, @@ -61,7 +71,12 @@ pub struct Limbo<'a> { pub interrupt_count: Arc, input_buff: String, opts: Settings, - pub rl: &'a mut Editor, + pub rl: Option>, +} + +struct QueryStatistics { + io_time_elapsed_samples: Vec, + execute_time_elapsed_samples: Vec, } macro_rules! query_internal { @@ -91,8 +106,8 @@ macro_rules! query_internal { static COLORS: &[Color] = &[Color::Green, Color::Black, Color::Grey]; -impl<'a> Limbo<'a> { - pub fn new(rl: &'a mut rustyline::Editor) -> anyhow::Result { +impl Limbo { + pub fn new() -> anyhow::Result { let opts = Opts::parse(); let db_file = opts .database @@ -119,8 +134,6 @@ impl<'a> Limbo<'a> { ) }; let conn = db.connect()?; - let h = LimboHelper::new(conn.clone(), io.clone()); - rl.set_helper(Some(h)); let interrupt_count = Arc::new(AtomicUsize::new(0)); { let interrupt_count: Arc = Arc::clone(&interrupt_count); @@ -130,6 +143,8 @@ impl<'a> Limbo<'a> { }) .expect("Error setting Ctrl-C handler"); } + let sql = opts.sql.clone(); + let quiet = opts.quiet; let mut app = Self { prompt: PROMPT.to_string(), io, @@ -137,21 +152,32 @@ impl<'a> Limbo<'a> { conn, interrupt_count, input_buff: String::new(), - opts: Settings::from(&opts), - rl, + opts: Settings::from(opts), + rl: None, }; - - if opts.sql.is_some() { - app.handle_first_input(opts.sql.as_ref().unwrap()); - } - if !opts.quiet { - app.write_fmt(format_args!("Limbo v{}", env!("CARGO_PKG_VERSION")))?; - app.writeln("Enter \".help\" for usage hints.")?; - app.display_in_memory()?; - } + app.first_run(sql, quiet)?; Ok(app) } + pub fn with_readline(mut self, mut rl: Editor) -> Self { + let h = LimboHelper::new(self.conn.clone(), self.io.clone()); + rl.set_helper(Some(h)); + self.rl = Some(rl); + self + } + + fn first_run(&mut self, sql: Option, quiet: bool) -> io::Result<()> { + if let Some(sql) = sql { + self.handle_first_input(&sql); + } + if !quiet { + self.write_fmt(format_args!("Limbo v{}", env!("CARGO_PKG_VERSION")))?; + self.writeln("Enter \".help\" for usage hints.")?; + self.display_in_memory()?; + } + Ok(()) + } + fn handle_first_input(&mut self, cmd: &str) { if cmd.trim().starts_with('.') { self.handle_dot_command(&cmd[1..]); @@ -381,24 +407,84 @@ impl<'a> Limbo<'a> { let _ = self.writeln(input); } - if input.trim_start().starts_with("explain") { - if let Ok(Some(stmt)) = self.conn.query(input) { - let _ = self.writeln(stmt.explain().as_bytes()); + let start = Instant::now(); + let mut stats = QueryStatistics { + io_time_elapsed_samples: vec![], + execute_time_elapsed_samples: vec![], + }; + // TODO this is a quickfix. Some ideas to do case insensitive comparisons is to use + // Uncased or Unicase. + let temp = input.to_lowercase(); + if temp.trim_start().starts_with("explain") { + match self.conn.query(input) { + Ok(Some(stmt)) => { + let _ = self.writeln(stmt.explain().as_bytes()); + } + Err(e) => { + let _ = self.writeln(e.to_string()); + } + _ => {} } } else { let conn = self.conn.clone(); let runner = conn.query_runner(input.as_bytes()); for output in runner { - if self.print_query_result(input, output).is_err() { + if self + .print_query_result(input, output, Some(&mut stats)) + .is_err() + { break; } } } + self.print_query_performance_stats(start, stats); self.reset_input(); } - fn reset_line(&mut self, line: &str) -> rustyline::Result<()> { - self.rl.add_history_entry(line.to_owned())?; + fn print_query_performance_stats(&mut self, start: Instant, stats: QueryStatistics) { + let elapsed_as_str = |duration: Duration| { + if duration.as_secs() >= 1 { + format!("{} s", duration.as_secs_f64()) + } else if duration.as_millis() >= 1 { + format!("{} ms", duration.as_millis() as f64) + } else if duration.as_micros() >= 1 { + format!("{} us", duration.as_micros() as f64) + } else { + format!("{} ns", duration.as_nanos()) + } + }; + let sample_stats_as_str = |name: &str, samples: Vec| { + if samples.is_empty() { + return format!("{}: No samples available", name); + } + let avg_time_spent = samples.iter().sum::() / samples.len() as u32; + let total_time = samples.iter().fold(Duration::ZERO, |acc, x| acc + *x); + format!( + "{}: avg={}, total={}", + name, + elapsed_as_str(avg_time_spent), + elapsed_as_str(total_time), + ) + }; + if self.opts.timer { + let _ = self.writeln("Command stats:\n----------------------------"); + let _ = self.writeln(format!( + "total: {} (this includes parsing/coloring of cli app)\n", + elapsed_as_str(start.elapsed()) + )); + + let _ = self.writeln("query execution stats:\n----------------------------"); + let _ = self.writeln(sample_stats_as_str( + "Execution", + stats.execute_time_elapsed_samples, + )); + let _ = self.writeln(sample_stats_as_str("I/O", stats.io_time_elapsed_samples)); + } + } + + fn reset_line(&mut self, _line: &str) -> rustyline::Result<()> { + // Entry is auto added to history + // self.rl.add_history_entry(line.to_owned())?; self.interrupt_count.store(0, Ordering::SeqCst); Ok(()) } @@ -426,7 +512,7 @@ impl<'a> Limbo<'a> { let conn = self.conn.clone(); let runner = conn.query_runner(after_comment.as_bytes()); for output in runner { - if let Err(e) = self.print_query_result(after_comment, output) { + if let Err(e) = self.print_query_result(after_comment, output, None) { let _ = self.writeln(e.to_string()); } } @@ -467,15 +553,18 @@ impl<'a> Limbo<'a> { } match CommandParser::try_parse_from(args) { Err(err) => { - let _ = self.write_fmt(format_args!("{err}")); + // Let clap print with Styled Colors instead + let _ = err.print(); } Ok(cmd) => match cmd.command { Command::Exit(args) => { + self.save_history(); std::process::exit(args.code); } Command::Quit => { let _ = self.writeln("Exiting Limbo SQL Shell."); let _ = self.close_conn(); + self.save_history(); std::process::exit(0) } Command::Open(args) => { @@ -554,6 +643,17 @@ impl<'a> Limbo<'a> { let _ = self.writeln(v); }); } + Command::ListIndexes(args) => { + if let Err(e) = self.display_indexes(args.tbl_name) { + let _ = self.writeln(e.to_string()); + } + } + Command::Timer(timer_mode) => { + self.opts.timer = match timer_mode.mode { + TimerMode::On => true, + TimerMode::Off => false, + }; + } }, } } @@ -562,6 +662,7 @@ impl<'a> Limbo<'a> { &mut self, sql: &str, mut output: Result, LimboError>, + mut statistics: Option<&mut QueryStatistics>, ) -> anyhow::Result<()> { match output { Ok(Some(ref mut rows)) => match self.opts.output_mode { @@ -571,8 +672,13 @@ impl<'a> Limbo<'a> { return Ok(()); } + let start = Instant::now(); + match rows.step() { Ok(StepResult::Row) => { + if let Some(ref mut stats) = statistics { + stats.execute_time_elapsed_samples.push(start.elapsed()); + } let row = rows.row().unwrap(); for (i, value) in row.get_values().enumerate() { if i > 0 { @@ -587,17 +693,30 @@ impl<'a> Limbo<'a> { let _ = self.writeln(""); } Ok(StepResult::IO) => { + let start = Instant::now(); self.io.run_once()?; + if let Some(ref mut stats) = statistics { + stats.io_time_elapsed_samples.push(start.elapsed()); + } } Ok(StepResult::Interrupt) => break, Ok(StepResult::Done) => { + if let Some(ref mut stats) = statistics { + stats.execute_time_elapsed_samples.push(start.elapsed()); + } break; } Ok(StepResult::Busy) => { + if let Some(ref mut stats) = statistics { + stats.execute_time_elapsed_samples.push(start.elapsed()); + } let _ = self.writeln("database is busy"); break; } Err(err) => { + if let Some(ref mut stats) = statistics { + stats.execute_time_elapsed_samples.push(start.elapsed()); + } let _ = self.writeln(err.to_string()); break; } @@ -625,8 +744,12 @@ impl<'a> Limbo<'a> { table.set_header(header); } loop { + let start = Instant::now(); match rows.step() { Ok(StepResult::Row) => { + if let Some(ref mut stats) = statistics { + stats.execute_time_elapsed_samples.push(start.elapsed()); + } let record = rows.row().unwrap(); let mut row = Row::new(); row.max_height(1); @@ -657,35 +780,52 @@ impl<'a> Limbo<'a> { table.add_row(row); } Ok(StepResult::IO) => { + let start = Instant::now(); self.io.run_once()?; + if let Some(ref mut stats) = statistics { + stats.io_time_elapsed_samples.push(start.elapsed()); + } + } + Ok(StepResult::Interrupt) => { + if let Some(ref mut stats) = statistics { + stats.execute_time_elapsed_samples.push(start.elapsed()); + } + break; + } + Ok(StepResult::Done) => { + if let Some(ref mut stats) = statistics { + stats.execute_time_elapsed_samples.push(start.elapsed()); + } + break; } - Ok(StepResult::Interrupt) => break, - Ok(StepResult::Done) => break, Ok(StepResult::Busy) => { + if let Some(ref mut stats) = statistics { + stats.execute_time_elapsed_samples.push(start.elapsed()); + } let _ = self.writeln("database is busy"); break; } Err(err) => { - let _ = self.write_fmt(format_args!( - "{:?}", - miette::Error::from(err).with_source_code(sql.to_owned()) - )); + if let Some(ref mut stats) = statistics { + stats.execute_time_elapsed_samples.push(start.elapsed()); + } + let report = + miette::Error::from(err).with_source_code(sql.to_owned()); + let _ = self.write_fmt(format_args!("{:?}", report)); break; } } } - if table.header().is_some() { + if !table.is_empty() { let _ = self.write_fmt(format_args!("{}", table)); } } }, Ok(None) => {} Err(err) => { - let _ = self.write_fmt(format_args!( - "{:?}", - miette::Error::from(err).with_source_code(sql.to_owned()) - )); + let report = miette::Error::from(err).with_source_code(sql.to_owned()); + let _ = self.write_fmt(format_args!("{:?}", report)); anyhow::bail!("We have to throw here, even if we printed error"); } } @@ -694,6 +834,37 @@ impl<'a> Limbo<'a> { Ok(()) } + pub fn init_tracing(&mut self) -> Result { + let ((non_blocking, guard), should_emit_ansi) = + if let Some(file) = &self.opts.tracing_output { + ( + tracing_appender::non_blocking( + std::fs::File::options() + .append(true) + .create(true) + .open(file)?, + ), + false, + ) + } else { + (tracing_appender::non_blocking(std::io::stderr()), true) + }; + if let Err(e) = tracing_subscriber::registry() + .with( + tracing_subscriber::fmt::layer() + .with_writer(non_blocking) + .with_line_number(true) + .with_thread_ids(true) + .with_ansi(should_emit_ansi), + ) + .with(EnvFilter::from_default_env()) + .try_init() + { + println!("Unable to setup tracing appender: {:?}", e); + } + Ok(guard) + } + fn display_schema(&mut self, table: Option<&str>) -> anyhow::Result<()> { let sql = match table { Some(table_name) => format!( @@ -752,6 +923,55 @@ impl<'a> Limbo<'a> { Ok(()) } + fn display_indexes(&mut self, maybe_table: Option) -> anyhow::Result<()> { + let sql = match maybe_table { + Some(ref tbl_name) => format!( + "SELECT name FROM sqlite_schema WHERE type='index' AND tbl_name = '{}' ORDER BY 1", + tbl_name + ), + None => String::from("SELECT name FROM sqlite_schema WHERE type='index' ORDER BY 1"), + }; + + match self.conn.query(&sql) { + Ok(Some(ref mut rows)) => { + let mut indexes = String::new(); + loop { + match rows.step()? { + StepResult::Row => { + let row = rows.row().unwrap(); + if let Ok(OwnedValue::Text(idx)) = row.get::<&OwnedValue>(0) { + indexes.push_str(idx.as_str()); + indexes.push(' '); + } + } + StepResult::IO => { + self.io.run_once()?; + } + StepResult::Interrupt => break, + StepResult::Done => break, + StepResult::Busy => { + let _ = self.writeln("database is busy"); + break; + } + } + } + if !indexes.is_empty() { + let _ = self.writeln(indexes.trim_end()); + } + } + Err(err) => { + if err.to_string().contains("no such table: sqlite_schema") { + return Err(anyhow::anyhow!("Unable to access database schema. The database may be using an older SQLite version or may not be properly initialized.")); + } else { + return Err(anyhow::anyhow!("Error querying schema: {}", err)); + } + } + Ok(None) => {} + } + + Ok(()) + } + fn display_tables(&mut self, pattern: Option<&str>) -> anyhow::Result<()> { let sql = match pattern { Some(pattern) => format!( @@ -822,4 +1042,38 @@ impl<'a> Limbo<'a> { self.run_query(buff.as_str()); self.reset_input(); } + + pub fn readline(&mut self) -> Result { + if let Some(rl) = &mut self.rl { + Ok(rl.readline(&self.prompt)?) + } else { + let mut input = String::new(); + println!(""); + let mut reader = std::io::stdin().lock(); + if reader.read_line(&mut input)? == 0 { + return Err(ReadlineError::Eof.into()); + } + // Remove trailing newline + if input.ends_with('\n') { + input.pop(); + if input.ends_with('\r') { + input.pop(); + } + } + + Ok(input) + } + } + + fn save_history(&mut self) { + if let Some(rl) = &mut self.rl { + let _ = rl.save_history(HISTORY_FILE.as_path()); + } + } +} + +impl Drop for Limbo { + fn drop(&mut self) { + self.save_history() + } } diff --git a/cli/commands/args.rs b/cli/commands/args.rs index e0fd10994..4c36e6ef6 100644 --- a/cli/commands/args.rs +++ b/cli/commands/args.rs @@ -1,6 +1,13 @@ use clap::{Args, ValueEnum}; +use clap_complete::{ArgValueCompleter, CompletionCandidate, PathCompleter}; -use crate::input::OutputMode; +use crate::{input::OutputMode, opcodes_dictionary::OPCODE_DESCRIPTIONS}; + +#[derive(Debug, Clone, Args)] +pub struct IndexesArgs { + /// Name of table + pub tbl_name: Option, +} #[derive(Debug, Clone, Args)] pub struct ExitArgs { @@ -12,13 +19,17 @@ pub struct ExitArgs { #[derive(Debug, Clone, Args)] pub struct OpenArgs { /// Path to open database + #[arg(add = ArgValueCompleter::new(PathCompleter::file()))] pub path: String, + // TODO see how to have this completed with the output of List Vfs function + // Currently not possible to pass arbitrary /// Name of VFS pub vfs_name: Option, } #[derive(Debug, Clone, Args)] pub struct SchemaArgs { + // TODO depends on PRAGMA table_list for completions /// Table name to visualize schema pub table_name: Option, } @@ -26,6 +37,7 @@ pub struct SchemaArgs { #[derive(Debug, Clone, Args)] pub struct SetOutputArgs { /// File path to send output to + #[arg(add = ArgValueCompleter::new(PathCompleter::file()))] pub path: Option, } @@ -35,15 +47,40 @@ pub struct OutputModeArgs { pub mode: OutputMode, } +fn opcodes_completer(current: &std::ffi::OsStr) -> Vec { + let mut completions = vec![]; + + let Some(current) = current.to_str() else { + return completions; + }; + + let current = current.to_lowercase(); + + let opcodes = &OPCODE_DESCRIPTIONS; + + for op in opcodes { + // TODO if someone know how to do prefix_match with case insensitve in Rust + // without converting the String to lowercase first, please fix this. + let op_name = op.name.to_ascii_lowercase(); + if op_name.starts_with(¤t) { + completions.push(CompletionCandidate::new(op.name).help(Some(op.description.into()))); + } + } + + completions +} + #[derive(Debug, Clone, Args)] pub struct OpcodesArgs { /// Opcode to display description + #[arg(add = ArgValueCompleter::new(opcodes_completer))] pub opcode: Option, } #[derive(Debug, Clone, Args)] pub struct CwdArgs { /// Target directory + #[arg(add = ArgValueCompleter::new(PathCompleter::dir()))] pub directory: String, } @@ -72,11 +109,18 @@ pub struct TablesArgs { #[derive(Debug, Clone, Args)] pub struct LoadExtensionArgs { /// Path to extension file + #[arg(add = ArgValueCompleter::new(PathCompleter::file()))] pub path: String, } -#[derive(Debug, Clone, Args)] -pub struct ListVfsArgs { - /// Path to extension file - pub path: String, +#[derive(Debug, ValueEnum, Clone)] +pub enum TimerMode { + On, + Off, +} + +#[derive(Debug, Clone, Args)] +pub struct TimerArgs { + #[arg(value_enum)] + pub mode: TimerMode, } diff --git a/cli/commands/import.rs b/cli/commands/import.rs index df33500dd..38ec5df45 100644 --- a/cli/commands/import.rs +++ b/cli/commands/import.rs @@ -1,4 +1,5 @@ use clap::Args; +use clap_complete::{ArgValueCompleter, PathCompleter}; use limbo_core::Connection; use std::{fs::File, io::Write, path::PathBuf, rc::Rc, sync::Arc}; @@ -13,6 +14,7 @@ pub struct ImportArgs { /// Skip the first N rows of input #[arg(long, default_value = "0")] skip: u64, + #[arg(add = ArgValueCompleter::new(PathCompleter::file()))] file: PathBuf, table: String, } diff --git a/cli/commands/mod.rs b/cli/commands/mod.rs index 33261a860..bd94c6051 100644 --- a/cli/commands/mod.rs +++ b/cli/commands/mod.rs @@ -2,8 +2,8 @@ pub mod args; pub mod import; use args::{ - CwdArgs, EchoArgs, ExitArgs, LoadExtensionArgs, NullValueArgs, OpcodesArgs, OpenArgs, - OutputModeArgs, SchemaArgs, SetOutputArgs, TablesArgs, + CwdArgs, EchoArgs, ExitArgs, IndexesArgs, LoadExtensionArgs, NullValueArgs, OpcodesArgs, + OpenArgs, OutputModeArgs, SchemaArgs, SetOutputArgs, TablesArgs, TimerArgs, }; use clap::Parser; use import::ImportArgs; @@ -35,9 +35,6 @@ pub enum Command { /// Open a database file #[command(display_name = ".open")] Open(OpenArgs), - /// Print this message or the help of the given subcommand(s) - // #[command(display_name = ".help")] - // Help, /// Display schema for a table #[command(display_name = ".schema")] Schema(SchemaArgs), @@ -75,6 +72,11 @@ pub enum Command { /// List vfs modules available #[command(name = "vfslist", display_name = ".vfslist")] ListVfs, + /// Show names of indexes + #[command(name = "indexes", display_name = ".indexes")] + ListIndexes(IndexesArgs), + #[command(name = "timer", display_name = ".timer")] + Timer(TimerArgs), } const _HELP_TEMPLATE: &str = "{before-help}{name} diff --git a/cli/helper.rs b/cli/helper.rs index 90549dd11..70194234d 100644 --- a/cli/helper.rs +++ b/cli/helper.rs @@ -1,12 +1,18 @@ -use std::rc::Rc; -use std::sync::Arc; - +use clap::Parser; use limbo_core::{Connection, StepResult}; use nu_ansi_term::{Color, Style}; use rustyline::completion::{extract_word, Completer, Pair}; use rustyline::highlight::Highlighter; use rustyline::hint::HistoryHinter; use rustyline::{Completer, Helper, Hinter, Validator}; +use shlex::Shlex; +use std::cell::RefCell; +use std::marker::PhantomData; +use std::rc::Rc; +use std::sync::Arc; +use std::{ffi::OsString, path::PathBuf, str::FromStr as _}; + +use crate::commands::CommandParser; macro_rules! try_result { ($expr:expr, $err:expr) => { @@ -20,7 +26,7 @@ macro_rules! try_result { #[derive(Helper, Completer, Hinter, Validator)] pub struct LimboHelper { #[rustyline(Completer)] - completer: SqlCompleter, + completer: SqlCompleter, #[rustyline(Hinter)] hinter: HistoryHinter, } @@ -77,57 +83,70 @@ impl Highlighter for LimboHelper { } } -pub struct SqlCompleter { +pub struct SqlCompleter { conn: Rc, io: Arc, + // Has to be a ref cell as Rustyline takes immutable reference to self + // This problem would be solved with Reedline as it uses &mut self for completions + cmd: RefCell, + _cmd_phantom: PhantomData, } -impl SqlCompleter { +impl SqlCompleter { pub fn new(conn: Rc, io: Arc) -> Self { - Self { conn, io } - } -} - -// Got this from the FilenameCompleter. -// TODO have to see what chars break words in Sqlite -cfg_if::cfg_if! { - if #[cfg(unix)] { - // rl_basic_word_break_characters, rl_completer_word_break_characters - const fn default_break_chars(c : char) -> bool { - matches!(c, ' ' | '\t' | '\n' | '"' | '\\' | '\'' | '`' | '@' | '$' | '>' | '<' | '=' | ';' | '|' | '&' | - '{' | '(' | '\0') + Self { + conn, + io, + cmd: C::command().into(), + _cmd_phantom: PhantomData::default(), } - const ESCAPE_CHAR: Option = Some('\\'); - // In double quotes, not all break_chars need to be escaped - // https://www.gnu.org/software/bash/manual/html_node/Double-Quotes.html - #[allow(dead_code)] - const fn double_quotes_special_chars(c: char) -> bool { matches!(c, '"' | '$' | '\\' | '`') } - } else if #[cfg(windows)] { - // Remove \ to make file completion works on windows - const fn default_break_chars(c: char) -> bool { - matches!(c, ' ' | '\t' | '\n' | '"' | '\'' | '`' | '@' | '$' | '>' | '<' | '=' | ';' | '|' | '&' | '{' | - '(' | '\0') - } - const ESCAPE_CHAR: Option = None; - #[allow(dead_code)] - const fn double_quotes_special_chars(c: char) -> bool { c == '"' } // TODO Validate: only '"' ? - } else if #[cfg(target_arch = "wasm32")] { - const fn default_break_chars(c: char) -> bool { false } - const ESCAPE_CHAR: Option = None; - #[allow(dead_code)] - const fn double_quotes_special_chars(c: char) -> bool { false } } -} -impl Completer for SqlCompleter { - type Candidate = Pair; - - fn complete( + fn dot_completion( &self, - line: &str, - pos: usize, - _ctx: &rustyline::Context<'_>, - ) -> rustyline::Result<(usize, Vec)> { + mut line: &str, + mut pos: usize, + ) -> rustyline::Result<(usize, Vec)> { + // TODO maybe check to see if the line is empty and then just output the command names + line = &line[1..]; + pos = pos - 1; + + let (prefix_pos, _) = extract_word(line, pos, ESCAPE_CHAR, default_break_chars); + + let args = Shlex::new(line); + let mut args = std::iter::once("".to_owned()) + .chain(args) + .map(OsString::from) + .collect::>(); + if line.ends_with(' ') { + args.push(OsString::new()); + } + let arg_index = args.len() - 1; + // dbg!(&pos, line, &args, arg_index); + + let mut cmd = self.cmd.borrow_mut(); + match clap_complete::engine::complete( + &mut cmd, + args, + arg_index, + PathBuf::from_str(".").ok().as_deref(), + ) { + Ok(candidates) => { + let candidates = candidates + .iter() + .map(|candidate| Pair { + display: candidate.get_value().to_string_lossy().into_owned(), + replacement: candidate.get_value().to_string_lossy().into_owned(), + }) + .collect::>(); + + Ok((prefix_pos + 1, candidates)) + } + Err(_) => Ok((prefix_pos + 1, Vec::new())), + } + } + + fn sql_completion(&self, line: &str, pos: usize) -> rustyline::Result<(usize, Vec)> { // TODO: have to differentiate words if they are enclosed in single of double quotes let (prefix_pos, prefix) = extract_word(line, pos, ESCAPE_CHAR, default_break_chars); let mut candidates = Vec::new(); @@ -167,3 +186,51 @@ impl Completer for SqlCompleter { Ok((prefix_pos, candidates)) } } + +// Got this from the FilenameCompleter. +// TODO have to see what chars break words in Sqlite +cfg_if::cfg_if! { + if #[cfg(unix)] { + // rl_basic_word_break_characters, rl_completer_word_break_characters + const fn default_break_chars(c : char) -> bool { + matches!(c, ' ' | '\t' | '\n' | '"' | '\\' | '\'' | '`' | '@' | '$' | '>' | '<' | '=' | ';' | '|' | '&' | + '{' | '(' | '\0') + } + const ESCAPE_CHAR: Option = Some('\\'); + // In double quotes, not all break_chars need to be escaped + // https://www.gnu.org/software/bash/manual/html_node/Double-Quotes.html + #[allow(dead_code)] + const fn double_quotes_special_chars(c: char) -> bool { matches!(c, '"' | '$' | '\\' | '`') } + } else if #[cfg(windows)] { + // Remove \ to make file completion works on windows + const fn default_break_chars(c: char) -> bool { + matches!(c, ' ' | '\t' | '\n' | '"' | '\'' | '`' | '@' | '$' | '>' | '<' | '=' | ';' | '|' | '&' | '{' | + '(' | '\0') + } + const ESCAPE_CHAR: Option = None; + #[allow(dead_code)] + const fn double_quotes_special_chars(c: char) -> bool { c == '"' } // TODO Validate: only '"' ? + } else if #[cfg(target_arch = "wasm32")] { + const fn default_break_chars(c: char) -> bool { false } + const ESCAPE_CHAR: Option = None; + #[allow(dead_code)] + const fn double_quotes_special_chars(c: char) -> bool { false } + } +} + +impl Completer for SqlCompleter { + type Candidate = Pair; + + fn complete( + &self, + line: &str, + pos: usize, + _ctx: &rustyline::Context<'_>, + ) -> rustyline::Result<(usize, Vec)> { + if line.starts_with(".") { + self.dot_completion(line, pos) + } else { + self.sql_completion(line, pos) + } + } +} diff --git a/cli/input.rs b/cli/input.rs index 4361394c0..e20d5a71a 100644 --- a/cli/input.rs +++ b/cli/input.rs @@ -43,7 +43,7 @@ impl Default for Io { true => { #[cfg(all(target_os = "linux", feature = "io_uring"))] { - Io::IoUring + Io::Syscall // FIXME: make io_uring faster so it can be the default } #[cfg(any( not(target_os = "linux"), @@ -81,28 +81,32 @@ pub struct Settings { pub echo: bool, pub is_stdout: bool, pub io: Io, + pub tracing_output: Option, + pub timer: bool, } -impl From<&Opts> for Settings { - fn from(opts: &Opts) -> Self { +impl From for Settings { + fn from(opts: Opts) -> Self { Self { null_value: String::new(), output_mode: opts.output_mode, echo: false, is_stdout: opts.output.is_empty(), - output_filename: opts.output.clone(), + output_filename: opts.output, db_file: opts .database .as_ref() .map_or(":memory:".to_string(), |p| p.to_string_lossy().to_string()), io: match opts.vfs.as_ref().unwrap_or(&String::new()).as_str() { - "memory" => Io::Memory, + "memory" | ":memory:" => Io::Memory, "syscall" => Io::Syscall, #[cfg(all(target_os = "linux", feature = "io_uring"))] "io_uring" => Io::IoUring, "" => Io::default(), vfs => Io::External(vfs.to_string()), }, + tracing_output: opts.tracing_output, + timer: false, } } } @@ -214,6 +218,8 @@ pub const AFTER_HELP_MSG: &str = r#"Usage Examples: 13. To list all available VFS: .listvfs +14. To show names of indexes: + .indexes ?TABLE? Note: - All SQL commands must end with a semicolon (;). diff --git a/cli/main.rs b/cli/main.rs index 4e8eca02a..82eb64953 100644 --- a/cli/main.rs +++ b/cli/main.rs @@ -6,33 +6,39 @@ mod input; mod opcodes_dictionary; use rustyline::{error::ReadlineError, Config, Editor}; -use std::sync::atomic::Ordering; -use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt, EnvFilter}; +use std::{ + path::PathBuf, + sync::{atomic::Ordering, LazyLock}, +}; fn rustyline_config() -> Config { Config::builder() .completion_type(rustyline::CompletionType::List) + .auto_add_history(true) .build() } +pub static HOME_DIR: LazyLock = + LazyLock::new(|| dirs::home_dir().expect("Could not determine home directory")); + +pub static HISTORY_FILE: LazyLock = LazyLock::new(|| HOME_DIR.join(".limbo_history")); + fn main() -> anyhow::Result<()> { - let mut rl = Editor::with_config(rustyline_config())?; - tracing_subscriber::registry() - .with( - tracing_subscriber::fmt::layer() - .with_line_number(true) - .with_thread_ids(true), - ) - .with(EnvFilter::from_default_env()) - .init(); - let mut app = app::Limbo::new(&mut rl)?; - let home = dirs::home_dir().expect("Could not determine home directory"); - let history_file = home.join(".limbo_history"); - if history_file.exists() { - app.rl.load_history(history_file.as_path())?; + let mut app = app::Limbo::new()?; + let _guard = app.init_tracing()?; + + if std::io::IsTerminal::is_terminal(&std::io::stdin()) { + let mut rl = Editor::with_config(rustyline_config())?; + if HISTORY_FILE.exists() { + rl.load_history(HISTORY_FILE.as_path())?; + } + app = app.with_readline(rl); + } else { + tracing::debug!("not in tty"); } + loop { - let readline = app.rl.readline(&app.prompt); + let readline = app.readline(); match readline { Ok(line) => match app.handle_input_line(line.trim()) { Ok(_) => {} @@ -62,6 +68,5 @@ fn main() -> anyhow::Result<()> { } } } - rl.save_history(history_file.as_path())?; Ok(()) } diff --git a/core/Cargo.toml b/core/Cargo.toml index 4d294d397..d51a3440f 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -28,16 +28,17 @@ ipaddr = ["limbo_ipaddr/static"] completion = ["limbo_completion/static"] testvfs = ["limbo_ext_tests/static"] static = ["limbo_ext/static"] +fuzz = [] [target.'cfg(target_os = "linux")'.dependencies] -io-uring = { version = "0.6.1", optional = true } +io-uring = { version = "0.7.5", optional = true } [target.'cfg(target_family = "unix")'.dependencies] -polling = "3.7.2" -rustix = "0.38.34" +polling = "3.7.4" +rustix = { version = "1.0.5", features = ["fs"]} [target.'cfg(not(target_family = "wasm"))'.dependencies] -mimalloc = { version = "0.1", default-features = false } +mimalloc = { version = "0.1.46", default-features = false } libloading = "0.8.6" [dependencies] @@ -45,7 +46,7 @@ limbo_ext = { workspace = true, features = ["core_only"] } cfg_block = "0.1.1" fallible-iterator = "0.3.0" hex = "0.4.3" -libc = { version = "0.2.155", optional = true } +libc = { version = "0.2.172", optional = true } limbo_sqlite3_parser = { workspace = true } thiserror = "1.0.61" getrandom = { version = "0.2.15" } @@ -54,7 +55,7 @@ regex-syntax = { version = "0.8.5", default-features = false, features = [ "unicode", ] } chrono = { version = "0.4.38", default-features = false, features = ["clock"] } -julian_day_converter = "0.4.4" +julian_day_converter = "0.4.5" rand = "0.8.5" libm = "0.2" limbo_macros = { workspace = true } @@ -67,12 +68,13 @@ limbo_series = { workspace = true, optional = true, features = ["static"] } limbo_ipaddr = { workspace = true, optional = true, features = ["static"] } limbo_completion = { workspace = true, optional = true, features = ["static"] } limbo_ext_tests = { workspace = true, optional = true, features = ["static"] } -miette = "7.4.0" +miette = "7.6.0" strum = "0.26" parking_lot = "0.12.3" crossbeam-skiplist = "0.1.3" tracing = "0.1.41" ryu = "1.0.19" +bitflags = "2.9.0" [build-dependencies] chrono = { version = "0.4.38", default-features = false } @@ -96,7 +98,7 @@ rand = "0.8.5" # Required for quickcheck rand_chacha = "0.9.0" env_logger = "0.11.6" test-log = { version = "0.2.17", features = ["trace"] } -lru = "0.13.0" +lru = "0.14.0" [[bench]] name = "benchmark" diff --git a/core/error.rs b/core/error.rs index 3a1fd8112..1eca50305 100644 --- a/core/error.rs +++ b/core/error.rs @@ -1,5 +1,3 @@ -use std::num::NonZero; - use thiserror::Error; #[derive(Debug, Error, miette::Diagnostic)] @@ -49,12 +47,12 @@ pub enum LimboError { Constraint(String), #[error("Extension error: {0}")] ExtensionError(String), - #[error("Unbound parameter at index {0}")] - Unbound(NonZero), #[error("Runtime error: integer overflow")] IntegerOverflow, #[error("Schema is locked for write")] SchemaLocked, + #[error("Database Connection is read-only")] + ReadOnly, } #[macro_export] diff --git a/core/ext/dynamic.rs b/core/ext/dynamic.rs index df342caca..17138f268 100644 --- a/core/ext/dynamic.rs +++ b/core/ext/dynamic.rs @@ -6,6 +6,7 @@ use libloading::{Library, Symbol}; use limbo_ext::{ExtensionApi, ExtensionApiRef, ExtensionEntryPoint, ResultCode, VfsImpl}; use std::{ ffi::{c_char, CString}, + rc::Rc, sync::{Arc, Mutex, OnceLock}, }; @@ -29,7 +30,10 @@ unsafe impl Send for VfsMod {} unsafe impl Sync for VfsMod {} impl Connection { - pub fn load_extension>(&self, path: P) -> crate::Result<()> { + pub fn load_extension>( + self: &Rc, + path: P, + ) -> crate::Result<()> { use limbo_ext::ExtensionApiRef; let api = Box::new(self.build_limbo_ext()); @@ -44,7 +48,15 @@ impl Connection { let result_code = unsafe { entry(api_ptr) }; if result_code.is_ok() { let extensions = get_extension_libraries(); - extensions.lock().unwrap().push((Arc::new(lib), api_ref)); + extensions + .lock() + .map_err(|_| { + LimboError::ExtensionError("Error locking extension libraries".to_string()) + })? + .push((Arc::new(lib), api_ref)); + { + self.parse_schema_rows()?; + } Ok(()) } else { if !api_ptr.is_null() { diff --git a/core/ext/mod.rs b/core/ext/mod.rs index 270bee682..939fe3e05 100644 --- a/core/ext/mod.rs +++ b/core/ext/mod.rs @@ -89,12 +89,12 @@ impl Database { path: &str, vfs: &str, ) -> crate::Result<(Arc, Arc)> { - use crate::{MemoryIO, PlatformIO}; + use crate::{MemoryIO, SyscallIO}; use dynamic::get_vfs_modules; let io: Arc = match vfs { "memory" => Arc::new(MemoryIO::new()), - "syscall" => Arc::new(PlatformIO::new()?), + "syscall" => Arc::new(SyscallIO::new()?), #[cfg(all(target_os = "linux", feature = "io_uring"))] "io_uring" => Arc::new(UringIO::new()?), other => match get_vfs_modules().iter().find(|v| v.0 == vfs) { diff --git a/core/function.rs b/core/function.rs index 8f27b1a5b..5a436465e 100644 --- a/core/function.rs +++ b/core/function.rs @@ -10,6 +10,12 @@ pub struct ExternalFunc { pub func: ExtFunc, } +impl ExternalFunc { + pub fn is_deterministic(&self) -> bool { + false // external functions can be whatever so let's just default to false + } +} + #[derive(Debug, Clone)] pub enum ExtFunc { Scalar(ScalarFunction), @@ -98,6 +104,13 @@ pub enum JsonFunc { JsonQuote, } +#[cfg(feature = "json")] +impl JsonFunc { + pub fn is_deterministic(&self) -> bool { + true + } +} + #[cfg(feature = "json")] impl Display for JsonFunc { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { @@ -145,6 +158,12 @@ pub enum VectorFunc { VectorDistanceCos, } +impl VectorFunc { + pub fn is_deterministic(&self) -> bool { + true + } +} + impl Display for VectorFunc { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let str = match self { @@ -198,6 +217,10 @@ impl PartialEq for AggFunc { } impl AggFunc { + pub fn is_deterministic(&self) -> bool { + false // consider aggregate functions nondeterministic since they depend on the number of rows, not only the input arguments + } + pub fn num_args(&self) -> usize { match self { Self::Avg => 1, @@ -292,6 +315,68 @@ pub enum ScalarFunc { LoadExtension, StrfTime, Printf, + Likely, + TimeDiff, + Likelihood, +} + +impl ScalarFunc { + pub fn is_deterministic(&self) -> bool { + match self { + ScalarFunc::Cast => true, + ScalarFunc::Changes => false, // depends on DB state + ScalarFunc::Char => true, + ScalarFunc::Coalesce => true, + ScalarFunc::Concat => true, + ScalarFunc::ConcatWs => true, + ScalarFunc::Glob => true, + ScalarFunc::IfNull => true, + ScalarFunc::Iif => true, + ScalarFunc::Instr => true, + ScalarFunc::Like => true, + ScalarFunc::Abs => true, + ScalarFunc::Upper => true, + ScalarFunc::Lower => true, + ScalarFunc::Random => false, // duh + ScalarFunc::RandomBlob => false, // duh + ScalarFunc::Trim => true, + ScalarFunc::LTrim => true, + ScalarFunc::RTrim => true, + ScalarFunc::Round => true, + ScalarFunc::Length => true, + ScalarFunc::OctetLength => true, + ScalarFunc::Min => true, + ScalarFunc::Max => true, + ScalarFunc::Nullif => true, + ScalarFunc::Sign => true, + ScalarFunc::Substr => true, + ScalarFunc::Substring => true, + ScalarFunc::Soundex => true, + ScalarFunc::Date => false, + ScalarFunc::Time => false, + ScalarFunc::TotalChanges => false, + ScalarFunc::DateTime => false, + ScalarFunc::Typeof => true, + ScalarFunc::Unicode => true, + ScalarFunc::Quote => true, + ScalarFunc::SqliteVersion => true, + ScalarFunc::SqliteSourceId => true, + ScalarFunc::UnixEpoch => false, + ScalarFunc::JulianDay => false, + ScalarFunc::Hex => true, + ScalarFunc::Unhex => true, + ScalarFunc::ZeroBlob => true, + ScalarFunc::LastInsertRowid => false, + ScalarFunc::Replace => true, + #[cfg(feature = "fs")] + ScalarFunc::LoadExtension => true, + ScalarFunc::StrfTime => false, + ScalarFunc::Printf => false, + ScalarFunc::Likely => true, + ScalarFunc::TimeDiff => false, + ScalarFunc::Likelihood => true, + } + } } impl Display for ScalarFunc { @@ -346,6 +431,9 @@ impl Display for ScalarFunc { Self::LoadExtension => "load_extension".to_string(), Self::StrfTime => "strftime".to_string(), Self::Printf => "printf".to_string(), + Self::Likely => "likely".to_string(), + Self::TimeDiff => "timediff".to_string(), + Self::Likelihood => "likelihood".to_string(), }; write!(f, "{}", str) } @@ -392,6 +480,9 @@ pub enum MathFuncArity { } impl MathFunc { + pub fn is_deterministic(&self) -> bool { + true + } pub fn arity(&self) -> MathFuncArity { match self { Self::Pi => MathFuncArity::Nullary, @@ -495,6 +586,17 @@ pub struct FuncCtx { } impl Func { + pub fn is_deterministic(&self) -> bool { + match self { + Self::Agg(agg_func) => agg_func.is_deterministic(), + Self::Scalar(scalar_func) => scalar_func.is_deterministic(), + Self::Math(math_func) => math_func.is_deterministic(), + Self::Vector(vector_func) => vector_func.is_deterministic(), + #[cfg(feature = "json")] + Self::Json(json_func) => json_func.is_deterministic(), + Self::External(external_func) => external_func.is_deterministic(), + } + } pub fn resolve_function(name: &str, arg_count: usize) -> Result { match name { "avg" => { @@ -553,6 +655,12 @@ impl Func { } Ok(Self::Agg(AggFunc::Total)) } + "timediff" => { + if arg_count != 2 { + crate::bail_parse_error!("wrong number of arguments to function {}()", name) + } + Ok(Self::Scalar(ScalarFunc::TimeDiff)) + } #[cfg(feature = "json")] "jsonb_group_array" => Ok(Self::Agg(AggFunc::JsonbGroupArray)), #[cfg(feature = "json")] @@ -596,6 +704,8 @@ impl Func { "sqlite_version" => Ok(Self::Scalar(ScalarFunc::SqliteVersion)), "sqlite_source_id" => Ok(Self::Scalar(ScalarFunc::SqliteSourceId)), "replace" => Ok(Self::Scalar(ScalarFunc::Replace)), + "likely" => Ok(Self::Scalar(ScalarFunc::Likely)), + "likelihood" => Ok(Self::Scalar(ScalarFunc::Likelihood)), #[cfg(feature = "json")] "json" => Ok(Self::Json(JsonFunc::Json)), #[cfg(feature = "json")] diff --git a/core/functions/datetime.rs b/core/functions/datetime.rs index 294fbfb2d..b58527f40 100644 --- a/core/functions/datetime.rs +++ b/core/functions/datetime.rs @@ -46,21 +46,13 @@ enum DateTimeOutput { DateTime, // Holds the format string StrfTime(String), + JuliaDay, } fn exec_datetime(values: &[Register], output_type: DateTimeOutput) -> OwnedValue { if values.is_empty() { let now = parse_naive_date_time(&OwnedValue::build_text("now")).unwrap(); - - let formatted_str = match output_type { - DateTimeOutput::DateTime => now.format("%Y-%m-%d %H:%M:%S").to_string(), - DateTimeOutput::Time => now.format("%H:%M:%S").to_string(), - DateTimeOutput::Date => now.format("%Y-%m-%d").to_string(), - DateTimeOutput::StrfTime(ref format_str) => strftime_format(&now, format_str), - }; - - // Parse here - return OwnedValue::build_text(&formatted_str); + return format_dt(now, output_type, false); } if let Some(mut dt) = parse_naive_date_time(values[0].get_owned_value()) { // if successful, treat subsequent entries as modifiers @@ -91,28 +83,32 @@ fn modify_dt(dt: &mut NaiveDateTime, mods: &[Register], output_type: DateTimeOut if is_leap_second(dt) || *dt > get_max_datetime_exclusive() { return OwnedValue::build_text(""); } - let formatted = format_dt(*dt, output_type, subsec_requested); - OwnedValue::build_text(&formatted) + format_dt(*dt, output_type, subsec_requested) } -fn format_dt(dt: NaiveDateTime, output_type: DateTimeOutput, subsec: bool) -> String { +fn format_dt(dt: NaiveDateTime, output_type: DateTimeOutput, subsec: bool) -> OwnedValue { match output_type { - DateTimeOutput::Date => dt.format("%Y-%m-%d").to_string(), + DateTimeOutput::Date => OwnedValue::from_text(dt.format("%Y-%m-%d").to_string().as_str()), DateTimeOutput::Time => { - if subsec { + let t = if subsec { dt.format("%H:%M:%S%.3f").to_string() } else { dt.format("%H:%M:%S").to_string() - } + }; + OwnedValue::from_text(t.as_str()) } DateTimeOutput::DateTime => { - if subsec { + let t = if subsec { dt.format("%Y-%m-%d %H:%M:%S%.3f").to_string() } else { dt.format("%Y-%m-%d %H:%M:%S").to_string() - } + }; + OwnedValue::from_text(t.as_str()) } - DateTimeOutput::StrfTime(format_str) => strftime_format(&dt, &format_str), + DateTimeOutput::StrfTime(format_str) => { + OwnedValue::from_text(strftime_format(&dt, &format_str).as_str()) + } + DateTimeOutput::JuliaDay => OwnedValue::Float(to_julian_day_exact(&dt)), } } @@ -325,14 +321,8 @@ fn last_day_in_month(year: i32, month: u32) -> u32 { 28 } -pub fn exec_julianday(time_value: &OwnedValue) -> Result { - let dt = parse_naive_date_time(time_value); - match dt { - // if we did something heinous like: parse::().unwrap().to_string() - // that would solve the precision issue, but dear lord... - Some(dt) => Ok(format!("{:.1$}", to_julian_day_exact(&dt), 8)), - None => Ok(String::new()), - } +pub fn exec_julianday(values: &[Register]) -> OwnedValue { + exec_datetime(values, DateTimeOutput::JuliaDay) } fn to_julian_day_exact(dt: &NaiveDateTime) -> f64 { @@ -656,6 +646,61 @@ fn parse_modifier(modifier: &str) -> Result { } } +pub fn exec_timediff(values: &[Register]) -> OwnedValue { + if values.len() < 2 { + return OwnedValue::Null; + } + + let start = parse_naive_date_time(values[0].get_owned_value()); + let end = parse_naive_date_time(values[1].get_owned_value()); + + match (start, end) { + (Some(start), Some(end)) => { + let duration = start.signed_duration_since(end); + format_time_duration(&duration) + } + _ => OwnedValue::Null, + } +} + +/// Format the time duration as +/-YYYY-MM-DD HH:MM:SS.SSS as per SQLite's timediff() function +fn format_time_duration(duration: &chrono::Duration) -> OwnedValue { + let is_negative = duration.num_seconds() < 0; + + let abs_duration = if is_negative { + -duration.clone() + } else { + duration.clone() + }; + + let total_seconds = abs_duration.num_seconds(); + let hours = (total_seconds % 86400) / 3600; + let minutes = (total_seconds % 3600) / 60; + let seconds = total_seconds % 60; + + let days = total_seconds / 86400; + let years = days / 365; + let remaining_days = days % 365; + let months = 0; + + let total_millis = abs_duration.num_milliseconds(); + let millis = total_millis % 1000; + + let result = format!( + "{}{:04}-{:02}-{:02} {:02}:{:02}:{:02}.{:03}", + if is_negative { "-" } else { "+" }, + years, + months, + remaining_days, + hours, + minutes, + seconds, + millis + ); + + OwnedValue::build_text(&result) +} + #[cfg(test)] mod tests { use super::*; @@ -1642,4 +1687,67 @@ mod tests { #[test] fn test_strftime() {} + + #[test] + fn test_exec_timediff() { + let start = OwnedValue::build_text("12:00:00"); + let end = OwnedValue::build_text("14:30:45"); + let expected = OwnedValue::build_text("-0000-00-00 02:30:45.000"); + assert_eq!( + exec_timediff(&[Register::OwnedValue(start), Register::OwnedValue(end)]), + expected + ); + + let start = OwnedValue::build_text("14:30:45"); + let end = OwnedValue::build_text("12:00:00"); + let expected = OwnedValue::build_text("+0000-00-00 02:30:45.000"); + assert_eq!( + exec_timediff(&[Register::OwnedValue(start), Register::OwnedValue(end)]), + expected + ); + + let start = OwnedValue::build_text("12:00:01.300"); + let end = OwnedValue::build_text("12:00:00.500"); + let expected = OwnedValue::build_text("+0000-00-00 00:00:00.800"); + assert_eq!( + exec_timediff(&[Register::OwnedValue(start), Register::OwnedValue(end)]), + expected + ); + + let start = OwnedValue::build_text("13:30:00"); + let end = OwnedValue::build_text("16:45:30"); + let expected = OwnedValue::build_text("-0000-00-00 03:15:30.000"); + assert_eq!( + exec_timediff(&[Register::OwnedValue(start), Register::OwnedValue(end)]), + expected + ); + + let start = OwnedValue::build_text("2023-05-10 23:30:00"); + let end = OwnedValue::build_text("2023-05-11 01:15:00"); + let expected = OwnedValue::build_text("-0000-00-00 01:45:00.000"); + assert_eq!( + exec_timediff(&[Register::OwnedValue(start), Register::OwnedValue(end)]), + expected + ); + + let start = OwnedValue::Null; + let end = OwnedValue::build_text("12:00:00"); + let expected = OwnedValue::Null; + assert_eq!( + exec_timediff(&[Register::OwnedValue(start), Register::OwnedValue(end)]), + expected + ); + + let start = OwnedValue::build_text("not a time"); + let end = OwnedValue::build_text("12:00:00"); + let expected = OwnedValue::Null; + assert_eq!( + exec_timediff(&[Register::OwnedValue(start), Register::OwnedValue(end)]), + expected + ); + + let start = OwnedValue::build_text("12:00:00"); + let expected = OwnedValue::Null; + assert_eq!(exec_timediff(&[Register::OwnedValue(start)]), expected); + } } diff --git a/core/io/clock.rs b/core/io/clock.rs new file mode 100644 index 000000000..3a38ad955 --- /dev/null +++ b/core/io/clock.rs @@ -0,0 +1,9 @@ +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +pub struct Instant { + pub secs: i64, + pub micros: u32, +} + +pub trait Clock { + fn now(&self) -> Instant; +} diff --git a/core/io/generic.rs b/core/io/generic.rs index 1c0a5b6ff..aab5f2687 100644 --- a/core/io/generic.rs +++ b/core/io/generic.rs @@ -1,4 +1,5 @@ -use crate::{Completion, File, LimboError, OpenFlags, Result, IO}; +use super::MemoryIO; +use crate::{Clock, Completion, File, Instant, LimboError, OpenFlags, Result, IO}; use std::cell::RefCell; use std::io::{Read, Seek, Write}; use std::sync::Arc; @@ -19,13 +20,18 @@ unsafe impl Sync for GenericIO {} impl IO for GenericIO { fn open_file(&self, path: &str, flags: OpenFlags, _direct: bool) -> Result> { trace!("open_file(path = {})", path); - let file = std::fs::OpenOptions::new() - .read(true) - .write(true) - .create(matches!(flags, OpenFlags::Create)) - .open(path)?; + let mut file = std::fs::File::options(); + file.read(true); + + if !flags.contains(OpenFlags::ReadOnly) { + file.write(true); + file.create(flags.contains(OpenFlags::Create)); + } + + let file = file.open(path)?; Ok(Arc::new(GenericFile { file: RefCell::new(file), + memory_io: Arc::new(MemoryIO::new()), })) } @@ -39,13 +45,24 @@ impl IO for GenericIO { i64::from_ne_bytes(buf) } - fn get_current_time(&self) -> String { - chrono::Local::now().format("%Y-%m-%d %H:%M:%S").to_string() + fn get_memory_io(&self) -> Arc { + Arc::new(MemoryIO::new()) + } +} + +impl Clock for GenericIO { + fn now(&self) -> Instant { + let now = chrono::Local::now(); + Instant { + secs: now.timestamp(), + micros: now.timestamp_subsec_micros(), + } } } pub struct GenericFile { file: RefCell, + memory_io: Arc, } unsafe impl Send for GenericFile {} diff --git a/core/io/io_uring.rs b/core/io/io_uring.rs index cca473790..25d6aa33e 100644 --- a/core/io/io_uring.rs +++ b/core/io/io_uring.rs @@ -1,5 +1,6 @@ use super::{common, Completion, File, OpenFlags, WriteCompletion, IO}; -use crate::{LimboError, Result}; +use crate::io::clock::{Clock, Instant}; +use crate::{LimboError, MemoryIO, Result}; use rustix::fs::{self, FlockOperation, OFlags}; use rustix::io_uring::iovec; use std::cell::RefCell; @@ -138,11 +139,15 @@ impl WrappedIOUring { impl IO for UringIO { fn open_file(&self, path: &str, flags: OpenFlags, direct: bool) -> Result> { trace!("open_file(path = {})", path); - let file = std::fs::File::options() - .read(true) - .write(true) - .create(matches!(flags, OpenFlags::Create)) - .open(path)?; + let mut file = std::fs::File::options(); + file.read(true); + + if !flags.contains(OpenFlags::ReadOnly) { + file.write(true); + file.create(flags.contains(OpenFlags::Create)); + } + + let file = file.open(path)?; // Let's attempt to enable direct I/O. Not all filesystems support it // so ignore any errors. let fd = file.as_fd(); @@ -157,7 +162,7 @@ impl IO for UringIO { file, }); if std::env::var(common::ENV_DISABLE_FILE_LOCK).is_err() { - uring_file.lock_file(true)?; + uring_file.lock_file(!flags.contains(OpenFlags::ReadOnly))?; } Ok(uring_file) } @@ -197,8 +202,18 @@ impl IO for UringIO { i64::from_ne_bytes(buf) } - fn get_current_time(&self) -> String { - chrono::Local::now().format("%Y-%m-%d %H:%M:%S").to_string() + fn get_memory_io(&self) -> Arc { + Arc::new(MemoryIO::new()) + } +} + +impl Clock for UringIO { + fn now(&self) -> Instant { + let now = chrono::Local::now(); + Instant { + secs: now.timestamp(), + micros: now.timestamp_subsec_micros(), + } } } diff --git a/core/io/memory.rs b/core/io/memory.rs index c9519845d..9cc56a5e3 100644 --- a/core/io/memory.rs +++ b/core/io/memory.rs @@ -1,6 +1,7 @@ -use super::{Buffer, Completion, File, OpenFlags, IO}; +use super::{Buffer, Clock, Completion, File, OpenFlags, IO}; use crate::Result; +use crate::io::clock::Instant; use std::{ cell::{Cell, RefCell, UnsafeCell}, collections::BTreeMap, @@ -29,6 +30,16 @@ impl Default for MemoryIO { } } +impl Clock for MemoryIO { + fn now(&self) -> Instant { + let now = chrono::Local::now(); + Instant { + secs: now.timestamp(), + micros: now.timestamp_subsec_micros(), + } + } +} + impl IO for MemoryIO { fn open_file(&self, _path: &str, _flags: OpenFlags, _direct: bool) -> Result> { Ok(Arc::new(MemoryFile { @@ -48,8 +59,8 @@ impl IO for MemoryIO { i64::from_ne_bytes(buf) } - fn get_current_time(&self) -> String { - chrono::Local::now().format("%Y-%m-%d %H:%M:%S").to_string() + fn get_memory_io(&self) -> Arc { + Arc::new(MemoryIO::new()) } } diff --git a/core/io/mod.rs b/core/io/mod.rs index 7eb8845bb..6f75e9bea 100644 --- a/core/io/mod.rs +++ b/core/io/mod.rs @@ -1,4 +1,5 @@ use crate::Result; +use bitflags::bitflags; use cfg_block::cfg_block; use std::fmt; use std::sync::Arc; @@ -19,29 +20,31 @@ pub trait File: Send + Sync { fn size(&self) -> Result; } -#[derive(Copy, Clone)] -pub enum OpenFlags { - None, - Create, -} +#[derive(Debug, Copy, Clone, PartialEq)] +pub struct OpenFlags(i32); -impl OpenFlags { - pub fn to_flags(&self) -> i32 { - match self { - Self::None => 0, - Self::Create => 1, - } +bitflags! { + impl OpenFlags: i32 { + const None = 0b00000000; + const Create = 0b0000001; + const ReadOnly = 0b0000010; } } -pub trait IO: Send + Sync { +impl Default for OpenFlags { + fn default() -> Self { + Self::Create + } +} + +pub trait IO: Clock + Send + Sync { fn open_file(&self, path: &str, flags: OpenFlags, direct: bool) -> Result>; fn run_once(&self) -> Result<()>; fn generate_random_number(&self) -> i64; - fn get_current_time(&self) -> String; + fn get_memory_io(&self) -> Arc; } pub type Complete = dyn Fn(Arc>); @@ -191,7 +194,8 @@ cfg_block! { mod unix; #[cfg(feature = "fs")] pub use unix::UnixIO; - pub use io_uring::UringIO as PlatformIO; + pub use unix::UnixIO as SyscallIO; + pub use unix::UnixIO as PlatformIO; } #[cfg(any(all(target_os = "linux",not(feature = "io_uring")), target_os = "macos"))] { @@ -199,16 +203,19 @@ cfg_block! { #[cfg(feature = "fs")] pub use unix::UnixIO; pub use unix::UnixIO as PlatformIO; + pub use PlatformIO as SyscallIO; } #[cfg(target_os = "windows")] { mod windows; pub use windows::WindowsIO as PlatformIO; + pub use PlatformIO as SyscallIO; } #[cfg(not(any(target_os = "linux", target_os = "macos", target_os = "windows")))] { mod generic; pub use generic::GenericIO as PlatformIO; + pub use PlatformIO as SyscallIO; } } @@ -216,4 +223,6 @@ mod memory; #[cfg(feature = "fs")] mod vfs; pub use memory::MemoryIO; +pub mod clock; mod common; +pub use clock::Clock; diff --git a/core/io/unix.rs b/core/io/unix.rs index bbeb1266b..721ba20f3 100644 --- a/core/io/unix.rs +++ b/core/io/unix.rs @@ -2,7 +2,8 @@ use crate::error::LimboError; use crate::io::common; use crate::Result; -use super::{Completion, File, OpenFlags, IO}; +use super::{Completion, File, MemoryIO, OpenFlags, IO}; +use crate::io::clock::{Clock, Instant}; use polling::{Event, Events, Poller}; use rustix::{ fd::{AsFd, AsRawFd}, @@ -183,15 +184,28 @@ impl UnixIO { } } +impl Clock for UnixIO { + fn now(&self) -> Instant { + let now = chrono::Local::now(); + Instant { + secs: now.timestamp(), + micros: now.timestamp_subsec_micros(), + } + } +} + impl IO for UnixIO { fn open_file(&self, path: &str, flags: OpenFlags, _direct: bool) -> Result> { trace!("open_file(path = {})", path); - let file = std::fs::File::options() - .read(true) - .custom_flags(OFlags::NONBLOCK.bits() as i32) - .write(true) - .create(matches!(flags, OpenFlags::Create)) - .open(path)?; + let mut file = std::fs::File::options(); + file.read(true).custom_flags(OFlags::NONBLOCK.bits() as i32); + + if !flags.contains(OpenFlags::ReadOnly) { + file.write(true); + file.create(flags.contains(OpenFlags::Create)); + } + + let file = file.open(path)?; #[allow(clippy::arc_with_non_send_sync)] let unix_file = Arc::new(UnixFile { @@ -200,7 +214,7 @@ impl IO for UnixIO { callbacks: BorrowedCallbacks(self.callbacks.as_mut().into()), }); if std::env::var(common::ENV_DISABLE_FILE_LOCK).is_err() { - unix_file.lock_file(true)?; + unix_file.lock_file(!flags.contains(OpenFlags::ReadOnly))?; } Ok(unix_file) } @@ -248,8 +262,8 @@ impl IO for UnixIO { i64::from_ne_bytes(buf) } - fn get_current_time(&self) -> String { - chrono::Local::now().format("%Y-%m-%d %H:%M:%S").to_string() + fn get_memory_io(&self) -> Arc { + Arc::new(MemoryIO::new()) } } diff --git a/core/io/vfs.rs b/core/io/vfs.rs index f3cac9d30..d02f7d345 100644 --- a/core/io/vfs.rs +++ b/core/io/vfs.rs @@ -1,11 +1,21 @@ +use super::{Buffer, Completion, File, MemoryIO, OpenFlags, IO}; use crate::ext::VfsMod; +use crate::io::clock::{Clock, Instant}; use crate::{LimboError, Result}; use limbo_ext::{VfsFileImpl, VfsImpl}; use std::cell::RefCell; use std::ffi::{c_void, CString}; use std::sync::Arc; -use super::{Buffer, Completion, File, OpenFlags, IO}; +impl Clock for VfsMod { + fn now(&self) -> Instant { + let now = chrono::Local::now(); + Instant { + secs: now.timestamp(), + micros: now.timestamp_subsec_micros(), + } + } +} impl IO for VfsMod { fn open_file(&self, path: &str, flags: OpenFlags, direct: bool) -> Result> { @@ -14,7 +24,7 @@ impl IO for VfsMod { })?; let ctx = self.ctx as *mut c_void; let vfs = unsafe { &*self.ctx }; - let file = unsafe { (vfs.open)(ctx, c_path.as_ptr(), flags.to_flags(), direct) }; + let file = unsafe { (vfs.open)(ctx, c_path.as_ptr(), flags.0, direct) }; if file.is_null() { return Err(LimboError::ExtensionError("File not found".to_string())); } @@ -41,6 +51,13 @@ impl IO for VfsMod { unsafe { (vfs.gen_random_number)() } } + fn get_memory_io(&self) -> Arc { + Arc::new(MemoryIO::new()) + } +} + +impl VfsMod { + #[allow(dead_code)] // used in FFI call fn get_current_time(&self) -> String { if self.ctx.is_null() { return "".to_string(); diff --git a/core/io/windows.rs b/core/io/windows.rs index 9bfd523a9..a329abc14 100644 --- a/core/io/windows.rs +++ b/core/io/windows.rs @@ -1,9 +1,9 @@ -use crate::{Completion, File, LimboError, OpenFlags, Result, IO}; +use super::MemoryIO; +use crate::{Clock, Completion, File, Instant, LimboError, OpenFlags, Result, IO}; use std::cell::RefCell; use std::io::{Read, Seek, Write}; use std::sync::Arc; use tracing::{debug, trace}; - pub struct WindowsIO {} impl WindowsIO { @@ -19,11 +19,15 @@ unsafe impl Sync for WindowsIO {} impl IO for WindowsIO { fn open_file(&self, path: &str, flags: OpenFlags, direct: bool) -> Result> { trace!("open_file(path = {})", path); - let file = std::fs::File::options() - .read(true) - .write(true) - .create(matches!(flags, OpenFlags::Create)) - .open(path)?; + let mut file = std::fs::File::options(); + file.read(true); + + if !flags.contains(OpenFlags::ReadOnly) { + file.write(true); + file.create(flags.contains(OpenFlags::Create)); + } + + let file = file.open(path)?; Ok(Arc::new(WindowsFile { file: RefCell::new(file), })) @@ -39,8 +43,18 @@ impl IO for WindowsIO { i64::from_ne_bytes(buf) } - fn get_current_time(&self) -> String { - chrono::Local::now().format("%Y-%m-%d %H:%M:%S").to_string() + fn get_memory_io(&self) -> Arc { + Arc::new(MemoryIO::new()) + } +} + +impl Clock for WindowsIO { + fn now(&self) -> Instant { + let now = chrono::Local::now(); + Instant { + secs: now.timestamp(), + micros: now.timestamp_subsec_micros(), + } } } diff --git a/core/lib.rs b/core/lib.rs index e827c3d0d..2f7ab0577 100644 --- a/core/lib.rs +++ b/core/lib.rs @@ -20,6 +20,12 @@ mod util; mod vdbe; mod vector; +#[cfg(feature = "fuzz")] +pub mod numeric; + +#[cfg(not(feature = "fuzz"))] +mod numeric; + #[cfg(not(target_family = "wasm"))] #[global_allocator] static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc; @@ -27,12 +33,15 @@ static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc; use crate::{fast_lock::SpinLock, translate::optimizer::optimize_plan}; pub use error::LimboError; use fallible_iterator::FallibleIterator; +pub use io::clock::{Clock, Instant}; #[cfg(all(feature = "fs", target_family = "unix"))] pub use io::UnixIO; #[cfg(all(feature = "fs", target_os = "linux", feature = "io_uring"))] pub use io::UringIO; -pub use io::{Buffer, Completion, File, MemoryIO, OpenFlags, PlatformIO, WriteCompletion, IO}; -use limbo_ext::{ResultCode, VTabKind, VTabModuleImpl}; +pub use io::{ + Buffer, Completion, File, MemoryIO, OpenFlags, PlatformIO, SyscallIO, WriteCompletion, IO, +}; +use limbo_ext::{ConstraintInfo, IndexInfo, OrderByInfo, ResultCode, VTabKind, VTabModuleImpl}; use limbo_sqlite3_parser::{ast, ast::Cmd, lexer::sql::Parser}; use parking_lot::RwLock; use schema::{Column, Schema}; @@ -66,20 +75,19 @@ pub use types::OwnedValue; pub use types::RefValue; use util::{columns_from_create_table_body, parse_schema_rows}; use vdbe::{builder::QueryMode, VTabOpaqueCursor}; - pub type Result = std::result::Result; pub static DATABASE_VERSION: OnceLock = OnceLock::new(); -#[derive(Clone, PartialEq, Eq)] +#[derive(Clone, Copy, PartialEq, Eq)] enum TransactionState { Write, Read, None, } -pub(crate) type MvStore = crate::mvcc::MvStore; +pub(crate) type MvStore = mvcc::MvStore; -pub(crate) type MvCursor = crate::mvcc::cursor::ScanCursor; +pub(crate) type MvCursor = mvcc::cursor::ScanCursor; pub struct Database { mv_store: Option>, @@ -88,11 +96,12 @@ pub struct Database { header: Arc>, db_file: Arc, io: Arc, - page_size: u16, + page_size: u32, // Shared structures of a Database are the parts that are common to multiple threads that might // create DB connections. shared_page_cache: Arc>, shared_wal: Arc>, + open_flags: OpenFlags, } unsafe impl Send for Database {} @@ -101,53 +110,74 @@ unsafe impl Sync for Database {} impl Database { #[cfg(feature = "fs")] pub fn open_file(io: Arc, path: &str, enable_mvcc: bool) -> Result> { - use storage::wal::WalFileShared; + Self::open_file_with_flags(io, path, OpenFlags::default(), enable_mvcc) + } - let file = io.open_file(path, OpenFlags::Create, true)?; + #[cfg(feature = "fs")] + pub fn open_file_with_flags( + io: Arc, + path: &str, + flags: OpenFlags, + enable_mvcc: bool, + ) -> Result> { + let file = io.open_file(path, flags, true)?; maybe_init_database_file(&file, &io)?; let db_file = Arc::new(DatabaseFile::new(file)); - let wal_path = format!("{}-wal", path); - let db_header = Pager::begin_open(db_file.clone())?; - io.run_once()?; - let page_size = db_header.lock().page_size; - let wal_shared = WalFileShared::open_shared(&io, wal_path.as_str(), page_size)?; - Self::open(io, db_file, wal_shared, enable_mvcc) + Self::open_with_flags(io, path, db_file, flags, enable_mvcc) } #[allow(clippy::arc_with_non_send_sync)] pub fn open( io: Arc, + path: &str, db_file: Arc, - shared_wal: Arc>, + enable_mvcc: bool, + ) -> Result> { + Self::open_with_flags(io, path, db_file, OpenFlags::default(), enable_mvcc) + } + + #[allow(clippy::arc_with_non_send_sync)] + pub fn open_with_flags( + io: Arc, + path: &str, + db_file: Arc, + flags: OpenFlags, enable_mvcc: bool, ) -> Result> { let db_header = Pager::begin_open(db_file.clone())?; + // ensure db header is there io.run_once()?; + + let page_size = db_header.lock().get_page_size(); + let wal_path = format!("{}-wal", path); + let shared_wal = WalFileShared::open_shared(&io, wal_path.as_str(), page_size)?; + DATABASE_VERSION.get_or_init(|| { let version = db_header.lock().version_number; version.to_string() }); + let mv_store = if enable_mvcc { Some(Rc::new(MvStore::new( - crate::mvcc::LocalClock::new(), - crate::mvcc::persistent_storage::Storage::new_noop(), + mvcc::LocalClock::new(), + mvcc::persistent_storage::Storage::new_noop(), ))) } else { None }; + let shared_page_cache = Arc::new(RwLock::new(DumbLruPageCache::new(10))); - let page_size = db_header.lock().page_size; - let header = db_header; let schema = Arc::new(RwLock::new(Schema::new())); let db = Database { mv_store, schema: schema.clone(), - header: header.clone(), + header: db_header.clone(), shared_page_cache: shared_page_cache.clone(), shared_wal: shared_wal.clone(), db_file, io: io.clone(), page_size, + open_flags: flags, }; let db = Arc::new(db); { @@ -158,7 +188,13 @@ impl Database { .try_write() .expect("lock on schema should succeed first try"); let syms = conn.syms.borrow(); - parse_schema_rows(rows, &mut schema, io, syms.deref(), None)?; + if let Err(LimboError::ExtensionError(e)) = + parse_schema_rows(rows, &mut schema, io, &syms, None) + { + // this means that a vtab exists and we no longer have the module loaded. we print + // a warning to the user to load the module + eprintln!("Warning: {}", e); + } } Ok(db) } @@ -168,14 +204,14 @@ impl Database { let wal = Rc::new(RefCell::new(WalFile::new( self.io.clone(), - self.page_size as usize, + self.page_size, self.shared_wal.clone(), buffer_pool.clone(), ))); let pager = Rc::new(Pager::finish_open( self.header.clone(), self.db_file.clone(), - wal, + Some(wal), self.io.clone(), self.shared_page_cache.clone(), buffer_pool, @@ -186,9 +222,9 @@ impl Database { schema: self.schema.clone(), header: self.header.clone(), last_insert_rowid: Cell::new(0), - auto_commit: RefCell::new(true), + auto_commit: Cell::new(true), mv_transactions: RefCell::new(Vec::new()), - transaction_state: RefCell::new(TransactionState::None), + transaction_state: Cell::new(TransactionState::None), last_change: Cell::new(0), syms: RefCell::new(SymbolTable::new()), total_changes: Cell::new(0), @@ -204,12 +240,12 @@ impl Database { #[cfg(feature = "fs")] #[allow(clippy::arc_with_non_send_sync)] pub fn open_new(path: &str, vfs: &str) -> Result<(Arc, Arc)> { - let vfsmods = crate::ext::add_builtin_vfs_extensions(None)?; + let vfsmods = ext::add_builtin_vfs_extensions(None)?; let io: Arc = match vfsmods.iter().find(|v| v.0 == vfs).map(|v| v.1.clone()) { Some(vfs) => vfs, None => match vfs.trim() { "memory" => Arc::new(MemoryIO::new()), - "syscall" => Arc::new(PlatformIO::new()?), + "syscall" => Arc::new(SyscallIO::new()?), #[cfg(all(target_os = "linux", feature = "io_uring"))] "io_uring" => Arc::new(UringIO::new()?), other => { @@ -231,7 +267,7 @@ pub fn maybe_init_database_file(file: &Arc, io: &Arc) -> Resul let db_header = DatabaseHeader::default(); let page1 = allocate_page( 1, - &Rc::new(BufferPool::new(db_header.page_size as usize)), + &Rc::new(BufferPool::new(db_header.get_page_size() as usize)), DATABASE_HEADER_SIZE, ); { @@ -243,7 +279,7 @@ pub fn maybe_init_database_file(file: &Arc, io: &Arc) -> Resul &page1, storage::sqlite3_ondisk::PageType::TableLeaf, DATABASE_HEADER_SIZE, - db_header.page_size - db_header.reserved_space as u16, + (db_header.get_page_size() - db_header.reserved_space as u32) as u16, ); let contents = page1.get().contents.as_mut().unwrap(); @@ -278,9 +314,9 @@ pub struct Connection { pager: Rc, schema: Arc>, header: Arc>, - auto_commit: RefCell, + auto_commit: Cell, mv_transactions: RefCell>, - transaction_state: RefCell, + transaction_state: Cell, last_insert_rowid: Cell, last_change: Cell, total_changes: Cell, @@ -517,7 +553,26 @@ impl Connection { } pub fn get_auto_commit(&self) -> bool { - *self.auto_commit.borrow() + self.auto_commit.get() + } + + pub fn parse_schema_rows(self: &Rc) -> Result<()> { + let rows = self.query("SELECT * FROM sqlite_schema")?; + let mut schema = self + .schema + .try_write() + .expect("lock on schema should succeed first try"); + { + let syms = self.syms.borrow(); + if let Err(LimboError::ExtensionError(e)) = + parse_schema_rows(rows, &mut schema, self.pager.io.clone(), &syms, None) + { + // this means that a vtab exists and we no longer have the module loaded. we print + // a warning to the user to load the module + eprintln!("Warning: {}", e); + } + } + Ok(()) } } @@ -564,7 +619,7 @@ impl Statement { self.program.result_columns.len() } - pub fn get_column_name(&self, idx: usize) -> Cow { + pub fn get_column_name(&self, idx: usize) -> Cow { let column = &self.program.result_columns[idx]; match column.name(&self.program.table_references) { Some(name) => Cow::Borrowed(name), @@ -607,12 +662,28 @@ pub struct VirtualTable { args: Option>, pub implementation: Rc, columns: Vec, + kind: VTabKind, } impl VirtualTable { pub(crate) fn rowid(&self, cursor: &VTabOpaqueCursor) -> i64 { unsafe { (self.implementation.rowid)(cursor.as_ptr()) } } + + pub(crate) fn best_index( + &self, + constraints: &[ConstraintInfo], + order_by: &[OrderByInfo], + ) -> IndexInfo { + unsafe { + IndexInfo::from_ffi((self.implementation.best_idx)( + constraints.as_ptr(), + constraints.len() as i32, + order_by.as_ptr(), + order_by.len() as i32, + )) + } + } /// takes ownership of the provided Args pub(crate) fn from_args( tbl_name: Option<&str>, @@ -630,7 +701,7 @@ impl VirtualTable { module_name )))?; if let VTabKind::VirtualTable = kind { - if module.module_kind != VTabKind::VirtualTable { + if module.module_kind == VTabKind::TableValuedFunction { return Err(LimboError::ExtensionError(format!( "{} is not a virtual table module", module_name @@ -648,6 +719,7 @@ impl VirtualTable { implementation: module.implementation.clone(), columns, args: exprs, + kind, }); return Ok(vtab); } @@ -661,21 +733,30 @@ impl VirtualTable { VTabOpaqueCursor::new(cursor) } + #[tracing::instrument(skip(cursor))] pub fn filter( &self, cursor: &VTabOpaqueCursor, + idx_num: i32, + idx_str: Option, arg_count: usize, - args: Vec, + args: Vec, ) -> Result { - let mut filter_args = Vec::with_capacity(arg_count); - for i in 0..arg_count { - let ownedvalue_arg = args.get(i).unwrap(); - filter_args.push(ownedvalue_arg.to_ffi()); - } + tracing::trace!("xFilter"); + let c_idx_str = idx_str + .map(|s| std::ffi::CString::new(s).unwrap()) + .map(|cstr| cstr.into_raw()) + .unwrap_or(std::ptr::null_mut()); let rc = unsafe { - (self.implementation.filter)(cursor.as_ptr(), arg_count as i32, filter_args.as_ptr()) + (self.implementation.filter)( + cursor.as_ptr(), + arg_count as i32, + args.as_ptr(), + c_idx_str, + idx_num, + ) }; - for arg in filter_args { + for arg in args { unsafe { arg.__free_internal_type(); } @@ -725,6 +806,19 @@ impl VirtualTable { _ => Err(LimboError::ExtensionError(rc.to_string())), } } + + pub fn destroy(&self) -> Result<()> { + let implementation = self.implementation.as_ref(); + let rc = unsafe { + (self.implementation.destroy)( + implementation as *const VTabModuleImpl as *const std::ffi::c_void, + ) + }; + match rc { + ResultCode::OK => Ok(()), + _ => Err(LimboError::ExtensionError(rc.to_string())), + } + } } pub(crate) struct SymbolTable { diff --git a/core/numeric.rs b/core/numeric.rs new file mode 100644 index 000000000..e736edbc4 --- /dev/null +++ b/core/numeric.rs @@ -0,0 +1,575 @@ +use crate::OwnedValue; + +mod nonnan; + +use nonnan::NonNan; + +// TODO: Remove when https://github.com/rust-lang/libs-team/issues/230 is available +trait SaturatingShl { + fn saturating_shl(self, rhs: u32) -> Self; +} + +impl SaturatingShl for i64 { + fn saturating_shl(self, rhs: u32) -> Self { + if rhs >= Self::BITS { + 0 + } else { + self << rhs + } + } +} + +// TODO: Remove when https://github.com/rust-lang/libs-team/issues/230 is available +trait SaturatingShr { + fn saturating_shr(self, rhs: u32) -> Self; +} + +impl SaturatingShr for i64 { + fn saturating_shr(self, rhs: u32) -> Self { + if rhs >= Self::BITS { + if self >= 0 { + 0 + } else { + -1 + } + } else { + self >> rhs + } + } +} + +#[derive(Debug, Clone, Copy)] +pub enum Numeric { + Null, + Integer(i64), + Float(NonNan), +} + +impl Numeric { + pub fn try_into_bool(&self) -> Option { + match self { + Numeric::Null => None, + Numeric::Integer(0) => Some(false), + Numeric::Float(non_nan) if *non_nan == 0.0 => Some(false), + _ => Some(true), + } + } +} + +impl From for NullableInteger { + fn from(value: Numeric) -> Self { + match value { + Numeric::Null => NullableInteger::Null, + Numeric::Integer(v) => NullableInteger::Integer(v), + Numeric::Float(v) => NullableInteger::Integer(f64::from(v) as i64), + } + } +} + +impl From for OwnedValue { + fn from(value: Numeric) -> Self { + match value { + Numeric::Null => OwnedValue::Null, + Numeric::Integer(v) => OwnedValue::Integer(v), + Numeric::Float(v) => OwnedValue::Float(v.into()), + } + } +} + +impl> From for Numeric { + fn from(value: T) -> Self { + let text = value.as_ref(); + + match str_to_f64(text) { + None => Self::Integer(0), + Some(StrToF64::Fractional(value)) => Self::Float(value), + Some(StrToF64::Decimal(real)) => { + let integer = str_to_i64(text).unwrap_or(0); + + if real == integer as f64 { + Self::Integer(integer) + } else { + Self::Float(real) + } + } + } + } +} + +impl From for Numeric { + fn from(value: OwnedValue) -> Self { + Self::from(&value) + } +} +impl From<&OwnedValue> for Numeric { + fn from(value: &OwnedValue) -> Self { + match value { + OwnedValue::Null => Self::Null, + OwnedValue::Integer(v) => Self::Integer(*v), + OwnedValue::Float(v) => match NonNan::new(*v) { + Some(v) => Self::Float(v), + None => Self::Null, + }, + OwnedValue::Text(text) => Numeric::from(text.as_str()), + OwnedValue::Blob(blob) => { + let text = String::from_utf8_lossy(blob.as_slice()); + Numeric::from(&text) + } + } + } +} + +impl std::ops::Add for Numeric { + type Output = Self; + + fn add(self, rhs: Self) -> Self::Output { + match (self, rhs) { + (Numeric::Null, _) | (_, Numeric::Null) => Numeric::Null, + (Numeric::Integer(lhs), Numeric::Integer(rhs)) => match lhs.checked_add(rhs) { + None => Numeric::Float(lhs.into()) + Numeric::Float(rhs.into()), + Some(i) => Numeric::Integer(i), + }, + (Numeric::Float(lhs), Numeric::Float(rhs)) => match lhs + rhs { + Some(v) => Numeric::Float(v), + None => Numeric::Null, + }, + (f @ Numeric::Float(_), Numeric::Integer(i)) + | (Numeric::Integer(i), f @ Numeric::Float(_)) => f + Numeric::Float(i.into()), + } + } +} + +impl std::ops::Sub for Numeric { + type Output = Self; + + fn sub(self, rhs: Self) -> Self::Output { + match (self, rhs) { + (Numeric::Null, _) | (_, Numeric::Null) => Numeric::Null, + (Numeric::Float(lhs), Numeric::Float(rhs)) => match lhs - rhs { + Some(v) => Numeric::Float(v), + None => Numeric::Null, + }, + (Numeric::Integer(lhs), Numeric::Integer(rhs)) => match lhs.checked_sub(rhs) { + None => Numeric::Float(lhs.into()) - Numeric::Float(rhs.into()), + Some(i) => Numeric::Integer(i), + }, + (f @ Numeric::Float(_), Numeric::Integer(i)) => f - Numeric::Float(i.into()), + (Numeric::Integer(i), f @ Numeric::Float(_)) => Numeric::Float(i.into()) - f, + } + } +} + +impl std::ops::Mul for Numeric { + type Output = Self; + + fn mul(self, rhs: Self) -> Self::Output { + match (self, rhs) { + (Numeric::Null, _) | (_, Numeric::Null) => Numeric::Null, + (Numeric::Float(lhs), Numeric::Float(rhs)) => match lhs * rhs { + Some(v) => Numeric::Float(v), + None => Numeric::Null, + }, + (Numeric::Integer(lhs), Numeric::Integer(rhs)) => match lhs.checked_mul(rhs) { + None => Numeric::Float(lhs.into()) * Numeric::Float(rhs.into()), + Some(i) => Numeric::Integer(i), + }, + (f @ Numeric::Float(_), Numeric::Integer(i)) + | (Numeric::Integer(i), f @ Numeric::Float(_)) => f * Numeric::Float(i.into()), + } + } +} + +impl std::ops::Div for Numeric { + type Output = Self; + + fn div(self, rhs: Self) -> Self::Output { + match (self, rhs) { + (Numeric::Null, _) | (_, Numeric::Null) => Numeric::Null, + (Numeric::Float(lhs), Numeric::Float(rhs)) => match lhs / rhs { + Some(v) if rhs != 0.0 => Numeric::Float(v), + _ => Numeric::Null, + }, + (Numeric::Integer(lhs), Numeric::Integer(rhs)) => match lhs.checked_div(rhs) { + None => Numeric::Float(lhs.into()) / Numeric::Float(rhs.into()), + Some(v) => Numeric::Integer(v), + }, + (f @ Numeric::Float(_), Numeric::Integer(i)) => f / Numeric::Float(i.into()), + (Numeric::Integer(i), f @ Numeric::Float(_)) => Numeric::Float(i.into()) / f, + } + } +} + +impl std::ops::Neg for Numeric { + type Output = Self; + + fn neg(self) -> Self::Output { + match self { + Numeric::Null => Numeric::Null, + Numeric::Integer(v) => match v.checked_neg() { + None => -Numeric::Float(v.into()), + Some(i) => Numeric::Integer(i), + }, + Numeric::Float(v) => Numeric::Float(-v), + } + } +} + +#[derive(Debug)] +pub enum NullableInteger { + Null, + Integer(i64), +} + +impl From for OwnedValue { + fn from(value: NullableInteger) -> Self { + match value { + NullableInteger::Null => OwnedValue::Null, + NullableInteger::Integer(v) => OwnedValue::Integer(v), + } + } +} + +impl> From for NullableInteger { + fn from(value: T) -> Self { + Self::Integer(str_to_i64(value.as_ref()).unwrap_or(0)) + } +} + +impl From for NullableInteger { + fn from(value: OwnedValue) -> Self { + Self::from(&value) + } +} + +impl From<&OwnedValue> for NullableInteger { + fn from(value: &OwnedValue) -> Self { + match value { + OwnedValue::Null => Self::Null, + OwnedValue::Integer(v) => Self::Integer(*v), + OwnedValue::Float(v) => Self::Integer(*v as i64), + OwnedValue::Text(text) => Self::from(text.as_str()), + OwnedValue::Blob(blob) => { + let text = String::from_utf8_lossy(blob.as_slice()); + Self::from(text) + } + } + } +} + +impl std::ops::Not for NullableInteger { + type Output = Self; + + fn not(self) -> Self::Output { + match self { + NullableInteger::Null => NullableInteger::Null, + NullableInteger::Integer(lhs) => NullableInteger::Integer(!lhs), + } + } +} + +impl std::ops::BitAnd for NullableInteger { + type Output = Self; + + fn bitand(self, rhs: Self) -> Self::Output { + match (self, rhs) { + (NullableInteger::Null, _) | (_, NullableInteger::Null) => NullableInteger::Null, + (NullableInteger::Integer(lhs), NullableInteger::Integer(rhs)) => { + NullableInteger::Integer(lhs & rhs) + } + } + } +} + +impl std::ops::BitOr for NullableInteger { + type Output = Self; + + fn bitor(self, rhs: Self) -> Self::Output { + match (self, rhs) { + (NullableInteger::Null, _) | (_, NullableInteger::Null) => NullableInteger::Null, + (NullableInteger::Integer(lhs), NullableInteger::Integer(rhs)) => { + NullableInteger::Integer(lhs | rhs) + } + } + } +} + +impl std::ops::Shl for NullableInteger { + type Output = Self; + + fn shl(self, rhs: Self) -> Self::Output { + match (self, rhs) { + (NullableInteger::Null, _) | (_, NullableInteger::Null) => NullableInteger::Null, + (NullableInteger::Integer(lhs), NullableInteger::Integer(rhs)) => { + NullableInteger::Integer(if rhs.is_positive() { + lhs.saturating_shl(rhs.try_into().unwrap_or(u32::MAX)) + } else { + lhs.saturating_shr(rhs.saturating_abs().try_into().unwrap_or(u32::MAX)) + }) + } + } + } +} + +impl std::ops::Shr for NullableInteger { + type Output = Self; + + fn shr(self, rhs: Self) -> Self::Output { + match (self, rhs) { + (NullableInteger::Null, _) | (_, NullableInteger::Null) => NullableInteger::Null, + (NullableInteger::Integer(lhs), NullableInteger::Integer(rhs)) => { + NullableInteger::Integer(if rhs.is_positive() { + lhs.saturating_shr(rhs.try_into().unwrap_or(u32::MAX)) + } else { + lhs.saturating_shl(rhs.saturating_abs().try_into().unwrap_or(u32::MAX)) + }) + } + } + } +} + +impl std::ops::Rem for NullableInteger { + type Output = Self; + + fn rem(self, rhs: Self) -> Self::Output { + match (self, rhs) { + (NullableInteger::Null, _) | (_, NullableInteger::Null) => NullableInteger::Null, + (_, NullableInteger::Integer(0)) => NullableInteger::Null, + (lhs, NullableInteger::Integer(-1)) => lhs % NullableInteger::Integer(1), + (NullableInteger::Integer(lhs), NullableInteger::Integer(rhs)) => { + NullableInteger::Integer(lhs % rhs) + } + } + } +} + +// Maximum u64 that can survive a f64 round trip +const MAX_EXACT: u64 = u64::MAX << 11; + +const VERTICAL_TAB: char = '\u{b}'; + +/// Encapsulates Dekker's arithmetic for higher precision. This is spiritually the same as using a +/// f128 for arithmetic, but cross platform and compatible with sqlite. +#[derive(Debug, Clone, Copy)] +struct DoubleDouble(f64, f64); + +impl From for DoubleDouble { + fn from(value: u64) -> Self { + let r = value as f64; + + // If the value is smaller than MAX_EXACT, the error isn't significant + let rr = if r <= MAX_EXACT as f64 { + let round_tripped = value as f64 as u64; + let sign = if value >= round_tripped { 1.0 } else { -1.0 }; + + // Error term is the signed distance of the round tripped value and itself + sign * value.abs_diff(round_tripped) as f64 + } else { + 0.0 + }; + + DoubleDouble(r, rr) + } +} + +impl From for f64 { + fn from(DoubleDouble(a, aa): DoubleDouble) -> Self { + a + aa + } +} + +impl std::ops::Mul for DoubleDouble { + type Output = Self; + + /// Double-Double multiplication. (self.0, self.1) *= (rhs.0, rhs.1) + /// + /// Reference: + /// T. J. Dekker, "A Floating-Point Technique for Extending the Available Precision". + /// 1971-07-26. + /// + fn mul(self, rhs: Self) -> Self::Output { + // TODO: Better variable naming + + let mask = u64::MAX << 26; + + let hx = f64::from_bits(self.0.to_bits() & mask); + let tx = self.0 - hx; + + let hy = f64::from_bits(rhs.0.to_bits() & mask); + let ty = rhs.0 - hy; + + let p = hx * hy; + let q = hx * ty + tx * hy; + + let c = p + q; + let cc = p - c + q + tx * ty; + let cc = self.0 * rhs.1 + self.1 * rhs.0 + cc; + + let r = c + cc; + let rr = (c - r) + cc; + + DoubleDouble(r, rr) + } +} + +impl std::ops::MulAssign for DoubleDouble { + fn mul_assign(&mut self, rhs: Self) { + *self = self.clone() * rhs; + } +} + +pub fn str_to_i64(input: impl AsRef) -> Option { + let input = input + .as_ref() + .trim_matches(|ch: char| ch.is_ascii_whitespace() || ch == VERTICAL_TAB); + + let mut iter = input.chars().enumerate().peekable(); + + iter.next_if(|(_, ch)| matches!(ch, '+' | '-')); + let Some((end, _)) = iter.take_while(|(_, ch)| ch.is_ascii_digit()).last() else { + return Some(0); + }; + + input[0..=end].parse::().map_or_else( + |err| match err.kind() { + std::num::IntErrorKind::PosOverflow => Some(i64::MAX), + std::num::IntErrorKind::NegOverflow => Some(i64::MIN), + std::num::IntErrorKind::Empty => unreachable!(), + _ => Some(0), + }, + Some, + ) +} + +pub enum StrToF64 { + Fractional(NonNan), + Decimal(NonNan), +} + +pub fn str_to_f64(input: impl AsRef) -> Option { + let mut input = input + .as_ref() + .trim_matches(|ch: char| ch.is_ascii_whitespace() || ch == VERTICAL_TAB) + .chars() + .peekable(); + + let sign = match input.next_if(|ch| matches!(ch, '-' | '+')) { + Some('-') => -1.0, + _ => 1.0, + }; + + let mut had_digits = false; + let mut is_fractional = false; + + if matches!(input.peek(), Some('e' | 'E')) { + return None; + } + + let mut significant: u64 = 0; + + // Copy as many significant digits as we can + while let Some(digit) = input.peek().and_then(|ch| ch.to_digit(10)) { + had_digits = true; + + match significant + .checked_mul(10) + .and_then(|v| v.checked_add(digit as u64)) + { + Some(new) => significant = new, + None => break, + } + + input.next(); + } + + let mut exponent = 0; + + // Increment the exponent for every non significant digit we skipped + while input.next_if(char::is_ascii_digit).is_some() { + exponent += 1 + } + + if input.next_if(|ch| matches!(ch, '.')).is_some() { + if had_digits || input.peek().is_some_and(char::is_ascii_digit) { + is_fractional = true + } + + while let Some(digit) = input.peek().and_then(|ch| ch.to_digit(10)) { + if significant < (u64::MAX - 9) / 10 { + significant = significant * 10 + digit as u64; + exponent -= 1; + } + + input.next(); + } + }; + + if input.next_if(|ch| matches!(ch, 'e' | 'E')).is_some() { + let sign = match input.next_if(|ch| matches!(ch, '-' | '+')) { + Some('-') => -1, + _ => 1, + }; + + if input.peek().is_some_and(char::is_ascii_digit) { + is_fractional = true + } + + let e = input.map_while(|ch| ch.to_digit(10)).fold(0, |acc, digit| { + if acc < 1000 { + acc * 10 + digit as i32 + } else { + 1000 + } + }); + + exponent += sign * e; + }; + + while exponent.is_positive() && significant < MAX_EXACT / 10 { + significant *= 10; + exponent -= 1; + } + + while exponent.is_negative() && significant % 10 == 0 { + significant /= 10; + exponent += 1; + } + + let mut result = DoubleDouble::from(significant); + + if exponent > 0 { + while exponent >= 100 { + exponent -= 100; + result *= DoubleDouble(1.0e+100, -1.5902891109759918046e+83); + } + while exponent >= 10 { + exponent -= 10; + result *= DoubleDouble(1.0e+10, 0.0); + } + while exponent >= 1 { + exponent -= 1; + result *= DoubleDouble(1.0e+01, 0.0); + } + } else { + while exponent <= -100 { + exponent += 100; + result *= DoubleDouble(1.0e-100, -1.99918998026028836196e-117); + } + while exponent <= -10 { + exponent += 10; + result *= DoubleDouble(1.0e-10, -3.6432197315497741579e-27); + } + while exponent <= -1 { + exponent += 1; + result *= DoubleDouble(1.0e-01, -5.5511151231257827021e-18); + } + } + + let result = NonNan::new(f64::from(result) * sign) + .unwrap_or_else(|| NonNan::new(sign * f64::INFINITY).unwrap()); + + Some(if is_fractional { + StrToF64::Fractional(result) + } else { + StrToF64::Decimal(result) + }) +} diff --git a/core/numeric/nonnan.rs b/core/numeric/nonnan.rs new file mode 100644 index 000000000..5ae6a1f34 --- /dev/null +++ b/core/numeric/nonnan.rs @@ -0,0 +1,105 @@ +#[repr(transparent)] +#[derive(Debug, Clone, Copy, PartialEq)] +pub struct NonNan(f64); + +impl NonNan { + pub fn new(value: f64) -> Option { + if value.is_nan() { + return None; + } + + Some(NonNan(value)) + } +} + +impl PartialEq for f64 { + fn eq(&self, other: &NonNan) -> bool { + *self == other.0 + } +} + +impl PartialEq for NonNan { + fn eq(&self, other: &f64) -> bool { + self.0 == *other + } +} + +impl PartialOrd for NonNan { + fn partial_cmp(&self, other: &f64) -> Option { + self.0.partial_cmp(other) + } +} + +impl PartialOrd for f64 { + fn partial_cmp(&self, other: &NonNan) -> Option { + self.partial_cmp(&other.0) + } +} + +impl From for NonNan { + fn from(value: i64) -> Self { + NonNan(value as f64) + } +} + +impl From for f64 { + fn from(value: NonNan) -> Self { + value.0 + } +} + +impl std::ops::Deref for NonNan { + type Target = f64; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl std::ops::Add for NonNan { + type Output = Option; + + fn add(self, rhs: Self) -> Self::Output { + Self::new(self.0 + rhs.0) + } +} + +impl std::ops::Sub for NonNan { + type Output = Option; + + fn sub(self, rhs: Self) -> Self::Output { + Self::new(self.0 - rhs.0) + } +} + +impl std::ops::Mul for NonNan { + type Output = Option; + + fn mul(self, rhs: Self) -> Self::Output { + Self::new(self.0 * rhs.0) + } +} + +impl std::ops::Div for NonNan { + type Output = Option; + + fn div(self, rhs: Self) -> Self::Output { + Self::new(self.0 / rhs.0) + } +} + +impl std::ops::Rem for NonNan { + type Output = Option; + + fn rem(self, rhs: Self) -> Self::Output { + Self::new(self.0 % rhs.0) + } +} + +impl std::ops::Neg for NonNan { + type Output = Self; + + fn neg(self) -> Self::Output { + Self(-self.0) + } +} diff --git a/core/schema.rs b/core/schema.rs index fbec7627f..42c619693 100644 --- a/core/schema.rs +++ b/core/schema.rs @@ -1,8 +1,8 @@ -use crate::VirtualTable; use crate::{util::normalize_ident, Result}; +use crate::{LimboError, VirtualTable}; use core::fmt; use fallible_iterator::FallibleIterator; -use limbo_sqlite3_parser::ast::{Expr, Literal, TableOptions}; +use limbo_sqlite3_parser::ast::{Expr, Literal, SortOrder, TableOptions}; use limbo_sqlite3_parser::{ ast::{Cmd, CreateTableBody, QualifiedName, ResultColumn, Stmt}, lexer::sql::Parser, @@ -30,6 +30,13 @@ impl Schema { Self { tables, indexes } } + pub fn is_unique_idx_name(&self, name: &str) -> bool { + !self + .indexes + .iter() + .any(|idx| idx.1.iter().any(|i| i.name == name)) + } + pub fn add_btree_table(&mut self, table: Rc) { let name = normalize_ident(&table.name); self.tables.insert(name, Table::BTree(table).into()); @@ -74,6 +81,14 @@ impl Schema { .map_or_else(|| &[] as &[Arc], |v| v.as_slice()) } + pub fn get_index(&self, table_name: &str, index_name: &str) -> Option<&Arc> { + let name = normalize_ident(table_name); + self.indexes + .get(&name)? + .iter() + .find(|index| index.name == index_name) + } + pub fn remove_indices_for_table(&mut self, table_name: &str) { let name = normalize_ident(table_name); self.indexes.remove(&name); @@ -151,15 +166,16 @@ impl PartialEq for Table { pub struct BTreeTable { pub root_page: usize, pub name: String, - pub primary_key_column_names: Vec, + pub primary_key_columns: Vec<(String, SortOrder)>, pub columns: Vec, pub has_rowid: bool, + pub is_strict: bool, } impl BTreeTable { pub fn get_rowid_alias_column(&self) -> Option<(usize, &Column)> { - if self.primary_key_column_names.len() == 1 { - let (idx, col) = self.get_column(&self.primary_key_column_names[0]).unwrap(); + if self.primary_key_columns.len() == 1 { + let (idx, col) = self.get_column(&self.primary_key_columns[0].0).unwrap(); if self.column_is_rowid_alias(col) { return Some((idx, col)); } @@ -171,6 +187,10 @@ impl BTreeTable { col.is_rowid_alias } + /// Returns the column position and column for a given column name. + /// Returns None if the column name is not found. + /// E.g. if table is CREATE TABLE t(a, b, c) + /// then get_column("b") returns (1, &Column { .. }) pub fn get_column(&self, name: &str) -> Option<(usize, &Column)> { let name = normalize_ident(name); for (i, column) in self.columns.iter().enumerate() { @@ -209,7 +229,7 @@ impl BTreeTable { } } -#[derive(Debug)] +#[derive(Debug, Default)] pub struct PseudoTable { pub columns: Vec, } @@ -245,12 +265,6 @@ impl PseudoTable { } } -impl Default for PseudoTable { - fn default() -> Self { - Self::new() - } -} - fn create_table( tbl_name: QualifiedName, body: CreateTableBody, @@ -259,14 +273,16 @@ fn create_table( let table_name = normalize_ident(&tbl_name.name.0); trace!("Creating table {}", table_name); let mut has_rowid = true; - let mut primary_key_column_names = vec![]; + let mut primary_key_columns = vec![]; let mut cols = vec![]; + let is_strict: bool; match body { CreateTableBody::ColumnsAndConstraints { columns, constraints, options, } => { + is_strict = options.contains(TableOptions::STRICT); if let Some(constraints) = constraints { for c in constraints { if let limbo_sqlite3_parser::ast::TableConstraint::PrimaryKey { @@ -274,7 +290,7 @@ fn create_table( } = c.constraint { for column in columns { - primary_key_column_names.push(match column.expr { + let col_name = match column.expr { Expr::Id(id) => normalize_ident(&id.0), Expr::Literal(Literal::String(value)) => { value.trim_matches('\'').to_owned() @@ -282,7 +298,9 @@ fn create_table( _ => { todo!("Unsupported primary key expression"); } - }); + }; + primary_key_columns + .push((col_name, column.order.unwrap_or(SortOrder::Asc))); } } } @@ -339,10 +357,17 @@ fn create_table( let mut default = None; let mut primary_key = false; let mut notnull = false; + let mut order = SortOrder::Asc; for c_def in &col_def.constraints { match &c_def.constraint { - limbo_sqlite3_parser::ast::ColumnConstraint::PrimaryKey { .. } => { + limbo_sqlite3_parser::ast::ColumnConstraint::PrimaryKey { + order: o, + .. + } => { primary_key = true; + if let Some(o) = o { + order = o.clone(); + } } limbo_sqlite3_parser::ast::ColumnConstraint::NotNull { .. } => { notnull = true; @@ -355,8 +380,11 @@ fn create_table( } if primary_key { - primary_key_column_names.push(name.clone()); - } else if primary_key_column_names.contains(&name) { + primary_key_columns.push((name.clone(), order)); + } else if primary_key_columns + .iter() + .any(|(col_name, _)| col_name == &name) + { primary_key = true; } @@ -378,7 +406,7 @@ fn create_table( }; // flip is_rowid_alias back to false if the table has multiple primary keys // or if the table has no rowid - if !has_rowid || primary_key_column_names.len() > 1 { + if !has_rowid || primary_key_columns.len() > 1 { for col in cols.iter_mut() { col.is_rowid_alias = false; } @@ -387,8 +415,9 @@ fn create_table( root_page, name: table_name, has_rowid, - primary_key_column_names, + primary_key_columns, columns: cols, + is_strict, }) } @@ -455,7 +484,7 @@ pub fn affinity(datatype: &str) -> Affinity { } // Rule 3: BLOB or empty -> BLOB affinity (historically called NONE) - if datatype.contains("BLOB") || datatype.is_empty() { + if datatype.contains("BLOB") || datatype.is_empty() || datatype.contains("ANY") { return Affinity::Blob; } @@ -478,26 +507,72 @@ pub enum Type { Blob, } +/// # SQLite Column Type Affinities +/// /// Each column in an SQLite 3 database is assigned one of the following type affinities: /// -/// TEXT -/// NUMERIC -/// INTEGER -/// REAL -/// BLOB -/// (Historical note: The "BLOB" type affinity used to be called "NONE". But that term was easy to confuse with "no affinity" and so it was renamed.) +/// - **TEXT** +/// - **NUMERIC** +/// - **INTEGER** +/// - **REAL** +/// - **BLOB** /// -/// A column with TEXT affinity stores all data using storage classes NULL, TEXT or BLOB. If numerical data is inserted into a column with TEXT affinity it is converted into text form before being stored. +/// > **Note:** Historically, the "BLOB" type affinity was called "NONE". However, this term was renamed to avoid confusion with "no affinity". /// -/// A column with NUMERIC affinity may contain values using all five storage classes. When text data is inserted into a NUMERIC column, the storage class of the text is converted to INTEGER or REAL (in order of preference) if the text is a well-formed integer or real literal, respectively. If the TEXT value is a well-formed integer literal that is too large to fit in a 64-bit signed integer, it is converted to REAL. For conversions between TEXT and REAL storage classes, only the first 15 significant decimal digits of the number are preserved. If the TEXT value is not a well-formed integer or real literal, then the value is stored as TEXT. For the purposes of this paragraph, hexadecimal integer literals are not considered well-formed and are stored as TEXT. (This is done for historical compatibility with versions of SQLite prior to version 3.8.6 2014-08-15 where hexadecimal integer literals were first introduced into SQLite.) If a floating point value that can be represented exactly as an integer is inserted into a column with NUMERIC affinity, the value is converted into an integer. No attempt is made to convert NULL or BLOB values. +/// ## Affinity Descriptions /// -/// A string might look like a floating-point literal with a decimal point and/or exponent notation but as long as the value can be expressed as an integer, the NUMERIC affinity will convert it into an integer. Hence, the string '3.0e+5' is stored in a column with NUMERIC affinity as the integer 300000, not as the floating point value 300000.0. +/// ### **TEXT** +/// - Stores data using the NULL, TEXT, or BLOB storage classes. +/// - Numerical data inserted into a column with TEXT affinity is converted into text form before being stored. +/// - **Example:** +/// ```sql +/// CREATE TABLE example (col TEXT); +/// INSERT INTO example (col) VALUES (123); -- Stored as '123' (text) +/// SELECT typeof(col) FROM example; -- Returns 'text' +/// ``` /// -/// A column that uses INTEGER affinity behaves the same as a column with NUMERIC affinity. The difference between INTEGER and NUMERIC affinity is only evident in a CAST expression: The expression "CAST(4.0 AS INT)" returns an integer 4, whereas "CAST(4.0 AS NUMERIC)" leaves the value as a floating-point 4.0. +/// ### **NUMERIC** +/// - Can store values using all five storage classes. +/// - Text data is converted to INTEGER or REAL (in that order of preference) if it is a well-formed integer or real literal. +/// - If the text represents an integer too large for a 64-bit signed integer, it is converted to REAL. +/// - If the text is not a well-formed literal, it is stored as TEXT. +/// - Hexadecimal integer literals are stored as TEXT for historical compatibility. +/// - Floating-point values that can be exactly represented as integers are converted to integers. +/// - **Example:** +/// ```sql +/// CREATE TABLE example (col NUMERIC); +/// INSERT INTO example (col) VALUES ('3.0e+5'); -- Stored as 300000 (integer) +/// SELECT typeof(col) FROM example; -- Returns 'integer' +/// ``` /// -/// A column with REAL affinity behaves like a column with NUMERIC affinity except that it forces integer values into floating point representation. (As an internal optimization, small floating point values with no fractional component and stored in columns with REAL affinity are written to disk as integers in order to take up less space and are automatically converted back into floating point as the value is read out. This optimization is completely invisible at the SQL level and can only be detected by examining the raw bits of the database file.) +/// ### **INTEGER** +/// - Behaves like NUMERIC affinity but differs in `CAST` expressions. +/// - **Example:** +/// ```sql +/// CREATE TABLE example (col INTEGER); +/// INSERT INTO example (col) VALUES (4.0); -- Stored as 4 (integer) +/// SELECT typeof(col) FROM example; -- Returns 'integer' +/// ``` /// -/// A column with affinity BLOB does not prefer one storage class over another and no attempt is made to coerce data from one storage class into another. +/// ### **REAL** +/// - Similar to NUMERIC affinity but forces integer values into floating-point representation. +/// - **Optimization:** Small floating-point values with no fractional component may be stored as integers on disk to save space. This is invisible at the SQL level. +/// - **Example:** +/// ```sql +/// CREATE TABLE example (col REAL); +/// INSERT INTO example (col) VALUES (4); -- Stored as 4.0 (real) +/// SELECT typeof(col) FROM example; -- Returns 'real' +/// ``` +/// +/// ### **BLOB** +/// - Does not prefer any storage class. +/// - No coercion is performed between storage classes. +/// - **Example:** +/// ```sql +/// CREATE TABLE example (col BLOB); +/// INSERT INTO example (col) VALUES (x'1234'); -- Stored as a binary blob +/// SELECT typeof(col) FROM example; -- Returns 'blob' +/// ``` #[derive(Debug, Clone, Copy, PartialEq)] pub enum Affinity { Integer, @@ -507,11 +582,11 @@ pub enum Affinity { Numeric, } -pub const SQLITE_AFF_TEXT: char = 'a'; -pub const SQLITE_AFF_NONE: char = 'b'; // Historically called NONE, but it's the same as BLOB -pub const SQLITE_AFF_NUMERIC: char = 'c'; -pub const SQLITE_AFF_INTEGER: char = 'd'; -pub const SQLITE_AFF_REAL: char = 'e'; +pub const SQLITE_AFF_NONE: char = 'A'; // Historically called NONE, but it's the same as BLOB +pub const SQLITE_AFF_TEXT: char = 'B'; +pub const SQLITE_AFF_NUMERIC: char = 'C'; +pub const SQLITE_AFF_INTEGER: char = 'D'; +pub const SQLITE_AFF_REAL: char = 'E'; impl Affinity { /// This is meant to be used in opcodes like Eq, which state: @@ -530,6 +605,20 @@ impl Affinity { Affinity::Numeric => SQLITE_AFF_NUMERIC, } } + + pub fn from_char(char: char) -> Result { + match char { + SQLITE_AFF_INTEGER => Ok(Affinity::Integer), + SQLITE_AFF_TEXT => Ok(Affinity::Text), + SQLITE_AFF_NONE => Ok(Affinity::Blob), + SQLITE_AFF_REAL => Ok(Affinity::Real), + SQLITE_AFF_NUMERIC => Ok(Affinity::Numeric), + _ => Err(LimboError::InternalError(format!( + "Invalid affinity character: {}", + char + ))), + } + } } impl fmt::Display for Type { @@ -551,7 +640,8 @@ pub fn sqlite_schema_table() -> BTreeTable { root_page: 1, name: "sqlite_schema".to_string(), has_rowid: true, - primary_key_column_names: vec![], + is_strict: false, + primary_key_columns: vec![], columns: vec![ Column { name: Some("type".to_string()), @@ -610,23 +700,24 @@ pub struct Index { pub root_page: usize, pub columns: Vec, pub unique: bool, + pub ephemeral: bool, } #[allow(dead_code)] #[derive(Debug, Clone)] pub struct IndexColumn { pub name: String, - pub order: Order, -} - -#[derive(Debug, Clone, PartialEq)] -pub enum Order { - Ascending, - Descending, + pub order: SortOrder, + /// the position of the column in the source table. + /// for example: + /// CREATE TABLE t(a,b,c) + /// CREATE INDEX idx ON t(b) + /// b.pos_in_table == 1 + pub pos_in_table: usize, } impl Index { - pub fn from_sql(sql: &str, root_page: usize) -> Result { + pub fn from_sql(sql: &str, root_page: usize, table: &BTreeTable) -> Result { let mut parser = Parser::new(sql.as_bytes()); let cmd = parser.next()?; match cmd { @@ -638,23 +729,28 @@ impl Index { .. })) => { let index_name = normalize_ident(&idx_name.name.0); - let index_columns = columns - .into_iter() - .map(|col| IndexColumn { - name: normalize_ident(&col.expr.to_string()), - order: match col.order { - Some(limbo_sqlite3_parser::ast::SortOrder::Asc) => Order::Ascending, - Some(limbo_sqlite3_parser::ast::SortOrder::Desc) => Order::Descending, - None => Order::Ascending, - }, - }) - .collect(); + let mut index_columns = Vec::with_capacity(columns.len()); + for col in columns.into_iter() { + let name = normalize_ident(&col.expr.to_string()); + let Some((pos_in_table, _)) = table.get_column(&name) else { + return Err(crate::LimboError::InternalError(format!( + "Column {} is in index {} but not found in table {}", + name, index_name, table.name + ))); + }; + index_columns.push(IndexColumn { + name, + order: col.order.unwrap_or(SortOrder::Asc), + pos_in_table, + }); + } Ok(Index { name: index_name, table_name: normalize_ident(&tbl_name.0), root_page, columns: index_columns, unique, + ephemeral: false, }) } _ => todo!("Expected create index statement"), @@ -666,26 +762,27 @@ impl Index { index_name: &str, root_page: usize, ) -> Result { - if table.primary_key_column_names.is_empty() { + if table.primary_key_columns.is_empty() { return Err(crate::LimboError::InternalError( "Cannot create automatic index for table without primary key".to_string(), )); } let index_columns = table - .primary_key_column_names + .primary_key_columns .iter() - .map(|col_name| { + .map(|(col_name, order)| { // Verify that each primary key column exists in the table - if table.get_column(col_name).is_none() { + let Some((pos_in_table, _)) = table.get_column(col_name) else { return Err(crate::LimboError::InternalError(format!( - "Primary key column {} not found in table {}", - col_name, table.name + "Column {} is in index {} but not found in table {}", + col_name, index_name, table.name ))); - } + }; Ok(IndexColumn { name: normalize_ident(col_name), - order: Order::Ascending, // Primary key indexes are always ascending + order: order.clone(), + pos_in_table, }) }) .collect::>>()?; @@ -696,8 +793,21 @@ impl Index { root_page, columns: index_columns, unique: true, // Primary key indexes are always unique + ephemeral: false, }) } + + /// Given a column position in the table, return the position in the index. + /// Returns None if the column is not found in the index. + /// For example, given: + /// CREATE TABLE t(a, b, c) + /// CREATE INDEX idx ON t(b) + /// then column_table_pos_to_index_pos(1) returns Some(0) + pub fn column_table_pos_to_index_pos(&self, table_pos: usize) -> Option { + self.columns + .iter() + .position(|c| c.pos_in_table == table_pos) + } } #[cfg(test)] @@ -818,8 +928,8 @@ mod tests { let column = table.get_column("c").unwrap().1; assert!(!column.primary_key, "column 'c' shouldn't be a primary key"); assert_eq!( - vec!["a"], - table.primary_key_column_names, + vec![("a".to_string(), SortOrder::Asc)], + table.primary_key_columns, "primary key column names should be ['a']" ); Ok(()) @@ -836,8 +946,11 @@ mod tests { let column = table.get_column("c").unwrap().1; assert!(!column.primary_key, "column 'c' shouldn't be a primary key"); assert_eq!( - vec!["a", "b"], - table.primary_key_column_names, + vec![ + ("a".to_string(), SortOrder::Asc), + ("b".to_string(), SortOrder::Asc) + ], + table.primary_key_columns, "primary key column names should be ['a', 'b']" ); Ok(()) @@ -845,7 +958,7 @@ mod tests { #[test] pub fn test_primary_key_separate_single() -> Result<()> { - let sql = r#"CREATE TABLE t1 (a INTEGER, b TEXT, c REAL, PRIMARY KEY(a));"#; + let sql = r#"CREATE TABLE t1 (a INTEGER, b TEXT, c REAL, PRIMARY KEY(a desc));"#; let table = BTreeTable::from_sql(sql, 0)?; let column = table.get_column("a").unwrap().1; assert!(column.primary_key, "column 'a' should be a primary key"); @@ -854,8 +967,8 @@ mod tests { let column = table.get_column("c").unwrap().1; assert!(!column.primary_key, "column 'c' shouldn't be a primary key"); assert_eq!( - vec!["a"], - table.primary_key_column_names, + vec![("a".to_string(), SortOrder::Desc)], + table.primary_key_columns, "primary key column names should be ['a']" ); Ok(()) @@ -863,7 +976,7 @@ mod tests { #[test] pub fn test_primary_key_separate_multiple() -> Result<()> { - let sql = r#"CREATE TABLE t1 (a INTEGER, b TEXT, c REAL, PRIMARY KEY(a, b));"#; + let sql = r#"CREATE TABLE t1 (a INTEGER, b TEXT, c REAL, PRIMARY KEY(a, b desc));"#; let table = BTreeTable::from_sql(sql, 0)?; let column = table.get_column("a").unwrap().1; assert!(column.primary_key, "column 'a' should be a primary key"); @@ -872,8 +985,11 @@ mod tests { let column = table.get_column("c").unwrap().1; assert!(!column.primary_key, "column 'c' shouldn't be a primary key"); assert_eq!( - vec!["a", "b"], - table.primary_key_column_names, + vec![ + ("a".to_string(), SortOrder::Asc), + ("b".to_string(), SortOrder::Desc) + ], + table.primary_key_columns, "primary key column names should be ['a', 'b']" ); Ok(()) @@ -890,8 +1006,8 @@ mod tests { let column = table.get_column("c").unwrap().1; assert!(!column.primary_key, "column 'c' shouldn't be a primary key"); assert_eq!( - vec!["a"], - table.primary_key_column_names, + vec![("a".to_string(), SortOrder::Asc)], + table.primary_key_columns, "primary key column names should be ['a']" ); Ok(()) @@ -907,8 +1023,8 @@ mod tests { let column = table.get_column("c").unwrap().1; assert!(!column.primary_key, "column 'c' shouldn't be a primary key"); assert_eq!( - vec!["a"], - table.primary_key_column_names, + vec![("a".to_string(), SortOrder::Asc)], + table.primary_key_columns, "primary key column names should be ['a']" ); Ok(()) @@ -1012,7 +1128,7 @@ mod tests { assert!(index.unique); assert_eq!(index.columns.len(), 1); assert_eq!(index.columns[0].name, "a"); - assert!(matches!(index.columns[0].order, Order::Ascending)); + assert!(matches!(index.columns[0].order, SortOrder::Asc)); Ok(()) } @@ -1029,8 +1145,8 @@ mod tests { assert_eq!(index.columns.len(), 2); assert_eq!(index.columns[0].name, "a"); assert_eq!(index.columns[1].name, "b"); - assert!(matches!(index.columns[0].order, Order::Ascending)); - assert!(matches!(index.columns[1].order, Order::Ascending)); + assert!(matches!(index.columns[0].order, SortOrder::Asc)); + assert!(matches!(index.columns[1].order, SortOrder::Asc)); Ok(()) } @@ -1055,7 +1171,8 @@ mod tests { root_page: 0, name: "t1".to_string(), has_rowid: true, - primary_key_column_names: vec!["nonexistent".to_string()], + is_strict: false, + primary_key_columns: vec![("nonexistent".to_string(), SortOrder::Asc)], columns: vec![Column { name: Some("a".to_string()), ty: Type::Integer, diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 558a6eee3..0523edcb8 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -1,51 +1,84 @@ -use tracing::debug; - -use crate::storage::pager::Pager; -use crate::storage::sqlite3_ondisk::{ - read_u32, read_varint, BTreeCell, PageContent, PageType, TableInteriorCell, TableLeafCell, -}; -use crate::MvCursor; - -use crate::types::{ - compare_immutable, CursorResult, ImmutableRecord, OwnedValue, RefValue, SeekKey, SeekOp, -}; -use crate::{return_corrupt, LimboError, Result}; - -use std::cell::{Cell, Ref, RefCell}; -use std::pin::Pin; -use std::rc::Rc; - -use super::pager::PageRef; -use super::sqlite3_ondisk::{ - write_varint_to_vec, IndexInteriorCell, IndexLeafCell, OverflowCell, DATABASE_HEADER_SIZE, +use crate::{ + schema::Index, + storage::{ + pager::Pager, + sqlite3_ondisk::{ + read_u32, read_varint, BTreeCell, PageContent, PageType, TableInteriorCell, + TableLeafCell, + }, + }, + translate::plan::IterationDirection, + types::IndexKeySortOrder, + MvCursor, }; -/* - These are offsets of fields in the header of a b-tree page. -*/ +use crate::{ + return_corrupt, + types::{ + compare_immutable, CursorResult, ImmutableRecord, OwnedValue, RefValue, SeekKey, SeekOp, + }, + LimboError, Result, +}; -/// type of btree page -> u8 -const PAGE_HEADER_OFFSET_PAGE_TYPE: usize = 0; -/// pointer to first freeblock -> u16 -/// The second field of the b-tree page header is the offset of the first freeblock, or zero if there are no freeblocks on the page. -/// A freeblock is a structure used to identify unallocated space within a b-tree page. -/// Freeblocks are organized as a chain. +#[cfg(debug_assertions)] +use std::collections::HashSet; +use std::{ + cell::{Cell, Ref, RefCell}, + cmp::Ordering, + pin::Pin, + rc::Rc, +}; + +use super::{ + pager::PageRef, + sqlite3_ondisk::{ + read_record, write_varint_to_vec, IndexInteriorCell, IndexLeafCell, OverflowCell, + DATABASE_HEADER_SIZE, + }, +}; + +/// The B-Tree page header is 12 bytes for interior pages and 8 bytes for leaf pages. /// -/// To be clear, freeblocks do not mean the regular unallocated free space to the left of the cell content area pointer, but instead -/// blocks of at least 4 bytes WITHIN the cell content area that are not in use due to e.g. deletions. -const PAGE_HEADER_OFFSET_FIRST_FREEBLOCK: usize = 1; -/// number of cells in the page -> u16 -const PAGE_HEADER_OFFSET_CELL_COUNT: usize = 3; -/// pointer to first byte of cell allocated content from top -> u16 -/// SQLite strives to place cells as far toward the end of the b-tree page as it can, -/// in order to leave space for future growth of the cell pointer array. -/// = the cell content area pointer moves leftward as cells are added to the page -const PAGE_HEADER_OFFSET_CELL_CONTENT_AREA: usize = 5; -/// number of fragmented bytes -> u8 -/// Fragments are isolated groups of 1, 2, or 3 unused bytes within the cell content area. -const PAGE_HEADER_OFFSET_FRAGMENTED_BYTES_COUNT: usize = 7; -/// if internalnode, pointer right most pointer (saved separately from cells) -> u32 -const PAGE_HEADER_OFFSET_RIGHTMOST_PTR: usize = 8; +/// +--------+-----------------+-----------------+-----------------+--------+----- ..... ----+ +/// | Page | First Freeblock | Cell Count | Cell Content | Frag. | Right-most | +/// | Type | Offset | | Area Start | Bytes | pointer | +/// +--------+-----------------+-----------------+-----------------+--------+----- ..... ----+ +/// 0 1 2 3 4 5 6 7 8 11 +/// +pub mod offset { + /// Type of the B-Tree page (u8). + pub const BTREE_PAGE_TYPE: usize = 0; + + /// A pointer to the first freeblock (u16). + /// + /// This field of the B-Tree page header is an offset to the first freeblock, or zero if + /// there are no freeblocks on the page. A freeblock is a structure used to identify + /// unallocated space within a B-Tree page, organized as a chain. + /// + /// Please note that freeblocks do not mean the regular unallocated free space to the left + /// of the cell content area pointer, but instead blocks of at least 4 + /// bytes WITHIN the cell content area that are not in use due to e.g. + /// deletions. + pub const BTREE_FIRST_FREEBLOCK: usize = 1; + + /// The number of cells in the page (u16). + pub const BTREE_CELL_COUNT: usize = 3; + + /// A pointer to first byte of cell allocated content from top (u16). + /// + /// SQLite strives to place cells as far toward the end of the b-tree page as it can, in + /// order to leave space for future growth of the cell pointer array. This means that the + /// cell content area pointer moves leftward as cells are added to the page. + pub const BTREE_CELL_CONTENT_AREA: usize = 5; + + /// The number of fragmented bytes (u8). + /// + /// Fragments are isolated groups of 1, 2, or 3 unused bytes within the cell content area. + pub const BTREE_FRAGMENTED_BYTES_COUNT: usize = 7; + + /// The right-most pointer (saved separately from cells) (u32) + pub const BTREE_RIGHTMOST_PTR: usize = 8; +} /// Maximum depth of an SQLite B-Tree structure. Any B-Tree deeper than /// this will be declared corrupt. This value is calculated based on a @@ -112,6 +145,12 @@ struct DestroyInfo { state: DestroyState, } +#[derive(Debug, Clone)] +enum DeleteSavepoint { + Rowid(u64), + Payload(ImmutableRecord), +} + #[derive(Debug, Clone)] enum DeleteState { Start, @@ -126,21 +165,13 @@ enum DeleteState { cell_idx: usize, original_child_pointer: Option, }, - DropCell { - cell_idx: usize, - }, CheckNeedsBalancing, - StartBalancing { - target_rowid: u64, - }, WaitForBalancingToComplete { - target_rowid: u64, + target_key: DeleteSavepoint, }, SeekAfterBalancing { - target_rowid: u64, + target_key: DeleteSavepoint, }, - StackRetreat, - Finish, } #[derive(Clone)] @@ -169,14 +200,65 @@ enum ReadPayloadOverflow { }, } +#[derive(Clone, Debug)] +pub enum BTreeKey<'a> { + TableRowId((u64, Option<&'a ImmutableRecord>)), + IndexKey(&'a ImmutableRecord), +} + +impl BTreeKey<'_> { + /// Create a new table rowid key from a rowid and an optional immutable record. + /// The record is optional because it may not be available when the key is created. + pub fn new_table_rowid(rowid: u64, record: Option<&ImmutableRecord>) -> BTreeKey<'_> { + BTreeKey::TableRowId((rowid, record)) + } + + /// Create a new index key from an immutable record. + pub fn new_index_key(record: &ImmutableRecord) -> BTreeKey<'_> { + BTreeKey::IndexKey(record) + } + + /// Get the record, if present. Index will always be present, + fn get_record(&self) -> Option<&'_ ImmutableRecord> { + match self { + BTreeKey::TableRowId((_, record)) => *record, + BTreeKey::IndexKey(record) => Some(record), + } + } + + /// Get the rowid, if present. Index will never be present. + fn maybe_rowid(&self) -> Option { + match self { + BTreeKey::TableRowId((rowid, _)) => Some(*rowid), + BTreeKey::IndexKey(_) => None, + } + } + + /// Assert that the key is an integer rowid and return it. + fn to_rowid(&self) -> u64 { + match self { + BTreeKey::TableRowId((rowid, _)) => *rowid, + BTreeKey::IndexKey(_) => panic!("BTreeKey::to_rowid called on IndexKey"), + } + } + + /// Assert that the key is an index key and return it. + fn to_index_key_values(&self) -> &'_ Vec { + match self { + BTreeKey::TableRowId(_) => panic!("BTreeKey::to_index_key called on TableRowId"), + BTreeKey::IndexKey(key) => key.get_values(), + } + } +} + #[derive(Clone)] struct BalanceInfo { - /// Old pages being balanced. - pages_to_balance: Vec, - /// Bookkeeping of the rightmost pointer so the PAGE_HEADER_OFFSET_RIGHTMOST_PTR can be updated. + /// Old pages being balanced. We can have maximum 3 pages being balanced at the same time. + pages_to_balance: [Option; 3], + /// Bookkeeping of the rightmost pointer so the offset::BTREE_RIGHTMOST_PTR can be updated. rightmost_pointer: *mut u8, - /// Divider cells of old pages - divider_cells: Vec>, + /// Divider cells of old pages. We can have maximum 2 divider cells because of 3 pages. + divider_cells: [Option>; 2], /// Number of siblings being used to balance sibling_count: usize, /// First divider cell to remove that marks the first sibling @@ -282,30 +364,7 @@ pub struct BTreeCursor { /// Reusable immutable record, used to allow better allocation strategy. reusable_immutable_record: RefCell>, empty_record: Cell, -} - -/// Stack of pages representing the tree traversal order. -/// current_page represents the current page being used in the tree and current_page - 1 would be -/// the parent. Using current_page + 1 or higher is undefined behaviour. -struct PageStack { - /// Pointer to the current page being consumed - current_page: Cell, - /// List of pages in the stack. Root page will be in index 0 - stack: RefCell<[Option; BTCURSOR_MAX_DEPTH + 1]>, - /// List of cell indices in the stack. - /// cell_indices[current_page] is the current cell index being consumed. Similarly - /// cell_indices[current_page-1] is the cell index of the parent of the current page - /// that we save in case of going back up. - /// There are two points that need special attention: - /// If cell_indices[current_page] = -1, it indicates that the current iteration has reached the start of the current_page - /// If cell_indices[current_page] = `cell_count`, it means that the current iteration has reached the end of the current_page - cell_indices: RefCell<[i32; BTCURSOR_MAX_DEPTH + 1]>, -} - -struct CellArray { - cells: Vec<&'static mut [u8]>, // TODO(pere): make this with references - - number_of_cells_per_page: Vec, // number of cells in each page + pub index_key_sort_order: IndexKeySortOrder, } impl BTreeCursor { @@ -330,9 +389,22 @@ impl BTreeCursor { }, reusable_immutable_record: RefCell::new(None), empty_record: Cell::new(true), + index_key_sort_order: IndexKeySortOrder::default(), } } + pub fn new_index( + mv_cursor: Option>>, + pager: Rc, + root_page: usize, + index: &Index, + ) -> Self { + let index_key_sort_order = IndexKeySortOrder::from_index(index); + let mut cursor = Self::new(mv_cursor, pager, root_page); + cursor.index_key_sort_order = index_key_sort_order; + cursor + } + /// Check if the table is empty. /// This is done by checking if the root page has no cells. fn is_empty_table(&self) -> Result> { @@ -349,7 +421,10 @@ impl BTreeCursor { /// Move the cursor to the previous record and return it. /// Used in backwards iteration. - fn get_prev_record(&mut self) -> Result>> { + fn get_prev_record( + &mut self, + predicate: Option<(SeekKey<'_>, SeekOp)>, + ) -> Result>> { loop { let page = self.stack.top(); let cell_idx = self.stack.current_cell_index(); @@ -358,11 +433,11 @@ impl BTreeCursor { // todo: find a better way to flag moved to end or begin of page if self.stack.current_cell_index_less_than_min() { loop { - if self.stack.current_cell_index() > 0 { - self.stack.retreat(); + if self.stack.current_cell_index() >= 0 { break; } if self.stack.has_parent() { + self.going_upwards = true; self.stack.pop(); } else { // moved to begin of btree @@ -374,11 +449,6 @@ impl BTreeCursor { } let cell_idx = cell_idx as usize; - tracing::trace!( - "get_prev_record current id={} cell={}", - page.get().id, - cell_idx - ); return_if_locked!(page); if !page.is_loaded() { self.pager.load_page(page.clone())?; @@ -387,13 +457,24 @@ impl BTreeCursor { let contents = page.get().contents.as_ref().unwrap(); let cell_count = contents.cell_count(); + + // If we are at the end of the page and we haven't just come back from the right child, + // we now need to move to the rightmost child. + if cell_idx as i32 == i32::MAX && !self.going_upwards { + let rightmost_pointer = contents.rightmost_pointer(); + if let Some(rightmost_pointer) = rightmost_pointer { + self.stack + .push_backwards(self.pager.read_page(rightmost_pointer as usize)?); + continue; + } + } + let cell_idx = if cell_idx >= cell_count { self.stack.set_cell_index(cell_count as i32 - 1); cell_count - 1 } else { cell_idx }; - let cell = contents.cell_get( cell_idx, payload_overflow_threshold_max(contents.page_type(), self.usable_space() as u16), @@ -407,9 +488,7 @@ impl BTreeCursor { _rowid, }) => { let mem_page = self.pager.read_page(_left_child_page as usize)?; - self.stack.push(mem_page); - // use cell_index = i32::MAX to tell next loop to go to the end of the current page - self.stack.set_cell_index(i32::MAX); + self.stack.push_backwards(mem_page); continue; } BTreeCell::TableLeafCell(TableLeafCell { @@ -429,8 +508,141 @@ impl BTreeCursor { self.stack.retreat(); return Ok(CursorResult::Ok(Some(_rowid))); } - BTreeCell::IndexInteriorCell(_) => todo!(), - BTreeCell::IndexLeafCell(_) => todo!(), + BTreeCell::IndexInteriorCell(IndexInteriorCell { + payload, + left_child_page, + first_overflow_page, + payload_size, + }) => { + if !self.going_upwards { + // In backwards iteration, if we haven't just moved to this interior node from the + // right child, but instead are about to move to the left child, we need to retreat + // so that we don't come back to this node again. + // For example: + // this parent: key 666 + // left child has: key 663, key 664, key 665 + // we need to move to the previous parent (with e.g. key 662) when iterating backwards. + self.stack.retreat(); + let mem_page = self.pager.read_page(left_child_page as usize)?; + self.stack.push(mem_page); + // use cell_index = i32::MAX to tell next loop to go to the end of the current page + self.stack.set_cell_index(i32::MAX); + continue; + } + if let Some(next_page) = first_overflow_page { + return_if_io!(self.process_overflow_read(payload, next_page, payload_size)) + } else { + crate::storage::sqlite3_ondisk::read_record( + payload, + self.get_immutable_record_or_create().as_mut().unwrap(), + )? + }; + + // Going upwards = we just moved to an interior cell from the right child. + // On the first pass we must take the record from the interior cell (since unlike table btrees, index interior cells have payloads) + // We then mark going_upwards=false so that we go back down the tree on the next invocation. + self.going_upwards = false; + if predicate.is_none() { + let rowid = match self.get_immutable_record().as_ref().unwrap().last_value() + { + Some(RefValue::Integer(rowid)) => *rowid as u64, + _ => unreachable!("index cells should have an integer rowid"), + }; + return Ok(CursorResult::Ok(Some(rowid))); + } + + let (key, op) = predicate.as_ref().unwrap(); + let SeekKey::IndexKey(index_key) = key else { + unreachable!("index seek key should be a record"); + }; + let order = { + let record = self.get_immutable_record(); + let record = record.as_ref().unwrap(); + let record_values = record.get_values(); + let record_slice_same_num_cols = + &record_values[..index_key.get_values().len()]; + let order = compare_immutable( + record_slice_same_num_cols, + index_key.get_values(), + self.index_key_sort_order, + ); + order + }; + + let found = match op { + SeekOp::EQ => order.is_eq(), + SeekOp::LE => order.is_le(), + SeekOp::LT => order.is_lt(), + _ => unreachable!("Seek GT/GE should not happen in get_prev_record() because we are iterating backwards"), + }; + if found { + let rowid = match self.get_immutable_record().as_ref().unwrap().last_value() + { + Some(RefValue::Integer(rowid)) => *rowid as u64, + _ => unreachable!("index cells should have an integer rowid"), + }; + return Ok(CursorResult::Ok(Some(rowid))); + } else { + continue; + } + } + BTreeCell::IndexLeafCell(IndexLeafCell { + payload, + first_overflow_page, + payload_size, + }) => { + if let Some(next_page) = first_overflow_page { + return_if_io!(self.process_overflow_read(payload, next_page, payload_size)) + } else { + crate::storage::sqlite3_ondisk::read_record( + payload, + self.get_immutable_record_or_create().as_mut().unwrap(), + )? + }; + + self.stack.retreat(); + if predicate.is_none() { + let rowid = match self.get_immutable_record().as_ref().unwrap().last_value() + { + Some(RefValue::Integer(rowid)) => *rowid as u64, + _ => unreachable!("index cells should have an integer rowid"), + }; + return Ok(CursorResult::Ok(Some(rowid))); + } + let (key, op) = predicate.as_ref().unwrap(); + let SeekKey::IndexKey(index_key) = key else { + unreachable!("index seek key should be a record"); + }; + let order = { + let record = self.get_immutable_record(); + let record = record.as_ref().unwrap(); + let record_values = record.get_values(); + let record_slice_same_num_cols = + &record_values[..index_key.get_values().len()]; + let order = compare_immutable( + record_slice_same_num_cols, + index_key.get_values(), + self.index_key_sort_order, + ); + order + }; + let found = match op { + SeekOp::EQ => order.is_eq(), + SeekOp::LE => order.is_le(), + SeekOp::LT => order.is_lt(), + _ => unreachable!("Seek GT/GE should not happen in get_prev_record() because we are iterating backwards"), + }; + if found { + let rowid = match self.get_immutable_record().as_ref().unwrap().last_value() + { + Some(RefValue::Integer(rowid)) => *rowid as u64, + _ => unreachable!("index cells should have an integer rowid"), + }; + return Ok(CursorResult::Ok(Some(rowid))); + } else { + continue; + } + } } } } @@ -554,7 +766,7 @@ impl BTreeCursor { } None => { if has_parent { - debug!("moving simple upwards"); + tracing::trace!("moving simple upwards"); self.going_upwards = true; self.stack.pop(); continue; @@ -569,7 +781,7 @@ impl BTreeCursor { // end let has_parent = self.stack.current() > 0; if has_parent { - debug!("moving upwards"); + tracing::debug!("moving upwards"); self.going_upwards = true; self.stack.pop(); continue; @@ -599,8 +811,8 @@ impl BTreeCursor { BTreeCell::TableLeafCell(TableLeafCell { _rowid, _payload, - first_overflow_page, payload_size, + first_overflow_page, }) => { assert!(predicate.is_none()); if let Some(next_page) = first_overflow_page { @@ -657,14 +869,23 @@ impl BTreeCursor { let SeekKey::IndexKey(index_key) = key else { unreachable!("index seek key should be a record"); }; - let order = compare_immutable( - &self.get_immutable_record().as_ref().unwrap().get_values(), - index_key.get_values(), - ); + let order = { + let record = self.get_immutable_record(); + let record = record.as_ref().unwrap(); + let record_slice_same_num_cols = + &record.get_values()[..index_key.get_values().len()]; + let order = compare_immutable( + record_slice_same_num_cols, + index_key.get_values(), + self.index_key_sort_order, + ); + order + }; let found = match op { SeekOp::GT => order.is_gt(), SeekOp::GE => order.is_ge(), SeekOp::EQ => order.is_eq(), + _ => unreachable!("Seek LE/LT should not happen in get_next_record() because we are iterating forwards"), }; if found { let rowid = match self.get_immutable_record().as_ref().unwrap().last_value() @@ -708,14 +929,23 @@ impl BTreeCursor { let SeekKey::IndexKey(index_key) = key else { unreachable!("index seek key should be a record"); }; - let order = compare_immutable( - &self.get_immutable_record().as_ref().unwrap().get_values(), - index_key.get_values(), - ); + let order = { + let record = self.get_immutable_record(); + let record = record.as_ref().unwrap(); + let record_slice_same_num_cols = + &record.get_values()[..index_key.get_values().len()]; + let order = compare_immutable( + record_slice_same_num_cols, + index_key.get_values(), + self.index_key_sort_order, + ); + order + }; let found = match op { SeekOp::GT => order.is_lt(), SeekOp::GE => order.is_le(), SeekOp::EQ => order.is_le(), + _ => todo!("not implemented: {:?}", op), }; if found { let rowid = match self.get_immutable_record().as_ref().unwrap().last_value() @@ -737,129 +967,14 @@ impl BTreeCursor { /// or e.g. find the first record greater than the seek key in a range query (e.g. SELECT * FROM table WHERE col > 10). /// We don't include the rowid in the comparison and that's why the last value from the record is not included. fn do_seek(&mut self, key: SeekKey<'_>, op: SeekOp) -> Result>> { - return_if_io!(self.move_to(key.clone(), op.clone())); - - { - let page = self.stack.top(); - return_if_locked!(page); - - let contents = page.get().contents.as_ref().unwrap(); - - for cell_idx in 0..contents.cell_count() { - let cell = contents.cell_get( - cell_idx, - payload_overflow_threshold_max( - contents.page_type(), - self.usable_space() as u16, - ), - payload_overflow_threshold_min( - contents.page_type(), - self.usable_space() as u16, - ), - self.usable_space(), - )?; - match &cell { - BTreeCell::TableLeafCell(TableLeafCell { - _rowid: cell_rowid, - _payload: payload, - first_overflow_page, - payload_size, - }) => { - let SeekKey::TableRowId(rowid_key) = key else { - unreachable!("table seek key should be a rowid"); - }; - let found = match op { - SeekOp::GT => *cell_rowid > rowid_key, - SeekOp::GE => *cell_rowid >= rowid_key, - SeekOp::EQ => *cell_rowid == rowid_key, - }; - if found { - if let Some(next_page) = first_overflow_page { - return_if_io!(self.process_overflow_read( - payload, - *next_page, - *payload_size - )) - } else { - crate::storage::sqlite3_ondisk::read_record( - payload, - self.get_immutable_record_or_create().as_mut().unwrap(), - )? - }; - self.stack.advance(); - return Ok(CursorResult::Ok(Some(*cell_rowid))); - } else { - self.stack.advance(); - } - } - BTreeCell::IndexLeafCell(IndexLeafCell { - payload, - first_overflow_page, - payload_size, - }) => { - let SeekKey::IndexKey(index_key) = key else { - unreachable!("index seek key should be a record"); - }; - if let Some(next_page) = first_overflow_page { - return_if_io!(self.process_overflow_read( - payload, - *next_page, - *payload_size - )) - } else { - crate::storage::sqlite3_ondisk::read_record( - payload, - self.get_immutable_record_or_create().as_mut().unwrap(), - )? - }; - let record = self.get_immutable_record(); - let record = record.as_ref().unwrap(); - let order = compare_immutable( - &record.get_values().as_slice()[..record.len() - 1], - &index_key.get_values().as_slice()[..], - ); - let found = match op { - SeekOp::GT => order.is_gt(), - SeekOp::GE => order.is_ge(), - SeekOp::EQ => order.is_eq(), - }; - self.stack.advance(); - if found { - let rowid = match record.last_value() { - Some(RefValue::Integer(rowid)) => *rowid as u64, - _ => unreachable!("index cells should have an integer rowid"), - }; - return Ok(CursorResult::Ok(Some(rowid))); - } - } - cell_type => { - unreachable!("unexpected cell type: {:?}", cell_type); - } - } + match key { + SeekKey::TableRowId(rowid) => { + return self.tablebtree_seek(rowid, op); + } + SeekKey::IndexKey(index_key) => { + return self.indexbtree_seek(index_key, op); } } - - // We have now iterated over all cells in the leaf page and found no match. - let is_index = matches!(key, SeekKey::IndexKey(_)); - if is_index { - // Unlike tables, indexes store payloads in interior cells as well. self.move_to() always moves to a leaf page, so there are cases where we need to - // move back up to the parent interior cell and get the next record from there to perform a correct seek. - // an example of how this can occur: - // - // we do an index seek for key K with cmp = SeekOp::GT, meaning we want to seek to the first key that is greater than K. - // in self.move_to(), we encounter an interior cell with key K' = K+2, and move the left child page, which is a leaf page. - // the reason we move to the left child page is that we know that in an index, all keys in the left child page are less than K' i.e. less than K+2, - // meaning that the left subtree may contain a key greater than K, e.g. K+1. however, it is possible that it doesn't, in which case the correct - // next key is K+2, which is in the parent interior cell. - // - // In the seek() method, once we have landed in the leaf page and find that there is no cell with a key greater than K, - // if we were to return Ok(CursorResult::Ok((None, None))), self.record would be None, which is incorrect, because we already know - // that there is a record with a key greater than K (K' = K+2) in the parent interior cell. Hence, we need to move back up the tree - // and get the next matching record from there. - return self.get_next_record(Some((key, op))); - } - - Ok(CursorResult::Ok(None)) } /// Move the cursor to the root page of the btree. @@ -902,9 +1017,589 @@ impl BTreeCursor { } } + /// Specialized version of move_to() for table btrees. + fn tablebtree_move_to(&mut self, rowid: u64, seek_op: SeekOp) -> Result> { + let iter_dir = seek_op.iteration_direction(); + 'outer: loop { + let page = self.stack.top(); + return_if_locked!(page); + let contents = page.get().contents.as_ref().unwrap(); + if contents.is_leaf() { + return Ok(CursorResult::Ok(())); + } + + let cell_count = contents.cell_count(); + let mut min: isize = 0; + let mut max: isize = cell_count as isize - 1; + let mut leftmost_matching_cell = None; + loop { + if min > max { + if let Some(leftmost_matching_cell) = leftmost_matching_cell { + let left_child_page = contents.cell_table_interior_read_left_child_page( + leftmost_matching_cell as usize, + )?; + // If we found our target rowid in the left subtree, + // we need to move the parent cell pointer forwards or backwards depending on the iteration direction. + // For example: since the internal node contains the max rowid of the left subtree, we need to move the + // parent pointer backwards in backwards iteration so that we don't come back to the parent again. + // E.g. + // this parent: rowid 666 + // left child has: 664,665,666 + // we need to move to the previous parent (with e.g. rowid 663) when iterating backwards. + let index_change = + -1 + (iter_dir == IterationDirection::Forwards) as i32 * 2; + self.stack + .set_cell_index(leftmost_matching_cell as i32 + index_change); + let mem_page = self.pager.read_page(left_child_page as usize)?; + self.stack.push(mem_page); + continue 'outer; + } + self.stack.set_cell_index(cell_count as i32 + 1); + match contents.rightmost_pointer() { + Some(right_most_pointer) => { + let mem_page = self.pager.read_page(right_most_pointer as usize)?; + self.stack.push(mem_page); + continue 'outer; + } + None => { + unreachable!("we shall not go back up! The only way is down the slope"); + } + } + } + let cur_cell_idx = (min + max) >> 1; // rustc generates extra insns for (min+max)/2 due to them being isize. we know min&max are >=0 here. + let cell_rowid = contents.cell_table_interior_read_rowid(cur_cell_idx as usize)?; + // in sqlite btrees left child pages have <= keys. + // table btrees can have a duplicate rowid in the interior cell, so for example if we are looking for rowid=10, + // and we find an interior cell with rowid=10, we need to move to the left page since (due to the <= rule of sqlite btrees) + // the left page may have a rowid=10. + // Logic table for determining if target leaf page is in left subtree + // + // Forwards iteration (looking for first match in tree): + // OP | Current Cell vs Seek Key | Action? | Explanation + // GT | > | go left | First > key is in left subtree + // GT | = or < | go right | First > key is in right subtree + // GE | > or = | go left | First >= key is in left subtree + // GE | < | go right | First >= key is in right subtree + // + // Backwards iteration (looking for last match in tree): + // OP | Current Cell vs Seek Key | Action? | Explanation + // LE | > or = | go left | Last <= key is in left subtree + // LE | < | go right | Last <= key is in right subtree + // LT | > or = | go left | Last < key is in left subtree + // LT | < | go right?| Last < key is in right subtree, except if cell rowid is exactly 1 less + // + // No iteration (point query): + // EQ | > or = | go left | Last = key is in left subtree + // EQ | < | go right | Last = key is in right subtree + let is_on_left = match seek_op { + SeekOp::GT => cell_rowid > rowid, + SeekOp::GE => cell_rowid >= rowid, + SeekOp::LE => cell_rowid >= rowid, + SeekOp::LT => cell_rowid + 1 >= rowid, + SeekOp::EQ => cell_rowid >= rowid, + }; + if is_on_left { + leftmost_matching_cell = Some(cur_cell_idx as usize); + max = cur_cell_idx - 1; + } else { + min = cur_cell_idx + 1; + } + } + } + } + + /// Specialized version of move_to() for index btrees. + fn indexbtree_move_to( + &mut self, + index_key: &ImmutableRecord, + cmp: SeekOp, + ) -> Result> { + let iter_dir = cmp.iteration_direction(); + 'outer: loop { + let page = self.stack.top(); + return_if_locked!(page); + let contents = page.get().contents.as_ref().unwrap(); + if contents.is_leaf() { + return Ok(CursorResult::Ok(())); + } + + let cell_count = contents.cell_count(); + let mut min: isize = 0; + let mut max: isize = cell_count as isize - 1; + let mut leftmost_matching_cell = None; + loop { + if min > max { + let Some(leftmost_matching_cell) = leftmost_matching_cell else { + self.stack.set_cell_index(contents.cell_count() as i32 + 1); + match contents.rightmost_pointer() { + Some(right_most_pointer) => { + let mem_page = self.pager.read_page(right_most_pointer as usize)?; + self.stack.push(mem_page); + continue 'outer; + } + None => { + unreachable!( + "we shall not go back up! The only way is down the slope" + ); + } + } + }; + let matching_cell = contents.cell_get( + leftmost_matching_cell, + payload_overflow_threshold_max( + contents.page_type(), + self.usable_space() as u16, + ), + payload_overflow_threshold_min( + contents.page_type(), + self.usable_space() as u16, + ), + self.usable_space(), + )?; + self.stack.set_cell_index(leftmost_matching_cell as i32); + // we don't advance in case of forward iteration and index tree internal nodes because we will visit this node going up. + // in backwards iteration, we must retreat because otherwise we would unnecessarily visit this node again. + // Example: + // this parent: key 666, and we found the target key in the left child. + // left child has: key 663, key 664, key 665 + // we need to move to the previous parent (with e.g. key 662) when iterating backwards so that we don't end up back here again. + if iter_dir == IterationDirection::Backwards { + self.stack.retreat(); + } + let BTreeCell::IndexInteriorCell(IndexInteriorCell { + left_child_page, .. + }) = &matching_cell + else { + unreachable!("unexpected cell type: {:?}", matching_cell); + }; + + let mem_page = self.pager.read_page(*left_child_page as usize)?; + self.stack.push(mem_page); + continue 'outer; + } + + let cur_cell_idx = (min + max) >> 1; // rustc generates extra insns for (min+max)/2 due to them being isize. we know min&max are >=0 here. + self.stack.set_cell_index(cur_cell_idx as i32); + let cell = contents.cell_get( + cur_cell_idx as usize, + payload_overflow_threshold_max( + contents.page_type(), + self.usable_space() as u16, + ), + payload_overflow_threshold_min( + contents.page_type(), + self.usable_space() as u16, + ), + self.usable_space(), + )?; + let BTreeCell::IndexInteriorCell(IndexInteriorCell { + payload, + payload_size, + first_overflow_page, + .. + }) = &cell + else { + unreachable!("unexpected cell type: {:?}", cell); + }; + + if let Some(next_page) = first_overflow_page { + return_if_io!(self.process_overflow_read(payload, *next_page, *payload_size)) + } else { + crate::storage::sqlite3_ondisk::read_record( + payload, + self.get_immutable_record_or_create().as_mut().unwrap(), + )? + }; + let record = self.get_immutable_record(); + let record = record.as_ref().unwrap(); + let record_slice_equal_number_of_cols = + &record.get_values().as_slice()[..index_key.get_values().len()]; + let interior_cell_vs_index_key = compare_immutable( + record_slice_equal_number_of_cols, + index_key.get_values(), + self.index_key_sort_order, + ); + // in sqlite btrees left child pages have <= keys. + // in general, in forwards iteration we want to find the first key that matches the seek condition. + // in backwards iteration we want to find the last key that matches the seek condition. + // + // Logic table for determining if target leaf page is in left subtree. + // For index b-trees this is a bit more complicated since the interior cells contain payloads (the key is the payload). + // and for non-unique indexes there might be several cells with the same key. + // + // Forwards iteration (looking for first match in tree): + // OP | Current Cell vs Seek Key | Action? | Explanation + // GT | > | go left | First > key could be exactly this one, or in left subtree + // GT | = or < | go right | First > key must be in right subtree + // GE | > | go left | First >= key could be exactly this one, or in left subtree + // GE | = | go left | First >= key could be exactly this one, or in left subtree + // GE | < | go right | First >= key must be in right subtree + // + // Backwards iteration (looking for last match in tree): + // OP | Current Cell vs Seek Key | Action? | Explanation + // LE | > | go left | Last <= key must be in left subtree + // LE | = | go right | Last <= key is either this one, or somewhere to the right of this one. So we need to go right to make sure + // LE | < | go right | Last <= key must be in right subtree + // LT | > | go left | Last < key must be in left subtree + // LT | = | go left | Last < key must be in left subtree since we want strictly less than + // LT | < | go right | Last < key could be exactly this one, or in right subtree + // + // No iteration (point query): + // EQ | > | go left | First = key must be in left subtree + // EQ | = | go left | First = key could be exactly this one, or in left subtree + // EQ | < | go right | First = key must be in right subtree + + let target_leaf_page_is_in_left_subtree = match cmp { + SeekOp::GT => interior_cell_vs_index_key.is_gt(), + SeekOp::GE => interior_cell_vs_index_key.is_ge(), + SeekOp::EQ => interior_cell_vs_index_key.is_ge(), + SeekOp::LE => interior_cell_vs_index_key.is_gt(), + SeekOp::LT => interior_cell_vs_index_key.is_ge(), + }; + + if target_leaf_page_is_in_left_subtree { + leftmost_matching_cell = Some(cur_cell_idx as usize); + max = cur_cell_idx - 1; + } else { + min = cur_cell_idx + 1; + } + } + } + } + + /// Specialized version of do_seek() for table btrees that uses binary search instead + /// of iterating cells in order. + fn tablebtree_seek( + &mut self, + rowid: u64, + seek_op: SeekOp, + ) -> Result>> { + assert!(self.mv_cursor.is_none()); + self.move_to_root(); + return_if_io!(self.tablebtree_move_to(rowid, seek_op)); + let page = self.stack.top(); + return_if_locked!(page); + let contents = page.get().contents.as_ref().unwrap(); + assert!( + contents.is_leaf(), + "tablebtree_seek() called on non-leaf page" + ); + let iter_dir = seek_op.iteration_direction(); + + let cell_count = contents.cell_count(); + let mut min: isize = 0; + let mut max: isize = cell_count as isize - 1; + + // If iter dir is forwards, we want the first cell that matches; + // If iter dir is backwards, we want the last cell that matches. + let mut nearest_matching_cell = None; + loop { + if min > max { + let Some(nearest_matching_cell) = nearest_matching_cell else { + return Ok(CursorResult::Ok(None)); + }; + let matching_cell = contents.cell_get( + nearest_matching_cell, + payload_overflow_threshold_max( + contents.page_type(), + self.usable_space() as u16, + ), + payload_overflow_threshold_min( + contents.page_type(), + self.usable_space() as u16, + ), + self.usable_space(), + )?; + let BTreeCell::TableLeafCell(TableLeafCell { + _rowid: cell_rowid, + _payload, + first_overflow_page, + payload_size, + .. + }) = matching_cell + else { + unreachable!("unexpected cell type: {:?}", matching_cell); + }; + + return_if_io!(self.read_record_w_possible_overflow( + _payload, + first_overflow_page, + payload_size + )); + let cell_idx = if iter_dir == IterationDirection::Forwards { + nearest_matching_cell as i32 + 1 + } else { + nearest_matching_cell as i32 - 1 + }; + self.stack.set_cell_index(cell_idx as i32); + return Ok(CursorResult::Ok(Some(cell_rowid))); + } + + let cur_cell_idx = (min + max) >> 1; // rustc generates extra insns for (min+max)/2 due to them being isize. we know min&max are >=0 here. + let cell_rowid = contents.cell_table_leaf_read_rowid(cur_cell_idx as usize)?; + + let cmp = cell_rowid.cmp(&rowid); + + let found = match seek_op { + SeekOp::GT => cmp.is_gt(), + SeekOp::GE => cmp.is_ge(), + SeekOp::EQ => cmp.is_eq(), + SeekOp::LE => cmp.is_le(), + SeekOp::LT => cmp.is_lt(), + }; + + // rowids are unique, so we can return the rowid immediately + if found && SeekOp::EQ == seek_op { + let cur_cell = contents.cell_get( + cur_cell_idx as usize, + payload_overflow_threshold_max( + contents.page_type(), + self.usable_space() as u16, + ), + payload_overflow_threshold_min( + contents.page_type(), + self.usable_space() as u16, + ), + self.usable_space(), + )?; + let BTreeCell::TableLeafCell(TableLeafCell { + _rowid: _, + _payload, + first_overflow_page, + payload_size, + .. + }) = cur_cell + else { + unreachable!("unexpected cell type: {:?}", cur_cell); + }; + return_if_io!(self.read_record_w_possible_overflow( + _payload, + first_overflow_page, + payload_size + )); + let cell_idx = if iter_dir == IterationDirection::Forwards { + cur_cell_idx + 1 + } else { + cur_cell_idx - 1 + }; + self.stack.set_cell_index(cell_idx as i32); + return Ok(CursorResult::Ok(Some(cell_rowid))); + } + + if found { + match iter_dir { + IterationDirection::Forwards => { + nearest_matching_cell = Some(cur_cell_idx as usize); + max = cur_cell_idx - 1; + } + IterationDirection::Backwards => { + nearest_matching_cell = Some(cur_cell_idx as usize); + min = cur_cell_idx + 1; + } + } + } else { + if cmp.is_gt() { + max = cur_cell_idx - 1; + } else if cmp.is_lt() { + min = cur_cell_idx + 1; + } else { + match iter_dir { + IterationDirection::Forwards => { + min = cur_cell_idx + 1; + } + IterationDirection::Backwards => { + max = cur_cell_idx - 1; + } + } + } + } + } + } + + fn indexbtree_seek( + &mut self, + key: &ImmutableRecord, + seek_op: SeekOp, + ) -> Result>> { + self.move_to_root(); + return_if_io!(self.indexbtree_move_to(key, seek_op)); + + let page = self.stack.top(); + return_if_locked!(page); + + let contents = page.get().contents.as_ref().unwrap(); + + let cell_count = contents.cell_count(); + let mut min: isize = 0; + let mut max: isize = cell_count as isize - 1; + + let iter_dir = seek_op.iteration_direction(); + + // If iter dir is forwards, we want the first cell that matches; + // If iter dir is backwards, we want the last cell that matches. + let mut nearest_matching_cell = None; + loop { + if min > max { + let Some(nearest_matching_cell) = nearest_matching_cell else { + // We have now iterated over all cells in the leaf page and found no match. + // Unlike tables, indexes store payloads in interior cells as well. self.move_to() always moves to a leaf page, so there are cases where we need to + // move back up to the parent interior cell and get the next record from there to perform a correct seek. + // an example of how this can occur: + // + // we do an index seek for key K with cmp = SeekOp::GT, meaning we want to seek to the first key that is greater than K. + // in self.move_to(), we encounter an interior cell with key K' = K+2, and move the left child page, which is a leaf page. + // the reason we move to the left child page is that we know that in an index, all keys in the left child page are less than K' i.e. less than K+2, + // meaning that the left subtree may contain a key greater than K, e.g. K+1. however, it is possible that it doesn't, in which case the correct + // next key is K+2, which is in the parent interior cell. + // + // In the seek() method, once we have landed in the leaf page and find that there is no cell with a key greater than K, + // if we were to return Ok(CursorResult::Ok((None, None))), self.record would be None, which is incorrect, because we already know + // that there is a record with a key greater than K (K' = K+2) in the parent interior cell. Hence, we need to move back up the tree + // and get the next matching record from there. + match seek_op.iteration_direction() { + IterationDirection::Forwards => { + self.stack.set_cell_index(cell_count as i32); + return self.get_next_record(Some((SeekKey::IndexKey(key), seek_op))); + } + IterationDirection::Backwards => { + self.stack.set_cell_index(-1); + return self.get_prev_record(Some((SeekKey::IndexKey(key), seek_op))); + } + } + }; + let cell = contents.cell_get( + nearest_matching_cell as usize, + payload_overflow_threshold_max( + contents.page_type(), + self.usable_space() as u16, + ), + payload_overflow_threshold_min( + contents.page_type(), + self.usable_space() as u16, + ), + self.usable_space(), + )?; + + let BTreeCell::IndexLeafCell(IndexLeafCell { + payload, + first_overflow_page, + payload_size, + }) = &cell + else { + unreachable!("unexpected cell type: {:?}", cell); + }; + + if let Some(next_page) = first_overflow_page { + return_if_io!(self.process_overflow_read(payload, *next_page, *payload_size)) + } else { + crate::storage::sqlite3_ondisk::read_record( + payload, + self.get_immutable_record_or_create().as_mut().unwrap(), + )? + } + let record = self.get_immutable_record(); + let record = record.as_ref().unwrap(); + let rowid = match record.last_value() { + Some(RefValue::Integer(rowid)) => *rowid as u64, + _ => unreachable!("index cells should have an integer rowid"), + }; + self.stack.set_cell_index(nearest_matching_cell as i32); + self.stack.next_cell_in_direction(iter_dir); + return Ok(CursorResult::Ok(Some(rowid))); + } + + let cur_cell_idx = (min + max) >> 1; // rustc generates extra insns for (min+max)/2 due to them being isize. we know min&max are >=0 here. + self.stack.set_cell_index(cur_cell_idx as i32); + + let cell = contents.cell_get( + cur_cell_idx as usize, + payload_overflow_threshold_max(contents.page_type(), self.usable_space() as u16), + payload_overflow_threshold_min(contents.page_type(), self.usable_space() as u16), + self.usable_space(), + )?; + let BTreeCell::IndexLeafCell(IndexLeafCell { + payload, + first_overflow_page, + payload_size, + }) = &cell + else { + unreachable!("unexpected cell type: {:?}", cell); + }; + + if let Some(next_page) = first_overflow_page { + return_if_io!(self.process_overflow_read(payload, *next_page, *payload_size)) + } else { + crate::storage::sqlite3_ondisk::read_record( + payload, + self.get_immutable_record_or_create().as_mut().unwrap(), + )? + }; + let record = self.get_immutable_record(); + let record = record.as_ref().unwrap(); + let record_slice_equal_number_of_cols = + &record.get_values().as_slice()[..key.get_values().len()]; + let cmp = compare_immutable( + record_slice_equal_number_of_cols, + key.get_values(), + self.index_key_sort_order, + ); + let found = match seek_op { + SeekOp::GT => cmp.is_gt(), + SeekOp::GE => cmp.is_ge(), + SeekOp::EQ => cmp.is_eq(), + SeekOp::LE => cmp.is_le(), + SeekOp::LT => cmp.is_lt(), + }; + if found { + match iter_dir { + IterationDirection::Forwards => { + nearest_matching_cell = Some(cur_cell_idx as usize); + max = cur_cell_idx - 1; + } + IterationDirection::Backwards => { + nearest_matching_cell = Some(cur_cell_idx as usize); + min = cur_cell_idx + 1; + } + } + } else { + if cmp.is_gt() { + max = cur_cell_idx - 1; + } else if cmp.is_lt() { + min = cur_cell_idx + 1; + } else { + match iter_dir { + IterationDirection::Forwards => { + min = cur_cell_idx + 1; + } + IterationDirection::Backwards => { + max = cur_cell_idx - 1; + } + } + } + } + } + } + + fn read_record_w_possible_overflow( + &mut self, + payload: &'static [u8], + next_page: Option, + payload_size: u64, + ) -> Result> { + if let Some(next_page) = next_page { + self.process_overflow_read(payload, next_page, payload_size) + } else { + crate::storage::sqlite3_ondisk::read_record( + payload, + self.get_immutable_record_or_create().as_mut().unwrap(), + )?; + Ok(CursorResult::Ok(())) + } + } + pub fn move_to(&mut self, key: SeekKey<'_>, cmp: SeekOp) -> Result> { assert!(self.mv_cursor.is_none()); - tracing::debug!("move_to(key={:?} cmp={:?})", key, cmp); + tracing::trace!("move_to(key={:?} cmp={:?})", key, cmp); // For a table with N rows, we can find any row by row id in O(log(N)) time by starting at the root page and following the B-tree pointers. // B-trees consist of interior pages and leaf pages. Interior pages contain pointers to other pages, while leaf pages contain the actual row data. // @@ -930,130 +1625,23 @@ impl BTreeCursor { // 6. If we find the cell, we return the record. Otherwise, we return an empty result. self.move_to_root(); - loop { - let page = self.stack.top(); - return_if_locked!(page); - - let contents = page.get().contents.as_ref().unwrap(); - if contents.is_leaf() { - return Ok(CursorResult::Ok(())); + match key { + SeekKey::TableRowId(rowid_key) => { + return self.tablebtree_move_to(rowid_key, cmp); } - - let mut found_cell = false; - for cell_idx in 0..contents.cell_count() { - match &contents.cell_get( - cell_idx, - payload_overflow_threshold_max( - contents.page_type(), - self.usable_space() as u16, - ), - payload_overflow_threshold_min( - contents.page_type(), - self.usable_space() as u16, - ), - self.usable_space(), - )? { - BTreeCell::TableInteriorCell(TableInteriorCell { - _left_child_page, - _rowid, - }) => { - let SeekKey::TableRowId(rowid_key) = key else { - unreachable!("table seek key should be a rowid"); - }; - let target_leaf_page_is_in_left_subtree = match cmp { - SeekOp::GT => rowid_key < *_rowid, - SeekOp::GE => rowid_key <= *_rowid, - SeekOp::EQ => rowid_key <= *_rowid, - }; - self.stack.advance(); - if target_leaf_page_is_in_left_subtree { - let mem_page = self.pager.read_page(*_left_child_page as usize)?; - self.stack.push(mem_page); - found_cell = true; - break; - } - } - BTreeCell::TableLeafCell(TableLeafCell { - _rowid: _, - _payload: _, - first_overflow_page: _, - .. - }) => { - unreachable!( - "we don't iterate leaf cells while trying to move to a leaf cell" - ); - } - BTreeCell::IndexInteriorCell(IndexInteriorCell { - left_child_page, - payload, - first_overflow_page, - payload_size, - }) => { - let SeekKey::IndexKey(index_key) = key else { - unreachable!("index seek key should be a record"); - }; - if let Some(next_page) = first_overflow_page { - return_if_io!(self.process_overflow_read( - payload, - *next_page, - *payload_size - )) - } else { - crate::storage::sqlite3_ondisk::read_record( - payload, - self.get_immutable_record_or_create().as_mut().unwrap(), - )? - }; - let order = compare_immutable( - index_key.get_values(), - self.get_immutable_record().as_ref().unwrap().get_values(), - ); - let target_leaf_page_is_in_the_left_subtree = match cmp { - SeekOp::GT => order.is_lt(), - SeekOp::GE => order.is_le(), - SeekOp::EQ => order.is_le(), - }; - if target_leaf_page_is_in_the_left_subtree { - // we don't advance in case of index tree internal nodes because we will visit this node going up - let mem_page = self.pager.read_page(*left_child_page as usize)?; - self.stack.push(mem_page); - found_cell = true; - break; - } else { - self.stack.advance(); - } - } - BTreeCell::IndexLeafCell(_) => { - unreachable!( - "we don't iterate leaf cells while trying to move to a leaf cell" - ); - } - } - } - - if !found_cell { - match contents.rightmost_pointer() { - Some(right_most_pointer) => { - self.stack.advance(); - let mem_page = self.pager.read_page(right_most_pointer as usize)?; - self.stack.push(mem_page); - continue; - } - None => { - unreachable!("we shall not go back up! The only way is down the slope"); - } - } + SeekKey::IndexKey(index_key) => { + return self.indexbtree_move_to(index_key, cmp); } } } /// Insert a record into the btree. /// If the insert operation overflows the page, it will be split and the btree will be balanced. - fn insert_into_page( - &mut self, - key: &OwnedValue, - record: &ImmutableRecord, - ) -> Result> { + fn insert_into_page(&mut self, bkey: &BTreeKey) -> Result> { + let record = bkey + .get_record() + .expect("expected record present on insert"); + if let CursorState::None = &self.state { self.state = CursorState::Write(WriteInfo::new()); } @@ -1069,10 +1657,6 @@ impl BTreeCursor { WriteState::Start => { let page = self.stack.top(); return_if_locked_maybe_load!(self.pager, page); - let int_key = match key { - OwnedValue::Integer(i) => *i as u64, - _ => unreachable!("btree tables are indexed by integers!"), - }; // get page and find cell let (cell_idx, page_type) = { @@ -1082,23 +1666,27 @@ impl BTreeCursor { self.pager.add_dirty(page.get().id); let page = page.get().contents.as_mut().unwrap(); - assert!(matches!(page.page_type(), PageType::TableLeaf)); + assert!(matches!( + page.page_type(), + PageType::TableLeaf | PageType::IndexLeaf + )); // find cell - (self.find_cell(page, int_key), page.page_type()) + (self.find_cell(page, bkey), page.page_type()) }; tracing::debug!("insert_into_page(cell_idx={})", cell_idx); // if the cell index is less than the total cells, check: if its an existing // rowid, we are going to update / overwrite the cell if cell_idx < page.get_contents().cell_count() { - if let BTreeCell::TableLeafCell(tbl_leaf) = page.get_contents().cell_get( + match page.get_contents().cell_get( cell_idx, payload_overflow_threshold_max(page_type, self.usable_space() as u16), payload_overflow_threshold_min(page_type, self.usable_space() as u16), self.usable_space(), )? { - if tbl_leaf._rowid == int_key { + BTreeCell::TableLeafCell(tbl_leaf) => { + if tbl_leaf._rowid == bkey.to_rowid() { tracing::debug!("insert_into_page: found exact match with cell_idx={cell_idx}, overwriting"); self.overwrite_cell(page.clone(), cell_idx, record)?; self.state @@ -1108,12 +1696,38 @@ impl BTreeCursor { continue; } } + BTreeCell::IndexLeafCell(idx_leaf) => { + read_record( + idx_leaf.payload, + self.get_immutable_record_or_create().as_mut().unwrap(), + ) + .expect("failed to read record"); + if compare_immutable( + record.get_values(), + self.get_immutable_record() + .as_ref() + .unwrap() + .get_values(), + self.index_key_sort_order, + ) == Ordering::Equal { + + tracing::debug!("insert_into_page: found exact match with cell_idx={cell_idx}, overwriting"); + self.overwrite_cell(page.clone(), cell_idx, record)?; + self.state + .mut_write_info() + .expect("expected write info") + .state = WriteState::Finish; + continue; + } + } + other => panic!("unexpected cell type, expected TableLeaf or IndexLeaf, found: {:?}", other), + } } // insert cell let mut cell_payload: Vec = Vec::with_capacity(record.len() + 4); fill_cell_payload( page_type, - Some(int_key), + bkey.maybe_rowid(), &mut cell_payload, record, self.usable_space() as u16, @@ -1123,7 +1737,7 @@ impl BTreeCursor { // insert let overflow = { let contents = page.get().contents.as_mut().unwrap(); - debug!( + tracing::debug!( "insert_into_page(overflow, cell_count={})", contents.cell_count() ); @@ -1188,9 +1802,20 @@ impl BTreeCursor { let current_page = self.stack.top(); { // check if we don't need to balance - // don't continue if there are no overflow cells + // don't continue if: + // - current page is not overfull root + // OR + // - current page is not overfull and the amount of free space on the page + // is less than 2/3rds of the total usable space on the page + // + // https://github.com/sqlite/sqlite/blob/0aa95099f5003dc99f599ab77ac0004950b281ef/src/btree.c#L9064-L9071 let page = current_page.get().contents.as_mut().unwrap(); - if page.overflow_cells.is_empty() { + let usable_space = self.usable_space(); + let free_space = compute_free_space(page, usable_space as u16); + if page.overflow_cells.is_empty() + && (!self.stack.has_parent() + || free_space as usize * 3 <= usable_space * 2) + { let write_info = self.state.mut_write_info().unwrap(); write_info.state = WriteState::Finish; return Ok(CursorResult::Ok(())); @@ -1204,7 +1829,6 @@ impl BTreeCursor { let write_info = self.state.mut_write_info().unwrap(); write_info.state = WriteState::BalanceNonRoot; self.stack.pop(); - self.stack.retreat(); return_if_io!(self.balance_non_root()); } WriteState::BalanceNonRoot | WriteState::BalanceNonRootWaitLoadPages => { @@ -1229,16 +1853,20 @@ impl BTreeCursor { WriteState::BalanceStart => todo!(), WriteState::BalanceNonRoot => { let parent_page = self.stack.top(); - if parent_page.is_locked() { - return Ok(CursorResult::IO); - } return_if_locked_maybe_load!(self.pager, parent_page); + // If `move_to` moved to rightmost page, cell index will be out of bounds. Meaning cell_count+1. + // In any other case, `move_to` will stay in the correct index. + if self.stack.current_cell_index() as usize + == parent_page.get_contents().cell_count() + 1 + { + self.stack.retreat(); + } parent_page.set_dirty(); self.pager.add_dirty(parent_page.get().id); let parent_contents = parent_page.get().contents.as_ref().unwrap(); let page_to_balance_idx = self.stack.current_cell_index() as usize; - debug!( + tracing::debug!( "balance_non_root(parent_id={} page_to_balance_idx={})", parent_page.get().id, page_to_balance_idx @@ -1248,7 +1876,7 @@ impl BTreeCursor { PageType::IndexInterior | PageType::TableInterior )); // Part 1: Find the sibling pages to balance - let mut pages_to_balance = vec![]; + let mut pages_to_balance: [Option; 3] = [const { None }; 3]; let number_of_cells_in_parent = parent_contents.cell_count() + parent_contents.overflow_cells.len(); @@ -1256,7 +1884,12 @@ impl BTreeCursor { parent_contents.overflow_cells.is_empty(), "balancing child page with overflowed parent not yet implemented" ); - assert!(page_to_balance_idx <= parent_contents.cell_count()); + assert!( + page_to_balance_idx <= parent_contents.cell_count(), + "page_to_balance_idx={} is out of bounds for parent cell count {}", + page_to_balance_idx, + number_of_cells_in_parent + ); // As there will be at maximum 3 pages used to balance: // sibling_pointer is the index represeneting one of those 3 pages, and we initialize it to the last possible page. // next_divider is the first divider that contains the first page of the 3 pages. @@ -1311,8 +1944,12 @@ impl BTreeCursor { let current_sibling = sibling_pointer; for i in (0..=current_sibling).rev() { let page = self.pager.read_page(pgno as usize)?; - debug_validate_cells!(&page.get_contents(), self.usable_space() as u16); - pages_to_balance.push(page); + #[cfg(debug_assertions)] + { + return_if_locked!(page); + debug_validate_cells!(&page.get_contents(), self.usable_space() as u16); + } + pages_to_balance[i].replace(page); assert_eq!( parent_contents.overflow_cells.len(), 0, @@ -1345,14 +1982,16 @@ impl BTreeCursor { } }; } - // Reverse in order to keep the right order - pages_to_balance.reverse(); #[cfg(debug_assertions)] { - let page_type_of_siblings = pages_to_balance[0].get_contents().page_type(); - for page in &pages_to_balance { - let contents = page.get_contents(); + let page_type_of_siblings = pages_to_balance[0] + .as_ref() + .unwrap() + .get_contents() + .page_type(); + for page in pages_to_balance.iter().take(sibling_count) { + let contents = page.as_ref().unwrap().get_contents(); debug_validate_cells!(&contents, self.usable_space() as u16); assert_eq!(contents.page_type(), page_type_of_siblings); } @@ -1364,7 +2003,7 @@ impl BTreeCursor { .replace(Some(BalanceInfo { pages_to_balance, rightmost_pointer: right_pointer, - divider_cells: Vec::new(), + divider_cells: [const { None }; 2], sibling_count, first_divider_cell: first_cell_divider, })); @@ -1380,34 +2019,40 @@ impl BTreeCursor { let all_loaded = balance_info .pages_to_balance .iter() - .all(|page| !page.is_locked()); + .take(balance_info.sibling_count) + .all(|page| !page.as_ref().unwrap().is_locked()); if !all_loaded { return Ok(CursorResult::IO); } // Now do real balancing let parent_page = self.stack.top(); let parent_contents = parent_page.get_contents(); + let parent_is_root = !self.stack.has_parent(); + assert!( parent_contents.overflow_cells.is_empty(), "overflow parent not yet implemented" ); - // Get divider cells and max_cells + /* 1. Get divider cells and max_cells */ let mut max_cells = 0; - let mut pages_to_balance_new = Vec::new(); + // we only need maximum 5 pages to balance 3 pages + let mut pages_to_balance_new: [Option; 5] = [const { None }; 5]; for i in (0..balance_info.sibling_count).rev() { - let sibling_page = &balance_info.pages_to_balance[i]; + let sibling_page = balance_info.pages_to_balance[i].as_ref().unwrap(); let sibling_contents = sibling_page.get_contents(); sibling_page.set_dirty(); self.pager.add_dirty(sibling_page.get().id); max_cells += sibling_contents.cell_count(); max_cells += sibling_contents.overflow_cells.len(); - if i == 0 { - // we don't have left sibling from this one so we break - break; + + // Right pointer is not dropped, we simply update it at the end. This could be a divider cell that points + // to the last page in the list of pages to balance or this could be the rightmost pointer that points to a page. + if i == balance_info.sibling_count - 1 { + continue; } // Since we know we have a left sibling, take the divider that points to left sibling of this page - let cell_idx = balance_info.first_divider_cell + i - 1; + let cell_idx = balance_info.first_divider_cell + i; let (cell_start, cell_len) = parent_contents.cell_get_raw_region( cell_idx, payload_overflow_threshold_max( @@ -1424,8 +2069,15 @@ impl BTreeCursor { let cell_buf = &buf[cell_start..cell_start + cell_len]; max_cells += 1; + tracing::debug!( + "balance_non_root(drop_divider_cell, first_divider_cell={}, divider_cell={}, left_pointer={})", + balance_info.first_divider_cell, + i, + read_u32(cell_buf, 0) + ); + // TODO(pere): make this reference and not copy - balance_info.divider_cells.push(cell_buf.to_vec()); + balance_info.divider_cells[i].replace(cell_buf.to_vec()); tracing::trace!( "dropping divider cell from parent cell_idx={} count={}", cell_idx, @@ -1433,29 +2085,33 @@ impl BTreeCursor { ); drop_cell(parent_contents, cell_idx, self.usable_space() as u16)?; } - assert_eq!( - balance_info.divider_cells.len(), - balance_info.sibling_count - 1, - "the number of pages balancing must be divided by one less divider" - ); - // Reverse divider cells to be in order - balance_info.divider_cells.reverse(); + /* 2. Initialize CellArray with all the cells used for distribution, this includes divider cells if !leaf. */ let mut cell_array = CellArray { cells: Vec::with_capacity(max_cells), - number_of_cells_per_page: Vec::new(), + number_of_cells_per_page: [0; 5], }; let cells_capacity_start = cell_array.cells.capacity(); let mut total_cells_inserted = 0; // count_cells_in_old_pages is the prefix sum of cells of each page - let mut count_cells_in_old_pages = Vec::new(); + let mut count_cells_in_old_pages: [u16; 5] = [0; 5]; - let page_type = balance_info.pages_to_balance[0].get_contents().page_type(); + let page_type = balance_info.pages_to_balance[0] + .as_ref() + .unwrap() + .get_contents() + .page_type(); + tracing::debug!("balance_non_root(page_type={:?})", page_type); let leaf_data = matches!(page_type, PageType::TableLeaf); let leaf = matches!(page_type, PageType::TableLeaf | PageType::IndexLeaf); - for (i, old_page) in balance_info.pages_to_balance.iter().enumerate() { - let old_page_contents = old_page.get_contents(); + for (i, old_page) in balance_info + .pages_to_balance + .iter() + .take(balance_info.sibling_count) + .enumerate() + { + let old_page_contents = old_page.as_ref().unwrap().get_contents(); debug_validate_cells!(&old_page_contents, self.usable_space() as u16); for cell_idx in 0..old_page_contents.cell_count() { let (cell_start, cell_len) = old_page_contents.cell_get_raw_region( @@ -1477,10 +2133,6 @@ impl BTreeCursor { } // Insert overflow cells into correct place let offset = total_cells_inserted; - assert!( - old_page_contents.overflow_cells.len() <= 1, - "todo: check this works for more than one overflow cell" - ); for overflow_cell in old_page_contents.overflow_cells.iter_mut() { cell_array.cells.insert( offset + overflow_cell.index, @@ -1488,19 +2140,32 @@ impl BTreeCursor { ); } - count_cells_in_old_pages.push(cell_array.cells.len() as u16); + count_cells_in_old_pages[i] = cell_array.cells.len() as u16; let mut cells_inserted = old_page_contents.cell_count() + old_page_contents.overflow_cells.len(); - if i < balance_info.pages_to_balance.len() - 1 && !leaf_data { + if i < balance_info.sibling_count - 1 && !leaf_data { // If we are a index page or a interior table page we need to take the divider cell too. // But we don't need the last divider as it will remain the same. - let divider_cell = &mut balance_info.divider_cells[i]; + let mut divider_cell = balance_info.divider_cells[i] + .as_mut() + .unwrap() + .as_mut_slice(); // TODO(pere): in case of old pages are leaf pages, so index leaf page, we need to strip page pointers // from divider cells in index interior pages (parent) because those should not be included. cells_inserted += 1; - cell_array.cells.push(to_static_buf(divider_cell.as_mut())); + if !leaf { + // This divider cell needs to be updated with new left pointer, + let right_pointer = old_page_contents.rightmost_pointer().unwrap(); + divider_cell[..4].copy_from_slice(&right_pointer.to_be_bytes()); + } else { + // index leaf + assert!(divider_cell.len() >= 4); + // let's strip the page pointer + divider_cell = &mut divider_cell[4..]; + } + cell_array.cells.push(to_static_buf(divider_cell)); } total_cells_inserted += cells_inserted; } @@ -1510,58 +2175,76 @@ impl BTreeCursor { cells_capacity_start, "calculation of max cells was wrong" ); + + // Let's copy all cells for later checks + #[cfg(debug_assertions)] + let mut cells_debug = Vec::new(); #[cfg(debug_assertions)] { for cell in &cell_array.cells { - assert!(cell.len() >= 4); - - if leaf_data { - assert!(cell[0] != 0, "payload is {:?}", cell); + cells_debug.push(cell.to_vec()); + if leaf { + assert!(cell[0] != 0) } } } - // calculate how many pages to allocate - let mut new_page_sizes = Vec::new(); + + #[cfg(debug_assertions)] + validate_cells_after_insertion(&cell_array, leaf_data); + + /* 3. Initiliaze current size of every page including overflow cells and divider cells that might be included. */ + let mut new_page_sizes: [i64; 5] = [0; 5]; let leaf_correction = if leaf { 4 } else { 0 }; - // number of bytes beyond header, different from global usableSapce which inccludes + // number of bytes beyond header, different from global usableSapce which includes // header let usable_space = self.usable_space() - 12 + leaf_correction; for i in 0..balance_info.sibling_count { - cell_array - .number_of_cells_per_page - .push(count_cells_in_old_pages[i]); - let page = &balance_info.pages_to_balance[i]; + cell_array.number_of_cells_per_page[i] = count_cells_in_old_pages[i]; + let page = &balance_info.pages_to_balance[i].as_ref().unwrap(); let page_contents = page.get_contents(); let free_space = compute_free_space(page_contents, self.usable_space() as u16); - new_page_sizes.push(usable_space as u16 - free_space); + new_page_sizes[i] = usable_space as i64 - free_space as i64; for overflow in &page_contents.overflow_cells { - let size = new_page_sizes.last_mut().unwrap(); // 2 to account of pointer - *size += 2 + overflow.payload.len() as u16; + new_page_sizes[i] += 2 + overflow.payload.len() as i64; + } + if !leaf && i < balance_info.sibling_count - 1 { + // Account for divider cell which is included in this page. + new_page_sizes[i] += + cell_array.cells[cell_array.cell_count(i)].len() as i64; } } + /* 4. Now let's try to move cells to the left trying to stack them without exceeding the maximum size of a page. + There are two cases: + * If current page has too many cells, it will move them to the next page. + * If it still has space, and it can take a cell from the right it will take them. + Here there is a caveat. Taking a cell from the right might take cells from page i+1, i+2, i+3, so not necessarily + adjacent. But we decrease the size of the adjacent page if we move from the right. This might cause a intermitent state + where page can have size <0. + This will also calculate how many pages are required to balance the cells and store in sibling_count_new. + */ // Try to pack as many cells to the left let mut sibling_count_new = balance_info.sibling_count; let mut i = 0; while i < sibling_count_new { // First try to move cells to the right if they do not fit - while new_page_sizes[i] > usable_space as u16 { + while new_page_sizes[i] > usable_space as i64 { let needs_new_page = i + 1 >= sibling_count_new; if needs_new_page { - sibling_count_new += 1; - new_page_sizes.push(0); - cell_array - .number_of_cells_per_page - .push(cell_array.cells.len() as u16); + sibling_count_new = i + 2; assert!( sibling_count_new <= 5, "it is corrupt to require more than 5 pages to balance 3 siblings" ); + + new_page_sizes[sibling_count_new - 1] = 0; + cell_array.number_of_cells_per_page[sibling_count_new - 1] = + cell_array.cells.len() as u16; } let size_of_cell_to_remove_from_left = - 2 + cell_array.cells[cell_array.cell_count(i) - 1].len() as u16; + 2 + cell_array.cells[cell_array.cell_count(i) - 1].len() as i64; new_page_sizes[i] -= size_of_cell_to_remove_from_left; let size_of_cell_to_move_right = if !leaf_data { if cell_array.number_of_cells_per_page[i] @@ -1569,23 +2252,23 @@ impl BTreeCursor { { // This means we move to the right page the divider cell and we // promote left cell to divider - 2 + cell_array.cells[cell_array.cell_count(i)].len() as u16 + 2 + cell_array.cells[cell_array.cell_count(i)].len() as i64 } else { 0 } } else { size_of_cell_to_remove_from_left }; - new_page_sizes[i + 1] += size_of_cell_to_move_right; + new_page_sizes[i + 1] += size_of_cell_to_move_right as i64; cell_array.number_of_cells_per_page[i] -= 1; } // Now try to take from the right if we didn't have enough while cell_array.number_of_cells_per_page[i] < cell_array.cells.len() as u16 { let size_of_cell_to_remove_from_right = - 2 + cell_array.cells[cell_array.cell_count(i)].len() as u16; + 2 + cell_array.cells[cell_array.cell_count(i)].len() as i64; let can_take = new_page_sizes[i] + size_of_cell_to_remove_from_right - > usable_space as u16; + > usable_space as i64; if can_take { break; } @@ -1596,7 +2279,7 @@ impl BTreeCursor { if cell_array.number_of_cells_per_page[i] < cell_array.cells.len() as u16 { - 2 + cell_array.cells[cell_array.cell_count(i)].len() as u16 + 2 + cell_array.cells[cell_array.cell_count(i)].len() as i64 } else { 0 } @@ -1607,16 +2290,20 @@ impl BTreeCursor { new_page_sizes[i + 1] -= size_of_cell_to_remove_from_right; } - let we_still_need_another_page = + // Check if this page contains up to the last cell. If this happens it means we really just need up to this page. + // Let's update the number of new pages to be up to this page (i+1) + let page_completes_all_cells = cell_array.number_of_cells_per_page[i] >= cell_array.cells.len() as u16; - if we_still_need_another_page { + if page_completes_all_cells { sibling_count_new = i + 1; + break; } i += 1; if i >= sibling_count_new { break; } } + tracing::debug!( "balance_non_root(sibling_count={}, sibling_count_new={}, cells={})", balance_info.sibling_count, @@ -1624,6 +2311,10 @@ impl BTreeCursor { cell_array.cells.len() ); + /* 5. Balance pages starting from a left stacked cell state and move them to right trying to maintain a balanced state + where we only move from left to right if it will not unbalance both pages, meaning moving left to right won't make + right page bigger than left page. + */ // Comment borrowed from SQLite src/btree.c // The packing computed by the previous block is biased toward the siblings // on the left side (siblings with smaller keys). The left siblings are @@ -1642,8 +2333,8 @@ impl BTreeCursor { // the same we add to right (we don't add divider to right). let mut cell_right = cell_left + 1 - leaf_data as u16; loop { - let cell_left_size = cell_array.cell_size(cell_left as usize); - let cell_right_size = cell_array.cell_size(cell_right as usize); + let cell_left_size = cell_array.cell_size(cell_left as usize) as i64; + let cell_right_size = cell_array.cell_size(cell_right as usize) as i64; // TODO: add assert nMaxCells let pointer_size = if i == sibling_count_new - 1 { 0 } else { 2 }; @@ -1679,47 +2370,97 @@ impl BTreeCursor { // Allocate pages or set dirty if not needed for i in 0..sibling_count_new { if i < balance_info.sibling_count { - balance_info.pages_to_balance[i].set_dirty(); - pages_to_balance_new.push(balance_info.pages_to_balance[i].clone()); + let page = balance_info.pages_to_balance[i].as_ref().unwrap(); + page.set_dirty(); + pages_to_balance_new[i].replace(page.clone()); } else { let page = self.pager.do_allocate_page(page_type, 0); - pages_to_balance_new.push(page); + pages_to_balance_new[i].replace(page); // Since this page didn't exist before, we can set it to cells length as it // marks them as empty since it is a prefix sum of cells. - count_cells_in_old_pages.push(cell_array.cells.len() as u16); + count_cells_in_old_pages[i] = cell_array.cells.len() as u16; } } // Reassign page numbers in increasing order - let mut page_numbers = Vec::new(); - for page in pages_to_balance_new.iter() { - page_numbers.push(page.get().id); - } - page_numbers.sort(); - for (page, new_id) in pages_to_balance_new.iter().zip(page_numbers) { - if new_id != page.get().id { - page.get().id = new_id; - self.pager.put_loaded_page(new_id, page.clone()); + { + let mut page_numbers: [usize; 5] = [0; 5]; + for (i, page) in pages_to_balance_new + .iter() + .take(sibling_count_new) + .enumerate() + { + page_numbers[i] = page.as_ref().unwrap().get().id; + } + page_numbers.sort(); + for (page, new_id) in pages_to_balance_new + .iter() + .take(sibling_count_new) + .rev() + .zip(page_numbers.iter().rev().take(sibling_count_new)) + { + let page = page.as_ref().unwrap(); + if *new_id != page.get().id { + page.get().id = *new_id; + self.pager.put_loaded_page(*new_id, page.clone()); + } + } + + #[cfg(debug_assertions)] + { + tracing::debug!( + "balance_non_root(parent page_id={})", + parent_page.get().id + ); + for page in pages_to_balance_new.iter().take(sibling_count_new) { + tracing::debug!( + "balance_non_root(new_sibling page_id={})", + page.as_ref().unwrap().get().id + ); + } } } - // Write right pointer in parent page to point to new rightmost page - let right_page_id = pages_to_balance_new.last().unwrap().get().id as u32; + // pages_pointed_to helps us debug we did in fact create divider cells to all the new pages and the rightmost pointer, + // also points to the last page. + #[cfg(debug_assertions)] + let mut pages_pointed_to = HashSet::new(); + + // Write right pointer in parent page to point to new rightmost page. keep in mind + // we update rightmost pointer first because inserting cells could defragment parent page, + // therfore invalidating the pointer. + let right_page_id = pages_to_balance_new[sibling_count_new - 1] + .as_ref() + .unwrap() + .get() + .id as u32; let rightmost_pointer = balance_info.rightmost_pointer; let rightmost_pointer = unsafe { std::slice::from_raw_parts_mut(rightmost_pointer, 4) }; rightmost_pointer[0..4].copy_from_slice(&right_page_id.to_be_bytes()); + #[cfg(debug_assertions)] + pages_pointed_to.insert(right_page_id); + tracing::debug!( + "balance_non_root(rightmost_pointer_update, rightmost_pointer={})", + right_page_id + ); + + /* 6. Update parent pointers. Update right pointer and insert divider cells with newly created distribution of cells */ // Ensure right-child pointer of the right-most new sibling pge points to the page // that was originally on that place. let is_leaf_page = matches!(page_type, PageType::TableLeaf | PageType::IndexLeaf); if !is_leaf_page { - let last_page = balance_info.pages_to_balance.last().unwrap(); + let last_page = balance_info.pages_to_balance[balance_info.sibling_count - 1] + .as_ref() + .unwrap(); let right_pointer = last_page.get_contents().rightmost_pointer().unwrap(); - let new_last_page = pages_to_balance_new.last().unwrap(); + let new_last_page = pages_to_balance_new[sibling_count_new - 1] + .as_ref() + .unwrap(); new_last_page .get_contents() - .write_u32(PAGE_HEADER_OFFSET_RIGHTMOST_PTR, right_pointer); + .write_u32(offset::BTREE_RIGHTMOST_PTR, right_pointer); } // TODO: pointer map update (vacuum support) // Update divider cells in parent @@ -1729,6 +2470,7 @@ impl BTreeCursor { .take(sibling_count_new - 1) /* do not take last page */ { + let page = page.as_ref().unwrap(); let divider_cell_idx = cell_array.cell_count(i); let mut divider_cell = &mut cell_array.cells[divider_cell_idx]; // FIXME: dont use auxiliary space, could be done without allocations @@ -1738,10 +2480,17 @@ impl BTreeCursor { // Make this page's rightmost pointer point to pointer of divider cell before modification let previous_pointer_divider = read_u32(÷r_cell, 0); page.get_contents() - .write_u32(PAGE_HEADER_OFFSET_RIGHTMOST_PTR, previous_pointer_divider); + .write_u32(offset::BTREE_RIGHTMOST_PTR, previous_pointer_divider); // divider cell now points to this page - divider_cell[0..4].copy_from_slice(&(page.get().id as u32).to_be_bytes()); - new_divider_cell.extend_from_slice(divider_cell); + new_divider_cell.extend_from_slice(&(page.get().id as u32).to_be_bytes()); + // now copy the rest of the divider cell: + // Table Interior page: + // * varint rowid + // Index Interior page: + // * varint payload size + // * payload + // * first overflow page (u32 optional) + new_divider_cell.extend_from_slice(÷r_cell[4..]); } else if leaf_data { // Leaf table // FIXME: not needed conversion @@ -1754,10 +2503,29 @@ impl BTreeCursor { write_varint_to_vec(rowid, &mut new_divider_cell); } else { // Leaf index + new_divider_cell.extend_from_slice(&(page.get().id as u32).to_be_bytes()); new_divider_cell.extend_from_slice(divider_cell); } + + let left_pointer = read_u32(&new_divider_cell[..4], 0); + assert!(left_pointer != parent_page.get().id as u32); + #[cfg(debug_assertions)] + pages_pointed_to.insert(left_pointer); + tracing::debug!( + "balance_non_root(insert_divider_cell, first_divider_cell={}, divider_cell={}, left_pointer={})", + balance_info.first_divider_cell, + i, + left_pointer + ); + assert_eq!(left_pointer, page.get().id as u32); + // FIXME: remove this lock + assert!( + left_pointer <= self.pager.db_header.lock().database_size, + "invalid page number divider left pointer {} > database number of pages", + left_pointer, + ); // FIXME: defragment shouldn't be needed - defragment_page(parent_contents, self.usable_space() as u16); + // defragment_page(parent_contents, self.usable_space() as u16); insert_into_cell( parent_contents, &new_divider_cell, @@ -1765,9 +2533,55 @@ impl BTreeCursor { self.usable_space() as u16, ) .unwrap(); + #[cfg(debug_assertions)] + self.validate_balance_non_root_divider_cell_insertion( + balance_info, + parent_contents, + i, + page, + ); } - // TODO: update pages - let mut done = vec![false; sibling_count_new]; + tracing::debug!( + "balance_non_root(parent_overflow={})", + parent_contents.overflow_cells.len() + ); + + #[cfg(debug_assertions)] + { + // Let's ensure every page is pointed to by the divider cell or the rightmost pointer. + for page in pages_to_balance_new.iter().take(sibling_count_new) { + let page = page.as_ref().unwrap(); + assert!( + pages_pointed_to.contains(&(page.get().id as u32)), + "page {} not pointed to by divider cell or rightmost pointer", + page.get().id + ); + } + } + /* 7. Start real movement of cells. Next comment is borrowed from SQLite: */ + /* Now update the actual sibling pages. The order in which they are updated + ** is important, as this code needs to avoid disrupting any page from which + ** cells may still to be read. In practice, this means: + ** + ** (1) If cells are moving left (from apNew[iPg] to apNew[iPg-1]) + ** then it is not safe to update page apNew[iPg] until after + ** the left-hand sibling apNew[iPg-1] has been updated. + ** + ** (2) If cells are moving right (from apNew[iPg] to apNew[iPg+1]) + ** then it is not safe to update page apNew[iPg] until after + ** the right-hand sibling apNew[iPg+1] has been updated. + ** + ** If neither of the above apply, the page is safe to update. + ** + ** The iPg value in the following loop starts at nNew-1 goes down + ** to 0, then back up to nNew-1 again, thus making two passes over + ** the pages. On the initial downward pass, only condition (1) above + ** needs to be tested because (2) will always be true from the previous + ** step. On the upward pass, both conditions are always true, so the + ** upwards pass simply processes pages that were missed on the downward + ** pass. + */ + let mut done = [false; 5]; for i in (1 - sibling_count_new as i64)..sibling_count_new as i64 { let page_idx = i.unsigned_abs() as usize; if done[page_idx] { @@ -1782,6 +2596,7 @@ impl BTreeCursor { (0, 0, cell_array.cell_count(0)) } else { let this_was_old_page = page_idx < balance_info.sibling_count; + // We add !leaf_data because we want to skip 1 in case of divider cell which is encountared between pages assigned let start_old_cells = if this_was_old_page { count_cells_in_old_pages[page_idx - 1] as usize + (!leaf_data) as usize @@ -1796,30 +2611,103 @@ impl BTreeCursor { cell_array.cell_count(page_idx) - start_new_cells, ) }; - let page = &pages_to_balance_new[page_idx]; + let page = pages_to_balance_new[page_idx].as_ref().unwrap(); tracing::debug!("pre_edit_page(page={})", page.get().id); - let page = page.get_contents(); + let page_contents = page.get_contents(); edit_page( - page, + page_contents, start_old_cells, start_new_cells, number_new_cells, &cell_array, self.usable_space() as u16, )?; - debug_validate_cells!(page, self.usable_space() as u16); + debug_validate_cells!(page_contents, self.usable_space() as u16); tracing::trace!( "edit_page page={} cells={}", - pages_to_balance_new[page_idx].get().id, - page.cell_count() + page.get().id, + page_contents.cell_count() ); - page.overflow_cells.clear(); + page_contents.overflow_cells.clear(); done[page_idx] = true; } } - // TODO: balance root - // TODO: free pages + + // TODO: vacuum support + let first_child_page = pages_to_balance_new[0].as_ref().unwrap(); + let first_child_contents = first_child_page.get_contents(); + if parent_is_root + && parent_contents.cell_count() == 0 + + // this check to make sure we are not having negative free space + && parent_contents.offset + <= compute_free_space(first_child_contents, self.usable_space() as u16) + as usize + { + // From SQLite: + // The root page of the b-tree now contains no cells. The only sibling + // page is the right-child of the parent. Copy the contents of the + // child page into the parent, decreasing the overall height of the + // b-tree structure by one. This is described as the "balance-shallower" + // sub-algorithm in some documentation. + assert!(sibling_count_new == 1); + let parent_offset = if parent_page.get().id == 1 { + DATABASE_HEADER_SIZE + } else { + 0 + }; + + // From SQLite: + // It is critical that the child page be defragmented before being + // copied into the parent, because if the parent is page 1 then it will + // by smaller than the child due to the database header, and so + // all the free space needs to be up front. + defragment_page(first_child_contents, self.usable_space() as u16); + + let child_top = first_child_contents.cell_content_area() as usize; + let parent_buf = parent_contents.as_ptr(); + let child_buf = first_child_contents.as_ptr(); + let content_size = self.usable_space() - child_top; + + // Copy cell contents + parent_buf[child_top..child_top + content_size] + .copy_from_slice(&child_buf[child_top..child_top + content_size]); + + // Copy header and pointer + // NOTE: don't use .cell_pointer_array_offset_and_size() because of different + // header size + let header_and_pointer_size = first_child_contents.header_size() + + first_child_contents.cell_pointer_array_size(); + parent_buf[parent_offset..parent_offset + header_and_pointer_size] + .copy_from_slice( + &child_buf[first_child_contents.offset + ..first_child_contents.offset + header_and_pointer_size], + ); + + self.stack.set_cell_index(0); // reset cell index, top is already parent + sibling_count_new -= 1; // decrease sibling count for debugging and free at the end + assert!(sibling_count_new < balance_info.sibling_count); + } + + #[cfg(debug_assertions)] + self.post_balance_non_root_validation( + &parent_page, + balance_info, + parent_contents, + pages_to_balance_new, + page_type, + leaf_data, + cells_debug, + sibling_count_new, + rightmost_pointer, + ); + + // We have to free pages that are not used anymore + for i in sibling_count_new..balance_info.sibling_count { + let page = balance_info.pages_to_balance[i].as_ref().unwrap(); + self.pager.free_page(Some(page.clone()), page.get().id)?; + } (WriteState::BalanceStart, Ok(CursorResult::Ok(()))) } WriteState::Finish => todo!(), @@ -1833,6 +2721,525 @@ impl BTreeCursor { result } + #[cfg(debug_assertions)] + fn validate_balance_non_root_divider_cell_insertion( + &self, + balance_info: &mut BalanceInfo, + parent_contents: &mut PageContent, + i: usize, + page: &std::sync::Arc, + ) { + let left_pointer = if parent_contents.overflow_cells.len() == 0 { + let (cell_start, cell_len) = parent_contents.cell_get_raw_region( + balance_info.first_divider_cell + i, + payload_overflow_threshold_max( + parent_contents.page_type(), + self.usable_space() as u16, + ), + payload_overflow_threshold_min( + parent_contents.page_type(), + self.usable_space() as u16, + ), + self.usable_space(), + ); + tracing::debug!( + "balance_non_root(cell_start={}, cell_len={})", + cell_start, + cell_len + ); + + let left_pointer = read_u32( + &parent_contents.as_ptr()[cell_start..cell_start + cell_len], + 0, + ); + left_pointer + } else { + let mut left_pointer = None; + for cell in parent_contents.overflow_cells.iter() { + if cell.index == balance_info.first_divider_cell + i { + left_pointer = Some(read_u32(&cell.payload, 0)) + } + } + left_pointer.expect("overflow cell with divider cell was not found") + }; + assert_eq!(left_pointer, page.get().id as u32, "the cell we just inserted doesn't point to the correct page. points to {}, should point to {}", + left_pointer, + page.get().id as u32 + ); + } + + #[cfg(debug_assertions)] + fn post_balance_non_root_validation( + &self, + parent_page: &PageRef, + balance_info: &mut BalanceInfo, + parent_contents: &mut PageContent, + pages_to_balance_new: [Option>; 5], + page_type: PageType, + leaf_data: bool, + mut cells_debug: Vec>, + sibling_count_new: usize, + rightmost_pointer: &mut [u8], + ) { + let mut valid = true; + let mut current_index_cell = 0; + for cell_idx in 0..parent_contents.cell_count() { + let cell = parent_contents + .cell_get( + cell_idx, + payload_overflow_threshold_max( + parent_contents.page_type(), + self.usable_space() as u16, + ), + payload_overflow_threshold_min( + parent_contents.page_type(), + self.usable_space() as u16, + ), + self.usable_space(), + ) + .unwrap(); + match cell { + BTreeCell::TableInteriorCell(table_interior_cell) => { + let left_child_page = table_interior_cell._left_child_page; + if left_child_page == parent_page.get().id as u32 { + tracing::error!("balance_non_root(parent_divider_points_to_same_page, page_id={}, cell_left_child_page={})", + parent_page.get().id, + left_child_page, + ); + valid = false; + } + } + BTreeCell::IndexInteriorCell(index_interior_cell) => { + let left_child_page = index_interior_cell.left_child_page; + if left_child_page == parent_page.get().id as u32 { + tracing::error!("balance_non_root(parent_divider_points_to_same_page, page_id={}, cell_left_child_page={})", + parent_page.get().id, + left_child_page, + ); + valid = false; + } + } + _ => {} + } + } + // Let's now make a in depth check that we in fact added all possible cells somewhere and they are not lost + for (page_idx, page) in pages_to_balance_new + .iter() + .take(sibling_count_new) + .enumerate() + { + let page = page.as_ref().unwrap(); + let contents = page.get_contents(); + debug_validate_cells!(contents, self.usable_space() as u16); + // Cells are distributed in order + for cell_idx in 0..contents.cell_count() { + let (cell_start, cell_len) = contents.cell_get_raw_region( + cell_idx, + payload_overflow_threshold_max( + contents.page_type(), + self.usable_space() as u16, + ), + payload_overflow_threshold_min( + contents.page_type(), + self.usable_space() as u16, + ), + self.usable_space(), + ); + let buf = contents.as_ptr(); + let cell_buf = to_static_buf(&mut buf[cell_start..cell_start + cell_len]); + let cell_buf_in_array = &cells_debug[current_index_cell]; + if cell_buf != cell_buf_in_array { + tracing::error!("balance_non_root(cell_not_found_debug, page_id={}, cell_in_cell_array_idx={})", + page.get().id, + current_index_cell, + ); + valid = false; + } + + let cell = crate::storage::sqlite3_ondisk::read_btree_cell( + cell_buf, + &page_type, + 0, + payload_overflow_threshold_max( + parent_contents.page_type(), + self.usable_space() as u16, + ), + payload_overflow_threshold_min( + parent_contents.page_type(), + self.usable_space() as u16, + ), + self.usable_space(), + ) + .unwrap(); + match &cell { + BTreeCell::TableInteriorCell(table_interior_cell) => { + let left_child_page = table_interior_cell._left_child_page; + if left_child_page == page.get().id as u32 { + tracing::error!("balance_non_root(child_page_points_same_page, page_id={}, cell_left_child_page={}, page_idx={})", + page.get().id, + left_child_page, + page_idx + ); + valid = false; + } + if left_child_page == parent_page.get().id as u32 { + tracing::error!("balance_non_root(child_page_points_parent_of_child, page_id={}, cell_left_child_page={}, page_idx={})", + page.get().id, + left_child_page, + page_idx + ); + valid = false; + } + } + BTreeCell::IndexInteriorCell(index_interior_cell) => { + let left_child_page = index_interior_cell.left_child_page; + if left_child_page == page.get().id as u32 { + tracing::error!("balance_non_root(child_page_points_same_page, page_id={}, cell_left_child_page={}, page_idx={})", + page.get().id, + left_child_page, + page_idx + ); + valid = false; + } + if left_child_page == parent_page.get().id as u32 { + tracing::error!("balance_non_root(child_page_points_parent_of_child, page_id={}, cell_left_child_page={}, page_idx={})", + page.get().id, + left_child_page, + page_idx + ); + valid = false; + } + } + _ => {} + } + current_index_cell += 1; + } + // Now check divider cells and their pointers. + let parent_buf = parent_contents.as_ptr(); + let cell_divider_idx = balance_info.first_divider_cell + page_idx; + if sibling_count_new == 0 { + // Balance-shallower case + // We need to check data in parent page + let rightmost = read_u32(rightmost_pointer, 0); + debug_validate_cells!(parent_contents, self.usable_space() as u16); + + if !pages_to_balance_new[0].is_some() { + tracing::error!( + "balance_non_root(balance_shallower_incorrect_page, page_idx={})", + 0 + ); + valid = false; + } + + for i in 1..sibling_count_new { + if pages_to_balance_new[i].is_some() { + tracing::error!( + "balance_non_root(balance_shallower_incorrect_page, page_idx={})", + i + ); + valid = false; + } + } + + if current_index_cell != cells_debug.len() + || cells_debug.len() != contents.cell_count() + || contents.cell_count() != parent_contents.cell_count() + { + tracing::error!("balance_non_root(balance_shallower_incorrect_cell_count, current_index_cell={}, cells_debug={}, cell_count={}, parent_cell_count={})", + current_index_cell, + cells_debug.len(), + contents.cell_count(), + parent_contents.cell_count() + ); + valid = false; + } + + if rightmost == page.get().id as u32 || rightmost == parent_page.get().id as u32 { + tracing::error!("balance_non_root(balance_shallower_rightmost_pointer, page_id={}, parent_page_id={}, rightmost={})", + page.get().id, + parent_page.get().id, + rightmost, + ); + valid = false; + } + + if let Some(rm) = contents.rightmost_pointer() { + if rm != rightmost { + tracing::error!("balance_non_root(balance_shallower_rightmost_pointer, page_rightmost={}, rightmost={})", + rm, + rightmost, + ); + valid = false; + } + } + + if let Some(rm) = parent_contents.rightmost_pointer() { + if rm != rightmost { + tracing::error!("balance_non_root(balance_shallower_rightmost_pointer, parent_rightmost={}, rightmost={})", + rm, + rightmost, + ); + valid = false; + } + } + + if parent_contents.page_type() != page_type { + tracing::error!("balance_non_root(balance_shallower_parent_page_type, page_type={:?}, parent_page_type={:?})", + page_type, + parent_contents.page_type() + ); + valid = false + } + + for parent_cell_idx in 0..contents.cell_count() { + let (parent_cell_start, parent_cell_len) = parent_contents.cell_get_raw_region( + parent_cell_idx, + payload_overflow_threshold_max( + parent_contents.page_type(), + self.usable_space() as u16, + ), + payload_overflow_threshold_min( + parent_contents.page_type(), + self.usable_space() as u16, + ), + self.usable_space(), + ); + + let (cell_start, cell_len) = contents.cell_get_raw_region( + parent_cell_idx, + payload_overflow_threshold_max( + contents.page_type(), + self.usable_space() as u16, + ), + payload_overflow_threshold_min( + contents.page_type(), + self.usable_space() as u16, + ), + self.usable_space(), + ); + + let buf = contents.as_ptr(); + let cell_buf = to_static_buf(&mut buf[cell_start..cell_start + cell_len]); + let parent_cell_buf = to_static_buf( + &mut parent_buf[parent_cell_start..parent_cell_start + parent_cell_len], + ); + let cell_buf_in_array = &cells_debug[parent_cell_idx]; + + if cell_buf != cell_buf_in_array || cell_buf != parent_cell_buf { + tracing::error!("balance_non_root(balance_shallower_cell_not_found_debug, page_id={}, cell_in_cell_array_idx={})", + page.get().id, + parent_cell_idx, + ); + valid = false; + } + } + } else if page_idx == sibling_count_new - 1 { + // We will only validate rightmost pointer of parent page, we will not validate rightmost if it's a cell and not the last pointer because, + // insert cell could've defragmented the page and invalidated the pointer. + // right pointer, we just check right pointer points to this page. + if cell_divider_idx == parent_contents.cell_count() { + let rightmost = read_u32(rightmost_pointer, 0); + if rightmost != page.get().id as u32 { + tracing::error!("balance_non_root(cell_divider_right_pointer, should point to {}, but points to {})", + page.get().id, + rightmost + ); + valid = false; + } + } + } else { + // divider cell might be an overflow cell + let mut was_overflow = false; + for overflow_cell in &parent_contents.overflow_cells { + if overflow_cell.index == cell_divider_idx { + let left_pointer = read_u32(&overflow_cell.payload, 0); + if left_pointer != page.get().id as u32 { + tracing::error!("balance_non_root(cell_divider_left_pointer_overflow, should point to page_id={}, but points to {}, divider_cell={}, overflow_cells_parent={})", + page.get().id, + left_pointer, + page_idx, + parent_contents.overflow_cells.len() + ); + valid = false; + } + was_overflow = true; + break; + } + } + if was_overflow { + if !leaf_data { + // remember to increase cell if this cell was moved to parent + current_index_cell += 1; + } + continue; + } + // check if overflow + // check if right pointer, this is the last page. Do we update rightmost pointer and defragment moves it? + let (cell_start, cell_len) = parent_contents.cell_get_raw_region( + cell_divider_idx, + payload_overflow_threshold_max( + parent_contents.page_type(), + self.usable_space() as u16, + ), + payload_overflow_threshold_min( + parent_contents.page_type(), + self.usable_space() as u16, + ), + self.usable_space(), + ); + let cell_left_pointer = read_u32(&parent_buf[cell_start..cell_start + cell_len], 0); + if cell_left_pointer != page.get().id as u32 { + tracing::error!("balance_non_root(cell_divider_left_pointer, should point to page_id={}, but points to {}, divider_cell={}, overflow_cells_parent={})", + page.get().id, + cell_left_pointer, + page_idx, + parent_contents.overflow_cells.len() + ); + valid = false; + } + if leaf_data { + // If we are in a table leaf page, we just need to check that this cell that should be a divider cell is in the parent + // This means we already check cell in leaf pages but not on parent so we don't advance current_index_cell + if page_idx >= balance_info.sibling_count - 1 { + // This means we are in the last page and we don't need to check anything + continue; + } + let cell_buf: &'static mut [u8] = + to_static_buf(&mut cells_debug[current_index_cell - 1]); + let cell = crate::storage::sqlite3_ondisk::read_btree_cell( + cell_buf, + &page_type, + 0, + payload_overflow_threshold_max( + parent_contents.page_type(), + self.usable_space() as u16, + ), + payload_overflow_threshold_min( + parent_contents.page_type(), + self.usable_space() as u16, + ), + self.usable_space(), + ) + .unwrap(); + let parent_cell = parent_contents + .cell_get( + cell_divider_idx, + payload_overflow_threshold_max( + parent_contents.page_type(), + self.usable_space() as u16, + ), + payload_overflow_threshold_min( + parent_contents.page_type(), + self.usable_space() as u16, + ), + self.usable_space(), + ) + .unwrap(); + let rowid = match cell { + BTreeCell::TableLeafCell(table_leaf_cell) => table_leaf_cell._rowid, + _ => unreachable!(), + }; + let rowid_parent = match parent_cell { + BTreeCell::TableInteriorCell(table_interior_cell) => { + table_interior_cell._rowid + } + _ => unreachable!(), + }; + if rowid_parent != rowid { + tracing::error!("balance_non_root(cell_divider_rowid, page_id={}, cell_divider_idx={}, rowid_parent={}, rowid={})", + page.get().id, + cell_divider_idx, + rowid_parent, + rowid + ); + valid = false; + } + } else { + // In any other case, we need to check that this cell was moved to parent as divider cell + let mut was_overflow = false; + for overflow_cell in &parent_contents.overflow_cells { + if overflow_cell.index == cell_divider_idx { + let left_pointer = read_u32(&overflow_cell.payload, 0); + if left_pointer != page.get().id as u32 { + tracing::error!("balance_non_root(cell_divider_divider_cell_overflow should point to page_id={}, but points to {}, divider_cell={}, overflow_cells_parent={})", + page.get().id, + left_pointer, + page_idx, + parent_contents.overflow_cells.len() + ); + valid = false; + } + was_overflow = true; + break; + } + } + if was_overflow { + if !leaf_data { + // remember to increase cell if this cell was moved to parent + current_index_cell += 1; + } + continue; + } + let (parent_cell_start, parent_cell_len) = parent_contents.cell_get_raw_region( + cell_divider_idx, + payload_overflow_threshold_max( + parent_contents.page_type(), + self.usable_space() as u16, + ), + payload_overflow_threshold_min( + parent_contents.page_type(), + self.usable_space() as u16, + ), + self.usable_space(), + ); + let cell_buf_in_array = &cells_debug[current_index_cell]; + let left_pointer = read_u32( + &parent_buf[parent_cell_start..parent_cell_start + parent_cell_len], + 0, + ); + if left_pointer != page.get().id as u32 { + tracing::error!("balance_non_root(divider_cell_left_pointer_interior should point to page_id={}, but points to {}, divider_cell={}, overflow_cells_parent={})", + page.get().id, + left_pointer, + page_idx, + parent_contents.overflow_cells.len() + ); + valid = false; + } + match page_type { + PageType::TableInterior | PageType::IndexInterior => { + let parent_cell_buf = + &parent_buf[parent_cell_start..parent_cell_start + parent_cell_len]; + if parent_cell_buf[4..] != cell_buf_in_array[4..] { + tracing::error!("balance_non_root(cell_divider_cell, page_id={}, cell_divider_idx={})", + page.get().id, + cell_divider_idx, + ); + valid = false; + } + } + PageType::IndexLeaf => { + let parent_cell_buf = + &parent_buf[parent_cell_start..parent_cell_start + parent_cell_len]; + if parent_cell_buf[4..] != cell_buf_in_array[..] { + tracing::error!("balance_non_root(cell_divider_cell_index_leaf, page_id={}, cell_divider_idx={})", + page.get().id, + cell_divider_idx, + ); + valid = false; + } + } + _ => { + unreachable!() + } + } + current_index_cell += 1; + } + } + } + assert!(valid, "corrupted database, cells were to balanced properly"); + } + /// Balance the root page. /// This is done when the root page overflows, and we need to create a new root page. /// See e.g. https://en.wikipedia.org/wiki/B-tree @@ -1880,7 +3287,11 @@ impl BTreeCursor { child_buf[0..root_contents.header_size()] .copy_from_slice(&root_buf[offset..offset + root_contents.header_size()]); // Copy overflow cells - child_contents.overflow_cells = root_contents.overflow_cells.clone(); + std::mem::swap( + &mut child_contents.overflow_cells, + &mut root_contents.overflow_cells, + ); + root_contents.overflow_cells.clear(); // 2. Modify root let new_root_page_type = match root_contents.page_type() { @@ -1889,21 +3300,17 @@ impl BTreeCursor { other => other, } as u8; // set new page type - root_contents.write_u8(PAGE_HEADER_OFFSET_PAGE_TYPE, new_root_page_type); - root_contents.write_u32(PAGE_HEADER_OFFSET_RIGHTMOST_PTR, child.get().id as u32); - root_contents.write_u16( - PAGE_HEADER_OFFSET_CELL_CONTENT_AREA, - self.usable_space() as u16, - ); - root_contents.write_u16(PAGE_HEADER_OFFSET_CELL_COUNT, 0); - root_contents.write_u16(PAGE_HEADER_OFFSET_FIRST_FREEBLOCK, 0); + root_contents.write_u8(offset::BTREE_PAGE_TYPE, new_root_page_type); + root_contents.write_u32(offset::BTREE_RIGHTMOST_PTR, child.get().id as u32); + root_contents.write_u16(offset::BTREE_CELL_CONTENT_AREA, self.usable_space() as u16); + root_contents.write_u16(offset::BTREE_CELL_COUNT, 0); + root_contents.write_u16(offset::BTREE_FIRST_FREEBLOCK, 0); - root_contents.write_u8(PAGE_HEADER_OFFSET_FRAGMENTED_BYTES_COUNT, 0); + root_contents.write_u8(offset::BTREE_FRAGMENTED_BYTES_COUNT, 0); root_contents.overflow_cells.clear(); self.root_page = root.get().id; self.stack.clear(); self.stack.push(root.clone()); - self.stack.advance(); self.stack.push(child.clone()); } @@ -1912,8 +3319,7 @@ impl BTreeCursor { } /// Find the index of the cell in the page that contains the given rowid. - /// BTree tables only. - fn find_cell(&self, page: &PageContent, int_key: u64) -> usize { + fn find_cell(&self, page: &PageContent, key: &BTreeKey) -> usize { let mut cell_idx = 0; let cell_count = page.cell_count(); while cell_idx < cell_count { @@ -1927,22 +3333,70 @@ impl BTreeCursor { .unwrap() { BTreeCell::TableLeafCell(cell) => { - if int_key <= cell._rowid { + if key.to_rowid() <= cell._rowid { break; } } BTreeCell::TableInteriorCell(cell) => { - if int_key <= cell._rowid { + if key.to_rowid() <= cell._rowid { break; } } - _ => todo!(), + BTreeCell::IndexInteriorCell(IndexInteriorCell { payload, .. }) + | BTreeCell::IndexLeafCell(IndexLeafCell { payload, .. }) => { + // TODO: implement efficient comparison of records + // e.g. https://github.com/sqlite/sqlite/blob/master/src/vdbeaux.c#L4719 + read_record( + payload, + self.get_immutable_record_or_create().as_mut().unwrap(), + ) + .expect("failed to read record"); + let order = compare_immutable( + key.to_index_key_values(), + self.get_immutable_record().as_ref().unwrap().get_values(), + self.index_key_sort_order, + ); + match order { + Ordering::Less | Ordering::Equal => { + break; + } + Ordering::Greater => {} + } + } } cell_idx += 1; } + assert!(cell_idx <= cell_count); cell_idx } + pub fn seek_end(&mut self) -> Result> { + assert!(self.mv_cursor.is_none()); // unsure about this -_- + self.move_to_root(); + loop { + let mem_page = self.stack.top(); + let page_id = mem_page.get().id; + let page = self.pager.read_page(page_id)?; + return_if_locked!(page); + + let contents = page.get().contents.as_ref().unwrap(); + if contents.is_leaf() { + // set cursor just past the last cell to append + self.stack.set_cell_index(contents.cell_count() as i32); + return Ok(CursorResult::Ok(())); + } + + match contents.rightmost_pointer() { + Some(right_most_pointer) => { + self.stack.set_cell_index(contents.cell_count() as i32 + 1); // invalid on interior + let child = self.pager.read_page(right_most_pointer as usize)?; + self.stack.push(child); + } + None => unreachable!("interior page must have rightmost pointer"), + } + } + } + pub fn seek_to_last(&mut self) -> Result> { return_if_io!(self.move_to_rightmost()); let rowid = return_if_io!(self.get_next_record(None)); @@ -1995,7 +3449,7 @@ impl BTreeCursor { pub fn prev(&mut self) -> Result> { assert!(self.mv_cursor.is_none()); - match self.get_prev_record()? { + match self.get_prev_record(None)? { CursorResult::Ok(rowid) => { self.rowid.replace(rowid); self.empty_record.replace(rowid.is_none()); @@ -2005,11 +3459,6 @@ impl BTreeCursor { } } - pub fn wait_for_completion(&mut self) -> Result<()> { - // TODO: Wait for pager I/O to complete - Ok(()) - } - pub fn rowid(&self) -> Result> { if let Some(mv_cursor) = &self.mv_cursor { let mv_cursor = mv_cursor.borrow(); @@ -2020,6 +3469,10 @@ impl BTreeCursor { pub fn seek(&mut self, key: SeekKey<'_>, op: SeekOp) -> Result> { assert!(self.mv_cursor.is_none()); + // We need to clear the null flag for the table cursor before seeking, + // because it might have been set to false by an unmatched left-join row during the previous iteration + // on the outer loop. + self.set_null_flag(false); let rowid = return_if_io!(self.do_seek(key, op)); self.rowid.replace(rowid); self.empty_record.replace(rowid.is_none()); @@ -2032,28 +3485,38 @@ impl BTreeCursor { pub fn insert( &mut self, - key: &OwnedValue, - record: &ImmutableRecord, + key: &BTreeKey, moved_before: bool, /* Indicate whether it's necessary to traverse to find the leaf page */ ) -> Result> { - let int_key = match key { - OwnedValue::Integer(i) => i, - _ => unreachable!("btree tables are indexed by integers!"), - }; + tracing::trace!("insert"); match &self.mv_cursor { - Some(mv_cursor) => { - let row_id = - crate::mvcc::database::RowID::new(self.table_id() as u64, *int_key as u64); - let record_buf = record.get_payload().to_vec(); - let row = crate::mvcc::database::Row::new(row_id, record_buf); - mv_cursor.borrow_mut().insert(row).unwrap(); - } - None => { - if !moved_before { - return_if_io!(self.move_to(SeekKey::TableRowId(*int_key as u64), SeekOp::EQ)); + Some(mv_cursor) => match key.maybe_rowid() { + Some(rowid) => { + let row_id = crate::mvcc::database::RowID::new(self.table_id() as u64, rowid); + let record_buf = key.get_record().unwrap().get_payload().to_vec(); + let row = crate::mvcc::database::Row::new(row_id, record_buf); + mv_cursor.borrow_mut().insert(row).unwrap(); + } + None => todo!("Support mvcc inserts with index btrees"), + }, + None => { + tracing::trace!("moved {}", moved_before); + if !moved_before { + match key { + BTreeKey::IndexKey(_) => { + return_if_io!(self + .move_to(SeekKey::IndexKey(key.get_record().unwrap()), SeekOp::GE)) + } + BTreeKey::TableRowId(_) => return_if_io!( + self.move_to(SeekKey::TableRowId(key.to_rowid()), SeekOp::EQ) + ), + } + } + return_if_io!(self.insert_into_page(key)); + if key.maybe_rowid().is_some() { + let int_key = key.to_rowid(); + self.rowid.replace(Some(int_key)); } - return_if_io!(self.insert_into_page(key, record)); - self.rowid.replace(Some(*int_key as u64)); } }; Ok(CursorResult::Ok(())) @@ -2063,15 +3526,12 @@ impl BTreeCursor { /// 1. Start -> check if the rowid to be delete is present in the page or not. If not we early return /// 2. LoadPage -> load the page. /// 3. FindCell -> find the cell to be deleted in the page. - /// 4. ClearOverflowPages -> clear overflow pages associated with the cell. here if the cell is a leaf page go to DropCell state - /// or else go to InteriorNodeReplacement + /// 4. ClearOverflowPages -> Clear the overflow pages if there are any before dropping the cell, then if we are in a leaf page we just drop the cell in place. + /// if we are in interior page, we need to rotate keys in order to replace current cell (InteriorNodeReplacement). /// 5. InteriorNodeReplacement -> we copy the left subtree leaf node into the deleted interior node's place. - /// 6. DropCell -> only for leaf nodes. drop the cell. - /// 7. CheckNeedsBalancing -> check if balancing is needed. If yes, move to StartBalancing else move to StackRetreat - /// 8. WaitForBalancingToComplete -> perform balancing - /// 9. SeekAfterBalancing -> adjust the cursor to a node that is closer to the deleted value. go to Finish - /// 10. StackRetreat -> perform stack retreat for cursor positioning. only when balancing is not needed. go to Finish - /// 11. Finish -> Delete operation is done. Return CursorResult(Ok()) + /// 6. WaitForBalancingToComplete -> perform balancing + /// 7. SeekAfterBalancing -> adjust the cursor to a node that is closer to the deleted value. go to Finish + /// 8. Finish -> Delete operation is done. Return CursorResult(Ok()) pub fn delete(&mut self) -> Result> { assert!(self.mv_cursor.is_none()); @@ -2087,16 +3547,30 @@ impl BTreeCursor { let delete_info = self.state.delete_info().expect("cannot get delete info"); delete_info.state.clone() }; + tracing::debug!("delete state: {:?}", delete_state); match delete_state { DeleteState::Start => { - let _target_rowid = match self.rowid.get() { - Some(rowid) => rowid, - None => { + let page = self.stack.top(); + page.set_dirty(); + self.pager.add_dirty(page.get().id); + if matches!( + page.get_contents().page_type(), + PageType::TableLeaf | PageType::TableInterior + ) { + let _target_rowid = match self.rowid.get() { + Some(rowid) => rowid, + None => { + self.state = CursorState::None; + return Ok(CursorResult::Ok(())); + } + }; + } else { + if self.reusable_immutable_record.borrow().is_none() { self.state = CursorState::None; return Ok(CursorResult::Ok(())); } - }; + } let delete_info = self.state.mut_delete_info().unwrap(); delete_info.state = DeleteState::LoadPage; @@ -2139,6 +3613,7 @@ impl BTreeCursor { let original_child_pointer = match &cell { BTreeCell::TableInteriorCell(interior) => Some(interior._left_child_page), + BTreeCell::IndexInteriorCell(interior) => Some(interior.left_child_page), _ => None, }; @@ -2167,7 +3642,11 @@ impl BTreeCursor { original_child_pointer, }; } else { - delete_info.state = DeleteState::DropCell { cell_idx }; + let contents = page.get().contents.as_mut().unwrap(); + drop_cell(contents, cell_idx, self.usable_space() as u16)?; + + let delete_info = self.state.mut_delete_info().unwrap(); + delete_info.state = DeleteState::CheckNeedsBalancing; } } @@ -2185,27 +3664,21 @@ impl BTreeCursor { return_if_io!(self.prev()); let leaf_page = self.stack.top(); - return_if_locked!(leaf_page); + return_if_locked_maybe_load!(self.pager, leaf_page); + assert!( + matches!( + leaf_page.get_contents().page_type(), + PageType::TableLeaf | PageType::IndexLeaf + ), + "self.prev should have returned a leaf page" + ); - if !leaf_page.is_loaded() { - self.pager.load_page(leaf_page.clone())?; - return Ok(CursorResult::IO); - } - - let parent_page = { - self.stack.pop(); - let parent = self.stack.top(); - self.stack.push(leaf_page.clone()); - parent - }; - - if !parent_page.is_loaded() { - self.pager.load_page(parent_page.clone())?; - return Ok(CursorResult::IO); - } + let parent_page = self.stack.parent_page().unwrap(); + assert!(parent_page.is_loaded(), "parent page"); let leaf_contents = leaf_page.get().contents.as_ref().unwrap(); - let leaf_cell_idx = self.stack.current_cell_index() as usize - 1; + // The index of the cell to removed must be the last one. + let leaf_cell_idx = leaf_contents.cell_count() - 1; let predecessor_cell = leaf_contents.cell_get( leaf_cell_idx, payload_overflow_threshold_max( @@ -2224,14 +3697,17 @@ impl BTreeCursor { let parent_contents = parent_page.get().contents.as_mut().unwrap(); - // Create an interior cell from the leaf cell + // Create an interior cell from a predecessor let mut cell_payload: Vec = Vec::new(); + let child_pointer = original_child_pointer.expect("there should be a pointer"); match predecessor_cell { BTreeCell::TableLeafCell(leaf_cell) => { - if let Some(child_pointer) = original_child_pointer { - cell_payload.extend_from_slice(&child_pointer.to_be_bytes()); - write_varint_to_vec(leaf_cell._rowid, &mut cell_payload); - } + cell_payload.extend_from_slice(&child_pointer.to_be_bytes()); + write_varint_to_vec(leaf_cell._rowid, &mut cell_payload); + } + BTreeCell::IndexLeafCell(leaf_cell) => { + cell_payload.extend_from_slice(&child_pointer.to_be_bytes()); + cell_payload.extend_from_slice(leaf_cell.payload); } _ => unreachable!("Expected table leaf cell"), } @@ -2248,61 +3724,37 @@ impl BTreeCursor { delete_info.state = DeleteState::CheckNeedsBalancing; } - DeleteState::DropCell { cell_idx } => { - let page = self.stack.top(); - return_if_locked!(page); - - if !page.is_loaded() { - self.pager.load_page(page.clone())?; - return Ok(CursorResult::IO); - } - - page.set_dirty(); - self.pager.add_dirty(page.get().id); - - let contents = page.get().contents.as_mut().unwrap(); - drop_cell(contents, cell_idx, self.usable_space() as u16)?; - - let delete_info = self.state.mut_delete_info().unwrap(); - delete_info.state = DeleteState::CheckNeedsBalancing; - } - DeleteState::CheckNeedsBalancing => { let page = self.stack.top(); - return_if_locked!(page); - - if !page.is_loaded() { - self.pager.load_page(page.clone())?; - return Ok(CursorResult::IO); - } + return_if_locked_maybe_load!(self.pager, page); let contents = page.get().contents.as_ref().unwrap(); let free_space = compute_free_space(contents, self.usable_space() as u16); let needs_balancing = free_space as usize * 3 > self.usable_space() * 2; - let target_rowid = self.rowid.get().unwrap(); + let target_key = if page.is_index() { + DeleteSavepoint::Payload(self.record().as_ref().unwrap().clone()) + } else { + DeleteSavepoint::Rowid(self.rowid.get().unwrap()) + }; let delete_info = self.state.mut_delete_info().unwrap(); if needs_balancing { - delete_info.state = DeleteState::StartBalancing { target_rowid }; + if delete_info.balance_write_info.is_none() { + let mut write_info = WriteInfo::new(); + write_info.state = WriteState::BalanceStart; + delete_info.balance_write_info = Some(write_info); + } + + delete_info.state = DeleteState::WaitForBalancingToComplete { target_key } } else { - delete_info.state = DeleteState::StackRetreat; + self.stack.retreat(); + self.state = CursorState::None; + return Ok(CursorResult::Ok(())); } } - DeleteState::StartBalancing { target_rowid } => { - let delete_info = self.state.mut_delete_info().unwrap(); - - if delete_info.balance_write_info.is_none() { - let mut write_info = WriteInfo::new(); - write_info.state = WriteState::BalanceStart; - delete_info.balance_write_info = Some(write_info); - } - - delete_info.state = DeleteState::WaitForBalancingToComplete { target_rowid } - } - - DeleteState::WaitForBalancingToComplete { target_rowid } => { + DeleteState::WaitForBalancingToComplete { target_key } => { let delete_info = self.state.mut_delete_info().unwrap(); // Switch the CursorState to Write state for balancing @@ -2320,12 +3772,13 @@ impl BTreeCursor { // Move to seek state self.state = CursorState::Delete(DeleteInfo { - state: DeleteState::SeekAfterBalancing { target_rowid }, + state: DeleteState::SeekAfterBalancing { target_key }, balance_write_info: Some(write_info), }); } CursorResult::IO => { + // Move to seek state // Save balance progress and return IO let write_info = match &self.state { CursorState::Write(wi) => wi.clone(), @@ -2333,7 +3786,7 @@ impl BTreeCursor { }; self.state = CursorState::Delete(DeleteInfo { - state: DeleteState::WaitForBalancingToComplete { target_rowid }, + state: DeleteState::WaitForBalancingToComplete { target_key }, balance_write_info: Some(write_info), }); return Ok(CursorResult::IO); @@ -2341,22 +3794,15 @@ impl BTreeCursor { } } - DeleteState::SeekAfterBalancing { target_rowid } => { - return_if_io!(self.move_to(SeekKey::TableRowId(target_rowid), SeekOp::EQ)); + DeleteState::SeekAfterBalancing { target_key } => { + let key = match &target_key { + DeleteSavepoint::Rowid(rowid) => SeekKey::TableRowId(*rowid), + DeleteSavepoint::Payload(immutable_record) => { + SeekKey::IndexKey(immutable_record) + } + }; + return_if_io!(self.seek(key, SeekOp::EQ)); - let delete_info = self.state.mut_delete_info().unwrap(); - delete_info.state = DeleteState::Finish; - delete_info.balance_write_info = None; - } - - DeleteState::StackRetreat => { - self.stack.retreat(); - let delete_info = self.state.mut_delete_info().unwrap(); - delete_info.state = DeleteState::Finish; - delete_info.balance_write_info = None; - } - - DeleteState::Finish => { self.state = CursorState::None; return Ok(CursorResult::Ok(())); } @@ -2364,21 +3810,53 @@ impl BTreeCursor { } } + /// In outer joins, whenever the right-side table has no matching row, the query must still return a row + /// for each left-side row. In order to achieve this, we set the null flag on the right-side table cursor + /// so that it returns NULL for all columns until cleared. + #[inline(always)] pub fn set_null_flag(&mut self, flag: bool) { self.null_flag = flag; } + #[inline(always)] pub fn get_null_flag(&self) -> bool { self.null_flag } + /// Search for a key in an Index Btree. Looking up indexes that need to be unique, we cannot compare the rowid + pub fn key_exists_in_index(&mut self, key: &ImmutableRecord) -> Result> { + return_if_io!(self.seek(SeekKey::IndexKey(key), SeekOp::GE)); + + let record_opt = self.record(); + match record_opt.as_ref() { + Some(record) => { + // Existing record found — compare prefix + let existing_key = &record.get_values()[..record.count().saturating_sub(1)]; + let inserted_key_vals = &key.get_values(); + if existing_key + .iter() + .zip(inserted_key_vals.iter()) + .all(|(a, b)| a == b) + { + return Ok(CursorResult::Ok(true)); // duplicate + } + } + None => { + // Cursor not pointing at a record — table is empty or past last + return Ok(CursorResult::Ok(false)); + } + } + + Ok(CursorResult::Ok(false)) // not a duplicate + } + pub fn exists(&mut self, key: &OwnedValue) -> Result> { assert!(self.mv_cursor.is_none()); let int_key = match key { OwnedValue::Integer(i) => i, _ => unreachable!("btree tables are indexed by integers!"), }; - return_if_io!(self.move_to(SeekKey::TableRowId(*int_key as u64), SeekOp::EQ)); + let _ = return_if_io!(self.move_to(SeekKey::TableRowId(*int_key as u64), SeekOp::EQ)); let page = self.stack.top(); // TODO(pere): request load return_if_locked!(page); @@ -2390,7 +3868,7 @@ impl BTreeCursor { OwnedValue::Integer(i) => *i as u64, _ => unreachable!("btree tables are indexed by integers!"), }; - let cell_idx = self.find_cell(contents, int_key); + let cell_idx = self.find_cell(contents, &BTreeKey::new_table_rowid(int_key, None)); if cell_idx >= contents.cell_count() { Ok(CursorResult::Ok(false)) } else { @@ -2699,25 +4177,8 @@ impl BTreeCursor { }; // if it all fits in local space and old_local_size is enough, do an in-place overwrite - if new_payload.len() <= old_local_size { - self.overwrite_content( - page_ref.clone(), - old_offset, - &new_payload, - 0, - new_payload.len(), - )?; - let remaining = old_local_size - new_payload.len(); - if remaining > 0 { - // fill the rest with zeros - self.overwrite_content( - page_ref.clone(), - old_offset + new_payload.len(), - &[0; 1], - 0, - remaining, - )?; - } + if new_payload.len() == old_local_size { + self.overwrite_content(page_ref.clone(), old_offset, &new_payload)?; Ok(CursorResult::Ok(())) } else { // doesn't fit, drop it and insert a new one @@ -2741,36 +4202,11 @@ impl BTreeCursor { page_ref: PageRef, dest_offset: usize, new_payload: &[u8], - src_offset: usize, - amount: usize, ) -> Result> { return_if_locked!(page_ref); - page_ref.set_dirty(); - self.pager.add_dirty(page_ref.get().id); let buf = page_ref.get().contents.as_mut().unwrap().as_ptr(); + buf[dest_offset..dest_offset + new_payload.len()].copy_from_slice(&new_payload); - // if new_payload doesn't have enough data, we fill with zeros - let n_data = new_payload.len().saturating_sub(src_offset); - if n_data == 0 { - // everything is zeros - for i in 0..amount { - if buf[dest_offset + i] != 0 { - buf[dest_offset + i] = 0; - } - } - } else { - let copy_len = n_data.min(amount); - // copy the overlapping portion - buf[dest_offset..dest_offset + copy_len] - .copy_from_slice(&new_payload[src_offset..src_offset + copy_len]); - - // if copy_len < amount => fill remainder with 0 - if copy_len < amount { - for i in copy_len..amount { - buf[dest_offset + i] = 0; - } - } - } Ok(CursorResult::Ok(())) } @@ -2785,6 +4221,42 @@ impl BTreeCursor { fn get_immutable_record(&self) -> std::cell::RefMut<'_, Option> { self.reusable_immutable_record.borrow_mut() } + + pub fn is_write_in_progress(&self) -> bool { + match self.state { + CursorState::Write(_) => true, + _ => false, + } + } +} + +#[cfg(debug_assertions)] +fn validate_cells_after_insertion(cell_array: &CellArray, leaf_data: bool) { + for cell in &cell_array.cells { + assert!(cell.len() >= 4); + + if leaf_data { + assert!(cell[0] != 0, "payload is {:?}", cell); + } + } +} + +/// Stack of pages representing the tree traversal order. +/// current_page represents the current page being used in the tree and current_page - 1 would be +/// the parent. Using current_page + 1 or higher is undefined behaviour. +struct PageStack { + /// Pointer to the current page being consumed + current_page: Cell, + /// List of pages in the stack. Root page will be in index 0 + stack: RefCell<[Option; BTCURSOR_MAX_DEPTH + 1]>, + /// List of cell indices in the stack. + /// cell_indices[current_page] is the current cell index being consumed. Similarly + /// cell_indices[current_page-1] is the cell index of the parent of the current page + /// that we save in case of going back up. + /// There are two points that need special attention: + /// If cell_indices[current_page] = -1, it indicates that the current iteration has reached the start of the current_page + /// If cell_indices[current_page] = `cell_count`, it means that the current iteration has reached the end of the current_page + cell_indices: RefCell<[i32; BTCURSOR_MAX_DEPTH + 1]>, } impl PageStack { @@ -2792,11 +4264,12 @@ impl PageStack { self.current_page.set(self.current_page.get() + 1); } fn decrement_current(&self) { + assert!(self.current_page.get() > 0); self.current_page.set(self.current_page.get() - 1); } /// Push a new page onto the stack. /// This effectively means traversing to a child page. - fn push(&self, page: PageRef) { + fn _push(&self, page: PageRef, starting_cell_idx: i32) { tracing::trace!( "pagestack::push(current={}, new_page_id={})", self.current_page.get(), @@ -2808,14 +4281,24 @@ impl PageStack { current < BTCURSOR_MAX_DEPTH as i32, "corrupted database, stack is bigger than expected" ); + assert!(current >= 0); self.stack.borrow_mut()[current as usize] = Some(page); - self.cell_indices.borrow_mut()[current as usize] = 0; + self.cell_indices.borrow_mut()[current as usize] = starting_cell_idx; + } + + fn push(&self, page: PageRef) { + self._push(page, 0); + } + + fn push_backwards(&self, page: PageRef) { + self._push(page, i32::MAX); } /// Pop a page off the stack. /// This effectively means traversing back up to a parent page. fn pop(&self) { let current = self.current_page.get(); + assert!(current >= 0); tracing::trace!("pagestack::pop(current={})", current); self.cell_indices.borrow_mut()[current as usize] = 0; self.stack.borrow_mut()[current as usize] = None; @@ -2839,7 +4322,9 @@ impl PageStack { /// Current page pointer being used fn current(&self) -> usize { - self.current_page.get() as usize + let current = self.current_page.get() as usize; + assert!(self.current_page.get() >= 0); + current } /// Cell index of the current page @@ -2859,16 +4344,28 @@ impl PageStack { /// We usually advance after going traversing a new page fn advance(&self) { let current = self.current(); - tracing::trace!("advance {}", self.cell_indices.borrow()[current],); + tracing::trace!("pagestack::advance {}", self.cell_indices.borrow()[current],); self.cell_indices.borrow_mut()[current] += 1; } fn retreat(&self) { let current = self.current(); - tracing::trace!("retreat {}", self.cell_indices.borrow()[current]); + tracing::trace!("pagestack::retreat {}", self.cell_indices.borrow()[current]); self.cell_indices.borrow_mut()[current] -= 1; } + /// Move the cursor to the next cell in the current page according to the iteration direction. + fn next_cell_in_direction(&self, iteration_direction: IterationDirection) { + match iteration_direction { + IterationDirection::Forwards => { + self.advance(); + } + IterationDirection::Backwards => { + self.retreat(); + } + } + } + fn set_cell_index(&self, idx: i32) { let current = self.current(); self.cell_indices.borrow_mut()[current] = idx @@ -2881,6 +4378,25 @@ impl PageStack { fn clear(&self) { self.current_page.set(-1); } + pub fn parent_page(&self) -> Option { + if self.current_page.get() > 0 { + Some( + self.stack.borrow()[self.current() - 1] + .as_ref() + .unwrap() + .clone(), + ) + } else { + None + } + } +} + +/// Used for redistributing cells during a balance operation. +struct CellArray { + cells: Vec<&'static mut [u8]>, // TODO(pere): make this with references + + number_of_cells_per_page: [u16; 5], // number of cells in each page } impl CellArray { @@ -2897,7 +4413,7 @@ impl CellArray { fn find_free_cell(page_ref: &PageContent, usable_space: u16, amount: usize) -> Result { // NOTE: freelist is in ascending order of keys and pc // unuse_space is reserved bytes at the end of page, therefore we must substract from maxpc - let mut prev_pc = page_ref.offset + PAGE_HEADER_OFFSET_FIRST_FREEBLOCK; + let mut prev_pc = page_ref.offset + offset::BTREE_FIRST_FREEBLOCK; let mut pc = page_ref.first_freeblock() as usize; let maxpc = usable_space as usize - amount; @@ -2919,16 +4435,16 @@ fn find_free_cell(page_ref: &PageContent, usable_space: u16, amount: usize) -> R return Ok(0); } // Delete the slot from freelist and update the page's fragment count. - page_ref.write_u16(prev_pc, next); + page_ref.write_u16_no_offset(prev_pc, next); let frag = page_ref.num_frag_free_bytes() + new_size as u8; - page_ref.write_u8(PAGE_HEADER_OFFSET_FRAGMENTED_BYTES_COUNT, frag); + page_ref.write_u8(offset::BTREE_FRAGMENTED_BYTES_COUNT, frag); return Ok(pc); } else if new_size + pc > maxpc { return_corrupt!("Free block extends beyond page end"); } else { // Requested amount fits inside the current free slot so we reduce its size // to account for newly allocated space. - page_ref.write_u16(pc + 2, new_size as u16); + page_ref.write_u16_no_offset(pc + 2, new_size as u16); return Ok(pc + new_size); } } @@ -2951,18 +4467,18 @@ fn find_free_cell(page_ref: &PageContent, usable_space: u16, amount: usize) -> R pub fn btree_init_page(page: &PageRef, page_type: PageType, offset: usize, usable_space: u16) { // setup btree page let contents = page.get(); - debug!("btree_init_page(id={}, offset={})", contents.id, offset); + tracing::debug!("btree_init_page(id={}, offset={})", contents.id, offset); let contents = contents.contents.as_mut().unwrap(); contents.offset = offset; let id = page_type as u8; - contents.write_u8(PAGE_HEADER_OFFSET_PAGE_TYPE, id); - contents.write_u16(PAGE_HEADER_OFFSET_FIRST_FREEBLOCK, 0); - contents.write_u16(PAGE_HEADER_OFFSET_CELL_COUNT, 0); + contents.write_u8(offset::BTREE_PAGE_TYPE, id); + contents.write_u16(offset::BTREE_FIRST_FREEBLOCK, 0); + contents.write_u16(offset::BTREE_CELL_COUNT, 0); - contents.write_u16(PAGE_HEADER_OFFSET_CELL_CONTENT_AREA, usable_space); + contents.write_u16(offset::BTREE_CELL_CONTENT_AREA, usable_space); - contents.write_u8(PAGE_HEADER_OFFSET_FRAGMENTED_BYTES_COUNT, 0); - contents.write_u32(PAGE_HEADER_OFFSET_RIGHTMOST_PTR, 0); + contents.write_u8(offset::BTREE_FRAGMENTED_BYTES_COUNT, 0); + contents.write_u32(offset::BTREE_RIGHTMOST_PTR, 0); } fn to_static_buf(buf: &mut [u8]) -> &'static mut [u8] { @@ -2977,7 +4493,7 @@ fn edit_page( cell_array: &CellArray, usable_space: u16, ) -> Result<()> { - tracing::trace!( + tracing::debug!( "edit_page start_old_cells={} start_new_cells={} number_new_cells={} cell_array={}", start_old_cells, start_new_cells, @@ -3059,7 +4575,7 @@ fn edit_page( )?; debug_validate_cells!(page, usable_space); // TODO: noverflow - page.write_u16(PAGE_HEADER_OFFSET_CELL_COUNT, number_new_cells as u16); + page.write_u16(offset::BTREE_CELL_COUNT, number_new_cells as u16); Ok(()) } @@ -3089,7 +4605,7 @@ fn page_free_array( let offset = (cell_pointer.start as usize - buf_range.start as usize) as u16; let len = (cell_pointer.end as usize - cell_pointer.start as usize) as u16; free_cell_range(page, offset, len, usable_space)?; - page.write_u16(PAGE_HEADER_OFFSET_CELL_COUNT, page.cell_count() as u16 - 1); + page.write_u16(offset::BTREE_CELL_COUNT, page.cell_count() as u16 - 1); number_of_cells_removed += 1; } } @@ -3196,10 +4712,14 @@ fn free_cell_range( } } if removed_fragmentation > page.num_frag_free_bytes() { - return_corrupt!("Invalid fragmentation count"); + return_corrupt!(format!( + "Invalid fragmentation count. Had {} and removed {}", + page.num_frag_free_bytes(), + removed_fragmentation + )); } let frag = page.num_frag_free_bytes() - removed_fragmentation; - page.write_u8(PAGE_HEADER_OFFSET_FRAGMENTED_BYTES_COUNT, frag); + page.write_u8(offset::BTREE_FRAGMENTED_BYTES_COUNT, frag); pc }; @@ -3207,11 +4727,11 @@ fn free_cell_range( if offset < page.cell_content_area() { return_corrupt!("Free block before content area"); } - if pointer_to_pc != page.offset as u16 + PAGE_HEADER_OFFSET_FIRST_FREEBLOCK as u16 { + if pointer_to_pc != page.offset as u16 + offset::BTREE_FIRST_FREEBLOCK as u16 { return_corrupt!("Invalid content area merge"); } - page.write_u16(PAGE_HEADER_OFFSET_FIRST_FREEBLOCK, pc); - page.write_u16(PAGE_HEADER_OFFSET_CELL_CONTENT_AREA, end); + page.write_u16(offset::BTREE_FIRST_FREEBLOCK, pc); + page.write_u16(offset::BTREE_CELL_CONTENT_AREA, end); } else { page.write_u16_no_offset(pointer_to_pc as usize, offset); page.write_u16_no_offset(offset as usize, pc); @@ -3276,10 +4796,10 @@ fn defragment_page(page: &PageContent, usable_space: u16) { assert!(cbrk >= first_cell); // set new first byte of cell content - page.write_u16(PAGE_HEADER_OFFSET_CELL_CONTENT_AREA, cbrk); + page.write_u16(offset::BTREE_CELL_CONTENT_AREA, cbrk); // set free block to 0, unused spaced can be retrieved from gap between cell pointer end and content start - page.write_u16(PAGE_HEADER_OFFSET_FIRST_FREEBLOCK, 0); - page.write_u8(PAGE_HEADER_OFFSET_FRAGMENTED_BYTES_COUNT, 0); + page.write_u16(offset::BTREE_FIRST_FREEBLOCK, 0); + page.write_u8(offset::BTREE_FRAGMENTED_BYTES_COUNT, 0); debug_validate_cells!(page, usable_space); } @@ -3287,17 +4807,27 @@ fn defragment_page(page: &PageContent, usable_space: u16) { /// Only enabled in debug mode, where we ensure that all cells are valid. fn debug_validate_cells_core(page: &PageContent, usable_space: u16) { for i in 0..page.cell_count() { - // println!("Debug function: i={}", i); let (offset, size) = page.cell_get_raw_region( i, payload_overflow_threshold_max(page.page_type(), usable_space), payload_overflow_threshold_min(page.page_type(), usable_space), usable_space as usize, ); + let buf = &page.as_ptr()[offset..offset + size]; + assert!( + size >= 4, + "cell size should be at least 4 bytes idx={}, cell={:?}, offset={}", + i, + buf, + offset + ); if page.is_leaf() { assert!(page.as_ptr()[offset] != 0); } - assert!(size >= 4, "cell size should be at least 4 bytes idx={}", i); + assert!( + offset + size <= usable_space as usize, + "cell spans out of usable space" + ); } } @@ -3312,8 +4842,9 @@ fn insert_into_cell( cell_idx: usize, usable_space: u16, ) -> Result<()> { + debug_validate_cells!(page, usable_space); assert!( - cell_idx <= page.cell_count(), + cell_idx <= page.cell_count() + page.overflow_cells.len(), "attempting to add cell to an incorrect place cell_idx={} cell_count={}", cell_idx, page.cell_count() @@ -3331,11 +4862,13 @@ fn insert_into_cell( } let new_cell_data_pointer = allocate_cell_space(page, payload.len() as u16, usable_space)?; - tracing::trace!( - "insert_into_cell(idx={}, pc={})", + tracing::debug!( + "insert_into_cell(idx={}, pc={}, size={})", cell_idx, - new_cell_data_pointer + new_cell_data_pointer, + payload.len() ); + assert!(new_cell_data_pointer + payload.len() as u16 <= usable_space); let buf = page.as_ptr(); // copy data @@ -3359,7 +4892,7 @@ fn insert_into_cell( // update cell count let new_n_cells = (page.cell_count() + 1) as u16; - page.write_u16(PAGE_HEADER_OFFSET_CELL_COUNT, new_n_cells); + page.write_u16(offset::BTREE_CELL_COUNT, new_n_cells); debug_validate_cells!(page, usable_space); Ok(()) } @@ -3471,12 +5004,12 @@ fn allocate_cell_space(page_ref: &PageContent, amount: u16, usable_space: u16) - if gap + 2 + amount > top { // defragment defragment_page(page_ref, usable_space); - top = page_ref.read_u16(PAGE_HEADER_OFFSET_CELL_CONTENT_AREA) as usize; + top = page_ref.read_u16(offset::BTREE_CELL_CONTENT_AREA) as usize; } top -= amount; - page_ref.write_u16(PAGE_HEADER_OFFSET_CELL_CONTENT_AREA, top as u16); + page_ref.write_u16(offset::BTREE_CELL_CONTENT_AREA, top as u16); assert!(top + amount <= usable_space as usize); Ok(top as u16) @@ -3509,7 +5042,7 @@ fn fill_cell_payload( } let payload_overflow_threshold_max = payload_overflow_threshold_max(page_type, usable_space); - debug!( + tracing::debug!( "fill_cell_payload(record_size={}, payload_overflow_threshold_max={})", record_buf.len(), payload_overflow_threshold_max @@ -3549,7 +5082,7 @@ fn fill_cell_payload( } // we still have bytes to add, we will need to allocate new overflow page - let overflow_page = allocate_overflow_page(pager.clone()); + let overflow_page = pager.allocate_overflow_page(); overflow_pages.push(overflow_page.clone()); { let id = overflow_page.get().id as u32; @@ -3572,20 +5105,6 @@ fn fill_cell_payload( assert_eq!(cell_size, cell_payload.len()); } -/// Allocate a new overflow page. -/// This is done when a cell overflows and new space is needed. -fn allocate_overflow_page(pager: Rc) -> PageRef { - let page = pager.allocate_page().unwrap(); - tracing::debug!("allocate_overflow_page(id={})", page.get().id); - - // setup overflow page - let contents = page.get().contents.as_mut().unwrap(); - let buf = contents.as_ptr(); - buf.fill(0); - - page -} - /// Returns the maximum payload size (X) that can be stored directly on a b-tree page without spilling to overflow pages. /// /// For table leaf pages: X = usable_size - 35 @@ -3635,11 +5154,11 @@ fn drop_cell(page: &mut PageContent, cell_idx: usize, usable_space: u16) -> Resu if page.cell_count() > 1 { shift_pointers_left(page, cell_idx); } else { - page.write_u16(PAGE_HEADER_OFFSET_CELL_CONTENT_AREA, usable_space); - page.write_u16(PAGE_HEADER_OFFSET_FIRST_FREEBLOCK, 0); - page.write_u8(PAGE_HEADER_OFFSET_FRAGMENTED_BYTES_COUNT, 0); + page.write_u16(offset::BTREE_CELL_CONTENT_AREA, usable_space); + page.write_u16(offset::BTREE_FIRST_FREEBLOCK, 0); + page.write_u8(offset::BTREE_FRAGMENTED_BYTES_COUNT, 0); } - page.write_u16(PAGE_HEADER_OFFSET_CELL_COUNT, page.cell_count() as u16 - 1); + page.write_u16(offset::BTREE_CELL_COUNT, page.cell_count() as u16 - 1); debug_validate_cells!(page, usable_space); Ok(()) } @@ -3659,29 +5178,31 @@ fn shift_pointers_left(page: &mut PageContent, cell_idx: usize) { #[cfg(test)] mod tests { - use rand_chacha::rand_core::RngCore; - use rand_chacha::rand_core::SeedableRng; - use rand_chacha::ChaCha8Rng; + use rand::{thread_rng, Rng}; + use rand_chacha::{ + rand_core::{RngCore, SeedableRng}, + ChaCha8Rng, + }; use test_log::test; use super::*; - use crate::fast_lock::SpinLock; - use crate::io::{Buffer, Completion, MemoryIO, OpenFlags, IO}; - use crate::storage::database::DatabaseFile; - use crate::storage::page_cache::DumbLruPageCache; - use crate::storage::sqlite3_ondisk; - use crate::storage::sqlite3_ondisk::DatabaseHeader; - use crate::types::Text; - use crate::vdbe::Register; - use crate::Connection; - use crate::{BufferPool, DatabaseStorage, WalFile, WalFileShared, WriteCompletion}; - use std::cell::RefCell; - use std::collections::HashSet; - use std::mem::transmute; - use std::ops::Deref; - use std::panic; - use std::rc::Rc; - use std::sync::Arc; + use crate::{ + fast_lock::SpinLock, + io::{Buffer, Completion, MemoryIO, OpenFlags, IO}, + storage::{ + database::DatabaseFile, + page_cache::DumbLruPageCache, + pager::CreateBTreeFlags, + sqlite3_ondisk::{self, DatabaseHeader}, + }, + types::Text, + vdbe::Register, + BufferPool, Connection, DatabaseStorage, StepResult, WalFile, WalFileShared, + WriteCompletion, + }; + use std::{ + cell::RefCell, collections::HashSet, mem::transmute, ops::Deref, panic, rc::Rc, sync::Arc, + }; use tempfile::TempDir; @@ -3706,14 +5227,13 @@ mod tests { let page = Arc::new(Page::new(id)); let drop_fn = Rc::new(|_| {}); - let inner = PageContent { - offset: 0, - buffer: Arc::new(RefCell::new(Buffer::new( + let inner = PageContent::new( + 0, + Arc::new(RefCell::new(Buffer::new( BufferData::new(vec![0; 4096]), drop_fn, ))), - overflow_cells: Vec::new(), - }; + ); page.get().contents.replace(inner); btree_init_page(&page, PageType::TableLeaf, 0, 4096); @@ -3853,6 +5373,14 @@ mod tests { _left_child_page, .. }) => { child_pages.push(pager.read_page(_left_child_page as usize).unwrap()); + if _left_child_page == page.id as u32 { + valid = false; + tracing::error!( + "left child page is the same as parent {}", + _left_child_page + ); + continue; + } let (child_depth, child_valid) = validate_btree(pager.clone(), _left_child_page as usize); valid &= child_valid; @@ -3860,6 +5388,10 @@ mod tests { } _ => panic!("unsupported btree cell: {:?}", cell), }; + if current_depth >= 100 { + tracing::error!("depth is too big"); + return (100, false); + } depth = Some(depth.unwrap_or(current_depth + 1)); if depth != Some(current_depth + 1) { tracing::error!("depth is different for child of page {}", page_idx); @@ -3965,22 +5497,22 @@ mod tests { fn empty_btree() -> (Rc, usize) { let db_header = DatabaseHeader::default(); - let page_size = db_header.page_size as usize; + let page_size = db_header.get_page_size(); #[allow(clippy::arc_with_non_send_sync)] let io: Arc = Arc::new(MemoryIO::new()); let io_file = io.open_file("test.db", OpenFlags::Create, false).unwrap(); let db_file = Arc::new(DatabaseFile::new(io_file)); - let buffer_pool = Rc::new(BufferPool::new(db_header.page_size as usize)); - let wal_shared = WalFileShared::open_shared(&io, "test.wal", db_header.page_size).unwrap(); + let buffer_pool = Rc::new(BufferPool::new(page_size as usize)); + let wal_shared = WalFileShared::open_shared(&io, "test.wal", page_size).unwrap(); let wal_file = WalFile::new(io.clone(), page_size, wal_shared, buffer_pool.clone()); let wal = Rc::new(RefCell::new(wal_file)); let page_cache = Arc::new(parking_lot::RwLock::new(DumbLruPageCache::new(10))); let pager = { let db_header = Arc::new(SpinLock::new(db_header.clone())); - Pager::finish_open(db_header, db_file, wal, io, page_cache, buffer_pool).unwrap() + Pager::finish_open(db_header, db_file, Some(wal), io, page_cache, buffer_pool).unwrap() }; let pager = Rc::new(pager); let page1 = pager.allocate_page().unwrap(); @@ -4040,25 +5572,28 @@ mod tests { for (key, size) in sequence.iter() { run_until_done( || { - let key = SeekKey::TableRowId(*key as u64); + let key = SeekKey::TableRowId(*key); cursor.move_to(key, SeekOp::EQ) }, pager.deref(), ) .unwrap(); - let key = OwnedValue::Integer(*key); let value = ImmutableRecord::from_registers(&[Register::OwnedValue( OwnedValue::Blob(vec![0; *size]), )]); tracing::info!("insert key:{}", key); - run_until_done(|| cursor.insert(&key, &value, true), pager.deref()).unwrap(); + run_until_done( + || cursor.insert(&BTreeKey::new_table_rowid(*key, Some(&value)), true), + pager.deref(), + ) + .unwrap(); tracing::info!( "=========== btree ===========\n{}\n\n", format_btree(pager.clone(), root_page, 0) ); } for (key, _) in sequence.iter() { - let seek_key = SeekKey::TableRowId(*key as u64); + let seek_key = SeekKey::TableRowId(*key); assert!( matches!( cursor.seek(seek_key, SeekOp::EQ).unwrap(), @@ -4092,9 +5627,7 @@ mod tests { let (pager, root_page) = empty_btree(); let mut cursor = BTreeCursor::new(None, pager.clone(), root_page); let mut keys = Vec::new(); - let seed = rng.next_u64(); tracing::info!("seed: {}", seed); - let mut rng = ChaCha8Rng::seed_from_u64(seed); for insert_id in 0..inserts { let size = size(&mut rng); let key = { @@ -4126,13 +5659,34 @@ mod tests { pager.deref(), ) .unwrap(); - - let key = OwnedValue::Integer(key); let value = ImmutableRecord::from_registers(&[Register::OwnedValue( OwnedValue::Blob(vec![0; size]), )]); - run_until_done(|| cursor.insert(&key, &value, true), pager.deref()).unwrap(); - if matches!(validate_btree(pager.clone(), root_page), (_, false)) { + let btree_before = format_btree(pager.clone(), root_page, 0); + run_until_done( + || cursor.insert(&BTreeKey::new_table_rowid(key as u64, Some(&value)), true), + pager.deref(), + ) + .unwrap(); + // FIXME: add sorted vector instead, should be okay for small amounts of keys for now :P, too lazy to fix right now + keys.sort(); + cursor.move_to_root(); + let mut valid = true; + for key in keys.iter() { + tracing::trace!("seeking key: {}", key); + run_until_done(|| cursor.next(), pager.deref()).unwrap(); + let cursor_rowid = cursor.rowid().unwrap().unwrap(); + if *key as u64 != cursor_rowid { + valid = false; + println!("key {} is not found, got {}", key, cursor_rowid); + break; + } + } + // let's validate btree too so that we undertsand where the btree failed + if matches!(validate_btree(pager.clone(), root_page), (_, false)) || !valid { + let btree_after = format_btree(pager.clone(), root_page, 0); + println!("btree before:\n{}", btree_before); + println!("btree after:\n{}", btree_after); panic!("invalid btree"); } } @@ -4143,16 +5697,95 @@ mod tests { if matches!(validate_btree(pager.clone(), root_page), (_, false)) { panic!("invalid btree"); } + keys.sort(); + cursor.move_to_root(); for key in keys.iter() { - let seek_key = SeekKey::TableRowId(*key as u64); - tracing::debug!("seeking key: {}", key); - let found = - run_until_done(|| cursor.seek(seek_key.clone(), SeekOp::EQ), pager.deref()) - .unwrap(); - assert!(found, "key {} is not found", key); + tracing::trace!("seeking key: {}", key); + run_until_done(|| cursor.next(), pager.deref()).unwrap(); + let cursor_rowid = cursor.rowid().unwrap().unwrap(); + assert_eq!( + *key as u64, cursor_rowid, + "key {} is not found, got {}", + key, cursor_rowid + ); } } } + + fn btree_index_insert_fuzz_run(attempts: usize, inserts: usize) { + let (mut rng, seed) = if std::env::var("SEED").is_ok() { + let seed = std::env::var("SEED").unwrap(); + let seed = seed.parse::().unwrap(); + let rng = ChaCha8Rng::seed_from_u64(seed); + (rng, seed) + } else { + rng_from_time() + }; + let mut seen = HashSet::new(); + tracing::info!("super seed: {}", seed); + for _ in 0..attempts { + let (pager, _) = empty_btree(); + let index_root_page = pager.btree_create(&CreateBTreeFlags::new_index()); + let index_root_page = index_root_page as usize; + let mut cursor = BTreeCursor::new(None, pager.clone(), index_root_page); + let mut keys = Vec::new(); + tracing::info!("seed: {}", seed); + for _ in 0..inserts { + let key = { + let result; + loop { + let cols = (0..10) + .map(|_| (rng.next_u64() % (1 << 30)) as i64) + .collect::>(); + if seen.contains(&cols) { + continue; + } else { + seen.insert(cols.clone()); + } + result = cols; + break; + } + result + }; + keys.push(key.clone()); + let value = ImmutableRecord::from_registers( + &key.iter() + .map(|col| Register::OwnedValue(OwnedValue::Integer(*col))) + .collect::>(), + ); + run_until_done( + || { + cursor.insert( + &BTreeKey::new_index_key(&value), + cursor.is_write_in_progress(), + ) + }, + pager.deref(), + ) + .unwrap(); + keys.sort(); + cursor.move_to_root(); + } + keys.sort(); + cursor.move_to_root(); + for key in keys.iter() { + tracing::trace!("seeking key: {:?}", key); + run_until_done(|| cursor.next(), pager.deref()).unwrap(); + let record = cursor.record(); + let record = record.as_ref().unwrap(); + let cursor_key = record.get_values(); + assert_eq!( + cursor_key, + &key.iter() + .map(|col| RefValue::Integer(*col)) + .collect::>(), + "key {:?} is not found", + key + ); + } + } + } + #[test] pub fn test_drop_odd() { let db = get_database(); @@ -4207,25 +5840,26 @@ mod tests { } #[test] - #[ignore] + pub fn btree_index_insert_fuzz_run_equal_size() { + btree_index_insert_fuzz_run(2, 1024 * 32); + } + + #[test] pub fn btree_insert_fuzz_run_random() { btree_insert_fuzz_run(128, 16, |rng| (rng.next_u32() % 4096) as usize); } #[test] - #[ignore] pub fn btree_insert_fuzz_run_small() { - btree_insert_fuzz_run(1, 1024, |rng| (rng.next_u32() % 128) as usize); + btree_insert_fuzz_run(1, 100, |rng| (rng.next_u32() % 128) as usize); } #[test] - #[ignore] pub fn btree_insert_fuzz_run_big() { btree_insert_fuzz_run(64, 32, |rng| 3 * 1024 + (rng.next_u32() % 1024) as usize); } #[test] - #[ignore] pub fn btree_insert_fuzz_run_overflow() { btree_insert_fuzz_run(64, 32, |rng| (rng.next_u32() % 32 * 1024) as usize); } @@ -4234,7 +5868,7 @@ mod tests { fn setup_test_env(database_size: u32) -> (Rc, Arc>) { let page_size = 512; let mut db_header = DatabaseHeader::default(); - db_header.page_size = page_size; + db_header.update_page_size(page_size); db_header.database_size = database_size; let db_header = Arc::new(SpinLock::new(db_header)); @@ -4266,7 +5900,7 @@ mod tests { let wal_shared = WalFileShared::open_shared(&io, "test.wal", page_size).unwrap(); let wal = Rc::new(RefCell::new(WalFile::new( io.clone(), - page_size as usize, + page_size, wal_shared, buffer_pool.clone(), ))); @@ -4275,7 +5909,7 @@ mod tests { Pager::finish_open( db_header.clone(), db_file, - wal, + Some(wal), io, Arc::new(parking_lot::RwLock::new(DumbLruPageCache::new(10))), buffer_pool, @@ -4306,7 +5940,7 @@ mod tests { let drop_fn = Rc::new(|_buf| {}); #[allow(clippy::arc_with_non_send_sync)] let buf = Arc::new(RefCell::new(Buffer::allocate( - db_header.lock().page_size as usize, + db_header.lock().get_page_size() as usize, drop_fn, ))); let write_complete = Box::new(|_| {}); @@ -4460,7 +6094,7 @@ mod tests { let contents = root_page.get().contents.as_mut().unwrap(); // Set rightmost pointer to page4 - contents.write_u32(PAGE_HEADER_OFFSET_RIGHTMOST_PTR, page4.get().id as u32); + contents.write_u32(offset::BTREE_RIGHTMOST_PTR, page4.get().id as u32); // Create a cell with pointer to page3 let cell_content = vec![ @@ -4614,15 +6248,13 @@ mod tests { let mut total_size = 0; let mut cells = Vec::new(); let usable_space = 4096; - let mut i = 1000; - // let seed = thread_rng().gen(); - // let seed = 15292777653676891381; - let seed = 9261043168681395159; + let mut i = 100000; + let seed = thread_rng().gen(); tracing::info!("seed {}", seed); let mut rng = ChaCha8Rng::seed_from_u64(seed); while i > 0 { i -= 1; - match rng.next_u64() % 3 { + match rng.next_u64() % 4 { 0 => { // allow appends with extra place to insert let cell_idx = rng.next_u64() as usize % (page.cell_count() + 1); @@ -4639,14 +6271,14 @@ mod tests { 4096, conn.pager.clone(), ); - if (free as usize) < payload.len() - 2 { + if (free as usize) < payload.len() + 2 { // do not try to insert overflow pages because they require balancing continue; } insert_into_cell(page, &payload, cell_idx, 4096).unwrap(); assert!(page.overflow_cells.is_empty()); total_size += payload.len() as u16 + 2; - cells.push(Cell { pos: i, payload }); + cells.insert(cell_idx, Cell { pos: i, payload }); } 1 => { if page.cell_count() == 0 { @@ -4666,6 +6298,13 @@ mod tests { 2 => { defragment_page(page, usable_space); } + 3 => { + // check cells + for (i, cell) in cells.iter().enumerate() { + ensure_cell(page, i, &cell.payload); + } + assert_eq!(page.cell_count(), cells.len()); + } _ => unreachable!(), } let free = compute_free_space(page, usable_space); @@ -4745,6 +6384,106 @@ mod tests { } } + // this test will create a tree like this: + // -page:2, ptr(right):4 + // +cells:node[rowid:14, ptr(<=):3] + // -page:3, ptr(right):0 + // +cells:leaf[rowid:11, len(payload):137, overflow:false] + // -page:4, ptr(right):0 + // +cells: + #[test] + pub fn test_drop_page_in_balancing_issue_1203() { + let db = get_database(); + let conn = db.connect().unwrap(); + + let queries = vec![ +"CREATE TABLE lustrous_petit (awesome_nomous TEXT,ambitious_amargi TEXT,fantastic_daniels BLOB,stupendous_highleyman TEXT,relaxed_crane TEXT,elegant_bromma INTEGER,proficient_castro BLOB,ambitious_liman TEXT,responsible_lusbert BLOB);", +"INSERT INTO lustrous_petit VALUES ('funny_sarambi', 'hardworking_naoumov', X'666561726C6573735F68696C6C', 'elegant_iafd', 'rousing_flag', 681399778772406122, X'706572736F6E61626C655F676F6477696E6772696D6D', 'insightful_anonymous', X'706F77657266756C5F726F636861'), ('personable_holmes', 'diligent_pera', X'686F6E6573745F64696D656E73696F6E', 'energetic_raskin', 'gleaming_federasyon', -2778469859573362611, X'656666696369656E745F6769617A', 'sensible_skirda', X'66616E7461737469635F6B656174696E67'), ('inquisitive_baedan', 'brave_sphinx', X'67656E65726F75735F6D6F6E7473656E79', 'inquisitive_syndicate', 'amiable_room', 6954857961525890638, X'7374756E6E696E675F6E6965747A73636865', 'glowing_coordinator', X'64617A7A6C696E675F7365766572696E65'), ('upbeat_foxtale', 'engaging_aktimon', X'63726561746976655F6875746368696E6773', 'ample_locura', 'creative_barrett', 6413352509911171593, X'6772697070696E675F6D696E7969', 'competitive_parissi', X'72656D61726B61626C655F77696E7374616E6C6579');", +"INSERT INTO lustrous_petit VALUES ('ambitious_berry', 'devoted_marshall', X'696E7175697369746976655F6C6172657661', 'flexible_pramen', 'outstanding_stauch', 6936508362673228293, X'6C6F76696E675F6261756572', 'charming_anonymous', X'68617264776F726B696E675F616E6E6973'), ('enchanting_cohen', 'engaging_rubel', X'686F6E6573745F70726F766F63617A696F6E65', 'humorous_robin', 'imaginative_shuzo', 4762266264295288131, X'726F7573696E675F6261796572', 'vivid_bolling', X'6F7267616E697A65645F7275696E73'), ('affectionate_resistance', 'gripping_rustamova', X'6B696E645F6C61726B696E', 'bright_boulanger', 'upbeat_ashirov', -1726815435854320541, X'61646570745F66646361', 'dazzling_tashjian', X'68617264776F726B696E675F6D6F72656C'), ('zestful_ewald', 'favorable_lewis', X'73747570656E646F75735F7368616C6966', 'bright_combustion', 'blithesome_harding', 8408539013935554176, X'62726176655F737079726F706F756C6F75', 'hilarious_finnegan', X'676976696E675F6F7267616E697A696E67'), ('blithesome_picqueray', 'sincere_william', X'636F75726167656F75735F6D69746368656C6C', 'rousing_atan', 'mirthful_katie', -429232313453215091, X'6C6F76656C795F776174616E616265', 'stupendous_mcmillan', X'666F63757365645F6B61666568'), ('incredible_kid', 'friendly_yvetot', X'706572666563745F617A697A', 'helpful_manhattan', 'shining_horrox', -4318061095860308846, X'616D626974696F75735F726F7765', 'twinkling_anarkiya', X'696D6167696E61746976655F73756D6E6572');", +"INSERT INTO lustrous_petit VALUES ('sleek_graeber', 'approachable_ghazzawi', X'62726176655F6865776974747768697465', 'adaptable_zimmer', 'polite_cohn', -5464225138957223865, X'68756D6F726F75735F736E72', 'adaptable_igualada', X'6C6F76656C795F7A686F75'), ('imaginative_rautiainen', 'magnificent_ellul', X'73706C656E6469645F726F6361', 'responsible_brown', 'upbeat_uruguaya', -1185340834321792223, X'616D706C655F6D6470', 'philosophical_kelly', X'676976696E675F6461676865726D6172676F7369616E'), ('blithesome_darkness', 'creative_newell', X'6C757374726F75735F61706174726973', 'engaging_kids', 'charming_wark', -1752453819873942466, X'76697669645F6162657273', 'independent_barricadas', X'676C697374656E696E675F64686F6E6474'), ('productive_chardronnet', 'optimistic_karnage', X'64696C6967656E745F666F72657374', 'engaging_beggar', 'sensible_wolke', 784341549042407442, X'656E676167696E675F6265726B6F7769637A', 'blithesome_zuzenko', X'6E6963655F70726F766F63617A696F6E65');", +"INSERT INTO lustrous_petit VALUES ('shining_sagris', 'considerate_mother', X'6F70656E5F6D696E6465645F72696F74', 'polite_laufer', 'patient_mink', 2240393952789100851, X'636F75726167656F75735F6D636D696C6C616E', 'glowing_robertson', X'68656C7066756C5F73796D6F6E6473'), ('dazzling_glug', 'stupendous_poznan', X'706572736F6E61626C655F6672616E6B73', 'open_minded_ruins', 'qualified_manes', 2937238916206423261, X'696E736967687466756C5F68616B69656C', 'passionate_borl', X'616D6961626C655F6B7570656E647561'), ('wondrous_parry', 'knowledgeable_giovanni', X'6D6F76696E675F77696E6E', 'shimmering_aberlin', 'affectionate_calhoun', 702116954493913499, X'7265736F7572636566756C5F62726F6D6D61', 'propitious_mezzagarcia', X'746563686E6F6C6F676963616C5F6E6973686974616E69');", +"INSERT INTO lustrous_petit VALUES ('kind_room', 'hilarious_crow', X'6F70656E5F6D696E6465645F6B6F74616E7969', 'hardworking_petit', 'adaptable_zarrow', 2491343172109894986, X'70726F647563746976655F646563616C6F677565', 'willing_sindikalis', X'62726561746874616B696E675F6A6F7264616E');", +"INSERT INTO lustrous_petit VALUES ('confident_etrebilal', 'agreeable_shifu', X'726F6D616E7469635F7363687765697A6572', 'loving_debs', 'gripping_spooner', -3136910055229112693, X'677265676172696F75735F736B726F7A6974736B79', 'ample_ontiveros', X'7175616C69666965645F726F6D616E69656E6B6F'), ('competitive_call', 'technological_egoumenides', X'6469706C6F6D617469635F6D6F6E616768616E', 'willing_stew', 'frank_neal', -5973720171570031332, X'6C6F76696E675F6465737461', 'dazzling_gambone', X'70726F647563746976655F6D656E64656C676C6565736F6E'), ('favorable_delesalle', 'sensible_atterbury', X'666169746866756C5F64617861', 'bountiful_aldred', 'marvelous_malgraith', 5330463874397264493, X'706572666563745F7765726265', 'lustrous_anti', X'6C6F79616C5F626F6F6B6368696E'), ('stellar_corlu', 'loyal_espana', X'6D6F76696E675F7A6167', 'efficient_nelson', 'qualified_shepard', 1015518116803600464, X'737061726B6C696E675F76616E6469766572', 'loving_scoffer', X'686F6E6573745F756C72696368'), ('adaptable_taylor', 'shining_yasushi', X'696D6167696E61746976655F776974746967', 'alluring_blackmore', 'zestful_coeurderoy', -7094136731216188999, X'696D6167696E61746976655F757A63617465677569', 'gleaming_hernandez', X'6672616E6B5F646F6D696E69636B'), ('competitive_luis', 'stellar_fredericks', X'616772656561626C655F6D696368656C', 'optimistic_navarro', 'funny_hamilton', 4003895682491323194, X'6F70656E5F6D696E6465645F62656C6D6173', 'incredible_thorndycraft', X'656C6567616E745F746F6C6B69656E'), ('remarkable_parsons', 'sparkling_ulrich', X'737061726B6C696E675F6D6172696E636561', 'technological_leighlais', 'warmhearted_konok', -5789111414354869563, X'676976696E675F68657272696E67', 'adept_dabtara', X'667269656E646C795F72617070');", +"INSERT INTO lustrous_petit VALUES ('hardworking_norberg', 'approachable_winter', X'62726176655F68617474696E6768', 'imaginative_james', 'open_minded_capital', -5950508516718821688, X'6C757374726F75735F72616E7473', 'warmhearted_limanov', X'696E736967687466756C5F646F637472696E65'), ('generous_shatz', 'generous_finley', X'726176697368696E675F6B757A6E6574736F76', 'stunning_arrigoni', 'favorable_volcano', -8442328990977069526, X'6D6972746866756C5F616C7467656C64', 'thoughtful_zurbrugg', X'6D6972746866756C5F6D6F6E726F65'), ('frank_kerr', 'splendid_swain', X'70617373696F6E6174655F6D6470', 'flexible_dubey', 'sensible_tj', 6352949260574274181, X'656666696369656E745F6B656D736B79', 'vibrant_ege', X'736C65656B5F6272696768746F6E'), ('organized_neal', 'glistening_sugar', X'656E676167696E675F6A6F72616D', 'romantic_krieger', 'qualified_corr', -4774868512022958085, X'706572666563745F6B6F7A6172656B', 'bountiful_zaikowska', X'74686F7567687466756C5F6C6F6767616E73'), ('excellent_lydiettcarrion', 'diligent_denslow', X'666162756C6F75735F6D616E68617474616E', 'confident_tomar', 'glistening_ligt', -1134906665439009896, X'7175616C69666965645F6F6E6B656E', 'remarkable_anarkiya', X'6C6F79616C5F696E64616261'), ('passionate_melis', 'loyal_xsilent', X'68617264776F726B696E675F73637564', 'lustrous_barnes', 'nice_sugako', -4097897163377829983, X'726F6D616E7469635F6461686572', 'bright_imrie', X'73656E7369626C655F6D61726B'), ('giving_mlb', 'breathtaking_fourier', X'736C65656B5F616E61726368697374', 'glittering_malet', 'brilliant_crew', 8791228049111405793, X'626F756E746966756C5F626576656E736565', 'lovely_swords', X'70726F706974696F75735F696E656469746173'), ('honest_wright', 'qualified_rabble', X'736C65656B5F6D6172656368616C', 'shimmering_marius', 'blithesome_mckelvie', -1330737263592370654, X'6F70656E5F6D696E6465645F736D616C6C', 'energetic_gorman', X'70726F706974696F75735F6B6F74616E7969');", +"DELETE FROM lustrous_petit WHERE (ambitious_liman > 'adept_dabtaqu');", +"INSERT INTO lustrous_petit VALUES ('technological_dewey', 'fabulous_st', X'6F7074696D69737469635F73687562', 'considerate_levy', 'adaptable_kernis', 4195134012457716562, X'61646570745F736F6C6964617269646164', 'vibrant_crump', X'6C6F79616C5F72796E6572'), ('super_marjan', 'awesome_gethin', X'736C65656B5F6F737465727765696C', 'diplomatic_loidl', 'qualified_bokani', -2822676417968234733, X'6272696768745F64756E6C6170', 'creative_en', X'6D6972746866756C5F656C6F6666'), ('philosophical_malet', 'unique_garcia', X'76697669645F6E6F7262657267', 'spellbinding_fire', 'faithful_barringtonbush', -7293711848773657758, X'6272696C6C69616E745F6F6B65656665', 'gripping_guillon', X'706572736F6E61626C655F6D61726C696E7370696B65'), ('thoughtful_morefus', 'lustrous_rodriguez', X'636F6E666964656E745F67726F73736D616E726F73686368696E', 'devoted_jackson', 'propitious_karnage', -7802999054396485709, X'63617061626C655F64', 'enchanting_orwell', X'7477696E6B6C696E675F64616C616B6F676C6F75'), ('alluring_guillon', 'brilliant_pinotnoir', X'706572736F6E61626C655F6A6165636B6C65', 'open_minded_azeez', 'courageous_romania', 2126962403055072268, X'746563686E6F6C6F676963616C5F6962616E657A', 'open_minded_rosa', X'6C757374726F75735F6575726F7065'), ('courageous_kolokotronis', 'inquisitive_gahman', X'677265676172696F75735F626172726574', 'ambitious_shakur', 'fantastic_apatris', -1232732971861520864, X'737061726B6C696E675F7761746368', 'captivating_clover', X'636F6E666964656E745F736574686E65737363617374726F'), ('charming_sullivan', 'focused_congress', X'7368696D6D6572696E675F636C7562', 'wondrous_skrbina', 'giving_mendanlioglu', -6837337053772308333, X'636861726D696E675F73616C696E6173', 'rousing_hedva', X'6469706C6F6D617469635F7061796E');", + ]; + + for query in queries { + let mut stmt = conn.query(query).unwrap().unwrap(); + loop { + let row = stmt.step().expect("step"); + match row { + StepResult::Done => { + break; + } + _ => { + tracing::debug!("row {:?}", row); + } + } + } + } + } + + // this test will create a tree like this: + // -page:2, ptr(right):3 + // +cells: + // -page:3, ptr(right):0 + // +cells: + #[test] + pub fn test_drop_page_in_balancing_issue_1203_2() { + let db = get_database(); + let conn = db.connect().unwrap(); + + let queries = vec![ +"CREATE TABLE super_becky (engrossing_berger BLOB,plucky_chai BLOB,mirthful_asbo REAL,bountiful_jon REAL,competitive_petit REAL,engrossing_rexroth REAL);", +"INSERT INTO super_becky VALUES (X'636861726D696E675F6261796572', X'70726F647563746976655F70617269737369', 6847793643.408741, 7330361375.924953, -6586051582.891455, -6921021872.711397), (X'657863656C6C656E745F6F7267616E697A696E67', X'6C757374726F75735F73696E64696B616C6973', 9905774996.48619, 570325205.2246342, 5852346465.53047, 728566012.1968269), (X'7570626561745F73656174746C65', X'62726176655F6661756E', -2202725836.424899, 5424554426.388281, 2625872085.917082, -6657362503.808359), (X'676C6F77696E675F6D617877656C6C', X'7761726D686561727465645F726F77616E', -9610936969.793116, 4886606277.093559, -3414536174.7928505, 6898267795.317778), (X'64796E616D69635F616D616E', X'7374656C6C61725F7374657073', 3918935692.153696, 151068445.947237, 4582065669.356403, -3312668220.4789667), (X'64696C6967656E745F64757272757469', X'7175616C69666965645F6D726163686E696B', 5527271629.262201, 6068855126.044355, 289904657.13490677, 2975774820.0877323), (X'6469706C6F6D617469635F726F76657363696F', X'616C6C7572696E675F626F7474696369', 9844748192.66119, -6180276383.305578, -4137330511.025565, -478754566.79494476), (X'776F6E64726F75735F6173686572', X'6465766F7465645F6176657273696F6E', 2310211470.114773, -6129166761.628184, -2865371645.3145514, 7542428654.8645935), (X'617070726F61636861626C655F6B686F6C61', X'6C757374726F75735F6C696E6E656C6C', -4993113161.458349, 7356727284.362968, -3228937035.568404, -1779334005.5067253);", +"INSERT INTO super_becky VALUES (X'74686F7567687466756C5F726576696577', X'617765736F6D655F63726F73736579', 9401977997.012783, 8428201961.643898, 2822821303.052643, 4555601220.718847), (X'73706563746163756C61725F6B686179617469', X'616772656561626C655F61646F6E696465', 7414547022.041355, 365016845.73330307, 50682963.055828094, -9258802584.962656), (X'6C6F79616C5F656D6572736F6E', X'676C6F77696E675F626174616C6F', -5522070106.765736, 2712536599.6384163, 6631385631.869345, 1242757880.7583427), (X'68617264776F726B696E675F6F6B656C6C79', X'666162756C6F75735F66696C697373', 6682622809.9778805, 4233900041.917185, 9017477903.795563, -756846353.6034946), (X'68617264776F726B696E675F626C61756D616368656E', X'616666656374696F6E6174655F6B6F736D616E', -1146438175.3174362, -7545123696.438596, -6799494012.403366, 5646913977.971333), (X'66616E7461737469635F726F77616E', X'74686F7567687466756C5F7465727269746F72696573', -4414529784.916277, -6209371635.279242, 4491104121.288605, 2590223842.117277);", +"INSERT INTO super_becky VALUES (X'676C697374656E696E675F706F72746572', X'696E7175697369746976655F656D', 2986144164.3676434, 3495899172.5935287, -849280584.9386635, 6869709150.2699375), (X'696D6167696E61746976655F6D65726C696E6F', X'676C6F77696E675F616B74696D6F6E', 8733490615.829357, 6782649864.719433, 6926744218.74107, 1532081022.4379768), (X'6E6963655F726F73736574', X'626C69746865736F6D655F66696C697373', -839304300.0706863, 6155504968.705227, -2951592321.950267, -6254186334.572437), (X'636F6E666964656E745F6C69626574', X'676C696D6D6572696E675F6B6F74616E7969', -5344675223.37533, -8703794729.211002, 3987472096.020382, -7678989974.961197), (X'696D6167696E61746976655F6B61726162756C7574', X'64796E616D69635F6D6367697272', 2028227065.6995697, -7435689525.030833, 7011220815.569796, 5526665697.213846), (X'696E7175697369746976655F636C61726B', X'616666656374696F6E6174655F636C6561766572', 3016598350.546356, -3686782925.383732, 9671422351.958004, 9099319829.078941), (X'63617061626C655F746174616E6B61', X'696E6372656469626C655F6F746F6E6F6D61', 6339989259.432795, -8888997534.102034, 6855868409.475763, -2565348887.290493), (X'676F7267656F75735F6265726E657269', X'65647563617465645F6F6D6F77616C69', 6992467657.527826, -3538089391.748543, -7103111660.146708, 4019283237.3740463), (X'616772656561626C655F63756C74757265', X'73706563746163756C61725F657370616E61', 189387871.06959534, 6211851191.361202, 1786455196.9768047, 7966404387.318119);", +"INSERT INTO super_becky VALUES (X'7068696C6F736F70686963616C5F6C656967686C616973', X'666162756C6F75735F73656D696E61746F7265', 8688321500.141502, -7855144036.024546, -5234949709.573349, -9937638367.366447), (X'617070726F61636861626C655F726F677565', X'676C65616D696E675F6D7574696E79', -5351540099.744092, -3614025150.9013805, -2327775310.276925, 2223379997.077526), (X'676C696D6D6572696E675F63617263686961', X'696D6167696E61746976655F61737379616E6E', 4104832554.8371887, -5531434716.627781, 1652773397.4099865, 3884980522.1830273);", +"DELETE FROM super_becky WHERE (plucky_chai != X'7761726D686561727465645F6877616E67' AND mirthful_asbo != 9537234687.183533 AND bountiful_jon = -3538089391.748543);", +"INSERT INTO super_becky VALUES (X'706C75636B795F6D617263616E74656C', X'696D6167696E61746976655F73696D73', 9535651632.375484, 92270815.0720501, 1299048084.6248207, 6460855331.572151), (X'726F6D616E7469635F706F746C61746368', X'68756D6F726F75735F63686165686F', 9345375719.265533, 7825332230.247925, -7133157299.39028, -6939677879.6597), (X'656666696369656E745F6261676E696E69', X'63726561746976655F67726168616D', -2615470560.1954746, 6790849074.977201, -8081732985.448849, -8133707792.312794), (X'677265676172696F75735F73637564', X'7368696E696E675F67726F7570', -7996394978.2610035, -9734939565.228964, 1108439333.8481388, -5420483517.169478), (X'6C696B61626C655F6B616E6176616C6368796B', X'636F75726167656F75735F7761726669656C64', -1959869609.656724, 4176668769.239971, -8423220404.063669, 9987687878.685959), (X'657863656C6C656E745F68696C6473646F74746572', X'676C6974746572696E675F7472616D7564616E61', -5220160777.908238, 3892402687.8826714, 9803857762.617172, -1065043714.0265541), (X'6D61676E69666963656E745F717565657273', X'73757065725F717565657273', -700932053.2006226, -4706306995.253335, -5286045811.046467, 1954345265.5250092), (X'676976696E675F6275636B65726D616E6E', X'667269656E646C795F70697A7A6F6C61746F', -2186859620.9089565, -6098492099.446075, -7456845586.405931, 8796967674.444252);", +"DELETE FROM super_becky WHERE TRUE;", +"INSERT INTO super_becky VALUES (X'6F7074696D69737469635F6368616E69616C', X'656E657267657469635F6E65677261', 1683345860.4208698, 4163199322.9289455, -4192968616.7868404, -7253371206.571701), (X'616C6C7572696E675F686176656C', X'7477696E6B6C696E675F626965627579636B', -9947019174.287437, 5975899640.893995, 3844707723.8570194, -9699970750.513876), (X'6F7074696D69737469635F7A686F75', X'616D626974696F75735F636F6E6772657373', 4143738484.1081524, -2138255286.170598, 9960750454.03466, 5840575852.80299), (X'73706563746163756C61725F6A6F6E67', X'73656E7369626C655F616269646F72', -1767611042.9716015, -7684260477.580351, 4570634429.188147, -9222640121.140202), (X'706F6C6974655F6B657272', X'696E736967687466756C5F63686F646F726B6F6666', -635016769.5123329, -4359901288.494518, -7531565119.905825, -1180410948.6572971), (X'666C657869626C655F636F6D756E69656C6C6F', X'6E6963655F6172636F73', 8708423014.802425, -6276712625.559328, -771680766.2485523, 8639486874.113342);", +"DELETE FROM super_becky WHERE (mirthful_asbo < 9730384310.536528 AND plucky_chai < X'6E6963655F61726370B2');", +"DELETE FROM super_becky WHERE (mirthful_asbo > 6248699554.426553 AND bountiful_jon > 4124481472.333034);", +"INSERT INTO super_becky VALUES (X'676C696D6D6572696E675F77656C7368', X'64696C6967656E745F636F7262696E', 8217054003.369003, 8745594518.77864, 1928172803.2261295, -8375115534.050233), (X'616772656561626C655F6463', X'6C6F76696E675F666F72656D616E', -5483889804.871533, -8264576639.127487, 4770567289.404846, -3409172927.2573576), (X'6D617276656C6F75735F6173696D616B6F706F756C6F73', X'746563686E6F6C6F676963616C5F6A61637175696572', 2694858779.206814, -1703227425.3442516, -4504989231.263319, -3097265869.5230227), (X'73747570656E646F75735F64757075697364657269', X'68696C6172696F75735F6D75697268656164', 568174708.66469, -4878260547.265669, -9579691520.956625, 73507727.8100338), (X'626C69746865736F6D655F626C6F6B', X'61646570745F6C65696572', 7772117077.916897, 4590608571.321514, -881713470.657032, -9158405774.647465);", +"INSERT INTO super_becky VALUES (X'6772697070696E675F6573736578', X'67656E65726F75735F636875726368696C6C', -4180431825.598956, 7277443000.677654, 2499796052.7878246, -2858339306.235305), (X'756E697175655F6D6172656368616C', X'62726561746874616B696E675F636875726368696C6C', 1401354536.7625294, -611427440.2796707, -4621650430.463729, 1531473111.7482872), (X'657863656C6C656E745F66696E6C6579', X'666169746866756C5F62726F636B', -4020697828.0073624, -2833530733.19637, -7766170050.654022, 8661820959.434689);", +"INSERT INTO super_becky VALUES (X'756E697175655F6C617061797265', X'6C6F76696E675F7374617465', 7063237787.258968, -5425712581.365798, -7750509440.0141945, -7570954710.892544), (X'62726561746874616B696E675F6E65616C', X'636F75726167656F75735F61727269676F6E69', 289862394.2028198, 9690362375.014446, -4712463267.033899, 2474917855.0973473), (X'7477696E6B6C696E675F7368616B7572', X'636F75726167656F75735F636F6D6D6974746565', 5449035403.229155, -2159678989.597906, 3625606019.1150894, -3752010405.4475393);", +"INSERT INTO super_becky VALUES (X'70617373696F6E6174655F73686970776179', X'686F6E6573745F7363687765697A6572', 4193384746.165228, -2232151704.896323, 8615245520.962444, -9789090953.995636);", +"INSERT INTO super_becky VALUES (X'6C696B61626C655F69', X'6661766F7261626C655F6D626168', 6581403690.769894, 3260059398.9544716, -407118859.046051, -3155853965.2700634), (X'73696E636572655F6F72', X'616772656561626C655F617070656C6261756D', 9402938544.308651, -7595112171.758331, -7005316716.211025, -8368210960.419411);", +"INSERT INTO super_becky VALUES (X'6D617276656C6F75735F6B61736864616E', X'6E6963655F636F7272', -5976459640.85817, -3177550476.2092276, 2073318650.736992, -1363247319.9978447);", +"INSERT INTO super_becky VALUES (X'73706C656E6469645F6C616D656E646F6C61', X'677265676172696F75735F766F6E6E65677574', 6898259773.050102, 8973519699.707073, -25070632.280548096, -1845922497.9676847), (X'617765736F6D655F7365766572', X'656E657267657469635F706F746C61746368', -8750678407.717808, 5130907533.668898, -6778425327.111566, 3718982135.202587);", +"INSERT INTO super_becky VALUES (X'70726F706974696F75735F6D616C617465737461', X'657863656C6C656E745F65766572657474', -8846855772.62094, -6168969732.697067, -8796372709.125793, 9983557891.544613), (X'73696E636572655F6C6177', X'696E7175697369746976655F73616E647374726F6D', -6366985697.975358, 3838628702.6652164, 3680621713.3371124, -786796486.8049564), (X'706F6C6974655F676C6561736F6E', X'706C75636B795F677579616E61', -3987946379.104308, -2119148244.413993, -1448660343.6888638, -1264195510.1611118), (X'676C6974746572696E675F6C6975', X'70657273697374656E745F6F6C6976696572', 6741779968.943846, -3239809989.227495, -1026074003.5506897, 4654600514.871752);", +"DELETE FROM super_becky WHERE (engrossing_berger < X'6566651A3C70278D4E200657551D8071A1' AND competitive_petit > 1236742147.9451914);", +"INSERT INTO super_becky VALUES (X'6661766F7261626C655F726569746D616E', X'64657465726D696E65645F726974746572', -7412553243.829927, -7572665195.290464, 7879603411.222157, 3706943306.5691853), (X'70657273697374656E745F6E6F6C616E', X'676C6974746572696E675F73686570617264', 7028261282.277422, -2064164782.3494844, -5244048504.507779, -2399526243.005843), (X'6B6E6F776C6564676561626C655F70617474656E', X'70726F66696369656E745F726F7365627261756768', 3713056763.583538, 3919834206.566164, -6306779387.430006, -9939464323.995546), (X'616461707461626C655F7172757A', X'696E7175697369746976655F68617261776179', 6519349690.299835, -9977624623.820414, 7500579325.440605, -8118341251.362242);", +"INSERT INTO super_becky VALUES (X'636F6E73696465726174655F756E696F6E', X'6E6963655F6573736578', -1497385534.8720198, 9957688503.242973, 9191804202.566128, -179015615.7117195), (X'666169746866756C5F626F776C656773', X'6361707469766174696E675F6D6367697272', 893707300.1576138, 3381656294.246702, 6884723724.381908, 6248331214.701559), (X'6B6E6F776C6564676561626C655F70656E6E61', X'6B696E645F616A697468', -3335162603.6574974, 1812878172.8505402, 5115606679.658335, -5690100280.808182), (X'617765736F6D655F77696E7374616E6C6579', X'70726F706974696F75735F6361726173736F', -7395576292.503981, 4956546102.029215, -1468521769.7486448, -2968223925.60355), (X'636F75726167656F75735F77617266617265', X'74686F7567687466756C5F7361707068697265', 7052982930.566017, -9806098174.104418, -6910398936.377775, -4041963031.766964), (X'657863656C6C656E745F6B62', X'626C69746865736F6D655F666F75747A6F706F756C6F73', 6142173202.994768, 5193126957.544125, -7522202722.983735, -1659088056.594862), (X'7374756E6E696E675F6E6576616461', X'626F756E746966756C5F627572746F6E', -3822097036.7628613, -3458840259.240303, 2544472236.86788, 6928890176.466003);", +"INSERT INTO super_becky VALUES (X'706572736F6E61626C655F646D69747269', X'776F6E64726F75735F6133796F', 2651932559.0077076, 811299402.3174248, -8271909238.671928, 6761098864.189909);", +"INSERT INTO super_becky VALUES (X'726F7573696E675F6B6C6166657461', X'64617A7A6C696E675F6B6E617070', 9370628891.439335, -5923332007.253168, -2763161830.5880013, -9156194881.875952), (X'656666696369656E745F6C6576656C6C6572', X'616C6C7572696E675F706561636F7474', 3102641409.8314342, 2838360181.628153, 2466271662.169607, 1015942181.844162), (X'6469706C6F6D617469635F7065726B696E73', X'726F7573696E675F6172616269', -1551071129.022499, -8079487600.186886, 7832984580.070087, -6785993247.895652), (X'626F756E746966756C5F6D656D62657273', X'706F77657266756C5F70617269737369', 9226031830.72445, 7012021503.536997, -2297349030.108919, -2738320055.4710903), (X'676F7267656F75735F616E6172636F7469636F', X'68656C7066756C5F7765696C616E64', -8394163480.676959, -2978605095.699134, -6439355448.021704, 9137308022.281273), (X'616666656374696F6E6174655F70726F6C65696E666F', X'706C75636B795F73616E7A', 3546758708.3524914, -1870964264.9353771, 338752565.3643894, -3908023657.299715), (X'66756E6E795F706F70756C61697265', X'6F75747374616E64696E675F626576696E67746F6E', -1533858145.408224, 6164225076.710373, 8419445987.622173, 584555253.6852646), (X'76697669645F6D7474', X'7368696D6D6572696E675F70616F6E65737361', 5512251366.193035, -8680583180.123213, -4445968638.153208, -3274009935.4229546);", +"INSERT INTO super_becky VALUES (X'7068696C6F736F70686963616C5F686F7264', X'657863656C6C656E745F67757373656C7370726F757473', -816909447.0240917, -3614686681.8786583, 7701617524.26067, -4541962047.183721), (X'616D6961626C655F69676E6174696576', X'6D61676E69666963656E745F70726F76696E6369616C69', -1318532883.847702, -4918966075.976474, -7601723171.33518, -3515747704.3847466), (X'70726F66696369656E745F32303137', X'66756E6E795F6E77', -1264540201.518032, 8227396547.578808, 6245093925.183641, -8368355328.110817);", +"INSERT INTO super_becky VALUES (X'77696C6C696E675F6E6F6B6B65', X'726F6D616E7469635F677579616E61', 6618610796.3707695, -3814565359.1524105, 1663106272.4565296, -4175107840.768817), (X'72656C617865645F7061766C6F76', X'64657465726D696E65645F63686F646F726B6F6666', -3350029338.034504, -3520837855.4619064, 3375167499.631817, -8866806483.714607), (X'616D706C655F67696464696E6773', X'667269656E646C795F6A6F686E', 1458864959.9942684, 1344208968.0486107, 9335156635.91314, -6180643697.918882), (X'72656C617865645F6C65726F79', X'636F75726167656F75735F6E6F72646772656E', -5164986537.499656, 8820065797.720875, 6146530425.891005, 6949241471.958189), (X'666F63757365645F656D6D61', X'696D6167696E61746976655F6C6F6E67', -9587619060.80035, 6128068142.184402, 6765196076.956905, 800226302.7983418);", +"INSERT INTO super_becky VALUES (X'616D626974696F75735F736F6E67', X'706572666563745F6761686D616E', 4989979180.706432, -9374266591.537058, 314459621.2820797, -3200029490.9553604), (X'666561726C6573735F626C6174', X'676C697374656E696E675F616374696F6E', -8512203612.903147, -7625581186.013805, -9711122307.234787, -301590929.32751083), (X'617765736F6D655F6669646573', X'666169746866756C5F63756E6E696E6768616D', -1428228887.9205084, 7669883854.400173, 5604446195.905277, -1509311057.9653416), (X'68756D6F726F75735F77697468647261776E', X'62726561746874616B696E675F7472617562656C', -7292778713.676636, -6728132503.529593, 2805341768.7252483, 330416975.2300949);", +"INSERT INTO super_becky VALUES (X'677265676172696F75735F696873616E', X'7374656C6C61725F686172746D616E', 8819210651.1988, 5298459883.813452, 7293544377.958424, 460475869.72971725), (X'696E736967687466756C5F62657765726E69747A', X'676C65616D696E675F64656E736C6F77', -6911957282.193239, 1754196756.2193146, -6316860403.693853, -3094020672.236368), (X'6D6972746866756C5F616D6265727261656B656C6C79', X'68756D6F726F75735F6772617665', 1785574023.0269203, -372056983.82761574, 4133719439.9538956, 9374053482.066044), (X'76697669645F736169747461', X'7761726D686561727465645F696E656469746173', 2787071361.6099434, 9663839418.553448, -5934098589.901047, -9774745509.608858), (X'61646570745F6F6375727279', X'6C696B61626C655F726569746D616E', -3098540915.1310825, 5460848322.672174, -6012867197.519758, 6769770087.661135), (X'696E646570656E64656E745F6F', X'656C6567616E745F726F6F726461', 1462542860.3143978, 3360904654.2464733, 5458876201.665213, -5522844849.529962), (X'72656D61726B61626C655F626F6B616E69', X'6F70656E5F6D696E6465645F686F72726F78', 7589481760.867031, 7970075121.546291, 7513467575.5213585, 9663061478.289227), (X'636F6E666964656E745F6C616479', X'70617373696F6E6174655F736B726F7A6974736B79', 8266917234.53915, -7172933478.625412, 309854059.94031143, -8309837814.497616);", +"DELETE FROM super_becky WHERE (competitive_petit != 8725256604.165474 OR engrossing_rexroth > -3607424615.7839313 OR plucky_chai < X'726F7573696E675F6216E20375');", +"INSERT INTO super_becky VALUES (X'7368696E696E675F736F6C69646169726573', X'666561726C6573735F63617264616E', -170727879.20838165, 2744601113.384678, 5676912434.941502, 6757573601.657997), (X'636F75726167656F75735F706C616E636865', X'696E646570656E64656E745F636172736F6E', -6271723086.761938, -180566679.7470188, -1285774632.134449, 1359665735.7842407), (X'677265676172696F75735F7374616D61746F76', X'7374756E6E696E675F77696C64726F6F7473', -6210238866.953484, 2492683045.8287067, -9688894361.68205, 5420275482.048567), (X'696E646570656E64656E745F6F7267616E697A6572', X'676C6974746572696E675F736F72656C', 9291163783.3073, -6843003475.769236, -1320245894.772686, -5023483808.044955), (X'676C6F77696E675F6E65736963', X'676C65616D696E675F746F726D6579', 829526382.8027191, 9365690945.1316, 4761505764.826195, -4149154965.0024815), (X'616C6C7572696E675F646F637472696E65', X'6E6963655F636C6561766572', 3896644979.981762, -288600448.8016701, 9462856570.130062, -909633752.5993862);", + ]; + + for query in queries { + let mut stmt = conn.query(query).unwrap().unwrap(); + loop { + let row = stmt.step().expect("step"); + match row { + StepResult::Done => { + break; + } + _ => { + tracing::debug!("row {:?}", row); + } + } + } + } + } + #[test] pub fn test_free_space() { let db = get_database(); @@ -5005,7 +6744,6 @@ mod tests { for i in 0..10000 { let mut cursor = BTreeCursor::new(None, pager.clone(), root_page); tracing::info!("INSERT INTO t VALUES ({});", i,); - let key = OwnedValue::Integer(i); let value = ImmutableRecord::from_registers(&[Register::OwnedValue(OwnedValue::Integer(i))]); tracing::trace!("before insert {}", i); @@ -5017,7 +6755,11 @@ mod tests { pager.deref(), ) .unwrap(); - run_until_done(|| cursor.insert(&key, &value, true), pager.deref()).unwrap(); + run_until_done( + || cursor.insert(&BTreeKey::new_table_rowid(i as u64, Some(&value)), true), + pager.deref(), + ) + .unwrap(); keys.push(i); } if matches!(validate_btree(pager.clone(), root_page), (_, false)) { @@ -5081,7 +6823,6 @@ mod tests { // Insert 10,000 records in to the BTree. for i in 1..=10000 { let mut cursor = BTreeCursor::new(None, pager.clone(), root_page); - let key = OwnedValue::Integer(i); let value = ImmutableRecord::from_registers(&[Register::OwnedValue(OwnedValue::Text( Text::new("hello world"), ))]); @@ -5095,7 +6836,11 @@ mod tests { ) .unwrap(); - run_until_done(|| cursor.insert(&key, &value, true), pager.deref()).unwrap(); + run_until_done( + || cursor.insert(&BTreeKey::new_table_rowid(i as u64, Some(&value)), true), + pager.deref(), + ) + .unwrap(); } match validate_btree(pager.clone(), root_page) { @@ -5154,7 +6899,6 @@ mod tests { for i in 0..iterations { let mut cursor = BTreeCursor::new(None, pager.clone(), root_page); tracing::info!("INSERT INTO t VALUES ({});", i,); - let key = OwnedValue::Integer(i as i64); let value = ImmutableRecord::from_registers(&[Register::OwnedValue(OwnedValue::Text(Text { value: huge_texts[i].as_bytes().to_vec(), @@ -5173,7 +6917,11 @@ mod tests { pager.deref(), ) .unwrap(); - run_until_done(|| cursor.insert(&key, &value, true), pager.deref()).unwrap(); + run_until_done( + || cursor.insert(&BTreeKey::new_table_rowid(i as u64, Some(&value)), true), + pager.deref(), + ) + .unwrap(); tracing::debug!( "=========== btree after ===========\n{}\n\n", format_btree(pager.clone(), root_page, 0) diff --git a/core/storage/database.rs b/core/storage/database.rs index f23d2d3ee..cf8b57d8e 100644 --- a/core/storage/database.rs +++ b/core/storage/database.rs @@ -1,4 +1,3 @@ -#[cfg(feature = "fs")] use crate::error::LimboError; use crate::{io::Completion, Buffer, Result}; use std::{cell::RefCell, sync::Arc}; @@ -70,3 +69,52 @@ impl DatabaseFile { Self { file } } } + +pub struct FileMemoryStorage { + file: Arc, +} + +unsafe impl Send for FileMemoryStorage {} +unsafe impl Sync for FileMemoryStorage {} + +impl DatabaseStorage for FileMemoryStorage { + fn read_page(&self, page_idx: usize, c: Completion) -> Result<()> { + let r = match c { + Completion::Read(ref r) => r, + _ => unreachable!(), + }; + let size = r.buf().len(); + assert!(page_idx > 0); + if !(512..=65536).contains(&size) || size & (size - 1) != 0 { + return Err(LimboError::NotADB); + } + let pos = (page_idx - 1) * size; + self.file.pread(pos, c)?; + Ok(()) + } + + fn write_page( + &self, + page_idx: usize, + buffer: Arc>, + c: Completion, + ) -> Result<()> { + let buffer_size = buffer.borrow().len(); + assert!(buffer_size >= 512); + assert!(buffer_size <= 65536); + assert_eq!(buffer_size & (buffer_size - 1), 0); + let pos = (page_idx - 1) * buffer_size; + self.file.pwrite(pos, buffer, c)?; + Ok(()) + } + + fn sync(&self, c: Completion) -> Result<()> { + self.file.sync(c) + } +} + +impl FileMemoryStorage { + pub fn new(file: Arc) -> Self { + Self { file } + } +} diff --git a/core/storage/pager.rs b/core/storage/pager.rs index 70af1c8d2..36a0936a3 100644 --- a/core/storage/pager.rs +++ b/core/storage/pager.rs @@ -123,6 +123,13 @@ impl Page { tracing::debug!("clear loaded {}", self.get().id); self.get().flags.fetch_and(!PAGE_LOADED, Ordering::SeqCst); } + + pub fn is_index(&self) -> bool { + match self.get_contents().page_type() { + PageType::IndexLeaf | PageType::IndexInterior => true, + PageType::TableLeaf | PageType::TableInterior => false, + } + } } #[derive(Clone, Copy, Debug)] @@ -157,7 +164,7 @@ pub struct Pager { /// Source of the database pages. pub db_file: Arc, /// The write-ahead log (WAL) for the database. - wal: Rc>, + wal: Option>>, /// A page cache for the database. page_cache: Arc>, /// Buffer pool for temporary data storage. @@ -183,7 +190,7 @@ impl Pager { pub fn finish_open( db_header_ref: Arc>, db_file: Arc, - wal: Rc>, + wal: Option>>, io: Arc, page_cache: Arc>, buffer_pool: Rc, @@ -206,20 +213,31 @@ impl Pager { }) } - pub fn btree_create(&self, flags: usize) -> u32 { + pub fn btree_create(&self, flags: &CreateBTreeFlags) -> u32 { let page_type = match flags { - 1 => PageType::TableLeaf, - 2 => PageType::IndexLeaf, - _ => unreachable!( - "wrong create table flags, should be 1 for table and 2 for index, got {}", - flags, - ), + _ if flags.is_table() => PageType::TableLeaf, + _ if flags.is_index() => PageType::IndexLeaf, + _ => unreachable!("Invalid flags state"), }; let page = self.do_allocate_page(page_type, 0); let id = page.get().id; id as u32 } + /// Allocate a new overflow page. + /// This is done when a cell overflows and new space is needed. + pub fn allocate_overflow_page(&self) -> PageRef { + let page = self.allocate_page().unwrap(); + tracing::debug!("Pager::allocate_overflow_page(id={})", page.get().id); + + // setup overflow page + let contents = page.get().contents.as_mut().unwrap(); + let buf = contents.as_ptr(); + buf.fill(0); + + page + } + /// Allocate a new page to the btree via the pager. /// This marks the page as dirty and writes the page header. pub fn do_allocate_page(&self, page_type: PageType, offset: usize) -> PageRef { @@ -239,33 +257,47 @@ impl Pager { /// In other words, if the page size is 512, then the reserved space size cannot exceed 32. pub fn usable_space(&self) -> usize { let db_header = self.db_header.lock(); - (db_header.page_size - db_header.reserved_space as u16) as usize + (db_header.get_page_size() - db_header.reserved_space as u32) as usize } #[inline(always)] pub fn begin_read_tx(&self) -> Result { - self.wal.borrow_mut().begin_read_tx() + if let Some(wal) = &self.wal { + return wal.borrow_mut().begin_read_tx(); + } + + Ok(LimboResult::Ok) } #[inline(always)] pub fn begin_write_tx(&self) -> Result { - self.wal.borrow_mut().begin_write_tx() + if let Some(wal) = &self.wal { + return wal.borrow_mut().begin_write_tx(); + } + + Ok(LimboResult::Ok) } pub fn end_tx(&self) -> Result { - let checkpoint_status = self.cacheflush()?; - match checkpoint_status { - CheckpointStatus::IO => Ok(checkpoint_status), - CheckpointStatus::Done(_) => { - self.wal.borrow().end_write_tx()?; - self.wal.borrow().end_read_tx()?; - Ok(checkpoint_status) - } + if let Some(wal) = &self.wal { + let checkpoint_status = self.cacheflush()?; + return match checkpoint_status { + CheckpointStatus::IO => Ok(checkpoint_status), + CheckpointStatus::Done(_) => { + wal.borrow().end_write_tx()?; + wal.borrow().end_read_tx()?; + Ok(checkpoint_status) + } + }; } + + Ok(CheckpointStatus::Done(CheckpointResult::default())) } pub fn end_read_tx(&self) -> Result<()> { - self.wal.borrow().end_read_tx()?; + if let Some(wal) = &self.wal { + wal.borrow().end_read_tx()?; + } Ok(()) } @@ -273,7 +305,11 @@ impl Pager { pub fn read_page(&self, page_idx: usize) -> Result { tracing::trace!("read_page(page_idx = {})", page_idx); let mut page_cache = self.page_cache.write(); - let page_key = PageCacheKey::new(page_idx, Some(self.wal.borrow().get_max_frame())); + let max_frame = match &self.wal { + Some(wal) => wal.borrow().get_max_frame(), + None => 0, + }; + let page_key = PageCacheKey::new(page_idx, Some(max_frame)); if let Some(page) = page_cache.get(&page_key) { tracing::trace!("read_page(page_idx = {}) = cached", page_idx); return Ok(page.clone()); @@ -281,17 +317,18 @@ impl Pager { let page = Arc::new(Page::new(page_idx)); page.set_locked(); - if let Some(frame_id) = self.wal.borrow().find_frame(page_idx as u64)? { - self.wal - .borrow() - .read_frame(frame_id, page.clone(), self.buffer_pool.clone())?; - { - page.set_uptodate(); + if let Some(wal) = &self.wal { + if let Some(frame_id) = wal.borrow().find_frame(page_idx as u64)? { + wal.borrow() + .read_frame(frame_id, page.clone(), self.buffer_pool.clone())?; + { + page.set_uptodate(); + } + // TODO(pere) ensure page is inserted, we should probably first insert to page cache + // and if successful, read frame or page + page_cache.insert(page_key, page.clone()); + return Ok(page); } - // TODO(pere) ensure page is inserted, we should probably first insert to page cache - // and if successful, read frame or page - page_cache.insert(page_key, page.clone()); - return Ok(page); } sqlite3_ondisk::begin_read_page( self.db_file.clone(), @@ -310,19 +347,29 @@ impl Pager { trace!("load_page(page_idx = {})", id); let mut page_cache = self.page_cache.write(); page.set_locked(); - let page_key = PageCacheKey::new(id, Some(self.wal.borrow().get_max_frame())); - if let Some(frame_id) = self.wal.borrow().find_frame(id as u64)? { - self.wal - .borrow() - .read_frame(frame_id, page.clone(), self.buffer_pool.clone())?; - { - page.set_uptodate(); + let max_frame = match &self.wal { + Some(wal) => wal.borrow().get_max_frame(), + None => 0, + }; + let page_key = PageCacheKey::new(id, Some(max_frame)); + if let Some(wal) = &self.wal { + if let Some(frame_id) = wal.borrow().find_frame(id as u64)? { + wal.borrow() + .read_frame(frame_id, page.clone(), self.buffer_pool.clone())?; + { + page.set_uptodate(); + } + // TODO(pere) ensure page is inserted + if !page_cache.contains_key(&page_key) { + page_cache.insert(page_key, page.clone()); + } + return Ok(()); } - // TODO(pere) ensure page is inserted - if !page_cache.contains_key(&page_key) { - page_cache.insert(page_key, page.clone()); - } - return Ok(()); + } + + // TODO(pere) ensure page is inserted + if !page_cache.contains_key(&page_key) { + page_cache.insert(page_key, page.clone()); } sqlite3_ondisk::begin_read_page( self.db_file.clone(), @@ -330,10 +377,7 @@ impl Pager { page.clone(), id, )?; - // TODO(pere) ensure page is inserted - if !page_cache.contains_key(&page_key) { - page_cache.insert(page_key, page.clone()); - } + Ok(()) } @@ -362,18 +406,23 @@ impl Pager { match state { FlushState::Start => { let db_size = self.db_header.lock().database_size; + let max_frame = match &self.wal { + Some(wal) => wal.borrow().get_max_frame(), + None => 0, + }; for page_id in self.dirty_pages.borrow().iter() { let mut cache = self.page_cache.write(); - let page_key = - PageCacheKey::new(*page_id, Some(self.wal.borrow().get_max_frame())); - let page = cache.get(&page_key).expect("we somehow added a page to dirty list but we didn't mark it as dirty, causing cache to drop it."); - let page_type = page.get().contents.as_ref().unwrap().maybe_page_type(); - trace!("cacheflush(page={}, page_type={:?}", page_id, page_type); - self.wal.borrow_mut().append_frame( - page.clone(), - db_size, - self.flush_info.borrow().in_flight_writes.clone(), - )?; + let page_key = PageCacheKey::new(*page_id, Some(max_frame)); + if let Some(wal) = &self.wal { + let page = cache.get(&page_key).expect("we somehow added a page to dirty list but we didn't mark it as dirty, causing cache to drop it."); + let page_type = page.get().contents.as_ref().unwrap().maybe_page_type(); + trace!("cacheflush(page={}, page_type={:?}", page_id, page_type); + wal.borrow_mut().append_frame( + page.clone(), + db_size, + self.flush_info.borrow().in_flight_writes.clone(), + )?; + } // This page is no longer valid. // For example: // We took page with key (page_num, max_frame) -- this page is no longer valid for that max_frame so it must be invalidated. @@ -392,13 +441,16 @@ impl Pager { } } FlushState::SyncWal => { - match self.wal.borrow_mut().sync() { + let wal = self.wal.clone().ok_or(LimboError::InternalError( + "SyncWal was called without a existing wal".to_string(), + ))?; + match wal.borrow_mut().sync() { Ok(CheckpointStatus::IO) => return Ok(CheckpointStatus::IO), Ok(CheckpointStatus::Done(res)) => checkpoint_result = res, Err(e) => return Err(e), } - let should_checkpoint = self.wal.borrow().should_checkpoint(); + let should_checkpoint = wal.borrow().should_checkpoint(); if should_checkpoint { self.flush_info.borrow_mut().state = FlushState::Checkpoint; } else { @@ -440,11 +492,13 @@ impl Pager { match state { CheckpointState::Checkpoint => { let in_flight = self.checkpoint_inflight.clone(); - match self.wal.borrow_mut().checkpoint( - self, - in_flight, - CheckpointMode::Passive, - )? { + let wal = self.wal.clone().ok_or(LimboError::InternalError( + "Checkpoint was called without a existing wal".to_string(), + ))?; + match wal + .borrow_mut() + .checkpoint(self, in_flight, CheckpointMode::Passive)? + { CheckpointStatus::IO => return Ok(CheckpointStatus::IO), CheckpointStatus::Done(res) => { checkpoint_result = res; @@ -481,7 +535,7 @@ impl Pager { pub fn clear_page_cache(&self) -> CheckpointResult { let checkpoint_result: CheckpointResult; loop { - match self.wal.borrow_mut().checkpoint( + match self.wal.clone().unwrap().borrow_mut().checkpoint( self, Rc::new(RefCell::new(0)), CheckpointMode::Passive, @@ -606,8 +660,12 @@ impl Pager { page.set_dirty(); self.add_dirty(page.get().id); let mut cache = self.page_cache.write(); - let page_key = - PageCacheKey::new(page.get().id, Some(self.wal.borrow().get_max_frame())); + let max_frame = match &self.wal { + Some(wal) => wal.borrow().get_max_frame(), + None => 0, + }; + + let page_key = PageCacheKey::new(page.get().id, Some(max_frame)); cache.insert(page_key, page.clone()); } Ok(page) @@ -616,14 +674,18 @@ impl Pager { pub fn put_loaded_page(&self, id: usize, page: PageRef) { let mut cache = self.page_cache.write(); // cache insert invalidates previous page - let page_key = PageCacheKey::new(id, Some(self.wal.borrow().get_max_frame())); + let max_frame = match &self.wal { + Some(wal) => wal.borrow().get_max_frame(), + None => 0, + }; + let page_key = PageCacheKey::new(id, Some(max_frame)); cache.insert(page_key, page.clone()); page.set_loaded(); } pub fn usable_size(&self) -> usize { let db_header = self.db_header.lock(); - (db_header.page_size - db_header.reserved_space as u16) as usize + (db_header.get_page_size() - db_header.reserved_space as u32) as usize } } @@ -637,15 +699,38 @@ pub fn allocate_page(page_id: usize, buffer_pool: &Rc, offset: usize }); let buffer = Arc::new(RefCell::new(Buffer::new(buffer, drop_fn))); page.set_loaded(); - page.get().contents = Some(PageContent { - offset, - buffer, - overflow_cells: Vec::new(), - }); + page.get().contents = Some(PageContent::new(offset, buffer)); } page } +#[derive(Debug)] +pub struct CreateBTreeFlags(pub u8); +impl CreateBTreeFlags { + pub const TABLE: u8 = 0b0001; + pub const INDEX: u8 = 0b0010; + + pub fn new_table() -> Self { + Self(CreateBTreeFlags::TABLE) + } + + pub fn new_index() -> Self { + Self(CreateBTreeFlags::INDEX) + } + + pub fn is_table(&self) -> bool { + (self.0 & CreateBTreeFlags::TABLE) != 0 + } + + pub fn is_index(&self) -> bool { + (self.0 & CreateBTreeFlags::INDEX) != 0 + } + + pub fn get_flags(&self) -> u8 { + self.0 + } +} + #[cfg(test)] mod tests { use std::sync::Arc; diff --git a/core/storage/sqlite3_ondisk.rs b/core/storage/sqlite3_ondisk.rs index d48f5b61b..fd77dd2ea 100644 --- a/core/storage/sqlite3_ondisk.rs +++ b/core/storage/sqlite3_ondisk.rs @@ -47,7 +47,9 @@ use crate::io::{Buffer, Completion, ReadCompletion, SyncCompletion, WriteComplet use crate::storage::buffer_pool::BufferPool; use crate::storage::database::DatabaseStorage; use crate::storage::pager::Pager; -use crate::types::{ImmutableRecord, RawSlice, RefValue, TextRef, TextSubtype}; +use crate::types::{ + ImmutableRecord, RawSlice, RefValue, SerialType, SerialTypeKind, TextRef, TextSubtype, +}; use crate::{File, Result}; use std::cell::RefCell; use std::mem::MaybeUninit; @@ -63,9 +65,19 @@ pub const DATABASE_HEADER_SIZE: usize = 100; // DEFAULT_CACHE_SIZE negative values mean that we store the amount of pages a XKiB of memory can hold. // We can calculate "real" cache size by diving by page size. const DEFAULT_CACHE_SIZE: i32 = -2000; + // Minimum number of pages that cache can hold. pub const MIN_PAGE_CACHE_SIZE: usize = 10; +/// The minimum page size in bytes. +const MIN_PAGE_SIZE: u32 = 512; + +/// The maximum page size in bytes. +const MAX_PAGE_SIZE: u32 = 65536; + +/// The default page size in bytes. +const DEFAULT_PAGE_SIZE: u16 = 4096; + /// The database header. /// The first 100 bytes of the database file comprise the database file header. /// The database file header is divided into fields as shown by the table below. @@ -77,7 +89,7 @@ pub struct DatabaseHeader { /// The database page size in bytes. Must be a power of two between 512 and 32768 inclusive, /// or the value 1 representing a page size of 65536. - pub page_size: u16, + page_size: u16, /// File format write version. 1 for legacy; 2 for WAL. write_version: u8, @@ -113,7 +125,7 @@ pub struct DatabaseHeader { pub freelist_pages: u32, /// The schema cookie. Incremented when the database schema changes. - schema_cookie: u32, + pub schema_cookie: u32, /// The schema format number. Supported formats are 1, 2, 3, and 4. schema_format: u32, @@ -168,7 +180,7 @@ pub struct WalHeader { /// WAL format version. Currently 3007000 pub file_format: u32, - /// Database page size in bytes. Power of two between 512 and 32768 inclusive + /// Database page size in bytes. Power of two between 512 and 65536 inclusive pub page_size: u32, /// Checkpoint sequence number. Increases with each checkpoint @@ -217,7 +229,7 @@ impl Default for DatabaseHeader { fn default() -> Self { Self { magic: *b"SQLite format 3\0", - page_size: 4096, + page_size: DEFAULT_PAGE_SIZE, write_version: 2, read_version: 2, reserved_space: 0, @@ -243,6 +255,28 @@ impl Default for DatabaseHeader { } } +impl DatabaseHeader { + pub fn update_page_size(&mut self, size: u32) { + if !(MIN_PAGE_SIZE..=MAX_PAGE_SIZE).contains(&size) || (size & (size - 1) != 0) { + return; + } + + self.page_size = if size == MAX_PAGE_SIZE { + 1u16 + } else { + size as u16 + }; + } + + pub fn get_page_size(&self) -> u32 { + if self.page_size == 1 { + MAX_PAGE_SIZE + } else { + self.page_size as u32 + } + } +} + pub fn begin_read_database_header( db_file: Arc, ) -> Result>> { @@ -413,6 +447,14 @@ impl Clone for PageContent { } impl PageContent { + pub fn new(offset: usize, buffer: Arc>) -> Self { + Self { + offset, + buffer, + overflow_cells: Vec::new(), + } + } + pub fn page_type(&self) -> PageType { self.read_u8(0).try_into().unwrap() } @@ -590,6 +632,54 @@ impl PageContent { usable_size, ) } + + /// Read the rowid of a table interior cell. + #[inline(always)] + pub fn cell_table_interior_read_rowid(&self, idx: usize) -> Result { + debug_assert!(self.page_type() == PageType::TableInterior); + let buf = self.as_ptr(); + const INTERIOR_PAGE_HEADER_SIZE_BYTES: usize = 12; + let cell_pointer_array_start = INTERIOR_PAGE_HEADER_SIZE_BYTES; + let cell_pointer = cell_pointer_array_start + (idx * 2); + let cell_pointer = self.read_u16(cell_pointer) as usize; + const LEFT_CHILD_PAGE_SIZE_BYTES: usize = 4; + let (rowid, _) = read_varint(&buf[cell_pointer + LEFT_CHILD_PAGE_SIZE_BYTES..])?; + Ok(rowid) + } + + /// Read the left child page of a table interior cell. + #[inline(always)] + pub fn cell_table_interior_read_left_child_page(&self, idx: usize) -> Result { + debug_assert!(self.page_type() == PageType::TableInterior); + let buf = self.as_ptr(); + const INTERIOR_PAGE_HEADER_SIZE_BYTES: usize = 12; + let cell_pointer_array_start = INTERIOR_PAGE_HEADER_SIZE_BYTES; + let cell_pointer = cell_pointer_array_start + (idx * 2); + let cell_pointer = self.read_u16(cell_pointer) as usize; + Ok(u32::from_be_bytes([ + buf[cell_pointer], + buf[cell_pointer + 1], + buf[cell_pointer + 2], + buf[cell_pointer + 3], + ])) + } + + /// Read the rowid of a table leaf cell. + #[inline(always)] + pub fn cell_table_leaf_read_rowid(&self, idx: usize) -> Result { + debug_assert!(self.page_type() == PageType::TableLeaf); + let buf = self.as_ptr(); + const LEAF_PAGE_HEADER_SIZE_BYTES: usize = 8; + let cell_pointer_array_start = LEAF_PAGE_HEADER_SIZE_BYTES; + let cell_pointer = cell_pointer_array_start + (idx * 2); + let cell_pointer = self.read_u16(cell_pointer) as usize; + let mut pos = cell_pointer; + let (_, nr) = read_varint(&buf[pos..])?; + pos += nr; + let (rowid, _) = read_varint(&buf[pos..])?; + Ok(rowid) + } + /// The cell pointer array of a b-tree page immediately follows the b-tree page header. /// Let K be the number of cells on the btree. /// The cell pointer array consists of K 2-byte integer offsets to the cell contents. @@ -626,9 +716,9 @@ impl PageContent { usable_size, ); if overflows { - 4 + to_read + n_payload + 4 + 4 + to_read + n_payload } else { - 4 + len_payload as usize + n_payload + 4 + 4 + len_payload as usize + n_payload } } PageType::TableInterior => { @@ -644,9 +734,9 @@ impl PageContent { usable_size, ); if overflows { - to_read + n_payload + 4 + to_read + n_payload } else { - len_payload as usize + n_payload + 4 + len_payload as usize + n_payload } } PageType::TableLeaf => { @@ -741,11 +831,7 @@ fn finish_read_page( } else { 0 }; - let inner = PageContent { - offset: pos, - buffer: buffer_ref.clone(), - overflow_cells: Vec::new(), - }; + let inner = PageContent::new(pos, buffer_ref.clone()); { page.get().contents.replace(inner); page.set_uptodate(); @@ -950,116 +1036,24 @@ fn read_payload(unread: &'static [u8], payload_size: usize) -> (&'static [u8], O } } -pub type SerialType = u64; - -pub const SERIAL_TYPE_NULL: SerialType = 0; -pub const SERIAL_TYPE_INT8: SerialType = 1; -pub const SERIAL_TYPE_BEINT16: SerialType = 2; -pub const SERIAL_TYPE_BEINT24: SerialType = 3; -pub const SERIAL_TYPE_BEINT32: SerialType = 4; -pub const SERIAL_TYPE_BEINT48: SerialType = 5; -pub const SERIAL_TYPE_BEINT64: SerialType = 6; -pub const SERIAL_TYPE_BEFLOAT64: SerialType = 7; -pub const SERIAL_TYPE_CONSTINT0: SerialType = 8; -pub const SERIAL_TYPE_CONSTINT1: SerialType = 9; - -pub trait SerialTypeExt { - fn is_null(self) -> bool; - fn is_int8(self) -> bool; - fn is_beint16(self) -> bool; - fn is_beint24(self) -> bool; - fn is_beint32(self) -> bool; - fn is_beint48(self) -> bool; - fn is_beint64(self) -> bool; - fn is_befloat64(self) -> bool; - fn is_constint0(self) -> bool; - fn is_constint1(self) -> bool; - fn is_blob(self) -> bool; - fn is_string(self) -> bool; - fn blob_size(self) -> usize; - fn string_size(self) -> usize; - fn is_valid(self) -> bool; +#[inline(always)] +pub fn validate_serial_type(value: u64) -> Result<()> { + if !SerialType::u64_is_valid_serial_type(value) { + crate::bail_corrupt_error!("Invalid serial type: {}", value); + } + Ok(()) } -impl SerialTypeExt for u64 { - fn is_null(self) -> bool { - self == SERIAL_TYPE_NULL - } - - fn is_int8(self) -> bool { - self == SERIAL_TYPE_INT8 - } - - fn is_beint16(self) -> bool { - self == SERIAL_TYPE_BEINT16 - } - - fn is_beint24(self) -> bool { - self == SERIAL_TYPE_BEINT24 - } - - fn is_beint32(self) -> bool { - self == SERIAL_TYPE_BEINT32 - } - - fn is_beint48(self) -> bool { - self == SERIAL_TYPE_BEINT48 - } - - fn is_beint64(self) -> bool { - self == SERIAL_TYPE_BEINT64 - } - - fn is_befloat64(self) -> bool { - self == SERIAL_TYPE_BEFLOAT64 - } - - fn is_constint0(self) -> bool { - self == SERIAL_TYPE_CONSTINT0 - } - - fn is_constint1(self) -> bool { - self == SERIAL_TYPE_CONSTINT1 - } - - fn is_blob(self) -> bool { - self >= 12 && self % 2 == 0 - } - - fn is_string(self) -> bool { - self >= 13 && self % 2 == 1 - } - - fn blob_size(self) -> usize { - debug_assert!(self.is_blob()); - ((self - 12) / 2) as usize - } - - fn string_size(self) -> usize { - debug_assert!(self.is_string()); - ((self - 13) / 2) as usize - } - - fn is_valid(self) -> bool { - self <= 9 || self.is_blob() || self.is_string() - } -} - -pub fn validate_serial_type(value: u64) -> Result { - if value.is_valid() { - Ok(value) - } else { - crate::bail_corrupt_error!("Invalid serial type: {}", value) - } -} - -struct SmallVec { - pub data: [std::mem::MaybeUninit; 64], +pub struct SmallVec { + /// Stack allocated data + pub data: [std::mem::MaybeUninit; N], + /// Length of the vector, accounting for both stack and heap allocated data pub len: usize, + /// Extra data on heap pub extra_data: Option>, } -impl SmallVec { +impl SmallVec { pub fn new() -> Self { Self { data: unsafe { std::mem::MaybeUninit::uninit().assume_init() }, @@ -1080,6 +1074,50 @@ impl SmallVec { self.len += 1; } } + + fn get_from_heap(&self, index: usize) -> T { + assert!(self.extra_data.is_some()); + assert!(index >= self.data.len()); + let extra_data_index = index - self.data.len(); + let extra_data = self.extra_data.as_ref().unwrap(); + assert!(extra_data_index < extra_data.len()); + extra_data[extra_data_index] + } + + pub fn get(&self, index: usize) -> Option { + if index >= self.len { + return None; + } + let data_is_on_stack = index < self.data.len(); + if data_is_on_stack { + // SAFETY: We know this index is initialized we checked for index < self.len earlier above. + unsafe { Some(self.data[index].assume_init()) } + } else { + Some(self.get_from_heap(index)) + } + } +} + +impl SmallVec { + pub fn iter(&self) -> SmallVecIter<'_, T, N> { + SmallVecIter { vec: self, pos: 0 } + } +} + +pub struct SmallVecIter<'a, T, const N: usize> { + vec: &'a SmallVec, + pos: usize, +} + +impl<'a, T: Default + Copy, const N: usize> Iterator for SmallVecIter<'a, T, N> { + type Item = T; + + fn next(&mut self) -> Option { + self.vec.get(self.pos).map(|item| { + self.pos += 1; + item + }) + } } pub fn read_record(payload: &[u8], reuse_immutable: &mut ImmutableRecord) -> Result<()> { @@ -1095,10 +1133,10 @@ pub fn read_record(payload: &[u8], reuse_immutable: &mut ImmutableRecord) -> Res let mut header_size = (header_size as usize) - nr; pos += nr; - let mut serial_types = SmallVec::new(); + let mut serial_types = SmallVec::::new(); while header_size > 0 { let (serial_type, nr) = read_varint(&reuse_immutable.get_payload()[pos..])?; - let serial_type = validate_serial_type(serial_type)?; + validate_serial_type(serial_type)?; serial_types.push(serial_type); pos += nr; assert!(header_size >= nr); @@ -1107,14 +1145,17 @@ pub fn read_record(payload: &[u8], reuse_immutable: &mut ImmutableRecord) -> Res for &serial_type in &serial_types.data[..serial_types.len.min(serial_types.data.len())] { let (value, n) = read_value(&reuse_immutable.get_payload()[pos..], unsafe { - *serial_type.as_ptr() + serial_type.assume_init().try_into()? })?; pos += n; reuse_immutable.add_value(value); } if let Some(extra) = serial_types.extra_data.as_ref() { for serial_type in extra { - let (value, n) = read_value(&reuse_immutable.get_payload()[pos..], *serial_type)?; + let (value, n) = read_value( + &reuse_immutable.get_payload()[pos..], + (*serial_type).try_into()?, + )?; pos += n; reuse_immutable.add_value(value); } @@ -1127,140 +1168,125 @@ pub fn read_record(payload: &[u8], reuse_immutable: &mut ImmutableRecord) -> Res /// always. #[inline(always)] pub fn read_value(buf: &[u8], serial_type: SerialType) -> Result<(RefValue, usize)> { - if serial_type.is_null() { - return Ok((RefValue::Null, 0)); - } - - if serial_type.is_int8() { - if buf.is_empty() { - crate::bail_corrupt_error!("Invalid UInt8 value"); + match serial_type.kind() { + SerialTypeKind::Null => Ok((RefValue::Null, 0)), + SerialTypeKind::I8 => { + if buf.is_empty() { + crate::bail_corrupt_error!("Invalid UInt8 value"); + } + let val = buf[0] as i8; + Ok((RefValue::Integer(val as i64), 1)) } - let val = buf[0] as i8; - return Ok((RefValue::Integer(val as i64), 1)); - } - - if serial_type.is_beint16() { - if buf.len() < 2 { - crate::bail_corrupt_error!("Invalid BEInt16 value"); + SerialTypeKind::I16 => { + if buf.len() < 2 { + crate::bail_corrupt_error!("Invalid BEInt16 value"); + } + Ok(( + RefValue::Integer(i16::from_be_bytes([buf[0], buf[1]]) as i64), + 2, + )) } - return Ok(( - RefValue::Integer(i16::from_be_bytes([buf[0], buf[1]]) as i64), - 2, - )); - } - - if serial_type.is_beint24() { - if buf.len() < 3 { - crate::bail_corrupt_error!("Invalid BEInt24 value"); + SerialTypeKind::I24 => { + if buf.len() < 3 { + crate::bail_corrupt_error!("Invalid BEInt24 value"); + } + let sign_extension = if buf[0] <= 127 { 0 } else { 255 }; + Ok(( + RefValue::Integer( + i32::from_be_bytes([sign_extension, buf[0], buf[1], buf[2]]) as i64 + ), + 3, + )) } - let sign_extension = if buf[0] <= 127 { 0 } else { 255 }; - return Ok(( - RefValue::Integer(i32::from_be_bytes([sign_extension, buf[0], buf[1], buf[2]]) as i64), - 3, - )); - } - - if serial_type.is_beint32() { - if buf.len() < 4 { - crate::bail_corrupt_error!("Invalid BEInt32 value"); + SerialTypeKind::I32 => { + if buf.len() < 4 { + crate::bail_corrupt_error!("Invalid BEInt32 value"); + } + Ok(( + RefValue::Integer(i32::from_be_bytes([buf[0], buf[1], buf[2], buf[3]]) as i64), + 4, + )) } - return Ok(( - RefValue::Integer(i32::from_be_bytes([buf[0], buf[1], buf[2], buf[3]]) as i64), - 4, - )); - } - - if serial_type.is_beint48() { - if buf.len() < 6 { - crate::bail_corrupt_error!("Invalid BEInt48 value"); + SerialTypeKind::I48 => { + if buf.len() < 6 { + crate::bail_corrupt_error!("Invalid BEInt48 value"); + } + let sign_extension = if buf[0] <= 127 { 0 } else { 255 }; + Ok(( + RefValue::Integer(i64::from_be_bytes([ + sign_extension, + sign_extension, + buf[0], + buf[1], + buf[2], + buf[3], + buf[4], + buf[5], + ])), + 6, + )) } - let sign_extension = if buf[0] <= 127 { 0 } else { 255 }; - return Ok(( - RefValue::Integer(i64::from_be_bytes([ - sign_extension, - sign_extension, - buf[0], - buf[1], - buf[2], - buf[3], - buf[4], - buf[5], - ])), - 6, - )); - } - - if serial_type.is_beint64() { - if buf.len() < 8 { - crate::bail_corrupt_error!("Invalid BEInt64 value"); + SerialTypeKind::I64 => { + if buf.len() < 8 { + crate::bail_corrupt_error!("Invalid BEInt64 value"); + } + Ok(( + RefValue::Integer(i64::from_be_bytes([ + buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7], + ])), + 8, + )) } - return Ok(( - RefValue::Integer(i64::from_be_bytes([ - buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7], - ])), - 8, - )); - } - - if serial_type.is_befloat64() { - if buf.len() < 8 { - crate::bail_corrupt_error!("Invalid BEFloat64 value"); + SerialTypeKind::F64 => { + if buf.len() < 8 { + crate::bail_corrupt_error!("Invalid BEFloat64 value"); + } + Ok(( + RefValue::Float(f64::from_be_bytes([ + buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7], + ])), + 8, + )) } - return Ok(( - RefValue::Float(f64::from_be_bytes([ - buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7], - ])), - 8, - )); - } - - if serial_type.is_constint0() { - return Ok((RefValue::Integer(0), 0)); - } - - if serial_type.is_constint1() { - return Ok((RefValue::Integer(1), 0)); - } - - if serial_type.is_blob() { - let n = serial_type.blob_size(); - if buf.len() < n { - crate::bail_corrupt_error!("Invalid Blob value"); + SerialTypeKind::ConstInt0 => Ok((RefValue::Integer(0), 0)), + SerialTypeKind::ConstInt1 => Ok((RefValue::Integer(1), 0)), + SerialTypeKind::Blob => { + let content_size = serial_type.size(); + if buf.len() < content_size { + crate::bail_corrupt_error!("Invalid Blob value"); + } + if content_size == 0 { + Ok((RefValue::Blob(RawSlice::new(std::ptr::null(), 0)), 0)) + } else { + let ptr = &buf[0] as *const u8; + let slice = RawSlice::new(ptr, content_size); + Ok((RefValue::Blob(slice), content_size)) + } } - if n == 0 { - return Ok((RefValue::Blob(RawSlice::new(std::ptr::null(), 0)), 0)); + SerialTypeKind::Text => { + let content_size = serial_type.size(); + if buf.len() < content_size { + crate::bail_corrupt_error!( + "Invalid String value, length {} < expected length {}", + buf.len(), + content_size + ); + } + let slice = if content_size == 0 { + RawSlice::new(std::ptr::null(), 0) + } else { + let ptr = &buf[0] as *const u8; + RawSlice::new(ptr, content_size) + }; + Ok(( + RefValue::Text(TextRef { + value: slice, + subtype: TextSubtype::Text, + }), + content_size, + )) } - let ptr = &buf[0] as *const u8; - let slice = RawSlice::new(ptr, n); - return Ok((RefValue::Blob(slice), n)); } - - if serial_type.is_string() { - let n = serial_type.string_size(); - if buf.len() < n { - crate::bail_corrupt_error!( - "Invalid String value, length {} < expected length {}", - buf.len(), - n - ); - } - let slice = if n == 0 { - RawSlice::new(std::ptr::null(), 0) - } else { - let ptr = &buf[0] as *const u8; - RawSlice::new(ptr, n) - }; - return Ok(( - RefValue::Text(TextRef { - value: slice, - subtype: TextSubtype::Text, - }), - n, - )); - } - - // This should never happen if validate_serial_type is used correctly - crate::bail_corrupt_error!("Invalid serial type: {}", serial_type) } #[inline(always)] @@ -1393,6 +1419,7 @@ pub fn begin_write_wal_frame( io: &Arc, offset: usize, page: &PageRef, + page_size: u16, db_size: u32, write_counter: Rc>, wal_header: &WalHeader, @@ -1429,15 +1456,16 @@ pub fn begin_write_wal_frame( let content_len = contents_buf.len(); buf[WAL_FRAME_HEADER_SIZE..WAL_FRAME_HEADER_SIZE + content_len] .copy_from_slice(contents_buf); - if content_len < 4096 { - buf[WAL_FRAME_HEADER_SIZE + content_len..WAL_FRAME_HEADER_SIZE + 4096].fill(0); + if content_len < page_size as usize { + buf[WAL_FRAME_HEADER_SIZE + content_len..WAL_FRAME_HEADER_SIZE + page_size as usize] + .fill(0); } let expects_be = wal_header.magic & 1; let use_native_endian = cfg!(target_endian = "big") as u32 == expects_be; let header_checksum = checksum_wal(&buf[0..8], wal_header, checksums, use_native_endian); // Only 8 bytes let final_checksum = checksum_wal( - &buf[WAL_FRAME_HEADER_SIZE..WAL_FRAME_HEADER_SIZE + 4096], + &buf[WAL_FRAME_HEADER_SIZE..WAL_FRAME_HEADER_SIZE + page_size as usize], wal_header, header_checksum, use_native_endian, @@ -1594,32 +1622,32 @@ mod tests { use rstest::rstest; #[rstest] - #[case(&[], SERIAL_TYPE_NULL, OwnedValue::Null)] - #[case(&[255], SERIAL_TYPE_INT8, OwnedValue::Integer(-1))] - #[case(&[0x12, 0x34], SERIAL_TYPE_BEINT16, OwnedValue::Integer(0x1234))] - #[case(&[0xFE], SERIAL_TYPE_INT8, OwnedValue::Integer(-2))] - #[case(&[0x12, 0x34, 0x56], SERIAL_TYPE_BEINT24, OwnedValue::Integer(0x123456))] - #[case(&[0x12, 0x34, 0x56, 0x78], SERIAL_TYPE_BEINT32, OwnedValue::Integer(0x12345678))] - #[case(&[0x12, 0x34, 0x56, 0x78, 0x9A, 0xBC], SERIAL_TYPE_BEINT48, OwnedValue::Integer(0x123456789ABC))] - #[case(&[0x12, 0x34, 0x56, 0x78, 0x9A, 0xBC, 0xDE, 0xFF], SERIAL_TYPE_BEINT64, OwnedValue::Integer(0x123456789ABCDEFF))] - #[case(&[0x40, 0x09, 0x21, 0xFB, 0x54, 0x44, 0x2D, 0x18], SERIAL_TYPE_BEFLOAT64, OwnedValue::Float(std::f64::consts::PI))] - #[case(&[1, 2], SERIAL_TYPE_CONSTINT0, OwnedValue::Integer(0))] - #[case(&[65, 66], SERIAL_TYPE_CONSTINT1, OwnedValue::Integer(1))] - #[case(&[1, 2, 3], 18, OwnedValue::Blob(vec![1, 2, 3].into()))] - #[case(&[], 12, OwnedValue::Blob(vec![].into()))] // empty blob - #[case(&[65, 66, 67], 19, OwnedValue::build_text("ABC"))] - #[case(&[0x80], SERIAL_TYPE_INT8, OwnedValue::Integer(-128))] - #[case(&[0x80, 0], SERIAL_TYPE_BEINT16, OwnedValue::Integer(-32768))] - #[case(&[0x80, 0, 0], SERIAL_TYPE_BEINT24, OwnedValue::Integer(-8388608))] - #[case(&[0x80, 0, 0, 0], SERIAL_TYPE_BEINT32, OwnedValue::Integer(-2147483648))] - #[case(&[0x80, 0, 0, 0, 0, 0], SERIAL_TYPE_BEINT48, OwnedValue::Integer(-140737488355328))] - #[case(&[0x80, 0, 0, 0, 0, 0, 0, 0], SERIAL_TYPE_BEINT64, OwnedValue::Integer(-9223372036854775808))] - #[case(&[0x7f], SERIAL_TYPE_INT8, OwnedValue::Integer(127))] - #[case(&[0x7f, 0xff], SERIAL_TYPE_BEINT16, OwnedValue::Integer(32767))] - #[case(&[0x7f, 0xff, 0xff], SERIAL_TYPE_BEINT24, OwnedValue::Integer(8388607))] - #[case(&[0x7f, 0xff, 0xff, 0xff], SERIAL_TYPE_BEINT32, OwnedValue::Integer(2147483647))] - #[case(&[0x7f, 0xff, 0xff, 0xff, 0xff, 0xff], SERIAL_TYPE_BEINT48, OwnedValue::Integer(140737488355327))] - #[case(&[0x7f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff], SERIAL_TYPE_BEINT64, OwnedValue::Integer(9223372036854775807))] + #[case(&[], SerialType::null(), OwnedValue::Null)] + #[case(&[255], SerialType::i8(), OwnedValue::Integer(-1))] + #[case(&[0x12, 0x34], SerialType::i16(), OwnedValue::Integer(0x1234))] + #[case(&[0xFE], SerialType::i8(), OwnedValue::Integer(-2))] + #[case(&[0x12, 0x34, 0x56], SerialType::i24(), OwnedValue::Integer(0x123456))] + #[case(&[0x12, 0x34, 0x56, 0x78], SerialType::i32(), OwnedValue::Integer(0x12345678))] + #[case(&[0x12, 0x34, 0x56, 0x78, 0x9A, 0xBC], SerialType::i48(), OwnedValue::Integer(0x123456789ABC))] + #[case(&[0x12, 0x34, 0x56, 0x78, 0x9A, 0xBC, 0xDE, 0xFF], SerialType::i64(), OwnedValue::Integer(0x123456789ABCDEFF))] + #[case(&[0x40, 0x09, 0x21, 0xFB, 0x54, 0x44, 0x2D, 0x18], SerialType::f64(), OwnedValue::Float(std::f64::consts::PI))] + #[case(&[1, 2], SerialType::const_int0(), OwnedValue::Integer(0))] + #[case(&[65, 66], SerialType::const_int1(), OwnedValue::Integer(1))] + #[case(&[1, 2, 3], SerialType::blob(3), OwnedValue::Blob(vec![1, 2, 3].into()))] + #[case(&[], SerialType::blob(0), OwnedValue::Blob(vec![].into()))] // empty blob + #[case(&[65, 66, 67], SerialType::text(3), OwnedValue::build_text("ABC"))] + #[case(&[0x80], SerialType::i8(), OwnedValue::Integer(-128))] + #[case(&[0x80, 0], SerialType::i16(), OwnedValue::Integer(-32768))] + #[case(&[0x80, 0, 0], SerialType::i24(), OwnedValue::Integer(-8388608))] + #[case(&[0x80, 0, 0, 0], SerialType::i32(), OwnedValue::Integer(-2147483648))] + #[case(&[0x80, 0, 0, 0, 0, 0], SerialType::i48(), OwnedValue::Integer(-140737488355328))] + #[case(&[0x80, 0, 0, 0, 0, 0, 0, 0], SerialType::i64(), OwnedValue::Integer(-9223372036854775808))] + #[case(&[0x7f], SerialType::i8(), OwnedValue::Integer(127))] + #[case(&[0x7f, 0xff], SerialType::i16(), OwnedValue::Integer(32767))] + #[case(&[0x7f, 0xff, 0xff], SerialType::i24(), OwnedValue::Integer(8388607))] + #[case(&[0x7f, 0xff, 0xff, 0xff], SerialType::i32(), OwnedValue::Integer(2147483647))] + #[case(&[0x7f, 0xff, 0xff, 0xff, 0xff, 0xff], SerialType::i48(), OwnedValue::Integer(140737488355327))] + #[case(&[0x7f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff], SerialType::i64(), OwnedValue::Integer(9223372036854775807))] fn test_read_value( #[case] buf: &[u8], #[case] serial_type: SerialType, @@ -1631,54 +1659,94 @@ mod tests { #[test] fn test_serial_type_helpers() { - assert!(SERIAL_TYPE_NULL.is_null()); - assert!(SERIAL_TYPE_INT8.is_int8()); - assert!(SERIAL_TYPE_BEINT16.is_beint16()); - assert!(SERIAL_TYPE_BEINT24.is_beint24()); - assert!(SERIAL_TYPE_BEINT32.is_beint32()); - assert!(SERIAL_TYPE_BEINT48.is_beint48()); - assert!(SERIAL_TYPE_BEINT64.is_beint64()); - assert!(SERIAL_TYPE_BEFLOAT64.is_befloat64()); - assert!(SERIAL_TYPE_CONSTINT0.is_constint0()); - assert!(SERIAL_TYPE_CONSTINT1.is_constint1()); - - assert!(12u64.is_blob()); - assert!(14u64.is_blob()); - assert!(13u64.is_string()); - assert!(15u64.is_string()); - - assert_eq!(12u64.blob_size(), 0); - assert_eq!(14u64.blob_size(), 1); - assert_eq!(16u64.blob_size(), 2); - - assert_eq!(13u64.string_size(), 0); - assert_eq!(15u64.string_size(), 1); - assert_eq!(17u64.string_size(), 2); + assert_eq!( + TryInto::::try_into(12u64).unwrap(), + SerialType::blob(0) + ); + assert_eq!( + TryInto::::try_into(14u64).unwrap(), + SerialType::blob(1) + ); + assert_eq!( + TryInto::::try_into(13u64).unwrap(), + SerialType::text(0) + ); + assert_eq!( + TryInto::::try_into(15u64).unwrap(), + SerialType::text(1) + ); + assert_eq!( + TryInto::::try_into(16u64).unwrap(), + SerialType::blob(2) + ); + assert_eq!( + TryInto::::try_into(17u64).unwrap(), + SerialType::text(2) + ); } #[rstest] - #[case(0, SERIAL_TYPE_NULL)] - #[case(1, SERIAL_TYPE_INT8)] - #[case(2, SERIAL_TYPE_BEINT16)] - #[case(3, SERIAL_TYPE_BEINT24)] - #[case(4, SERIAL_TYPE_BEINT32)] - #[case(5, SERIAL_TYPE_BEINT48)] - #[case(6, SERIAL_TYPE_BEINT64)] - #[case(7, SERIAL_TYPE_BEFLOAT64)] - #[case(8, SERIAL_TYPE_CONSTINT0)] - #[case(9, SERIAL_TYPE_CONSTINT1)] - #[case(12, 12)] // Blob(0) - #[case(13, 13)] // String(0) - #[case(14, 14)] // Blob(1) - #[case(15, 15)] // String(1) - fn test_validate_serial_type(#[case] input: u64, #[case] expected: SerialType) { - let result = validate_serial_type(input).unwrap(); + #[case(0, SerialType::null())] + #[case(1, SerialType::i8())] + #[case(2, SerialType::i16())] + #[case(3, SerialType::i24())] + #[case(4, SerialType::i32())] + #[case(5, SerialType::i48())] + #[case(6, SerialType::i64())] + #[case(7, SerialType::f64())] + #[case(8, SerialType::const_int0())] + #[case(9, SerialType::const_int1())] + #[case(12, SerialType::blob(0))] + #[case(13, SerialType::text(0))] + #[case(14, SerialType::blob(1))] + #[case(15, SerialType::text(1))] + fn test_parse_serial_type(#[case] input: u64, #[case] expected: SerialType) { + let result = SerialType::try_from(input).unwrap(); assert_eq!(result, expected); } #[test] - fn test_invalid_serial_type() { - let result = validate_serial_type(10); - assert!(result.is_err()); + fn test_validate_serial_type() { + for i in 0..=9 { + let result = validate_serial_type(i); + assert!(result.is_ok()); + } + for i in 10..=11 { + let result = validate_serial_type(i); + assert!(result.is_err()); + } + for i in 12..=1000 { + let result = validate_serial_type(i); + assert!(result.is_ok()); + } + } + + #[test] + fn test_smallvec_iter() { + let mut small_vec = SmallVec::::new(); + (0..8).for_each(|i| small_vec.push(i)); + + let mut iter = small_vec.iter(); + assert_eq!(iter.next(), Some(0)); + assert_eq!(iter.next(), Some(1)); + assert_eq!(iter.next(), Some(2)); + assert_eq!(iter.next(), Some(3)); + assert_eq!(iter.next(), Some(4)); + assert_eq!(iter.next(), Some(5)); + assert_eq!(iter.next(), Some(6)); + assert_eq!(iter.next(), Some(7)); + assert_eq!(iter.next(), None); + } + + #[test] + fn test_smallvec_get() { + let mut small_vec = SmallVec::::new(); + (0..8).for_each(|i| small_vec.push(i)); + + (0..8).for_each(|i| { + assert_eq!(small_vec.get(i), Some(i as i32)); + }); + + assert_eq!(small_vec.get(8), None); } } diff --git a/core/storage/wal.rs b/core/storage/wal.rs index b56246a78..fd41af51b 100644 --- a/core/storage/wal.rs +++ b/core/storage/wal.rs @@ -246,7 +246,7 @@ pub struct WalFile { sync_state: RefCell, syncing: Rc>, - page_size: usize, + page_size: u32, shared: Arc>, ongoing_checkpoint: OngoingCheckpoint, @@ -462,6 +462,7 @@ impl Wal for WalFile { &shared.file, offset, &page, + self.page_size as u16, db_size, write_counter, &header, @@ -687,7 +688,7 @@ impl Wal for WalFile { impl WalFile { pub fn new( io: Arc, - page_size: usize, + page_size: u32, shared: Arc>, buffer_pool: Rc, ) -> Self { @@ -698,11 +699,10 @@ impl WalFile { let drop_fn = Rc::new(move |buf| { buffer_pool.put(buf); }); - checkpoint_page.get().contents = Some(PageContent { - offset: 0, - buffer: Arc::new(RefCell::new(Buffer::new(buffer, drop_fn))), - overflow_cells: Vec::new(), - }); + checkpoint_page.get().contents = Some(PageContent::new( + 0, + Arc::new(RefCell::new(Buffer::new(buffer, drop_fn))), + )); } Self { io, @@ -728,7 +728,7 @@ impl WalFile { fn frame_offset(&self, frame_id: u64) -> usize { assert!(frame_id > 0, "Frame ID must be 1-based"); let page_size = self.page_size; - let page_offset = (frame_id - 1) * (page_size + WAL_FRAME_HEADER_SIZE) as u64; + let page_offset = (frame_id - 1) * (page_size + WAL_FRAME_HEADER_SIZE as u32) as u64; let offset = WAL_HEADER_SIZE as u64 + page_offset; offset as usize } @@ -743,7 +743,7 @@ impl WalFileShared { pub fn open_shared( io: &Arc, path: &str, - page_size: u16, + page_size: u32, ) -> Result>> { let file = io.open_file(path, crate::io::OpenFlags::Create, false)?; let header = if file.size()? > 0 { @@ -764,7 +764,7 @@ impl WalFileShared { let mut wal_header = WalHeader { magic, file_format: 3007000, - page_size: page_size as u32, + page_size, checkpoint_seq: 0, // TODO implement sequence number salt_1: io.generate_random_number() as u32, salt_2: io.generate_random_number() as u32, diff --git a/core/translate/delete.rs b/core/translate/delete.rs index 1e0d64a98..5cb38cf42 100644 --- a/core/translate/delete.rs +++ b/core/translate/delete.rs @@ -7,7 +7,7 @@ use crate::vdbe::builder::{ProgramBuilder, ProgramBuilderOpts, QueryMode}; use crate::{schema::Schema, Result, SymbolTable}; use limbo_sqlite3_parser::ast::{Expr, Limit, QualifiedName}; -use super::plan::TableReference; +use super::plan::{ColumnUsedMask, IterationDirection, TableReference}; pub fn translate_delete( query_mode: QueryMode, @@ -50,11 +50,20 @@ pub fn prepare_delete_plan( crate::bail_corrupt_error!("Table is neither a virtual table nor a btree table"); }; let name = tbl_name.name.0.as_str().to_string(); - let table_references = vec![TableReference { + let indexes = schema + .get_indices(table.get_name()) + .iter() + .cloned() + .collect(); + let mut table_references = vec![TableReference { table, identifier: name, - op: Operation::Scan { iter_dir: None }, + op: Operation::Scan { + iter_dir: IterationDirection::Forwards, + index: None, + }, join_info: None, + col_used_mask: ColumnUsedMask::new(), }]; let mut where_predicates = vec![]; @@ -62,13 +71,13 @@ pub fn prepare_delete_plan( // Parse the WHERE clause parse_where( where_clause.map(|e| *e), - &table_references, + &mut table_references, None, &mut where_predicates, )?; // Parse the LIMIT/OFFSET clause - let (resolved_limit, resolved_offset) = limit.map_or(Ok((None, None)), |l| parse_limit(*l))?; + let (resolved_limit, resolved_offset) = limit.map_or(Ok((None, None)), |l| parse_limit(&l))?; let plan = DeletePlan { table_references, @@ -78,6 +87,7 @@ pub fn prepare_delete_plan( limit: resolved_limit, offset: resolved_offset, contains_constant_false_condition: false, + indexes, }; Ok(Plan::Delete(plan)) @@ -86,7 +96,5 @@ pub fn prepare_delete_plan( fn estimate_num_instructions(plan: &DeletePlan) -> usize { let base = 20; - let num_instructions = base + plan.table_references.len() * 10; - - num_instructions + base + plan.table_references.len() * 10 } diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index e4d05bfaa..e2d3f78c4 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -1,12 +1,17 @@ // This module contains code for emitting bytecode instructions for SQL query execution. // It handles translating high-level SQL operations into low-level bytecode that can be executed by the virtual machine. +use std::rc::Rc; +use std::sync::Arc; + use limbo_sqlite3_parser::ast::{self}; use crate::function::Func; +use crate::schema::Index; use crate::translate::plan::{DeletePlan, Plan, Search}; use crate::util::exprs_are_equivalent; -use crate::vdbe::builder::ProgramBuilder; +use crate::vdbe::builder::{CursorType, ProgramBuilder}; +use crate::vdbe::insn::{IdxInsertFlags, RegisterOrLiteral}; use crate::vdbe::{insn::Insn, BranchOffset}; use crate::{Result, SymbolTable}; @@ -62,6 +67,10 @@ pub struct TranslateCtx<'a> { pub label_main_loop_end: Option, // First register of the aggregation results pub reg_agg_start: Option, + // In non-group-by statements with aggregations (e.g. SELECT foo, bar, sum(baz) FROM t), + // we want to emit the non-aggregate columns (foo and bar) only once. + // This register is a flag that tracks whether we have already done that. + pub reg_nonagg_emit_once_flag: Option, // First register of the result columns of the query pub reg_result_cols_start: Option, // The register holding the limit value, if any. @@ -84,11 +93,12 @@ pub struct TranslateCtx<'a> { // This vector holds the indexes of the result columns that we need to skip. pub result_columns_to_skip_in_orderby_sorter: Option>, pub resolver: Resolver<'a>, + pub omit_predicates: Vec, } /// Used to distinguish database operations #[allow(clippy::upper_case_acronyms, dead_code)] -#[derive(Debug, Clone, Copy)] +#[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum OperationMode { SELECT, INSERT, @@ -115,6 +125,7 @@ fn prologue<'a>( labels_main_loop: (0..table_count).map(|_| LoopLabels::new(program)).collect(), label_main_loop_end: None, reg_agg_start: None, + reg_nonagg_emit_once_flag: None, reg_limit: None, reg_offset: None, reg_limit_offset_sum: None, @@ -125,11 +136,13 @@ fn prologue<'a>( result_column_indexes_in_orderby_sorter: (0..result_column_count).collect(), result_columns_to_skip_in_orderby_sorter: None, resolver: Resolver::new(syms), + omit_predicates: Vec::new(), }; Ok((t_ctx, init_label, start_offset)) } +#[derive(Clone, Copy, Debug)] pub enum TransactionMode { None, Read, @@ -149,8 +162,7 @@ fn epilogue( err_code: 0, description: String::new(), }); - - program.resolve_label(init_label, program.offset()); + program.preassign_label_to_next_insn(init_label); match txn_mode { TransactionMode::Read => program.emit_insn(Insn::Transaction { write: false }), @@ -243,6 +255,18 @@ pub fn emit_query<'a>( }); } + // For non-grouped aggregation queries that also have non-aggregate columns, + // we need to ensure non-aggregate columns are only emitted once. + // This flag helps track whether we've already emitted these columns. + if !plan.aggregates.is_empty() + && plan.group_by.is_none() + && plan.result_columns.iter().any(|c| !c.contains_aggregates) + { + let flag = program.alloc_register(); + program.emit_int(0, flag); // Initialize flag to 0 (not yet emitted) + t_ctx.reg_nonagg_emit_once_flag = Some(flag); + } + // Allocate registers for result columns t_ctx.reg_result_cols_start = Some(program.alloc_registers(plan.result_columns.len())); @@ -251,8 +275,8 @@ pub fn emit_query<'a>( init_order_by(program, t_ctx, order_by)?; } - if let Some(ref mut group_by) = plan.group_by { - init_group_by(program, t_ctx, group_by, &plan.aggregates)?; + if let Some(ref group_by) = plan.group_by { + init_group_by(program, t_ctx, group_by, &plan)?; } init_loop( program, @@ -275,7 +299,7 @@ pub fn emit_query<'a>( condition_metadata, &t_ctx.resolver, )?; - program.resolve_label(jump_target_when_true, program.offset()); + program.preassign_label_to_next_insn(jump_target_when_true); } // Set up main query execution loop @@ -286,8 +310,7 @@ pub fn emit_query<'a>( // Clean up and close the main execution loop close_loop(program, t_ctx, &plan.table_references)?; - - program.resolve_label(after_main_loop_label, program.offset()); + program.preassign_label_to_next_insn(after_main_loop_label); let mut order_by_necessary = plan.order_by.is_some() && !plan.contains_constant_false_condition; let order_by = plan.order_by.as_ref(); @@ -353,12 +376,17 @@ fn emit_program_for_delete( &plan.table_references, &plan.where_clause, )?; - emit_delete_insns(program, &mut t_ctx, &plan.table_references, &plan.limit)?; + emit_delete_insns( + program, + &mut t_ctx, + &plan.table_references, + &plan.indexes, + &plan.limit, + )?; // Clean up and close the main execution loop close_loop(program, &mut t_ctx, &plan.table_references)?; - - program.resolve_label(after_main_loop_label, program.offset()); + program.preassign_label_to_next_insn(after_main_loop_label); // Finalize program epilogue(program, init_label, start_offset, TransactionMode::Write)?; @@ -371,24 +399,28 @@ fn emit_delete_insns( program: &mut ProgramBuilder, t_ctx: &mut TranslateCtx, table_references: &[TableReference], + index_references: &[Arc], limit: &Option, ) -> Result<()> { let table_reference = table_references.first().unwrap(); let cursor_id = match &table_reference.op { Operation::Scan { .. } => program.resolve_cursor_id(&table_reference.identifier), Operation::Search(search) => match search { - Search::RowidEq { .. } | Search::RowidSearch { .. } => { + Search::RowidEq { .. } | Search::Seek { index: None, .. } => { program.resolve_cursor_id(&table_reference.identifier) } - Search::IndexSearch { index, .. } => program.resolve_cursor_id(&index.name), + Search::Seek { + index: Some(index), .. + } => program.resolve_cursor_id(&index.name), }, _ => return Ok(()), }; + let main_table_cursor_id = program.resolve_cursor_id(table_reference.table.get_name()); // Emit the instructions to delete the row let key_reg = program.alloc_register(); program.emit_insn(Insn::RowId { - cursor_id, + cursor_id: main_table_cursor_id, dest: key_reg, }); @@ -409,8 +441,43 @@ fn emit_delete_insns( conflict_action, }); } else { - program.emit_insn(Insn::DeleteAsync { cursor_id }); - program.emit_insn(Insn::DeleteAwait { cursor_id }); + for index in index_references { + let index_cursor_id = program.alloc_cursor_id( + Some(index.name.clone()), + crate::vdbe::builder::CursorType::BTreeIndex(index.clone()), + ); + + program.emit_insn(Insn::OpenWrite { + cursor_id: index_cursor_id, + root_page: RegisterOrLiteral::Literal(index.root_page), + }); + let num_regs = index.columns.len() + 1; + let start_reg = program.alloc_registers(num_regs); + // Emit columns that are part of the index + index + .columns + .iter() + .enumerate() + .for_each(|(reg_offset, column_index)| { + program.emit_insn(Insn::Column { + cursor_id: main_table_cursor_id, + column: column_index.pos_in_table, + dest: start_reg + reg_offset, + }); + }); + program.emit_insn(Insn::RowId { + cursor_id: main_table_cursor_id, + dest: start_reg + num_regs - 1, + }); + program.emit_insn(Insn::IdxDelete { + start_reg, + num_regs, + cursor_id: index_cursor_id, + }); + } + program.emit_insn(Insn::Delete { + cursor_id: main_table_cursor_id, + }); } if let Some(limit) = limit { let limit_reg = program.alloc_register(); @@ -442,25 +509,11 @@ fn emit_program_for_update( // Exit on LIMIT 0 if let Some(0) = plan.limit { - epilogue(program, init_label, start_offset, TransactionMode::Read)?; + epilogue(program, init_label, start_offset, TransactionMode::None)?; program.result_columns = plan.returning.unwrap_or_default(); program.table_references = plan.table_references; return Ok(()); } - let after_main_loop_label = program.allocate_label(); - t_ctx.label_main_loop_end = Some(after_main_loop_label); - if plan.contains_constant_false_condition { - program.emit_insn(Insn::Goto { - target_pc: after_main_loop_label, - }); - } - let skip_label = program.allocate_label(); - init_loop( - program, - &mut t_ctx, - &plan.table_references, - OperationMode::UPDATE, - )?; if t_ctx.reg_limit.is_none() && plan.limit.is_some() { let reg = program.alloc_register(); t_ctx.reg_limit = Some(reg); @@ -469,6 +522,50 @@ fn emit_program_for_update( dest: reg, }); program.mark_last_insn_constant(); + if t_ctx.reg_offset.is_none() && plan.offset.is_some_and(|n| n.ne(&0)) { + let reg = program.alloc_register(); + t_ctx.reg_offset = Some(reg); + program.emit_insn(Insn::Integer { + value: plan.offset.unwrap() as i64, + dest: reg, + }); + program.mark_last_insn_constant(); + let combined_reg = program.alloc_register(); + t_ctx.reg_limit_offset_sum = Some(combined_reg); + program.emit_insn(Insn::OffsetLimit { + limit_reg: t_ctx.reg_limit.unwrap(), + offset_reg: reg, + combined_reg, + }); + } + } + let after_main_loop_label = program.allocate_label(); + t_ctx.label_main_loop_end = Some(after_main_loop_label); + if plan.contains_constant_false_condition { + program.emit_insn(Insn::Goto { + target_pc: after_main_loop_label, + }); + } + + init_loop( + program, + &mut t_ctx, + &plan.table_references, + OperationMode::UPDATE, + )?; + // Open indexes for update. + let mut index_cursors = Vec::with_capacity(plan.indexes_to_update.len()); + // TODO: do not reopen if there is table reference using it. + for index in &plan.indexes_to_update { + let index_cursor = program.alloc_cursor_id( + Some(index.table_name.clone()), + CursorType::BTreeIndex(index.clone()), + ); + program.emit_insn(Insn::OpenWrite { + cursor_id: index_cursor, + root_page: RegisterOrLiteral::Literal(index.root_page), + }); + index_cursors.push(index_cursor); } open_loop( program, @@ -476,11 +573,9 @@ fn emit_program_for_update( &plan.table_references, &plan.where_clause, )?; - emit_update_insns(&plan, &t_ctx, program)?; - program.resolve_label(skip_label, program.offset()); + emit_update_insns(&plan, &t_ctx, program, index_cursors)?; close_loop(program, &mut t_ctx, &plan.table_references)?; - - program.resolve_label(after_main_loop_label, program.offset()); + program.preassign_label_to_next_insn(after_main_loop_label); // Finalize program epilogue(program, init_label, start_offset, TransactionMode::Write)?; @@ -493,17 +588,28 @@ fn emit_update_insns( plan: &UpdatePlan, t_ctx: &TranslateCtx, program: &mut ProgramBuilder, + index_cursors: Vec, ) -> crate::Result<()> { let table_ref = &plan.table_references.first().unwrap(); - let (cursor_id, index) = match &table_ref.op { - Operation::Scan { .. } => (program.resolve_cursor_id(&table_ref.identifier), None), + let loop_labels = t_ctx.labels_main_loop.first().unwrap(); + let (cursor_id, index, is_virtual) = match &table_ref.op { + Operation::Scan { .. } => ( + program.resolve_cursor_id(&table_ref.identifier), + None, + table_ref.virtual_table().is_some(), + ), Operation::Search(search) => match search { - &Search::RowidEq { .. } | Search::RowidSearch { .. } => { - (program.resolve_cursor_id(&table_ref.identifier), None) - } - Search::IndexSearch { index, .. } => ( + &Search::RowidEq { .. } | Search::Seek { index: None, .. } => ( + program.resolve_cursor_id(&table_ref.identifier), + None, + false, + ), + Search::Seek { + index: Some(index), .. + } => ( program.resolve_cursor_id(&table_ref.identifier), Some((index.clone(), program.resolve_cursor_id(&index.name))), + false, ), }, _ => return Ok(()), @@ -523,25 +629,102 @@ fn emit_update_insns( meta, &t_ctx.resolver, )?; - program.resolve_label(jump_target, program.offset()); + program.preassign_label_to_next_insn(jump_target); } - let first_col_reg = program.alloc_registers(table_ref.table.columns().len()); - let rowid_reg = program.alloc_register(); + let beg = program.alloc_registers( + table_ref.table.columns().len() + + if is_virtual { + 2 // two args before the relevant columns for VUpdate + } else { + 1 // rowid reg + }, + ); program.emit_insn(Insn::RowId { cursor_id, - dest: rowid_reg, + dest: beg, }); // if no rowid, we're done program.emit_insn(Insn::IsNull { - reg: rowid_reg, + reg: beg, target_pc: t_ctx.label_main_loop_end.unwrap(), }); + if is_virtual { + program.emit_insn(Insn::Copy { + src_reg: beg, + dst_reg: beg + 1, + amount: 0, + }) + } + if let Some(offset) = t_ctx.reg_offset { + program.emit_insn(Insn::IfPos { + reg: offset, + target_pc: loop_labels.next, + decrement_by: 1, + }); + } + + for cond in plan.where_clause.iter().filter(|c| c.is_constant()) { + let meta = ConditionMetadata { + jump_if_condition_is_true: false, + jump_target_when_true: BranchOffset::Placeholder, + jump_target_when_false: loop_labels.next, + }; + translate_condition_expr( + program, + &plan.table_references, + &cond.expr, + meta, + &t_ctx.resolver, + )?; + } + + // Update indexes first. Columns that are updated will be translated from an expression and those who aren't modified will be + // read from table. Mutiple value index key could be updated partially. + for (index, index_cursor) in plan.indexes_to_update.iter().zip(index_cursors) { + let index_record_reg_count = index.columns.len() + 1; + let index_record_reg_start = program.alloc_registers(index_record_reg_count); + for (idx, column) in index.columns.iter().enumerate() { + if let Some((_, expr)) = plan.set_clauses.iter().find(|(i, _)| *i == idx) { + translate_expr( + program, + Some(&plan.table_references), + expr, + index_record_reg_start + idx, + &t_ctx.resolver, + )?; + } else { + program.emit_insn(Insn::Column { + cursor_id: cursor_id, + column: column.pos_in_table, + dest: index_record_reg_start + idx, + }); + } + } + program.emit_insn(Insn::RowId { + cursor_id: cursor_id, + dest: index_record_reg_start + index.columns.len(), + }); + let index_record_reg = program.alloc_register(); + program.emit_insn(Insn::MakeRecord { + start_reg: index_record_reg_start, + count: index_record_reg_count, + dest_reg: index_record_reg, + }); + program.emit_insn(Insn::IdxInsert { + cursor_id: index_cursor, + record_reg: index_record_reg, + unpacked_start: Some(index_record_reg_start), + unpacked_count: Some(index_record_reg_count as u16), + flags: IdxInsertFlags::new(), + }); + } // we scan a column at a time, loading either the column's values, or the new value // from the Set expression, into registers so we can emit a MakeRecord and update the row. + let start = if is_virtual { beg + 2 } else { beg + 1 }; for idx in 0..table_ref.columns().len() { - if let Some((idx, expr)) = plan.set_clauses.iter().find(|(i, _)| *i == idx) { - let target_reg = first_col_reg + idx; + let target_reg = start + idx; + if let Some((_, expr)) = plan.set_clauses.iter().find(|(i, _)| *i == idx) { translate_expr( program, Some(&plan.table_references), @@ -556,9 +739,17 @@ fn emit_update_insns( .iter() .position(|c| Some(&c.name) == table_column.name.as_ref()) }); - let dest = first_col_reg + idx; - if table_column.primary_key { - program.emit_null(dest, None); + + // don't emit null for pkey of virtual tables. they require first two args + // before the 'record' to be explicitly non-null + if table_column.is_rowid_alias && !is_virtual { + program.emit_null(target_reg, None); + } else if is_virtual { + program.emit_insn(Insn::VColumn { + cursor_id, + column: idx, + dest: target_reg, + }); } else { program.emit_insn(Insn::Column { cursor_id: *index @@ -572,24 +763,42 @@ fn emit_update_insns( }) .unwrap_or(&cursor_id), column: column_idx_in_index.unwrap_or(idx), - dest, + dest: target_reg, }); } } } - let record_reg = program.alloc_register(); - program.emit_insn(Insn::MakeRecord { - start_reg: first_col_reg, - count: table_ref.columns().len(), - dest_reg: record_reg, - }); - program.emit_insn(Insn::InsertAsync { - cursor: cursor_id, - key_reg: rowid_reg, - record_reg, - flag: 0, - }); - program.emit_insn(Insn::InsertAwait { cursor_id }); + if let Some(btree_table) = table_ref.btree() { + if btree_table.is_strict { + program.emit_insn(Insn::TypeCheck { + start_reg: start, + count: table_ref.columns().len(), + check_generated: true, + table_reference: Rc::clone(&btree_table), + }); + } + let record_reg = program.alloc_register(); + program.emit_insn(Insn::MakeRecord { + start_reg: start, + count: table_ref.columns().len(), + dest_reg: record_reg, + }); + program.emit_insn(Insn::Insert { + cursor: cursor_id, + key_reg: beg, + record_reg, + flag: 0, + }); + } else if let Some(vtab) = table_ref.virtual_table() { + let arg_count = table_ref.columns().len() + 2; + program.emit_insn(Insn::VUpdate { + cursor_id, + arg_count, + start_reg: beg, + vtab_ptr: vtab.implementation.as_ref().ctx as usize, + conflict_action: 0u16, + }); + } if let Some(limit_reg) = t_ctx.reg_limit { program.emit_insn(Insn::DecrJumpZero { diff --git a/core/translate/expr.rs b/core/translate/expr.rs index cd384391a..dba075ac2 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -1,10 +1,14 @@ use limbo_sqlite3_parser::ast::{self, UnaryOperator}; +use super::emitter::Resolver; +use super::optimizer::Optimizable; +use super::plan::{Operation, TableReference}; #[cfg(feature = "json")] use crate::function::JsonFunc; use crate::function::{Func, FuncCtx, MathFuncArity, ScalarFunc, VectorFunc}; +use crate::functions::datetime; use crate::schema::{Table, Type}; -use crate::util::normalize_ident; +use crate::util::{exprs_are_equivalent, normalize_ident}; use crate::vdbe::{ builder::ProgramBuilder, insn::{CmpInsFlags, Insn}, @@ -12,9 +16,6 @@ use crate::vdbe::{ }; use crate::Result; -use super::emitter::Resolver; -use super::plan::{Operation, TableReference}; - #[derive(Debug, Clone, Copy)] pub struct ConditionMetadata { pub jump_if_condition_is_true: bool, @@ -186,7 +187,9 @@ pub fn translate_condition_expr( resolver: &Resolver, ) -> Result<()> { match expr { - ast::Expr::Between { .. } => todo!(), + ast::Expr::Between { .. } => { + unreachable!("expression should have been rewritten in optmizer") + } ast::Expr::Binary(lhs, ast::Operator::And, rhs) => { // In a binary AND, never jump to the parent 'jump_target_when_true' label on the first condition, because // the second condition MUST also be true. Instead we instruct the child expression to jump to a local @@ -203,7 +206,7 @@ pub fn translate_condition_expr( }, resolver, )?; - program.resolve_label(jump_target_when_true, program.offset()); + program.preassign_label_to_next_insn(jump_target_when_true); translate_condition_expr( program, referenced_tables, @@ -228,7 +231,7 @@ pub fn translate_condition_expr( }, resolver, )?; - program.resolve_label(jump_target_when_false, program.offset()); + program.preassign_label_to_next_insn(jump_target_when_false); translate_condition_expr( program, referenced_tables, @@ -252,8 +255,8 @@ pub fn translate_condition_expr( { let lhs_reg = program.alloc_register(); let rhs_reg = program.alloc_register(); - translate_and_mark(program, Some(referenced_tables), lhs, lhs_reg, resolver)?; - translate_and_mark(program, Some(referenced_tables), rhs, rhs_reg, resolver)?; + translate_expr(program, Some(referenced_tables), lhs, lhs_reg, resolver)?; + translate_expr(program, Some(referenced_tables), rhs, rhs_reg, resolver)?; match op { ast::Operator::Greater => { emit_cmp_insn!(program, condition_metadata, Gt, Le, lhs_reg, rhs_reg) @@ -408,7 +411,7 @@ pub fn translate_condition_expr( } if !condition_metadata.jump_if_condition_is_true { - program.resolve_label(jump_target_when_true, program.offset()); + program.preassign_label_to_next_insn(jump_target_when_true); } } ast::Expr::Like { not, .. } => { @@ -476,6 +479,38 @@ pub fn translate_condition_expr( Ok(()) } +/// Reason why [translate_expr_no_constant_opt()] was called. +#[derive(Debug)] +pub enum NoConstantOptReason { + /// The expression translation involves reusing register(s), + /// so hoisting those register assignments is not safe. + /// e.g. SELECT COALESCE(1, t.x, NULL) would overwrite 1 with NULL, which is invalid. + RegisterReuse, +} + +/// Translate an expression into bytecode via [translate_expr()], and forbid any constant values from being hoisted +/// into the beginning of the program. This is a good idea in most cases where +/// a register will end up being reused e.g. in a coroutine. +pub fn translate_expr_no_constant_opt( + program: &mut ProgramBuilder, + referenced_tables: Option<&[TableReference]>, + expr: &ast::Expr, + target_register: usize, + resolver: &Resolver, + deopt_reason: NoConstantOptReason, +) -> Result { + tracing::debug!( + "translate_expr_no_constant_opt: expr={:?}, deopt_reason={:?}", + expr, + deopt_reason + ); + let next_span_idx = program.constant_spans_next_idx(); + let translated = translate_expr(program, referenced_tables, expr, target_register, resolver)?; + program.constant_spans_invalidate_after(next_span_idx); + Ok(translated) +} + +/// Translate an expression into bytecode. pub fn translate_expr( program: &mut ProgramBuilder, referenced_tables: Option<&[TableReference]>, @@ -483,34 +518,51 @@ pub fn translate_expr( target_register: usize, resolver: &Resolver, ) -> Result { + let constant_span = if expr.is_constant(resolver) { + if !program.constant_span_is_open() { + Some(program.constant_span_start()) + } else { + None + } + } else { + program.constant_span_end_all(); + None + }; + if let Some(reg) = resolver.resolve_cached_expr_reg(expr) { program.emit_insn(Insn::Copy { src_reg: reg, dst_reg: target_register, amount: 0, }); + if let Some(span) = constant_span { + program.constant_span_end(span); + } return Ok(target_register); } + match expr { - ast::Expr::Between { .. } => todo!(), + ast::Expr::Between { .. } => { + unreachable!("expression should have been rewritten in optmizer") + } ast::Expr::Binary(e1, op, e2) => { - // Check if both sides of the expression are identical and reuse the same register if so - if e1 == e2 { + // Check if both sides of the expression are equivalent and reuse the same register if so + if exprs_are_equivalent(e1, e2) { let shared_reg = program.alloc_register(); translate_expr(program, referenced_tables, e1, shared_reg, resolver)?; emit_binary_insn(program, op, shared_reg, shared_reg, target_register)?; - return Ok(target_register); + Ok(target_register) + } else { + let e1_reg = program.alloc_registers(2); + let e2_reg = e1_reg + 1; + + translate_expr(program, referenced_tables, e1, e1_reg, resolver)?; + translate_expr(program, referenced_tables, e2, e2_reg, resolver)?; + + emit_binary_insn(program, op, e1_reg, e2_reg, target_register)?; + Ok(target_register) } - - let e1_reg = program.alloc_registers(2); - let e2_reg = e1_reg + 1; - - translate_expr(program, referenced_tables, e1, e1_reg, resolver)?; - translate_expr(program, referenced_tables, e2, e2_reg, resolver)?; - - emit_binary_insn(program, op, e1_reg, e2_reg, target_register)?; - Ok(target_register) } ast::Expr::Case { base, @@ -541,7 +593,14 @@ pub fn translate_expr( )?; }; for (when_expr, then_expr) in when_then_pairs { - translate_expr(program, referenced_tables, when_expr, expr_reg, resolver)?; + translate_expr_no_constant_opt( + program, + referenced_tables, + when_expr, + expr_reg, + resolver, + NoConstantOptReason::RegisterReuse, + )?; match base_reg { // CASE 1 WHEN 0 THEN 0 ELSE 1 becomes 1==0, Ne branch to next clause Some(base_reg) => program.emit_insn(Insn::Ne { @@ -559,12 +618,13 @@ pub fn translate_expr( }), }; // THEN... - translate_expr( + translate_expr_no_constant_opt( program, referenced_tables, then_expr, target_register, resolver, + NoConstantOptReason::RegisterReuse, )?; program.emit_insn(Insn::Goto { target_pc: return_label, @@ -576,7 +636,14 @@ pub fn translate_expr( } match else_expr { Some(expr) => { - translate_expr(program, referenced_tables, expr, target_register, resolver)?; + translate_expr_no_constant_opt( + program, + referenced_tables, + expr, + target_register, + resolver, + NoConstantOptReason::RegisterReuse, + )?; } // If ELSE isn't specified, it means ELSE null. None => { @@ -586,7 +653,7 @@ pub fn translate_expr( }); } }; - program.resolve_label(return_label, program.offset()); + program.preassign_label_to_next_insn(return_label); Ok(target_register) } ast::Expr::Cast { expr, type_name } => { @@ -772,7 +839,7 @@ pub fn translate_expr( if let Some(args) = args { for (i, arg) in args.iter().enumerate() { // register containing result of each argument expression - translate_and_mark( + translate_expr( program, referenced_tables, arg, @@ -900,12 +967,13 @@ pub fn translate_expr( // whenever a not null check succeeds, we jump to the end of the series let label_coalesce_end = program.allocate_label(); for (index, arg) in args.iter().enumerate() { - let reg = translate_expr( + let reg = translate_expr_no_constant_opt( program, referenced_tables, arg, target_register, resolver, + NoConstantOptReason::RegisterReuse, )?; if index < args.len() - 1 { program.emit_insn(Insn::NotNull { @@ -987,12 +1055,13 @@ pub fn translate_expr( }; let temp_reg = program.alloc_register(); - translate_expr( + translate_expr_no_constant_opt( program, referenced_tables, &args[0], temp_reg, resolver, + NoConstantOptReason::RegisterReuse, )?; let before_copy_label = program.allocate_label(); program.emit_insn(Insn::NotNull { @@ -1000,12 +1069,13 @@ pub fn translate_expr( target_pc: before_copy_label, }); - translate_expr( + translate_expr_no_constant_opt( program, referenced_tables, &args[1], temp_reg, resolver, + NoConstantOptReason::RegisterReuse, )?; program.resolve_label(before_copy_label, program.offset()); program.emit_insn(Insn::Copy { @@ -1025,12 +1095,13 @@ pub fn translate_expr( ), }; let temp_reg = program.alloc_register(); - translate_expr( + translate_expr_no_constant_opt( program, referenced_tables, &args[0], temp_reg, resolver, + NoConstantOptReason::RegisterReuse, )?; let jump_target_when_false = program.allocate_label(); program.emit_insn(Insn::IfNot { @@ -1038,26 +1109,28 @@ pub fn translate_expr( target_pc: jump_target_when_false, jump_if_null: true, }); - translate_expr( + translate_expr_no_constant_opt( program, referenced_tables, &args[1], target_register, resolver, + NoConstantOptReason::RegisterReuse, )?; let jump_target_result = program.allocate_label(); program.emit_insn(Insn::Goto { target_pc: jump_target_result, }); - program.resolve_label(jump_target_when_false, program.offset()); - translate_expr( + program.preassign_label_to_next_insn(jump_target_when_false); + translate_expr_no_constant_opt( program, referenced_tables, &args[2], target_register, resolver, + NoConstantOptReason::RegisterReuse, )?; - program.resolve_label(jump_target_result, program.offset()); + program.preassign_label_to_next_insn(jump_target_result); Ok(target_register) } ScalarFunc::Glob | ScalarFunc::Like => { @@ -1109,7 +1182,7 @@ pub fn translate_expr( | ScalarFunc::ZeroBlob => { let args = expect_arguments_exact!(args, 1, srf); let start_reg = program.alloc_register(); - translate_and_mark( + translate_expr( program, referenced_tables, &args[0], @@ -1128,7 +1201,7 @@ pub fn translate_expr( ScalarFunc::LoadExtension => { let args = expect_arguments_exact!(args, 1, srf); let start_reg = program.alloc_register(); - translate_and_mark( + translate_expr( program, referenced_tables, &args[0], @@ -1159,13 +1232,13 @@ pub fn translate_expr( }); Ok(target_register) } - ScalarFunc::Date | ScalarFunc::DateTime => { + ScalarFunc::Date | ScalarFunc::DateTime | ScalarFunc::JulianDay => { let start_reg = program .alloc_registers(args.as_ref().map(|x| x.len()).unwrap_or(1)); if let Some(args) = args { for (i, arg) in args.iter().enumerate() { // register containing result of each argument expression - translate_and_mark( + translate_expr( program, referenced_tables, arg, @@ -1244,7 +1317,7 @@ pub fn translate_expr( crate::bail_parse_error!("hex function with no arguments",); }; let start_reg = program.alloc_register(); - translate_and_mark( + translate_expr( program, referenced_tables, &args[0], @@ -1259,11 +1332,11 @@ pub fn translate_expr( }); Ok(target_register) } - ScalarFunc::UnixEpoch | ScalarFunc::JulianDay => { + ScalarFunc::UnixEpoch => { let mut start_reg = 0; match args { Some(args) if args.len() > 1 => { - crate::bail_parse_error!("epoch or julianday function with > 1 arguments. Modifiers are not yet supported."); + crate::bail_parse_error!("epoch function with > 1 arguments. Modifiers are not yet supported."); } Some(args) if args.len() == 1 => { let arg_reg = program.alloc_register(); @@ -1292,7 +1365,7 @@ pub fn translate_expr( if let Some(args) = args { for (i, arg) in args.iter().enumerate() { // register containing result of each argument expression - translate_and_mark( + translate_expr( program, referenced_tables, arg, @@ -1309,6 +1382,33 @@ pub fn translate_expr( }); Ok(target_register) } + ScalarFunc::TimeDiff => { + let args = expect_arguments_exact!(args, 2, srf); + + let start_reg = program.alloc_registers(2); + translate_expr( + program, + referenced_tables, + &args[0], + start_reg, + resolver, + )?; + translate_expr( + program, + referenced_tables, + &args[1], + start_reg + 1, + resolver, + )?; + + program.emit_insn(Insn::Function { + constant_mask: 0, + start_reg, + dest: target_register, + func: func_ctx, + }); + Ok(target_register) + } ScalarFunc::TotalChanges => { if args.is_some() { crate::bail_parse_error!( @@ -1334,7 +1434,7 @@ pub fn translate_expr( let start_reg = program.alloc_registers(args.len()); for (i, arg) in args.iter().enumerate() { - translate_and_mark( + translate_expr( program, referenced_tables, arg, @@ -1363,7 +1463,7 @@ pub fn translate_expr( }; let start_reg = program.alloc_registers(args.len()); for (i, arg) in args.iter().enumerate() { - translate_and_mark( + translate_expr( program, referenced_tables, arg, @@ -1393,7 +1493,7 @@ pub fn translate_expr( }; let start_reg = program.alloc_registers(args.len()); for (i, arg) in args.iter().enumerate() { - translate_and_mark( + translate_expr( program, referenced_tables, arg, @@ -1546,7 +1646,7 @@ pub fn translate_expr( if let Some(args) = args { for (i, arg) in args.iter().enumerate() { // register containing result of each argument expression - translate_and_mark( + translate_expr( program, referenced_tables, arg, @@ -1571,6 +1671,85 @@ pub fn translate_expr( target_register, func_ctx, ), + ScalarFunc::Likely => { + let args = if let Some(args) = args { + if args.len() != 1 { + crate::bail_parse_error!( + "likely function must have exactly 1 argument", + ); + } + args + } else { + crate::bail_parse_error!("likely function with no arguments",); + }; + let start_reg = program.alloc_register(); + translate_expr( + program, + referenced_tables, + &args[0], + start_reg, + resolver, + )?; + program.emit_insn(Insn::Function { + constant_mask: 0, + start_reg, + dest: target_register, + func: func_ctx, + }); + Ok(target_register) + } + ScalarFunc::Likelihood => { + let args = if let Some(args) = args { + if args.len() != 2 { + crate::bail_parse_error!( + "likelihood() function must have exactly 2 arguments", + ); + } + args + } else { + crate::bail_parse_error!("likelihood() function with no arguments",); + }; + + if let ast::Expr::Literal(ast::Literal::Numeric(ref value)) = args[1] { + if let Ok(probability) = value.parse::() { + if !(0.0..=1.0).contains(&probability) { + crate::bail_parse_error!( + "second argument of likelihood() must be between 0.0 and 1.0", + ); + } + if !value.contains('.') { + crate::bail_parse_error!( + "second argument of likelihood() must be a floating point number with decimal point", + ); + } + } else { + crate::bail_parse_error!( + "second argument of likelihood() must be a floating point constant", + ); + } + } else { + crate::bail_parse_error!( + "second argument of likelihood() must be a numeric literal", + ); + } + + let start_reg = program.alloc_register(); + translate_expr( + program, + referenced_tables, + &args[0], + start_reg, + resolver, + )?; + + program.emit_insn(Insn::Copy { + src_reg: start_reg, + dst_reg: target_register, + amount: 0, + }); + + Ok(target_register) + } } } Func::Math(math_func) => match math_func.arity() { @@ -1591,13 +1770,7 @@ pub fn translate_expr( MathFuncArity::Unary => { let args = expect_arguments_exact!(args, 1, math_func); let start_reg = program.alloc_register(); - translate_and_mark( - program, - referenced_tables, - &args[0], - start_reg, - resolver, - )?; + translate_expr(program, referenced_tables, &args[0], start_reg, resolver)?; program.emit_insn(Insn::Function { constant_mask: 0, start_reg, @@ -1664,41 +1837,79 @@ pub fn translate_expr( is_rowid_alias, } => { let table_reference = referenced_tables.as_ref().unwrap().get(*table).unwrap(); + let index = table_reference.op.index(); + let use_covering_index = table_reference.utilizes_covering_index(); match table_reference.op { // If we are reading a column from a table, we find the cursor that corresponds to // the table and read the column from the cursor. - Operation::Scan { .. } | Operation::Search(_) => match &table_reference.table { - Table::BTree(_) => { - let cursor_id = program.resolve_cursor_id(&table_reference.identifier); - if *is_rowid_alias { - program.emit_insn(Insn::RowId { - cursor_id, - dest: target_register, - }); - } else { - program.emit_insn(Insn::Column { + // If we have a covering index, we don't have an open table cursor so we read from the index cursor. + Operation::Scan { .. } | Operation::Search(_) => { + match &table_reference.table { + Table::BTree(_) => { + let table_cursor_id = if use_covering_index { + None + } else { + Some(program.resolve_cursor_id(&table_reference.identifier)) + }; + let index_cursor_id = if let Some(index) = index { + Some(program.resolve_cursor_id(&index.name)) + } else { + None + }; + if *is_rowid_alias { + if let Some(index_cursor_id) = index_cursor_id { + program.emit_insn(Insn::IdxRowId { + cursor_id: index_cursor_id, + dest: target_register, + }); + } else if let Some(table_cursor_id) = table_cursor_id { + program.emit_insn(Insn::RowId { + cursor_id: table_cursor_id, + dest: target_register, + }); + } else { + unreachable!("Either index or table cursor must be opened"); + } + } else { + let read_cursor = if use_covering_index { + index_cursor_id + .expect("index cursor should be opened when use_covering_index=true") + } else { + table_cursor_id + .expect("table cursor should be opened when use_covering_index=false") + }; + let column = if use_covering_index { + let index = index.expect("index cursor should be opened when use_covering_index=true"); + index.column_table_pos_to_index_pos(*column).unwrap_or_else(|| { + panic!("covering index {} does not contain column number {} of table {}", index.name, column, table_reference.identifier) + }) + } else { + *column + }; + program.emit_insn(Insn::Column { + cursor_id: read_cursor, + column, + dest: target_register, + }); + } + let Some(column) = table_reference.table.get_column_at(*column) else { + crate::bail_parse_error!("column index out of bounds"); + }; + maybe_apply_affinity(column.ty, target_register, program); + Ok(target_register) + } + Table::Virtual(_) => { + let cursor_id = program.resolve_cursor_id(&table_reference.identifier); + program.emit_insn(Insn::VColumn { cursor_id, column: *column, dest: target_register, }); + Ok(target_register) } - let Some(column) = table_reference.table.get_column_at(*column) else { - crate::bail_parse_error!("column index out of bounds"); - }; - maybe_apply_affinity(column.ty, target_register, program); - Ok(target_register) + _ => unreachable!(), } - Table::Virtual(_) => { - let cursor_id = program.resolve_cursor_id(&table_reference.identifier); - program.emit_insn(Insn::VColumn { - cursor_id, - column: *column, - dest: target_register, - }); - Ok(target_register) - } - _ => unreachable!(), - }, + } // If we are reading a column from a subquery, we instead copy the column from the // subquery's result registers. Operation::Subquery { @@ -1716,11 +1927,23 @@ pub fn translate_expr( } ast::Expr::RowId { database: _, table } => { let table_reference = referenced_tables.as_ref().unwrap().get(*table).unwrap(); - let cursor_id = program.resolve_cursor_id(&table_reference.identifier); - program.emit_insn(Insn::RowId { - cursor_id, - dest: target_register, - }); + let index = table_reference.op.index(); + let use_covering_index = table_reference.utilizes_covering_index(); + if use_covering_index { + let index = + index.expect("index cursor should be opened when use_covering_index=true"); + let cursor_id = program.resolve_cursor_id(&index.name); + program.emit_insn(Insn::IdxRowId { + cursor_id, + dest: target_register, + }); + } else { + let cursor_id = program.resolve_cursor_id(&table_reference.identifier); + program.emit_insn(Insn::RowId { + cursor_id, + dest: target_register, + }); + } Ok(target_register) } ast::Expr::InList { .. } => todo!(), @@ -1744,8 +1967,14 @@ pub fn translate_expr( } ast::Expr::Literal(lit) => match lit { ast::Literal::Numeric(val) => { - let maybe_int = val.parse::(); - if let Ok(int_value) = maybe_int { + if val.starts_with("0x") || val.starts_with("0X") { + // must be a hex decimal + let int_value = i64::from_str_radix(&val[2..], 16)?; + program.emit_insn(Insn::Integer { + value: int_value, + dest: target_register, + }); + } else if let Ok(int_value) = val.parse::() { program.emit_insn(Insn::Integer { value: int_value, dest: target_register, @@ -1791,9 +2020,27 @@ pub fn translate_expr( }); Ok(target_register) } - ast::Literal::CurrentDate => todo!(), - ast::Literal::CurrentTime => todo!(), - ast::Literal::CurrentTimestamp => todo!(), + ast::Literal::CurrentDate => { + program.emit_insn(Insn::String8 { + value: datetime::exec_date(&[]).to_string(), + dest: target_register, + }); + Ok(target_register) + } + ast::Literal::CurrentTime => { + program.emit_insn(Insn::String8 { + value: datetime::exec_time(&[]).to_string(), + dest: target_register, + }); + Ok(target_register) + } + ast::Literal::CurrentTimestamp => { + program.emit_insn(Insn::String8 { + value: datetime::exec_datetime_full(&[]).to_string(), + dest: target_register, + }); + Ok(target_register) + } }, ast::Expr::Name(_) => todo!(), ast::Expr::NotNull(_) => todo!(), @@ -1829,14 +2076,22 @@ pub fn translate_expr( // Special case: if we're negating "9223372036854775808", this is exactly MIN_INT64 // If we don't do this -1 * 9223372036854775808 will overflow and parse will fail // and trigger conversion to Real. - if numeric_value == "9223372036854775808" { + if numeric_value == "9223372036854775808" + || numeric_value.eq_ignore_ascii_case("0x7fffffffffffffff") + { program.emit_insn(Insn::Integer { value: i64::MIN, dest: target_register, }); } else { - let maybe_int = numeric_value.parse::(); - if let Ok(value) = maybe_int { + if numeric_value.starts_with("0x") || numeric_value.starts_with("0X") { + // must be a hex decimal + let int_value = i64::from_str_radix(&numeric_value[2..], 16)?; + program.emit_insn(Insn::Integer { + value: -int_value, + dest: target_register, + }); + } else if let Ok(value) = numeric_value.parse::() { program.emit_insn(Insn::Integer { value: value * -1, dest: target_register, @@ -1852,7 +2107,7 @@ pub fn translate_expr( Ok(target_register) } (UnaryOperator::Negative, _) => { - let value = -1; + let value = 0; let reg = program.alloc_register(); translate_expr(program, referenced_tables, expr, reg, resolver)?; @@ -1862,7 +2117,7 @@ pub fn translate_expr( dest: zero_reg, }); program.mark_last_insn_constant(); - program.emit_insn(Insn::Multiply { + program.emit_insn(Insn::Subtract { lhs: zero_reg, rhs: reg, dest: target_register, @@ -1870,8 +2125,13 @@ pub fn translate_expr( Ok(target_register) } (UnaryOperator::BitwiseNot, ast::Expr::Literal(ast::Literal::Numeric(num_val))) => { - let maybe_int = num_val.parse::(); - if let Ok(val) = maybe_int { + if num_val.starts_with("0x") || num_val.starts_with("0X") { + let int_value = i64::from_str_radix(&num_val[2..], 16)?; + program.emit_insn(Insn::Integer { + value: !int_value, + dest: target_register, + }); + } else if let Ok(val) = num_val.parse::() { program.emit_insn(Insn::Integer { value: !val, dest: target_register, @@ -1919,7 +2179,13 @@ pub fn translate_expr( }); Ok(target_register) } + }?; + + if let Some(span) = constant_span { + program.constant_span_end(span); } + + Ok(target_register) } fn emit_binary_insn( @@ -2188,17 +2454,11 @@ fn translate_like_base( let arg_count = if matches!(escape, Some(_)) { 3 } else { 2 }; let start_reg = program.alloc_registers(arg_count); let mut constant_mask = 0; - translate_and_mark(program, referenced_tables, lhs, start_reg + 1, resolver)?; + translate_expr(program, referenced_tables, lhs, start_reg + 1, resolver)?; let _ = translate_expr(program, referenced_tables, rhs, start_reg, resolver)?; if arg_count == 3 { if let Some(escape) = escape { - translate_and_mark( - program, - referenced_tables, - escape, - start_reg + 2, - resolver, - )?; + translate_expr(program, referenced_tables, escape, start_reg + 2, resolver)?; } } if matches!(rhs.as_ref(), ast::Expr::Literal(_)) { @@ -2303,20 +2563,6 @@ pub fn maybe_apply_affinity(col_type: Type, target_register: usize, program: &mu } } -pub fn translate_and_mark( - program: &mut ProgramBuilder, - referenced_tables: Option<&[TableReference]>, - expr: &ast::Expr, - target_register: usize, - resolver: &Resolver, -) -> Result<()> { - translate_expr(program, referenced_tables, expr, target_register, resolver)?; - if matches!(expr, ast::Expr::Literal(_)) { - program.mark_last_insn_constant(); - } - Ok(()) -} - /// Sanitaizes a string literal by removing single quote at front and back /// and escaping double single quotes pub fn sanitize_string(input: &str) -> String { diff --git a/core/translate/group_by.rs b/core/translate/group_by.rs index 13d860f16..70b33dee1 100644 --- a/core/translate/group_by.rs +++ b/core/translate/group_by.rs @@ -1,11 +1,11 @@ use std::rc::Rc; -use limbo_sqlite3_parser::ast; +use limbo_sqlite3_parser::ast::{self, SortOrder}; use crate::{ function::AggFunc, schema::{Column, PseudoTable}, - types::{OwnedValue, Record}, + util::exprs_are_equivalent, vdbe::{ builder::{CursorType, ProgramBuilder}, insn::Insn, @@ -37,12 +37,15 @@ pub struct GroupByMetadata { pub reg_sorter_key: usize, // Register holding a flag to abort the grouping process if necessary pub reg_abort_flag: usize, - // Register holding the start of the accumulator group registers (i.e. the groups, not the aggregates) - pub reg_group_exprs_acc: usize, + // Register holding the start of the non aggregate query members (all columns except aggregate arguments) + pub reg_non_aggregate_exprs_acc: usize, // Starting index of the register(s) that hold the comparison result between the current row and the previous row // The comparison result is used to determine if the current row belongs to the same group as the previous row // Each group by expression has a corresponding register pub reg_group_exprs_cmp: usize, + // Columns that not part of GROUP BY clause and not arguments of Aggregation function. + // Heavy calculation and needed in different functions, so it is reasonable to do it once and save. + pub non_group_by_non_agg_column_count: Option, } /// Initialize resources needed for GROUP BY processing @@ -50,29 +53,30 @@ pub fn init_group_by( program: &mut ProgramBuilder, t_ctx: &mut TranslateCtx, group_by: &GroupBy, - aggregates: &[Aggregate], + plan: &SelectPlan, ) -> Result<()> { - let num_aggs = aggregates.len(); + let num_aggs = plan.aggregates.len(); + + let non_aggregate_count = plan + .result_columns + .iter() + .filter(|rc| !rc.contains_aggregates) + .count(); let sort_cursor = program.alloc_cursor_id(None, CursorType::Sorter); let reg_abort_flag = program.alloc_register(); let reg_group_exprs_cmp = program.alloc_registers(group_by.exprs.len()); - let reg_group_exprs_acc = program.alloc_registers(group_by.exprs.len()); + let reg_non_aggregate_exprs_acc = program.alloc_registers(non_aggregate_count); let reg_agg_exprs_start = program.alloc_registers(num_aggs); let reg_sorter_key = program.alloc_register(); let label_subrtn_acc_clear = program.allocate_label(); - let mut order = Vec::new(); - const ASCENDING: i64 = 0; - for _ in group_by.exprs.iter() { - order.push(OwnedValue::Integer(ASCENDING)); - } program.emit_insn(Insn::SorterOpen { cursor_id: sort_cursor, - columns: aggregates.len() + group_by.exprs.len(), - order: Record::new(order), + columns: non_aggregate_count + plan.aggregates.len(), + order: (0..group_by.exprs.len()).map(|_| SortOrder::Asc).collect(), }); program.add_comment(program.offset(), "clear group by abort flag"); @@ -110,9 +114,10 @@ pub fn init_group_by( label_acc_indicator_set_flag_true: program.allocate_label(), reg_subrtn_acc_clear_return_offset, reg_abort_flag, - reg_group_exprs_acc, + reg_non_aggregate_exprs_acc, reg_group_exprs_cmp, reg_sorter_key, + non_group_by_non_agg_column_count: None, }); Ok(()) } @@ -146,25 +151,57 @@ pub fn emit_group_by<'a>( sort_cursor, reg_group_exprs_cmp, reg_subrtn_acc_clear_return_offset, - reg_group_exprs_acc, + reg_non_aggregate_exprs_acc, reg_abort_flag, reg_sorter_key, label_subrtn_acc_clear, label_acc_indicator_set_flag_true, + non_group_by_non_agg_column_count, .. } = *t_ctx.meta_group_by.as_mut().unwrap(); - let group_by = plan.group_by.as_ref().unwrap(); - // all group by columns and all arguments of agg functions are in the sorter. - // the sort keys are the group by columns (the aggregation within groups is done based on how long the sort keys remain the same) - let sorter_column_count = group_by.exprs.len() - + plan - .aggregates + let agg_args_count = plan + .aggregates + .iter() + .map(|agg| agg.args.len()) + .sum::(); + let group_by_count = group_by.exprs.len(); + let non_group_by_non_agg_column_count = non_group_by_non_agg_column_count.unwrap(); + + // We have to know which group by expr present in resulting set + let group_by_expr_in_res_cols = group_by.exprs.iter().map(|expr| { + plan.result_columns .iter() - .map(|agg| agg.args.len()) - .sum::(); - // sorter column names do not matter + .any(|e| exprs_are_equivalent(&e.expr, expr)) + }); + + // Create a map from sorter column index to result register + // This helps track where each column from the sorter should be stored + let mut column_register_mapping = + vec![None; group_by_count + non_group_by_non_agg_column_count]; + let mut next_reg = reg_non_aggregate_exprs_acc; + + // Map GROUP BY columns that are in the result set to registers + for (i, is_in_result) in group_by_expr_in_res_cols.clone().enumerate() { + if is_in_result { + column_register_mapping[i] = Some(next_reg); + next_reg += 1; + } + } + + // Handle other non-aggregate columns that aren't part of GROUP BY and not part of Aggregation function + for i in group_by_count..group_by_count + non_group_by_non_agg_column_count { + column_register_mapping[i] = Some(next_reg); + next_reg += 1; + } + + // Calculate total number of columns in the sorter + // The sorter contains all GROUP BY columns, aggregate arguments, and other columns + let sorter_column_count = agg_args_count + group_by_count + non_group_by_non_agg_column_count; + + // Create pseudo-columns for the pseudo-table + // (these are placeholders as we only care about structure, not semantics) let ty = crate::schema::Type::Null; let pseudo_columns = (0..sorter_column_count) .map(|_| Column { @@ -178,7 +215,8 @@ pub fn emit_group_by<'a>( }) .collect::>(); - // A pseudo table is a "fake" table to which we read one row at a time from the sorter + // Create a pseudo-table to read one row at a time from the sorter + // This allows us to use standard table access operations on the sorted data let pseudo_table = Rc::new(PseudoTable { columns: pseudo_columns, }); @@ -231,10 +269,21 @@ pub fn emit_group_by<'a>( "start new group if comparison is not equal", ); // If we are at a new group, continue. If we are at the same group, jump to the aggregation step (i.e. accumulate more values into the aggregations) + let label_jump_after_comparison = program.allocate_label(); program.emit_insn(Insn::Jump { - target_pc_lt: program.offset().add(1u32), + target_pc_lt: label_jump_after_comparison, target_pc_eq: agg_step_label, - target_pc_gt: program.offset().add(1u32), + target_pc_gt: label_jump_after_comparison, + }); + + program.add_comment( + program.offset(), + "check if ended group had data, and output if so", + ); + program.resolve_label(label_jump_after_comparison, program.offset()); + program.emit_insn(Insn::Gosub { + target_pc: label_subrtn_acc_output, + return_reg: reg_subrtn_acc_output_return_offset, }); // New group, move current group by columns into the comparison register @@ -244,15 +293,6 @@ pub fn emit_group_by<'a>( count: group_by.exprs.len(), }); - program.add_comment( - program.offset(), - "check if ended group had data, and output if so", - ); - program.emit_insn(Insn::Gosub { - target_pc: label_subrtn_acc_output, - return_reg: reg_subrtn_acc_output_return_offset, - }); - program.add_comment(program.offset(), "check abort flag"); program.emit_insn(Insn::IfPos { reg: reg_abort_flag, @@ -266,10 +306,10 @@ pub fn emit_group_by<'a>( return_reg: reg_subrtn_acc_clear_return_offset, }); - // Accumulate the values into the aggregations + // Process each aggregate function for the current row program.resolve_label(agg_step_label, program.offset()); let start_reg = t_ctx.reg_agg_start.unwrap(); - let mut cursor_index = group_by.exprs.len(); + let mut cursor_index = group_by_count + non_group_by_non_agg_column_count; // Skipping all columns in sorter that not an aggregation arguments for (i, agg) in plan.aggregates.iter().enumerate() { let agg_result_reg = start_reg + i; translate_aggregation_step_groupby( @@ -284,7 +324,8 @@ pub fn emit_group_by<'a>( cursor_index += agg.args.len(); } - // We only emit the group by columns if we are going to start a new group (i.e. the prev group will not accumulate any more values into the aggregations) + // We only need to store non-aggregate columns once per group + // Skip if we've already stored them for this group program.add_comment( program.offset(), "don't emit group columns if continuing existing group", @@ -295,17 +336,18 @@ pub fn emit_group_by<'a>( jump_if_null: false, }); - // Read the group by columns for a finished group - for i in 0..group_by.exprs.len() { - let key_reg = reg_group_exprs_acc + i; - let sorter_column_index = i; - program.emit_insn(Insn::Column { - cursor_id: pseudo_cursor, - column: sorter_column_index, - dest: key_reg, - }); + // Read non-aggregate columns from the current row + for (sorter_column_index, dest_reg) in column_register_mapping.iter().enumerate() { + if let Some(dest_reg) = dest_reg { + program.emit_insn(Insn::Column { + cursor_id: pseudo_cursor, + column: sorter_column_index, + dest: *dest_reg, + }); + } } + // Mark that we've stored data for this group program.resolve_label(label_acc_indicator_set_flag_true, program.offset()); program.add_comment(program.offset(), "indicate data in accumulator"); program.emit_insn(Insn::Integer { @@ -313,12 +355,12 @@ pub fn emit_group_by<'a>( dest: reg_data_in_acc_flag, }); + // Continue to the next row in the sorter program.emit_insn(Insn::SorterNext { cursor_id: sort_cursor, pc_if_next: label_grouping_loop_start, }); - - program.resolve_label(label_grouping_loop_end, program.offset()); + program.preassign_label_to_next_insn(label_grouping_loop_end); program.add_comment(program.offset(), "emit row for final group"); program.emit_insn(Insn::Gosub { @@ -340,18 +382,22 @@ pub fn emit_group_by<'a>( program.resolve_label(label_subrtn_acc_output, program.offset()); + // Only output a row if there's data in the accumulator program.add_comment(program.offset(), "output group by row subroutine start"); program.emit_insn(Insn::IfPos { reg: reg_data_in_acc_flag, target_pc: label_agg_final, decrement_by: 0, }); + + // If no data, return without outputting a row let group_by_end_without_emitting_row_label = program.allocate_label(); program.resolve_label(group_by_end_without_emitting_row_label, program.offset()); program.emit_insn(Insn::Return { return_reg: reg_subrtn_acc_output_return_offset, }); + // Finalize aggregate values for output let agg_start_reg = t_ctx.reg_agg_start.unwrap(); // Resolve the label for the start of the group by output row subroutine program.resolve_label(label_agg_final, program.offset()); @@ -363,16 +409,34 @@ pub fn emit_group_by<'a>( }); } - // we now have the group by columns in registers (group_exprs_start_register..group_exprs_start_register + group_by.len() - 1) - // and the agg results in (agg_start_reg..agg_start_reg + aggregates.len() - 1) - // we need to call translate_expr on each result column, but replace the expr with a register copy in case any part of the - // result column expression matches a) a group by column or b) an aggregation result. - for (i, expr) in group_by.exprs.iter().enumerate() { - t_ctx - .resolver - .expr_to_reg_cache - .push((expr, reg_group_exprs_acc + i)); + // Map GROUP BY expressions to their registers in the result set + for (i, (expr, is_in_result)) in group_by + .exprs + .iter() + .zip(group_by_expr_in_res_cols) + .enumerate() + { + if is_in_result { + if let Some(reg) = &column_register_mapping.get(i).and_then(|opt| *opt) { + t_ctx.resolver.expr_to_reg_cache.push((expr, *reg)); + } + } } + + // Map non-aggregate, non-GROUP BY columns to their registers + let non_agg_cols = plan + .result_columns + .iter() + .filter(|rc| !rc.contains_aggregates && !is_column_in_group_by(&rc.expr, &group_by.exprs)); + + for (idx, rc) in non_agg_cols.enumerate() { + let sorter_idx = group_by_count + idx; + if let Some(reg) = column_register_mapping.get(sorter_idx).and_then(|opt| *opt) { + t_ctx.resolver.expr_to_reg_cache.push((&rc.expr, reg)); + } + } + + // Map aggregate expressions to their result registers for (i, agg) in plan.aggregates.iter().enumerate() { t_ctx .resolver @@ -415,12 +479,18 @@ pub fn emit_group_by<'a>( return_reg: reg_subrtn_acc_output_return_offset, }); + // Subroutine to clear accumulators for a new group program.add_comment(program.offset(), "clear accumulator subroutine start"); program.resolve_label(label_subrtn_acc_clear, program.offset()); - let start_reg = reg_group_exprs_acc; + let start_reg = reg_non_aggregate_exprs_acc; + + // Reset all accumulator registers to NULL program.emit_insn(Insn::Null { dest: start_reg, - dest_end: Some(start_reg + group_by.exprs.len() + plan.aggregates.len() - 1), + dest_end: Some( + start_reg + non_group_by_non_agg_column_count + group_by_count + plan.aggregates.len() + - 1, + ), }); program.emit_insn(Insn::Integer { @@ -430,8 +500,7 @@ pub fn emit_group_by<'a>( program.emit_insn(Insn::Return { return_reg: reg_subrtn_acc_clear_return_offset, }); - - program.resolve_label(label_group_by_end, program.offset()); + program.preassign_label_to_next_insn(label_group_by_end); Ok(()) } @@ -668,3 +737,9 @@ pub fn translate_aggregation_step_groupby( }; Ok(dest) } + +pub fn is_column_in_group_by(expr: &ast::Expr, group_by_exprs: &[ast::Expr]) -> bool { + group_by_exprs + .iter() + .any(|expr2| exprs_are_equivalent(expr, expr2)) +} diff --git a/core/translate/index.rs b/core/translate/index.rs new file mode 100644 index 000000000..063344bd8 --- /dev/null +++ b/core/translate/index.rs @@ -0,0 +1,298 @@ +use std::sync::Arc; + +use crate::{ + schema::{BTreeTable, Column, Index, IndexColumn, PseudoTable, Schema}, + storage::pager::CreateBTreeFlags, + util::normalize_ident, + vdbe::{ + builder::{CursorType, ProgramBuilder, QueryMode}, + insn::{IdxInsertFlags, Insn, RegisterOrLiteral}, + }, +}; +use limbo_sqlite3_parser::ast::{self, Expr, Id, SortOrder, SortedColumn}; + +use super::schema::{emit_schema_entry, SchemaEntryType, SQLITE_TABLEID}; + +pub fn translate_create_index( + mode: QueryMode, + unique_if_not_exists: (bool, bool), + idx_name: &str, + tbl_name: &str, + columns: &[SortedColumn], + schema: &Schema, +) -> crate::Result { + let idx_name = normalize_ident(idx_name); + let tbl_name = normalize_ident(tbl_name); + let mut program = ProgramBuilder::new(crate::vdbe::builder::ProgramBuilderOpts { + query_mode: mode, + num_cursors: 5, + approx_num_insns: 40, + approx_num_labels: 5, + }); + + // Check if the index is being created on a valid btree table and + // the name is globally unique in the schema. + if !schema.is_unique_idx_name(&idx_name) { + crate::bail_parse_error!("Error: index with name '{idx_name}' already exists."); + } + let Some(tbl) = schema.tables.get(&tbl_name) else { + crate::bail_parse_error!("Error: table '{tbl_name}' does not exist."); + }; + let Some(tbl) = tbl.btree() else { + crate::bail_parse_error!("Error: table '{tbl_name}' is not a b-tree table."); + }; + let columns = resolve_sorted_columns(&tbl, columns)?; + + // Prologue: + let init_label = program.emit_init(); + let start_offset = program.offset(); + + let idx = Arc::new(Index { + name: idx_name.clone(), + table_name: tbl.name.clone(), + root_page: 0, // we dont have access till its created, after we parse the schema table + columns: columns + .iter() + .map(|((pos_in_table, col), order)| IndexColumn { + name: col.name.as_ref().unwrap().clone(), + order: *order, + pos_in_table: *pos_in_table, + }) + .collect(), + unique: unique_if_not_exists.0, + ephemeral: false, + }); + + // Allocate the necessary cursors: + // + // 1. sqlite_schema_cursor_id - sqlite_schema table + // 2. btree_cursor_id - new index btree + // 3. table_cursor_id - table we are creating the index on + // 4. sorter_cursor_id - sorter + // 5. pseudo_cursor_id - pseudo table to store the sorted index values + let sqlite_table = schema.get_btree_table(SQLITE_TABLEID).unwrap(); + let sqlite_schema_cursor_id = program.alloc_cursor_id( + Some(SQLITE_TABLEID.to_owned()), + CursorType::BTreeTable(sqlite_table.clone()), + ); + let btree_cursor_id = program.alloc_cursor_id( + Some(idx_name.to_owned()), + CursorType::BTreeIndex(idx.clone()), + ); + let table_cursor_id = program.alloc_cursor_id( + Some(tbl_name.to_owned()), + CursorType::BTreeTable(tbl.clone()), + ); + let sorter_cursor_id = program.alloc_cursor_id(None, CursorType::Sorter); + let pseudo_table = PseudoTable::new_with_columns(tbl.columns.clone()); + let pseudo_cursor_id = program.alloc_cursor_id(None, CursorType::Pseudo(pseudo_table.into())); + + // Create a new B-Tree and store the root page index in a register + let root_page_reg = program.alloc_register(); + program.emit_insn(Insn::CreateBtree { + db: 0, + root: root_page_reg, + flags: CreateBTreeFlags::new_index(), + }); + + // open the sqlite schema table for writing and create a new entry for the index + program.emit_insn(Insn::OpenWrite { + cursor_id: sqlite_schema_cursor_id, + root_page: RegisterOrLiteral::Literal(sqlite_table.root_page), + }); + let sql = create_idx_stmt_to_sql(&tbl_name, &idx_name, unique_if_not_exists, &columns); + emit_schema_entry( + &mut program, + sqlite_schema_cursor_id, + SchemaEntryType::Index, + &idx_name, + &tbl_name, + root_page_reg, + Some(sql), + ); + + // determine the order of the columns in the index for the sorter + let order = idx.columns.iter().map(|c| c.order.clone()).collect(); + // open the sorter and the pseudo table + program.emit_insn(Insn::SorterOpen { + cursor_id: sorter_cursor_id, + columns: columns.len(), + order, + }); + let content_reg = program.alloc_register(); + program.emit_insn(Insn::OpenPseudo { + cursor_id: pseudo_cursor_id, + content_reg, + num_fields: columns.len() + 1, + }); + + // open the table we are creating the index on for reading + program.emit_insn(Insn::OpenRead { + cursor_id: table_cursor_id, + root_page: tbl.root_page, + }); + + let loop_start_label = program.allocate_label(); + let loop_end_label = program.allocate_label(); + program.emit_insn(Insn::Rewind { + cursor_id: table_cursor_id, + pc_if_empty: loop_end_label, + }); + program.preassign_label_to_next_insn(loop_start_label); + + // Loop start: + // Collect index values into start_reg..rowid_reg + // emit MakeRecord (index key + rowid) into record_reg. + // + // Then insert the record into the sorter + let start_reg = program.alloc_registers(columns.len() + 1); + for (i, (col, _)) in columns.iter().enumerate() { + program.emit_insn(Insn::Column { + cursor_id: table_cursor_id, + column: col.0, + dest: start_reg + i, + }); + } + let rowid_reg = start_reg + columns.len(); + program.emit_insn(Insn::RowId { + cursor_id: table_cursor_id, + dest: rowid_reg, + }); + let record_reg = program.alloc_register(); + program.emit_insn(Insn::MakeRecord { + start_reg, + count: columns.len() + 1, + dest_reg: record_reg, + }); + program.emit_insn(Insn::SorterInsert { + cursor_id: sorter_cursor_id, + record_reg, + }); + + program.emit_insn(Insn::Next { + cursor_id: table_cursor_id, + pc_if_next: loop_start_label, + }); + program.preassign_label_to_next_insn(loop_end_label); + + // Open the index btree we created for writing to insert the + // newly sorted index records. + program.emit_insn(Insn::OpenWrite { + cursor_id: btree_cursor_id, + root_page: RegisterOrLiteral::Register(root_page_reg), + }); + + let sorted_loop_start = program.allocate_label(); + let sorted_loop_end = program.allocate_label(); + + // Sort the index records in the sorter + program.emit_insn(Insn::SorterSort { + cursor_id: sorter_cursor_id, + pc_if_empty: sorted_loop_end, + }); + program.preassign_label_to_next_insn(sorted_loop_start); + let sorted_record_reg = program.alloc_register(); + program.emit_insn(Insn::SorterData { + pseudo_cursor: pseudo_cursor_id, + cursor_id: sorter_cursor_id, + dest_reg: sorted_record_reg, + }); + + // seek to the end of the index btree to position the cursor for appending + program.emit_insn(Insn::SeekEnd { + cursor_id: btree_cursor_id, + }); + // insert new index record + program.emit_insn(Insn::IdxInsert { + cursor_id: btree_cursor_id, + record_reg: sorted_record_reg, + unpacked_start: None, // TODO: optimize with these to avoid decoding record twice + unpacked_count: None, + flags: IdxInsertFlags::new().use_seek(false), + }); + program.emit_insn(Insn::SorterNext { + cursor_id: sorter_cursor_id, + pc_if_next: sorted_loop_start, + }); + program.preassign_label_to_next_insn(sorted_loop_end); + + // End of the outer loop + // + // Keep schema table open to emit ParseSchema, close the other cursors. + program.close_cursors(&[sorter_cursor_id, table_cursor_id, btree_cursor_id]); + + // TODO: SetCookie for schema change + // + // Parse the schema table to get the index root page and add new index to Schema + let parse_schema_where_clause = format!("name = '{}' AND type = 'index'", idx_name); + program.emit_insn(Insn::ParseSchema { + db: sqlite_schema_cursor_id, + where_clause: parse_schema_where_clause, + }); + // Close the final sqlite_schema cursor + program.emit_insn(Insn::Close { + cursor_id: sqlite_schema_cursor_id, + }); + + // Epilogue: + program.emit_halt(); + program.preassign_label_to_next_insn(init_label); + program.emit_transaction(true); + program.emit_constant_insns(); + program.emit_goto(start_offset); + + Ok(program) +} + +fn resolve_sorted_columns<'a>( + table: &'a BTreeTable, + cols: &[SortedColumn], +) -> crate::Result> { + let mut resolved = Vec::with_capacity(cols.len()); + for sc in cols { + let ident = normalize_ident(match &sc.expr { + Expr::Id(Id(col_name)) | Expr::Name(ast::Name(col_name)) => col_name, + _ => crate::bail_parse_error!("Error: cannot use expressions in CREATE INDEX"), + }); + let Some(col) = table.get_column(&ident) else { + crate::bail_parse_error!( + "Error: column '{ident}' does not exist in table '{}'", + table.name + ); + }; + resolved.push((col, sc.order.unwrap_or(SortOrder::Asc))); + } + Ok(resolved) +} + +fn create_idx_stmt_to_sql( + tbl_name: &str, + idx_name: &str, + unique_if_not_exists: (bool, bool), + cols: &[((usize, &Column), SortOrder)], +) -> String { + let mut sql = String::with_capacity(128); + sql.push_str("CREATE "); + if unique_if_not_exists.0 { + sql.push_str("UNIQUE "); + } + sql.push_str("INDEX "); + if unique_if_not_exists.1 { + sql.push_str("IF NOT EXISTS "); + } + sql.push_str(idx_name); + sql.push_str(" ON "); + sql.push_str(tbl_name); + sql.push_str(" ("); + for (i, (col, order)) in cols.iter().enumerate() { + if i > 0 { + sql.push_str(", "); + } + sql.push_str(col.1.name.as_ref().unwrap()); + if *order == SortOrder::Desc { + sql.push_str(" DESC"); + } + } + sql.push(')'); + sql +} diff --git a/core/translate/insert.rs b/core/translate/insert.rs index 53368d30b..6887c43af 100644 --- a/core/translate/insert.rs +++ b/core/translate/insert.rs @@ -6,22 +6,22 @@ use limbo_sqlite3_parser::ast::{ }; use crate::error::SQLITE_CONSTRAINT_PRIMARYKEY; -use crate::schema::Table; +use crate::schema::{IndexColumn, Table}; use crate::util::normalize_ident; use crate::vdbe::builder::{ProgramBuilderOpts, QueryMode}; +use crate::vdbe::insn::{IdxInsertFlags, RegisterOrLiteral}; use crate::vdbe::BranchOffset; use crate::{ schema::{Column, Schema}, - translate::expr::translate_expr, vdbe::{ builder::{CursorType, ProgramBuilder}, insn::Insn, }, - SymbolTable, }; -use crate::{Result, VirtualTable}; +use crate::{Result, SymbolTable, VirtualTable}; use super::emitter::Resolver; +use super::expr::{translate_expr_no_constant_opt, NoConstantOptReason}; #[allow(clippy::too_many_arguments)] pub fn translate_insert( @@ -82,16 +82,33 @@ pub fn translate_insert( Some(table_name.0.clone()), CursorType::BTreeTable(btree_table.clone()), ); + // allocate cursor id's for each btree index cursor we'll need to populate the indexes + // (idx name, root_page, idx cursor id) + let idx_cursors = schema + .get_indices(&table_name.0) + .iter() + .map(|idx| { + ( + &idx.name, + idx.root_page, + program.alloc_cursor_id( + Some(table_name.0.clone()), + CursorType::BTreeIndex(idx.clone()), + ), + ) + }) + .collect::>(); let root_page = btree_table.root_page; let values = match body { - InsertBody::Select(select, None) => match &select.body.select.deref() { + InsertBody::Select(select, _) => match &select.body.select.deref() { OneSelect::Values(values) => values, _ => todo!(), }, - _ => todo!(), + InsertBody::DefaultValues => &vec![vec![]], }; let column_mappings = resolve_columns_for_insert(&table, columns, values)?; + let index_col_mappings = resolve_indicies_for_insert(schema, table.as_ref(), &column_mappings)?; // Check if rowid was provided (through INTEGER PRIMARY KEY as a rowid alias) let rowid_alias_index = btree_table.columns.iter().position(|c| c.is_rowid_alias); let has_user_provided_rowid = { @@ -126,12 +143,15 @@ pub fn translate_insert( if inserting_multiple_rows { let yield_reg = program.alloc_register(); let jump_on_definition_label = program.allocate_label(); + let start_offset_label = program.allocate_label(); program.emit_insn(Insn::InitCoroutine { yield_reg, jump_on_definition: jump_on_definition_label, - start_offset: program.offset().add(1u32), + start_offset: start_offset_label, }); + program.resolve_label(start_offset_label, program.offset()); + for value in values { populate_column_registers( &mut program, @@ -148,13 +168,12 @@ pub fn translate_insert( }); } program.emit_insn(Insn::EndCoroutine { yield_reg }); - program.resolve_label(jump_on_definition_label, program.offset()); + program.preassign_label_to_next_insn(jump_on_definition_label); - program.emit_insn(Insn::OpenWriteAsync { + program.emit_insn(Insn::OpenWrite { cursor_id, - root_page, + root_page: RegisterOrLiteral::Literal(root_page), }); - program.emit_insn(Insn::OpenWriteAwait {}); // Main loop // FIXME: rollback is not implemented. E.g. if you insert 2 rows and one fails to unique constraint violation, @@ -166,11 +185,10 @@ pub fn translate_insert( }); } else { // Single row - populate registers directly - program.emit_insn(Insn::OpenWriteAsync { + program.emit_insn(Insn::OpenWrite { cursor_id, - root_page, + root_page: RegisterOrLiteral::Literal(root_page), }); - program.emit_insn(Insn::OpenWriteAwait {}); populate_column_registers( &mut program, @@ -182,7 +200,13 @@ pub fn translate_insert( &resolver, )?; } - + // Open all the index btrees for writing + for idx_cursor in idx_cursors.iter() { + program.emit_insn(Insn::OpenWrite { + cursor_id: idx_cursor.2, + root_page: idx_cursor.1.into(), + }); + } // Common record insertion logic for both single and multiple rows let check_rowid_is_integer_label = rowid_alias_reg.and(Some(program.allocate_label())); if let Some(reg) = rowid_alias_reg { @@ -246,8 +270,109 @@ pub fn translate_insert( err_code: SQLITE_CONSTRAINT_PRIMARYKEY, description: format!("{}.{}", table_name.0, rowid_column_name), }); + program.preassign_label_to_next_insn(make_record_label); + } - program.resolve_label(make_record_label, program.offset()); + match table.btree() { + Some(t) if t.is_strict => { + program.emit_insn(Insn::TypeCheck { + start_reg: column_registers_start, + count: num_cols, + check_generated: true, + table_reference: Rc::clone(&t), + }); + } + _ => (), + } + + for index_col_mapping in index_col_mappings.iter() { + // find which cursor we opened earlier for this index + let idx_cursor_id = idx_cursors + .iter() + .find(|(name, _, _)| *name == &index_col_mapping.idx_name) + .map(|(_, _, c_id)| *c_id) + .expect("no cursor found for index"); + + let num_cols = index_col_mapping.columns.len(); + // allocate scratch registers for the index columns plus rowid + let idx_start_reg = program.alloc_registers(num_cols + 1); + + // copy each index column from the table's column registers into these scratch regs + for (i, col) in index_col_mapping.columns.iter().enumerate() { + // copy from the table's column register over to the index's scratch register + + program.emit_insn(Insn::Copy { + src_reg: column_registers_start + col.0, + dst_reg: idx_start_reg + i, + amount: 0, + }); + } + // last register is the rowid + program.emit_insn(Insn::Copy { + src_reg: rowid_reg, + dst_reg: idx_start_reg + num_cols, + amount: 0, + }); + + let record_reg = program.alloc_register(); + program.emit_insn(Insn::MakeRecord { + start_reg: idx_start_reg, + count: num_cols + 1, + dest_reg: record_reg, + }); + + let index = schema + .get_index(&table_name.0, &index_col_mapping.idx_name) + .expect("index should be present"); + + if index.unique { + let label_idx_insert = program.allocate_label(); + program.emit_insn(Insn::NoConflict { + cursor_id: idx_cursor_id, + target_pc: label_idx_insert, + record_reg: idx_start_reg, + num_regs: num_cols, + }); + let column_names = index_col_mapping.columns.iter().enumerate().fold( + String::with_capacity(50), + |mut accum, (idx, (index, _))| { + if idx > 0 { + accum.push_str(", "); + } + + accum.push_str(&btree_table.name); + accum.push('.'); + + let name = btree_table + .columns + .get(*index) + .unwrap() + .name + .as_ref() + .expect("column name is None"); + accum.push_str(name); + + accum + }, + ); + + program.emit_insn(Insn::Halt { + err_code: SQLITE_CONSTRAINT_PRIMARYKEY, + description: column_names, + }); + + program.resolve_label(label_idx_insert, program.offset()); + } + + // now do the actual index insertion using the unpacked registers + program.emit_insn(Insn::IdxInsert { + cursor_id: idx_cursor_id, + record_reg, + unpacked_start: Some(idx_start_reg), // TODO: enable optimization + unpacked_count: Some((num_cols + 1) as u16), + // TODO: figure out how to determine whether or not we need to seek prior to insert. + flags: IdxInsertFlags::new(), + }); } // Create and insert the record @@ -257,13 +382,12 @@ pub fn translate_insert( dest_reg: record_register, }); - program.emit_insn(Insn::InsertAsync { + program.emit_insn(Insn::Insert { cursor: cursor_id, key_reg: rowid_reg, record_reg: record_register, flag: 0, }); - program.emit_insn(Insn::InsertAwait { cursor_id }); if inserting_multiple_rows { // For multiple rows, loop back @@ -277,8 +401,8 @@ pub fn translate_insert( err_code: 0, description: String::new(), }); + program.preassign_label_to_next_insn(init_label); - program.resolve_label(init_label, program.offset()); program.emit_insn(Insn::Transaction { write: true }); program.emit_constant_insns(); program.emit_insn(Insn::Goto { @@ -297,6 +421,8 @@ struct ColumnMapping<'a> { /// If Some(i), use the i-th value from the VALUES tuple /// If None, use NULL (column was not specified in INSERT statement) value_index: Option, + /// The default value for the column, if defined + default_value: Option<&'a Expr>, } /// Resolves how each column in a table should be populated during an INSERT. @@ -352,6 +478,7 @@ fn resolve_columns_for_insert<'a>( .map(|(i, col)| ColumnMapping { column: col, value_index: if i < num_values { Some(i) } else { None }, + default_value: col.default.as_ref(), }) .collect()); } @@ -362,6 +489,7 @@ fn resolve_columns_for_insert<'a>( .map(|col| ColumnMapping { column: col, value_index: None, + default_value: col.default.as_ref(), }) .collect(); @@ -388,6 +516,69 @@ fn resolve_columns_for_insert<'a>( Ok(mappings) } +/// Represents how a column in an index should be populated during an INSERT. +/// Similar to ColumnMapping above but includes the index name, as well as multiple +/// possible value indices for each. +#[derive(Debug, Default)] +struct IndexColMapping { + idx_name: String, + columns: Vec<(usize, IndexColumn)>, + value_indicies: Vec>, +} + +impl IndexColMapping { + fn new(name: String) -> Self { + IndexColMapping { + idx_name: name, + ..Default::default() + } + } +} + +/// Example: +/// Table 'test': (a, b, c); +/// Index 'idx': test(a, b); +///________________________________ +/// Insert (a, c): (2, 3) +/// Record: (2, NULL, 3) +/// IndexColMapping: (a, b) = (2, NULL) +fn resolve_indicies_for_insert( + schema: &Schema, + table: &Table, + columns: &[ColumnMapping<'_>], +) -> Result> { + let mut index_col_mappings = Vec::new(); + // Iterate over all indices for this table + for index in schema.get_indices(table.get_name()) { + let mut idx_map = IndexColMapping::new(index.name.clone()); + // For each column in the index (in the order defined by the index), + // try to find the corresponding column in the insert’s column mapping. + for idx_col in &index.columns { + let target_name = normalize_ident(idx_col.name.as_str()); + if let Some((i, col_mapping)) = columns.iter().enumerate().find(|(_, mapping)| { + mapping + .column + .name + .as_ref() + .map_or(false, |name| name.eq_ignore_ascii_case(&target_name)) + }) { + idx_map.columns.push((i, idx_col.clone())); + idx_map.value_indicies.push(col_mapping.value_index); + } else { + return Err(crate::LimboError::ParseError(format!( + "Column {} not found in index {}", + target_name, index.name + ))); + } + } + // Add the mapping if at least one column was found. + if !idx_map.columns.is_empty() { + index_col_mappings.push(idx_map); + } + } + Ok(index_col_mappings) +} + /// Populates the column registers with values for a single row fn populate_column_registers( program: &mut ProgramBuilder, @@ -413,18 +604,28 @@ fn populate_column_registers( } else { target_reg }; - translate_expr( + translate_expr_no_constant_opt( program, None, value.get(value_index).expect("value index out of bounds"), reg, resolver, + NoConstantOptReason::RegisterReuse, )?; if write_directly_to_rowid_reg { program.emit_insn(Insn::SoftNull { reg: target_reg }); } + } else if let Some(default_expr) = mapping.default_value { + translate_expr_no_constant_opt( + program, + None, + default_expr, + target_reg, + resolver, + NoConstantOptReason::RegisterReuse, + )?; } else { - // Column was not specified - use NULL if it is nullable, otherwise error + // Column was not specified as has no DEFAULT - use NULL if it is nullable, otherwise error // Rowid alias columns can be NULL because we will autogenerate a rowid in that case. let is_nullable = !mapping.column.primary_key || mapping.column.is_rowid_alias; if is_nullable { @@ -472,7 +673,14 @@ fn translate_virtual_table_insert( let value_registers_start = program.alloc_registers(values[0].len()); for (i, expr) in values[0].iter().enumerate() { - translate_expr(program, None, expr, value_registers_start + i, resolver)?; + translate_expr_no_constant_opt( + program, + None, + expr, + value_registers_start + i, + resolver, + NoConstantOptReason::RegisterReuse, + )?; } /* * * Inserts for virtual tables are done in a single step. @@ -526,12 +734,12 @@ fn translate_virtual_table_insert( }); let halt_label = program.allocate_label(); + program.resolve_label(halt_label, program.offset()); program.emit_insn(Insn::Halt { err_code: 0, description: String::new(), }); - program.resolve_label(halt_label, program.offset()); program.resolve_label(init_label, program.offset()); program.emit_insn(Insn::Goto { diff --git a/core/translate/main_loop.rs b/core/translate/main_loop.rs index 7b51a2328..a1cabc511 100644 --- a/core/translate/main_loop.rs +++ b/core/translate/main_loop.rs @@ -1,12 +1,16 @@ +use limbo_ext::VTabKind; use limbo_sqlite3_parser::ast; +use std::sync::Arc; + use crate::{ - schema::Table, + schema::{Index, Table}, translate::result_row::emit_select_result, + types::SeekOp, vdbe::{ - builder::{CursorType, ProgramBuilder}, - insn::{CmpInsFlags, Insn}, - BranchOffset, + builder::ProgramBuilder, + insn::{CmpInsFlags, IdxInsertFlags, Insn}, + BranchOffset, CursorID, }, Result, }; @@ -14,11 +18,16 @@ use crate::{ use super::{ aggregation::translate_aggregation_step, emitter::{OperationMode, TranslateCtx}, - expr::{translate_condition_expr, translate_expr, ConditionMetadata}, + expr::{ + translate_condition_expr, translate_expr, translate_expr_no_constant_opt, + ConditionMetadata, NoConstantOptReason, + }, + group_by::is_column_in_group_by, + optimizer::Optimizable, order_by::{order_by_sorter_insert, sorter_insert}, plan::{ - IterationDirection, Operation, Search, SelectPlan, SelectQueryType, TableReference, - WhereTerm, + convert_where_to_vtab_constraint, IterationDirection, Operation, Search, SeekDef, + SelectPlan, SelectQueryType, TableReference, WhereTerm, }, }; @@ -37,11 +46,11 @@ pub struct LeftJoinMetadata { #[derive(Debug, Clone, Copy)] pub struct LoopLabels { /// jump to the start of the loop body - loop_start: BranchOffset, - /// jump to the NextAsync instruction (or equivalent) - next: BranchOffset, + pub loop_start: BranchOffset, + /// jump to the Next instruction (or equivalent) + pub next: BranchOffset, /// jump to the end of the loop, exiting it - loop_end: BranchOffset, + pub loop_end: BranchOffset, } impl LoopLabels { @@ -77,119 +86,101 @@ pub fn init_loop( t_ctx.meta_left_joins[table_index] = Some(lj_metadata); } } + let (table_cursor_id, index_cursor_id) = table.open_cursors(program, mode)?; match &table.op { - Operation::Scan { .. } => { - let cursor_id = program.alloc_cursor_id( - Some(table.identifier.clone()), - match &table.table { - Table::BTree(_) => CursorType::BTreeTable(table.btree().unwrap().clone()), - Table::Virtual(_) => { - CursorType::VirtualTable(table.virtual_table().unwrap().clone()) - } - other => panic!("Invalid table reference type in Scan: {:?}", other), - }, - ); - match (mode, &table.table) { - (OperationMode::SELECT, Table::BTree(btree)) => { - let root_page = btree.root_page; - program.emit_insn(Insn::OpenReadAsync { + Operation::Scan { index, .. } => match (mode, &table.table) { + (OperationMode::SELECT, Table::BTree(btree)) => { + let root_page = btree.root_page; + if let Some(cursor_id) = table_cursor_id { + program.emit_insn(Insn::OpenRead { cursor_id, root_page, }); - program.emit_insn(Insn::OpenReadAwait {}); } - (OperationMode::DELETE, Table::BTree(btree)) => { - let root_page = btree.root_page; - program.emit_insn(Insn::OpenWriteAsync { - cursor_id, - root_page, + if let Some(index_cursor_id) = index_cursor_id { + program.emit_insn(Insn::OpenRead { + cursor_id: index_cursor_id, + root_page: index.as_ref().unwrap().root_page, }); - program.emit_insn(Insn::OpenWriteAwait {}); - } - (OperationMode::UPDATE, Table::BTree(btree)) => { - let root_page = btree.root_page; - program.emit_insn(Insn::OpenWriteAsync { - cursor_id, - root_page, - }); - program.emit_insn(Insn::OpenWriteAwait {}); - } - (OperationMode::SELECT, Table::Virtual(_)) => { - program.emit_insn(Insn::VOpenAsync { cursor_id }); - program.emit_insn(Insn::VOpenAwait {}); - } - (OperationMode::DELETE, Table::Virtual(_)) => { - program.emit_insn(Insn::VOpenAsync { cursor_id }); - program.emit_insn(Insn::VOpenAwait {}); - } - _ => { - unimplemented!() } } - } + (OperationMode::DELETE, Table::BTree(btree)) => { + let root_page = btree.root_page; + program.emit_insn(Insn::OpenWrite { + cursor_id: table_cursor_id + .expect("table cursor is always opened in OperationMode::DELETE"), + root_page: root_page.into(), + }); + } + (OperationMode::UPDATE, Table::BTree(btree)) => { + let root_page = btree.root_page; + program.emit_insn(Insn::OpenWrite { + cursor_id: table_cursor_id + .expect("table cursor is always opened in OperationMode::UPDATE"), + root_page: root_page.into(), + }); + if let Some(index_cursor_id) = index_cursor_id { + program.emit_insn(Insn::OpenWrite { + cursor_id: index_cursor_id, + root_page: index.as_ref().unwrap().root_page.into(), + }); + } + } + (_, Table::Virtual(_)) => { + if let Some(cursor_id) = table_cursor_id { + program.emit_insn(Insn::VOpen { cursor_id }); + } + } + _ => { + unimplemented!() + } + }, Operation::Search(search) => { - let table_cursor_id = program.alloc_cursor_id( - Some(table.identifier.clone()), - CursorType::BTreeTable(table.btree().unwrap().clone()), - ); - match mode { OperationMode::SELECT => { - program.emit_insn(Insn::OpenReadAsync { - cursor_id: table_cursor_id, - root_page: table.table.get_root_page(), - }); - program.emit_insn(Insn::OpenReadAwait {}); + if let Some(table_cursor_id) = table_cursor_id { + program.emit_insn(Insn::OpenRead { + cursor_id: table_cursor_id, + root_page: table.table.get_root_page(), + }); + } } - OperationMode::DELETE => { - program.emit_insn(Insn::OpenWriteAsync { + OperationMode::DELETE | OperationMode::UPDATE => { + let table_cursor_id = table_cursor_id.expect("table cursor is always opened in OperationMode::DELETE or OperationMode::UPDATE"); + program.emit_insn(Insn::OpenWrite { cursor_id: table_cursor_id, - root_page: table.table.get_root_page(), + root_page: table.table.get_root_page().into(), }); - program.emit_insn(Insn::OpenWriteAwait {}); - } - OperationMode::UPDATE => { - program.emit_insn(Insn::OpenWriteAsync { - cursor_id: table_cursor_id, - root_page: table.table.get_root_page(), - }); - program.emit_insn(Insn::OpenWriteAwait {}); } _ => { unimplemented!() } } - if let Search::IndexSearch { index, .. } = search { - let index_cursor_id = program.alloc_cursor_id( - Some(index.name.clone()), - CursorType::BTreeIndex(index.clone()), - ); - - match mode { - OperationMode::SELECT => { - program.emit_insn(Insn::OpenReadAsync { - cursor_id: index_cursor_id, - root_page: index.root_page, - }); - program.emit_insn(Insn::OpenReadAwait); - } - OperationMode::DELETE => { - program.emit_insn(Insn::OpenWriteAsync { - cursor_id: index_cursor_id, - root_page: index.root_page, - }); - program.emit_insn(Insn::OpenWriteAwait {}); - } - OperationMode::UPDATE => { - program.emit_insn(Insn::OpenWriteAsync { - cursor_id: index_cursor_id, - root_page: index.root_page, - }); - program.emit_insn(Insn::OpenWriteAwait {}); - } - _ => { - unimplemented!() + if let Search::Seek { + index: Some(index), .. + } = search + { + // Ephemeral index cursor are opened ad-hoc when needed. + if !index.ephemeral { + match mode { + OperationMode::SELECT => { + program.emit_insn(Insn::OpenRead { + cursor_id: index_cursor_id + .expect("index cursor is always opened in Seek with index"), + root_page: index.root_page, + }); + } + OperationMode::UPDATE | OperationMode::DELETE => { + program.emit_insn(Insn::OpenWrite { + cursor_id: index_cursor_id + .expect("index cursor is always opened in Seek with index"), + root_page: index.root_page.into(), + }); + } + _ => { + unimplemented!() + } } } } @@ -202,7 +193,7 @@ pub fn init_loop( } /// Set up the main query execution loop -/// For example in the case of a nested table scan, this means emitting the RewindAsync instruction +/// For example in the case of a nested table scan, this means emitting the Rewind instruction /// for all tables involved, outermost first. pub fn open_loop( program: &mut ProgramBuilder, @@ -233,6 +224,8 @@ pub fn open_loop( } } + let (table_cursor_id, index_cursor_id) = table.resolve_cursors(program)?; + match &table.op { Operation::Subquery { plan, .. } => { let (yield_reg, coroutine_implementation_start) = match &plan.query_type { @@ -248,7 +241,7 @@ pub fn open_loop( jump_on_definition: BranchOffset::Offset(0), start_offset: coroutine_implementation_start, }); - program.resolve_label(loop_start, program.offset()); + program.preassign_label_to_next_insn(loop_start); // A subquery within the main loop of a parent query has no cursor, so instead of advancing the cursor, // it emits a Yield which jumps back to the main loop of the subquery itself to retrieve the next row. // When the subquery coroutine completes, this instruction jumps to the label at the top of the termination_label_stack, @@ -258,9 +251,6 @@ pub fn open_loop( end_offset: loop_end, }); - // These are predicates evaluated outside of the subquery, - // so they are translated here. - // E.g. SELECT foo FROM (SELECT bar as foo FROM t1) sub WHERE sub.foo > 10 for cond in predicates .iter() .filter(|cond| cond.should_eval_at_loop(table_index)) @@ -278,213 +268,137 @@ pub fn open_loop( condition_metadata, &t_ctx.resolver, )?; - program.resolve_label(jump_target_when_true, program.offset()); + program.preassign_label_to_next_insn(jump_target_when_true); } } - Operation::Scan { iter_dir } => { - let cursor_id = program.resolve_cursor_id(&table.identifier); - + Operation::Scan { iter_dir, .. } => { + let iteration_cursor_id = index_cursor_id.unwrap_or_else(|| { + table_cursor_id.expect("Either index or table cursor must be opened") + }); if !matches!(&table.table, Table::Virtual(_)) { - if iter_dir - .as_ref() - .is_some_and(|dir| *dir == IterationDirection::Backwards) - { - program.emit_insn(Insn::LastAsync { cursor_id }); + if *iter_dir == IterationDirection::Backwards { + program.emit_insn(Insn::Last { + cursor_id: iteration_cursor_id, + pc_if_empty: loop_end, + }); } else { - program.emit_insn(Insn::RewindAsync { cursor_id }); + program.emit_insn(Insn::Rewind { + cursor_id: iteration_cursor_id, + pc_if_empty: loop_end, + }); } - } - match &table.table { - Table::BTree(_) => program.emit_insn( - if iter_dir - .as_ref() - .is_some_and(|dir| *dir == IterationDirection::Backwards) - { - Insn::LastAwait { - cursor_id, - pc_if_empty: loop_end, + program.preassign_label_to_next_insn(loop_start); + } else if let Some(vtab) = table.virtual_table() { + let (start_reg, count, maybe_idx_str, maybe_idx_int) = if vtab + .kind + .eq(&VTabKind::VirtualTable) + { + // Virtual‑table (non‑TVF) modules can receive constraints via xBestIndex. + // They return information with which to pass to VFilter operation. + // We forward every predicate that touches vtab columns. + // + // vtab.col = literal (always usable) + // vtab.col = outer_table.col (usable, because outer_table is already positioned) + // vtab.col = later_table.col (forwarded with usable = false) + // + // xBestIndex decides which ones it wants by setting argvIndex and whether the + // core layer may omit them (omit = true). + // We then materialise the RHS/LHS into registers before issuing VFilter. + let converted_constraints = predicates + .iter() + .filter(|p| p.should_eval_at_loop(table_index)) + .enumerate() + .filter_map(|(i, p)| { + // Build ConstraintInfo from the predicates + convert_where_to_vtab_constraint(p, table_index, i) + }) + .collect::>(); + // TODO: get proper order_by information to pass to the vtab. + // maybe encode more info on t_ctx? we need: [col_idx, is_descending] + let index_info = vtab.best_index(&converted_constraints, &[]); + + // Determine the number of VFilter arguments (constraints with an argv_index). + let args_needed = index_info + .constraint_usages + .iter() + .filter(|u| u.argv_index.is_some()) + .count(); + let start_reg = program.alloc_registers(args_needed); + + // For each constraint used by best_index, translate the opposite side. + for (i, usage) in index_info.constraint_usages.iter().enumerate() { + if let Some(argv_index) = usage.argv_index { + if let Some(cinfo) = converted_constraints.get(i) { + let (pred_idx, is_rhs) = cinfo.unpack_plan_info(); + if let ast::Expr::Binary(lhs, _, rhs) = + &predicates[pred_idx].expr + { + // translate the opposite side of the referenced vtab column + let expr = if is_rhs { lhs } else { rhs }; + // argv_index is 1-based; adjust to get the proper register offset. + let target_reg = start_reg + (argv_index - 1) as usize; + translate_expr( + program, + Some(tables), + expr, + target_reg, + &t_ctx.resolver, + )?; + if cinfo.usable && usage.omit { + t_ctx.omit_predicates.push(pred_idx) + } + } + } } + } + // If best_index provided an idx_str, translate it. + let maybe_idx_str = if let Some(idx_str) = index_info.idx_str { + let reg = program.alloc_register(); + program.emit_insn(Insn::String8 { + dest: reg, + value: idx_str, + }); + Some(reg) } else { - Insn::RewindAwait { - cursor_id, - pc_if_empty: loop_end, - } - }, - ), - Table::Virtual(ref table) => { - let start_reg = program - .alloc_registers(table.args.as_ref().map(|a| a.len()).unwrap_or(0)); - let mut cur_reg = start_reg; - let args = match table.args.as_ref() { + None + }; + ( + start_reg, + args_needed, + maybe_idx_str, + Some(index_info.idx_num), + ) + } else { + // For table-valued functions: translate the table args. + let args = match vtab.args.as_ref() { Some(args) => args, None => &vec![], }; + let start_reg = program.alloc_registers(args.len()); + let mut cur_reg = start_reg; for arg in args { let reg = cur_reg; cur_reg += 1; let _ = translate_expr(program, Some(tables), arg, reg, &t_ctx.resolver)?; } - program.emit_insn(Insn::VFilter { - cursor_id, - pc_if_empty: loop_end, - arg_count: table.args.as_ref().map_or(0, |args| args.len()), - args_reg: start_reg, - }); - } - other => panic!("Unsupported table reference type: {:?}", other), - } - program.resolve_label(loop_start, program.offset()); - - for cond in predicates - .iter() - .filter(|cond| cond.should_eval_at_loop(table_index)) - { - let jump_target_when_true = program.allocate_label(); - let condition_metadata = ConditionMetadata { - jump_if_condition_is_true: false, - jump_target_when_true, - jump_target_when_false: next, - }; - translate_condition_expr( - program, - tables, - &cond.expr, - condition_metadata, - &t_ctx.resolver, - )?; - program.resolve_label(jump_target_when_true, program.offset()); - } - } - Operation::Search(search) => { - let table_cursor_id = program.resolve_cursor_id(&table.identifier); - // Open the loop for the index search. - // Rowid equality point lookups are handled with a SeekRowid instruction which does not loop, since it is a single row lookup. - if !matches!(search, Search::RowidEq { .. }) { - let index_cursor_id = if let Search::IndexSearch { index, .. } = search { - Some(program.resolve_cursor_id(&index.name)) - } else { - None - }; - let cmp_reg = program.alloc_register(); - let (cmp_expr, cmp_op) = match search { - Search::IndexSearch { - cmp_expr, cmp_op, .. - } => (cmp_expr, cmp_op), - Search::RowidSearch { cmp_expr, cmp_op } => (cmp_expr, cmp_op), - Search::RowidEq { .. } => unreachable!(), + (start_reg, args.len(), None, None) }; - // TODO this only handles ascending indexes - match cmp_op { - ast::Operator::Equals - | ast::Operator::Greater - | ast::Operator::GreaterEquals => { - translate_expr( - program, - Some(tables), - &cmp_expr.expr, - cmp_reg, - &t_ctx.resolver, - )?; - } - ast::Operator::Less | ast::Operator::LessEquals => { - program.emit_insn(Insn::Null { - dest: cmp_reg, - dest_end: None, - }); - } - _ => unreachable!(), - } - // If we try to seek to a key that is not present in the table/index, we exit the loop entirely. - program.emit_insn(match cmp_op { - ast::Operator::Equals | ast::Operator::GreaterEquals => Insn::SeekGE { - is_index: index_cursor_id.is_some(), - cursor_id: index_cursor_id.unwrap_or(table_cursor_id), - start_reg: cmp_reg, - num_regs: 1, - target_pc: loop_end, - }, - ast::Operator::Greater - | ast::Operator::Less - | ast::Operator::LessEquals => Insn::SeekGT { - is_index: index_cursor_id.is_some(), - cursor_id: index_cursor_id.unwrap_or(table_cursor_id), - start_reg: cmp_reg, - num_regs: 1, - target_pc: loop_end, - }, - _ => unreachable!(), + // Emit VFilter with the computed arguments. + program.emit_insn(Insn::VFilter { + cursor_id: table_cursor_id + .expect("Virtual tables do not support covering indexes"), + arg_count: count, + args_reg: start_reg, + idx_str: maybe_idx_str, + idx_num: maybe_idx_int.unwrap_or(0) as usize, + pc_if_empty: loop_end, }); - if *cmp_op == ast::Operator::Less || *cmp_op == ast::Operator::LessEquals { - translate_expr( - program, - Some(tables), - &cmp_expr.expr, - cmp_reg, - &t_ctx.resolver, - )?; - } - - program.resolve_label(loop_start, program.offset()); - // TODO: We are currently only handling ascending indexes. - // For conditions like index_key > 10, we have already sought to the first key greater than 10, and can just scan forward. - // For conditions like index_key < 10, we are at the beginning of the index, and will scan forward and emit IdxGE(10) with a conditional jump to the end. - // For conditions like index_key = 10, we have already sought to the first key greater than or equal to 10, and can just scan forward and emit IdxGT(10) with a conditional jump to the end. - // For conditions like index_key >= 10, we have already sought to the first key greater than or equal to 10, and can just scan forward. - // For conditions like index_key <= 10, we are at the beginning of the index, and will scan forward and emit IdxGT(10) with a conditional jump to the end. - // For conditions like index_key != 10, TODO. probably the optimal way is not to use an index at all. - // - // For primary key searches we emit RowId and then compare it to the seek value. - - match cmp_op { - ast::Operator::Equals | ast::Operator::LessEquals => { - if let Some(index_cursor_id) = index_cursor_id { - program.emit_insn(Insn::IdxGT { - cursor_id: index_cursor_id, - start_reg: cmp_reg, - num_regs: 1, - target_pc: loop_end, - }); - } else { - let rowid_reg = program.alloc_register(); - program.emit_insn(Insn::RowId { - cursor_id: table_cursor_id, - dest: rowid_reg, - }); - program.emit_insn(Insn::Gt { - lhs: rowid_reg, - rhs: cmp_reg, - target_pc: loop_end, - flags: CmpInsFlags::default(), - }); - } - } - ast::Operator::Less => { - if let Some(index_cursor_id) = index_cursor_id { - program.emit_insn(Insn::IdxGE { - cursor_id: index_cursor_id, - start_reg: cmp_reg, - num_regs: 1, - target_pc: loop_end, - }); - } else { - let rowid_reg = program.alloc_register(); - program.emit_insn(Insn::RowId { - cursor_id: table_cursor_id, - dest: rowid_reg, - }); - program.emit_insn(Insn::Ge { - lhs: rowid_reg, - rhs: cmp_reg, - target_pc: loop_end, - flags: CmpInsFlags::default(), - }); - } - } - _ => {} - } + program.preassign_label_to_next_insn(loop_start); + } + if let Some(table_cursor_id) = table_cursor_id { if let Some(index_cursor_id) = index_cursor_id { program.emit_insn(Insn::DeferredSeek { index_cursor_id, @@ -493,6 +407,28 @@ pub fn open_loop( } } + for (_, cond) in predicates.iter().enumerate().filter(|(i, cond)| { + cond.should_eval_at_loop(table_index) && !t_ctx.omit_predicates.contains(i) + }) { + let jump_target_when_true = program.allocate_label(); + let condition_metadata = ConditionMetadata { + jump_if_condition_is_true: false, + jump_target_when_true, + jump_target_when_false: next, + }; + translate_condition_expr( + program, + tables, + &cond.expr, + condition_metadata, + &t_ctx.resolver, + )?; + program.preassign_label_to_next_insn(jump_target_when_true); + } + } + Operation::Search(search) => { + // Open the loop for the index search. + // Rowid equality point lookups are handled with a SeekRowid instruction which does not loop, since it is a single row lookup. if let Search::RowidEq { cmp_expr } = search { let src_reg = program.alloc_register(); translate_expr( @@ -503,11 +439,81 @@ pub fn open_loop( &t_ctx.resolver, )?; program.emit_insn(Insn::SeekRowid { - cursor_id: table_cursor_id, + cursor_id: table_cursor_id + .expect("Search::RowidEq requires a table cursor"), src_reg, target_pc: next, }); + } else { + // Otherwise, it's an index/rowid scan, i.e. first a seek is performed and then a scan until the comparison expression is not satisfied anymore. + if let Search::Seek { + index: Some(index), .. + } = search + { + if index.ephemeral { + let table_has_rowid = if let Table::BTree(btree) = &table.table { + btree.has_rowid + } else { + false + }; + Some(emit_autoindex( + program, + &index, + table_cursor_id + .expect("an ephemeral index must have a source table cursor"), + index_cursor_id + .expect("an ephemeral index must have an index cursor"), + table_has_rowid, + )?) + } else { + index_cursor_id + } + } else { + index_cursor_id + }; + + let is_index = index_cursor_id.is_some(); + let seek_cursor_id = index_cursor_id.unwrap_or_else(|| { + table_cursor_id.expect("Either index or table cursor must be opened") + }); + let Search::Seek { seek_def, .. } = search else { + unreachable!("Rowid equality point lookup should have been handled above"); + }; + + let start_reg = program.alloc_registers(seek_def.key.len()); + emit_seek( + program, + tables, + seek_def, + t_ctx, + seek_cursor_id, + start_reg, + loop_end, + is_index, + )?; + emit_seek_termination( + program, + tables, + seek_def, + t_ctx, + seek_cursor_id, + start_reg, + loop_start, + loop_end, + is_index, + )?; + + if let Some(index_cursor_id) = index_cursor_id { + if let Some(table_cursor_id) = table_cursor_id { + // Don't do a btree table seek until it's actually necessary to read from the table. + program.emit_insn(Insn::DeferredSeek { + index_cursor_id, + table_cursor_id, + }); + } + } } + for cond in predicates .iter() .filter(|cond| cond.should_eval_at_loop(table_index)) @@ -525,7 +531,7 @@ pub fn open_loop( condition_metadata, &t_ctx.resolver, )?; - program.resolve_label(jump_target_when_true, program.offset()); + program.preassign_label_to_next_insn(jump_target_when_true); } } } @@ -597,19 +603,46 @@ fn emit_loop_source( ) -> Result<()> { match emit_target { LoopEmitTarget::GroupBySorter => { + // This function creates a sorter for GROUP BY operations by allocating registers and + // translating expressions for three types of columns: + // 1) GROUP BY columns (used as sorting keys) + // 2) non-aggregate, non-GROUP BY columns + // 3) aggregate function arguments let group_by = plan.group_by.as_ref().unwrap(); let aggregates = &plan.aggregates; - let sort_keys_count = group_by.exprs.len(); + + // Identify columns in the result set that are neither in GROUP BY nor contain aggregates + let non_group_by_non_agg_expr = plan + .result_columns + .iter() + .filter(|rc| { + !rc.contains_aggregates && !is_column_in_group_by(&rc.expr, &group_by.exprs) + }) + .map(|rc| &rc.expr); + let non_agg_count = non_group_by_non_agg_expr.clone().count(); + // Store the count of non-GROUP BY, non-aggregate columns in the metadata + // This will be used later during aggregation processing + t_ctx.meta_group_by.as_mut().map(|meta| { + meta.non_group_by_non_agg_column_count = Some(non_agg_count); + meta + }); + + // Calculate the total number of arguments used across all aggregate functions let aggregate_arguments_count = plan .aggregates .iter() .map(|agg| agg.args.len()) .sum::(); - let column_count = sort_keys_count + aggregate_arguments_count; + + // Calculate total number of registers needed for all columns in the sorter + let column_count = group_by.exprs.len() + aggregate_arguments_count + non_agg_count; + + // Allocate a contiguous block of registers for all columns let start_reg = program.alloc_registers(column_count); let mut cur_reg = start_reg; - // The group by sorter rows will contain the grouping keys first. They are also the sort keys. + // Step 1: Process GROUP BY columns first + // These will be the first columns in the sorter and serve as sort keys for expr in group_by.exprs.iter() { let key_reg = cur_reg; cur_reg += 1; @@ -621,14 +654,28 @@ fn emit_loop_source( &t_ctx.resolver, )?; } - // Then we have the aggregate arguments. + + // Step 2: Process columns that aren't part of GROUP BY and don't contain aggregates + // Example: SELECT col1, col2, SUM(col3) FROM table GROUP BY col1 + // Here col2 would be processed in this loop if it's in the result set + for expr in non_group_by_non_agg_expr { + let key_reg = cur_reg; + cur_reg += 1; + translate_expr( + program, + Some(&plan.table_references), + expr, + key_reg, + &t_ctx.resolver, + )?; + } + + // Step 3: Process arguments for all aggregate functions + // For each aggregate, translate all its argument expressions for agg in aggregates.iter() { - // Here we are collecting scalars for the group by sorter, which will include - // both the group by expressions and the aggregate arguments. - // e.g. in `select u.first_name, sum(u.age) from users group by u.first_name` - // the sorter will have two scalars: u.first_name and u.age. - // these are then sorted by u.first_name, and for each u.first_name, we sum the u.age. - // the actual aggregation is done later. + // For a query like: SELECT group_col, SUM(val1), AVG(val2) FROM table GROUP BY group_col + // we'll process val1 and val2 here, storing them in the sorter so they're available + // when computing the aggregates after sorting by group_col for expr in agg.args.iter() { let agg_reg = cur_reg; cur_reg += 1; @@ -642,9 +689,6 @@ fn emit_loop_source( } } - // TODO: although it's less often useful, SQLite does allow for expressions in the SELECT that are not part of a GROUP BY or aggregate. - // We currently ignore those and only emit the GROUP BY keys and aggregate arguments. This should be fixed. - let group_by_metadata = t_ctx.meta_group_by.as_ref().unwrap(); sorter_insert( @@ -677,14 +721,31 @@ fn emit_loop_source( &t_ctx.resolver, )?; } - for (i, rc) in plan.result_columns.iter().enumerate() { - if rc.contains_aggregates { - // Do nothing, aggregates are computed above - // if this result column is e.g. something like sum(x) + 1 or length(sum(x)), we do not want to translate that (+1) or length() yet, - // it will be computed after the aggregations are finalized. - continue; - } - let reg = start_reg + num_aggs + i; + + let label_emit_nonagg_only_once = if let Some(flag) = t_ctx.reg_nonagg_emit_once_flag { + let if_label = program.allocate_label(); + program.emit_insn(Insn::If { + reg: flag, + target_pc: if_label, + jump_if_null: false, + }); + Some(if_label) + } else { + None + }; + + let col_start = t_ctx.reg_result_cols_start.unwrap(); + + // Process only non-aggregate columns + let non_agg_columns = plan + .result_columns + .iter() + .enumerate() + .filter(|(_, rc)| !rc.contains_aggregates); + + for (i, rc) in non_agg_columns { + let reg = col_start + i; + translate_expr( program, Some(&plan.table_references), @@ -693,6 +754,12 @@ fn emit_loop_source( &t_ctx.resolver, )?; } + if let Some(label) = label_emit_nonagg_only_once { + program.resolve_label(label, program.offset()); + let flag = t_ctx.reg_nonagg_emit_once_flag.unwrap(); + program.emit_int(1, flag); + } + Ok(()) } LoopEmitTarget::QueryResult => { @@ -719,7 +786,7 @@ fn emit_loop_source( } /// Closes the loop for a given source operator. -/// For example in the case of a nested table scan, this means emitting the NextAsync instruction +/// For example in the case of a nested table scan, this means emitting the Next instruction /// for all tables involved, innermost first. pub fn close_loop( program: &mut ProgramBuilder, @@ -741,74 +808,77 @@ pub fn close_loop( .get(table_index) .expect("source has no loop labels"); + let (table_cursor_id, index_cursor_id) = table.resolve_cursors(program)?; + match &table.op { Operation::Subquery { .. } => { program.resolve_label(loop_labels.next, program.offset()); - // A subquery has no cursor to call NextAsync on, so it just emits a Goto + // A subquery has no cursor to call Next on, so it just emits a Goto // to the Yield instruction, which in turn jumps back to the main loop of the subquery, // so that the next row from the subquery can be read. program.emit_insn(Insn::Goto { target_pc: loop_labels.loop_start, }); + program.preassign_label_to_next_insn(loop_labels.loop_end); } Operation::Scan { iter_dir, .. } => { program.resolve_label(loop_labels.next, program.offset()); - let cursor_id = program.resolve_cursor_id(&table.identifier); + let iteration_cursor_id = index_cursor_id.unwrap_or_else(|| { + table_cursor_id.expect("Either index or table cursor must be opened") + }); match &table.table { Table::BTree(_) => { - if iter_dir - .as_ref() - .is_some_and(|dir| *dir == IterationDirection::Backwards) - { - program.emit_insn(Insn::PrevAsync { cursor_id }); - } else { - program.emit_insn(Insn::NextAsync { cursor_id }); - } - if iter_dir - .as_ref() - .is_some_and(|dir| *dir == IterationDirection::Backwards) - { - program.emit_insn(Insn::PrevAwait { - cursor_id, - pc_if_next: loop_labels.loop_start, + if *iter_dir == IterationDirection::Backwards { + program.emit_insn(Insn::Prev { + cursor_id: iteration_cursor_id, + pc_if_prev: loop_labels.loop_start, }); } else { - program.emit_insn(Insn::NextAwait { - cursor_id, + program.emit_insn(Insn::Next { + cursor_id: iteration_cursor_id, pc_if_next: loop_labels.loop_start, }); } } Table::Virtual(_) => { program.emit_insn(Insn::VNext { - cursor_id, + cursor_id: table_cursor_id + .expect("Virtual tables do not support covering indexes"), pc_if_next: loop_labels.loop_start, }); } other => unreachable!("Unsupported table reference type: {:?}", other), } + program.preassign_label_to_next_insn(loop_labels.loop_end); } Operation::Search(search) => { program.resolve_label(loop_labels.next, program.offset()); - // Rowid equality point lookups are handled with a SeekRowid instruction which does not loop, so there is no need to emit a NextAsync instruction. + let iteration_cursor_id = index_cursor_id.unwrap_or_else(|| { + table_cursor_id.expect("Either index or table cursor must be opened") + }); + // Rowid equality point lookups are handled with a SeekRowid instruction which does not loop, so there is no need to emit a Next instruction. if !matches!(search, Search::RowidEq { .. }) { - let cursor_id = match search { - Search::IndexSearch { index, .. } => program.resolve_cursor_id(&index.name), - Search::RowidSearch { .. } => program.resolve_cursor_id(&table.identifier), + let iter_dir = match search { + Search::Seek { seek_def, .. } => seek_def.iter_dir, Search::RowidEq { .. } => unreachable!(), }; - program.emit_insn(Insn::NextAsync { cursor_id }); - program.emit_insn(Insn::NextAwait { - cursor_id, - pc_if_next: loop_labels.loop_start, - }); + if iter_dir == IterationDirection::Backwards { + program.emit_insn(Insn::Prev { + cursor_id: iteration_cursor_id, + pc_if_prev: loop_labels.loop_start, + }); + } else { + program.emit_insn(Insn::Next { + cursor_id: iteration_cursor_id, + pc_if_next: loop_labels.loop_start, + }); + } } + program.preassign_label_to_next_insn(loop_labels.loop_end); } } - program.resolve_label(loop_labels.loop_end, program.offset()); - // Handle OUTER JOIN logic. The reason this comes after the "loop end" mark is that we may need to still jump back // and emit a row with NULLs for the right table, and then jump back to the next row of the left table. if let Some(join_info) = table.join_info.as_ref() { @@ -819,24 +889,26 @@ pub fn close_loop( // If the left join match flag has been set to 1, we jump to the next row on the outer table, // i.e. continue to the next row of t1 in our example. program.resolve_label(lj_meta.label_match_flag_check_value, program.offset()); - let jump_offset = program.offset().add(3u32); + let label_when_right_table_notnull = program.allocate_label(); program.emit_insn(Insn::IfPos { reg: lj_meta.reg_match_flag, - target_pc: jump_offset, + target_pc: label_when_right_table_notnull, decrement_by: 0, }); // If the left join match flag is still 0, it means there was no match on the right table, // but since it's a LEFT JOIN, we still need to emit a row with NULLs for the right table. // In that case, we now enter the routine that does exactly that. - // First we set the right table cursor's "pseudo null bit" on, which means any Insn::Column will return NULL - let right_cursor_id = match &table.op { - Operation::Scan { .. } => program.resolve_cursor_id(&table.identifier), - Operation::Search { .. } => program.resolve_cursor_id(&table.identifier), - _ => unreachable!(), - }; - program.emit_insn(Insn::NullRow { - cursor_id: right_cursor_id, - }); + // First we set the right table cursor's "pseudo null bit" on, which means any Insn::Column will return NULL. + // This needs to be set for both the table and the index cursor, if present, + // since even if the iteration cursor is the index cursor, it might fetch values from the table cursor. + [table_cursor_id, index_cursor_id] + .iter() + .filter_map(|maybe_cursor_id| maybe_cursor_id.as_ref()) + .for_each(|cursor_id| { + program.emit_insn(Insn::NullRow { + cursor_id: *cursor_id, + }); + }); // Then we jump to setting the left join match flag to 1 again, // but this time the right table cursor will set everything to null. // This leads to emitting a row with cols from the left + nulls from the right, @@ -846,10 +918,319 @@ pub fn close_loop( program.emit_insn(Insn::Goto { target_pc: lj_meta.label_match_flag_set_true, }); - - assert_eq!(program.offset(), jump_offset); + program.preassign_label_to_next_insn(label_when_right_table_notnull); } } } Ok(()) } + +/// Emits instructions for an index seek. See e.g. [crate::translate::plan::SeekDef] +/// for more details about the seek definition. +/// +/// Index seeks always position the cursor to the first row that matches the seek key, +/// and then continue to emit rows until the termination condition is reached, +/// see [emit_seek_termination] below. +/// +/// If either 1. the seek finds no rows or 2. the termination condition is reached, +/// the loop for that given table/index is fully exited. +#[allow(clippy::too_many_arguments)] +fn emit_seek( + program: &mut ProgramBuilder, + tables: &[TableReference], + seek_def: &SeekDef, + t_ctx: &mut TranslateCtx, + seek_cursor_id: usize, + start_reg: usize, + loop_end: BranchOffset, + is_index: bool, +) -> Result<()> { + let Some(seek) = seek_def.seek.as_ref() else { + // If there is no seek key, we start from the first or last row of the index, + // depending on the iteration direction. + match seek_def.iter_dir { + IterationDirection::Forwards => { + program.emit_insn(Insn::Rewind { + cursor_id: seek_cursor_id, + pc_if_empty: loop_end, + }); + } + IterationDirection::Backwards => { + program.emit_insn(Insn::Last { + cursor_id: seek_cursor_id, + pc_if_empty: loop_end, + }); + } + } + return Ok(()); + }; + // We allocated registers for the full index key, but our seek key might not use the full index key. + // See [crate::translate::optimizer::build_seek_def] for more details about in which cases we do and don't use the full index key. + for i in 0..seek_def.key.len() { + let reg = start_reg + i; + if i >= seek.len { + if seek.null_pad { + program.emit_insn(Insn::Null { + dest: reg, + dest_end: None, + }); + } + } else { + let expr = &seek_def.key[i].0; + translate_expr_no_constant_opt( + program, + Some(tables), + &expr, + reg, + &t_ctx.resolver, + NoConstantOptReason::RegisterReuse, + )?; + // If the seek key column is not verifiably non-NULL, we need check whether it is NULL, + // and if so, jump to the loop end. + // This is to avoid returning rows for e.g. SELECT * FROM t WHERE t.x > NULL, + // which would erroneously return all rows from t, as NULL is lower than any non-NULL value in index key comparisons. + if !expr.is_nonnull(tables) { + program.emit_insn(Insn::IsNull { + reg, + target_pc: loop_end, + }); + } + } + } + let num_regs = if seek.null_pad { + seek_def.key.len() + } else { + seek.len + }; + match seek.op { + SeekOp::GE => program.emit_insn(Insn::SeekGE { + is_index, + cursor_id: seek_cursor_id, + start_reg, + num_regs, + target_pc: loop_end, + }), + SeekOp::GT => program.emit_insn(Insn::SeekGT { + is_index, + cursor_id: seek_cursor_id, + start_reg, + num_regs, + target_pc: loop_end, + }), + SeekOp::LE => program.emit_insn(Insn::SeekLE { + is_index, + cursor_id: seek_cursor_id, + start_reg, + num_regs, + target_pc: loop_end, + }), + SeekOp::LT => program.emit_insn(Insn::SeekLT { + is_index, + cursor_id: seek_cursor_id, + start_reg, + num_regs, + target_pc: loop_end, + }), + SeekOp::EQ => panic!("An index seek is never EQ"), + }; + + Ok(()) +} + +/// Emits instructions for an index seek termination. See e.g. [crate::translate::plan::SeekDef] +/// for more details about the seek definition. +/// +/// Index seeks always position the cursor to the first row that matches the seek key +/// (see [emit_seek] above), and then continue to emit rows until the termination condition +/// (if any) is reached. +/// +/// If the termination condition is not present, the cursor is fully scanned to the end. +#[allow(clippy::too_many_arguments)] +fn emit_seek_termination( + program: &mut ProgramBuilder, + tables: &[TableReference], + seek_def: &SeekDef, + t_ctx: &mut TranslateCtx, + seek_cursor_id: usize, + start_reg: usize, + loop_start: BranchOffset, + loop_end: BranchOffset, + is_index: bool, +) -> Result<()> { + let Some(termination) = seek_def.termination.as_ref() else { + program.preassign_label_to_next_insn(loop_start); + return Ok(()); + }; + + // How many non-NULL values were used for seeking. + let seek_len = seek_def.seek.as_ref().map_or(0, |seek| seek.len); + + // How many values will be used for the termination condition. + let num_regs = if termination.null_pad { + seek_def.key.len() + } else { + termination.len + }; + for i in 0..seek_def.key.len() { + let reg = start_reg + i; + let is_last = i == seek_def.key.len() - 1; + + // For all index key values apart from the last one, we are guaranteed to use the same values + // as were used for the seek, so we don't need to emit them again. + if i < seek_len && !is_last { + continue; + } + // For the last index key value, we need to emit a NULL if the termination condition is NULL-padded. + // See [SeekKey::null_pad] and [crate::translate::optimizer::build_seek_def] for why this is the case. + if i >= termination.len && !termination.null_pad { + continue; + } + if is_last && termination.null_pad { + program.emit_insn(Insn::Null { + dest: reg, + dest_end: None, + }); + // if the seek key is shorter than the termination key, we need to translate the remaining suffix of the termination key. + // if not, we just reuse what was emitted for the seek. + } else if seek_len < termination.len { + translate_expr_no_constant_opt( + program, + Some(tables), + &seek_def.key[i].0, + reg, + &t_ctx.resolver, + NoConstantOptReason::RegisterReuse, + )?; + } + } + program.preassign_label_to_next_insn(loop_start); + let mut rowid_reg = None; + if !is_index { + rowid_reg = Some(program.alloc_register()); + program.emit_insn(Insn::RowId { + cursor_id: seek_cursor_id, + dest: rowid_reg.unwrap(), + }); + } + + match (is_index, termination.op) { + (true, SeekOp::GE) => program.emit_insn(Insn::IdxGE { + cursor_id: seek_cursor_id, + start_reg, + num_regs, + target_pc: loop_end, + }), + (true, SeekOp::GT) => program.emit_insn(Insn::IdxGT { + cursor_id: seek_cursor_id, + start_reg, + num_regs, + target_pc: loop_end, + }), + (true, SeekOp::LE) => program.emit_insn(Insn::IdxLE { + cursor_id: seek_cursor_id, + start_reg, + num_regs, + target_pc: loop_end, + }), + (true, SeekOp::LT) => program.emit_insn(Insn::IdxLT { + cursor_id: seek_cursor_id, + start_reg, + num_regs, + target_pc: loop_end, + }), + (false, SeekOp::GE) => program.emit_insn(Insn::Ge { + lhs: rowid_reg.unwrap(), + rhs: start_reg, + target_pc: loop_end, + flags: CmpInsFlags::default(), + }), + (false, SeekOp::GT) => program.emit_insn(Insn::Gt { + lhs: rowid_reg.unwrap(), + rhs: start_reg, + target_pc: loop_end, + flags: CmpInsFlags::default(), + }), + (false, SeekOp::LE) => program.emit_insn(Insn::Le { + lhs: rowid_reg.unwrap(), + rhs: start_reg, + target_pc: loop_end, + flags: CmpInsFlags::default(), + }), + (false, SeekOp::LT) => program.emit_insn(Insn::Lt { + lhs: rowid_reg.unwrap(), + rhs: start_reg, + target_pc: loop_end, + flags: CmpInsFlags::default(), + }), + (_, SeekOp::EQ) => { + panic!("An index termination condition is never EQ") + } + }; + + Ok(()) +} + +/// Open an ephemeral index cursor and build an automatic index on a table. +/// This is used as a last-resort to avoid a nested full table scan +/// Returns the cursor id of the ephemeral index cursor. +fn emit_autoindex( + program: &mut ProgramBuilder, + index: &Arc, + table_cursor_id: CursorID, + index_cursor_id: CursorID, + table_has_rowid: bool, +) -> Result { + assert!(index.ephemeral, "Index {} is not ephemeral", index.name); + let label_ephemeral_build_end = program.allocate_label(); + // Since this typically happens in an inner loop, we only build it once. + program.emit_insn(Insn::Once { + target_pc_when_reentered: label_ephemeral_build_end, + }); + program.emit_insn(Insn::OpenAutoindex { + cursor_id: index_cursor_id, + }); + // Rewind source table + let label_ephemeral_build_loop_start = program.allocate_label(); + program.emit_insn(Insn::Rewind { + cursor_id: table_cursor_id, + pc_if_empty: label_ephemeral_build_loop_start, + }); + program.preassign_label_to_next_insn(label_ephemeral_build_loop_start); + // Emit all columns from source table that are needed in the ephemeral index. + // Also reserve a register for the rowid if the source table has rowids. + let num_regs_to_reserve = index.columns.len() + table_has_rowid as usize; + let ephemeral_cols_start_reg = program.alloc_registers(num_regs_to_reserve); + for (i, col) in index.columns.iter().enumerate() { + let reg = ephemeral_cols_start_reg + i; + program.emit_insn(Insn::Column { + cursor_id: table_cursor_id, + column: col.pos_in_table, + dest: reg, + }); + } + if table_has_rowid { + program.emit_insn(Insn::RowId { + cursor_id: table_cursor_id, + dest: ephemeral_cols_start_reg + index.columns.len(), + }); + } + let record_reg = program.alloc_register(); + program.emit_insn(Insn::MakeRecord { + start_reg: ephemeral_cols_start_reg, + count: num_regs_to_reserve, + dest_reg: record_reg, + }); + program.emit_insn(Insn::IdxInsert { + cursor_id: index_cursor_id, + record_reg, + unpacked_start: Some(ephemeral_cols_start_reg), + unpacked_count: Some(num_regs_to_reserve as u16), + flags: IdxInsertFlags::new().use_seek(false), + }); + program.emit_insn(Insn::Next { + cursor_id: table_cursor_id, + pc_if_next: label_ephemeral_build_loop_start, + }); + program.preassign_label_to_next_insn(label_ephemeral_build_end); + Ok(index_cursor_id) +} diff --git a/core/translate/mod.rs b/core/translate/mod.rs index 739ae5f03..b05e78a6c 100644 --- a/core/translate/mod.rs +++ b/core/translate/mod.rs @@ -12,6 +12,7 @@ pub(crate) mod delete; pub(crate) mod emitter; pub(crate) mod expr; pub(crate) mod group_by; +pub(crate) mod index; pub(crate) mod insert; pub(crate) mod main_loop; pub(crate) mod optimizer; @@ -34,6 +35,7 @@ use crate::translate::delete::translate_delete; use crate::vdbe::builder::{ProgramBuilder, ProgramBuilderOpts, QueryMode}; use crate::vdbe::Program; use crate::{bail_parse_error, Connection, Result, SymbolTable}; +use index::translate_create_index; use insert::translate_insert; use limbo_sqlite3_parser::ast::{self, Delete, Insert}; use schema::{translate_create_table, translate_create_virtual_table, translate_drop_table}; @@ -61,7 +63,24 @@ pub fn translate( ast::Stmt::Attach { .. } => bail_parse_error!("ATTACH not supported yet"), ast::Stmt::Begin(tx_type, tx_name) => translate_tx_begin(tx_type, tx_name)?, ast::Stmt::Commit(tx_name) => translate_tx_commit(tx_name)?, - ast::Stmt::CreateIndex { .. } => bail_parse_error!("CREATE INDEX not supported yet"), + ast::Stmt::CreateIndex { + unique, + if_not_exists, + idx_name, + tbl_name, + columns, + .. + } => { + change_cnt_on = true; + translate_create_index( + query_mode, + (unique, if_not_exists), + &idx_name.name.0, + &tbl_name.0, + &columns, + schema, + )? + } ast::Stmt::CreateTable { temporary, if_not_exists, @@ -78,7 +97,7 @@ pub fn translate( ast::Stmt::CreateTrigger { .. } => bail_parse_error!("CREATE TRIGGER not supported yet"), ast::Stmt::CreateView { .. } => bail_parse_error!("CREATE VIEW not supported yet"), ast::Stmt::CreateVirtualTable(vtab) => { - translate_create_virtual_table(*vtab, schema, query_mode)? + translate_create_virtual_table(*vtab, schema, query_mode, &syms)? } ast::Stmt::Delete(delete) => { let Delete { diff --git a/core/translate/optimizer.rs b/core/translate/optimizer.rs index 6fa7f9619..8520f3e9d 100644 --- a/core/translate/optimizer.rs +++ b/core/translate/optimizer.rs @@ -1,15 +1,22 @@ -use std::{collections::HashMap, sync::Arc}; +use std::{cmp::Ordering, collections::HashMap, sync::Arc}; -use limbo_sqlite3_parser::ast; +use limbo_sqlite3_parser::ast::{self, Expr, SortOrder}; use crate::{ - schema::{Index, Schema}, + schema::{Index, IndexColumn, Schema}, + translate::plan::TerminationKey, + types::SeekOp, + util::exprs_are_equivalent, Result, }; -use super::plan::{ - DeletePlan, Direction, IterationDirection, Operation, Plan, Search, SelectPlan, TableReference, - UpdatePlan, WhereTerm, +use super::{ + emitter::Resolver, + plan::{ + DeletePlan, EvalAt, GroupBy, IterationDirection, Operation, Plan, Search, SeekDef, SeekKey, + SelectPlan, TableReference, UpdatePlan, WhereTerm, + }, + planner::determine_where_to_eval_expr, }; pub fn optimize_plan(plan: &mut Plan, schema: &Schema) -> Result<()> { @@ -39,9 +46,11 @@ fn optimize_select_plan(plan: &mut SelectPlan, schema: &Schema) -> Result<()> { &mut plan.table_references, &schema.indexes, &mut plan.where_clause, + &mut plan.order_by, + &plan.group_by, )?; - eliminate_unnecessary_orderby(plan, schema)?; + eliminate_orderby_like_groupby(plan)?; Ok(()) } @@ -59,6 +68,8 @@ fn optimize_delete_plan(plan: &mut DeletePlan, schema: &Schema) -> Result<()> { &mut plan.table_references, &schema.indexes, &mut plan.where_clause, + &mut plan.order_by, + &None, )?; Ok(()) @@ -76,6 +87,8 @@ fn optimize_update_plan(plan: &mut UpdatePlan, schema: &Schema) -> Result<()> { &mut plan.table_references, &schema.indexes, &mut plan.where_clause, + &mut plan.order_by, + &None, )?; Ok(()) } @@ -90,95 +103,264 @@ fn optimize_subqueries(plan: &mut SelectPlan, schema: &Schema) -> Result<()> { Ok(()) } -fn query_is_already_ordered_by( - table_references: &[TableReference], - key: &mut ast::Expr, - available_indexes: &HashMap>>, -) -> Result { - let first_table = table_references.first(); - if first_table.is_none() { - return Ok(false); +fn eliminate_orderby_like_groupby(plan: &mut SelectPlan) -> Result<()> { + if plan.order_by.is_none() | plan.group_by.is_none() { + return Ok(()); } - let table_reference = first_table.unwrap(); - match &table_reference.op { - Operation::Scan { .. } => Ok(key.is_rowid_alias_of(0)), - Operation::Search(search) => match search { - Search::RowidEq { .. } => Ok(key.is_rowid_alias_of(0)), - Search::RowidSearch { .. } => Ok(key.is_rowid_alias_of(0)), - Search::IndexSearch { index, .. } => { - let index_rc = key.check_index_scan(0, table_reference, available_indexes)?; - let index_is_the_same = index_rc - .map(|irc| Arc::ptr_eq(index, &irc)) - .unwrap_or(false); - Ok(index_is_the_same) - } - }, - _ => Ok(false), + if plan.table_references.len() == 0 { + return Ok(()); } + + let order_by_clauses = plan.order_by.as_mut().unwrap(); + // TODO: let's make the group by sorter aware of the order by directions so we dont need to skip + // descending terms. + if order_by_clauses + .iter() + .any(|(_, dir)| matches!(dir, SortOrder::Desc)) + { + return Ok(()); + } + let group_by_clauses = plan.group_by.as_mut().unwrap(); + // all order by terms must be in the group by clause for order by to be eliminated + if !order_by_clauses.iter().all(|(o_expr, _)| { + group_by_clauses + .exprs + .iter() + .any(|g_expr| exprs_are_equivalent(g_expr, o_expr)) + }) { + return Ok(()); + } + + // reorder group by terms so that they match the order by terms + // this way the group by sorter will effectively do the order by sorter's job and + // we can remove the order by clause + group_by_clauses.exprs.sort_by_key(|g_expr| { + order_by_clauses + .iter() + .position(|(o_expr, _)| exprs_are_equivalent(o_expr, g_expr)) + .unwrap_or(usize::MAX) + }); + + plan.order_by = None; + Ok(()) } -fn eliminate_unnecessary_orderby(plan: &mut SelectPlan, schema: &Schema) -> Result<()> { - if plan.order_by.is_none() { - return Ok(()); +/// Eliminate unnecessary ORDER BY clauses. +/// Returns true if the ORDER BY clause was eliminated. +fn eliminate_unnecessary_orderby( + table_references: &mut [TableReference], + available_indexes: &HashMap>>, + order_by: &mut Option>, + group_by: &Option, +) -> Result { + let Some(order) = order_by else { + return Ok(false); + }; + let Some(first_table_reference) = table_references.first_mut() else { + return Ok(false); + }; + let Some(btree_table) = first_table_reference.btree() else { + return Ok(false); + }; + // If GROUP BY clause is present, we can't rely on already ordered columns because GROUP BY reorders the data + // This early return prevents the elimination of ORDER BY when GROUP BY exists, as sorting must be applied after grouping + // And if ORDER BY clause duplicates GROUP BY we handle it later in fn eliminate_orderby_like_groupby + if group_by.is_some() { + return Ok(false); } - if plan.table_references.is_empty() { - return Ok(()); + let Operation::Scan { + index, iter_dir, .. + } = &mut first_table_reference.op + else { + return Ok(false); + }; + + assert!( + index.is_none(), + "Nothing shouldve transformed the scan to use an index yet" + ); + + // Special case: if ordering by just the rowid, we can remove the ORDER BY clause + if order.len() == 1 && order[0].0.is_rowid_alias_of(0) { + *iter_dir = match order[0].1 { + SortOrder::Asc => IterationDirection::Forwards, + SortOrder::Desc => IterationDirection::Backwards, + }; + *order_by = None; + return Ok(true); } - let o = plan.order_by.as_mut().unwrap(); + // Find the best matching index for the ORDER BY columns + let table_name = &btree_table.name; + let mut best_index = (None, 0); - if o.len() != 1 { - // TODO: handle multiple order by keys - return Ok(()); + for (_, indexes) in available_indexes.iter() { + for index_candidate in indexes.iter().filter(|i| &i.table_name == table_name) { + let matching_columns = index_candidate.columns.iter().enumerate().take_while(|(i, c)| { + if let Some((Expr::Column { table, column, .. }, _)) = order.get(*i) { + let col_idx_in_table = btree_table + .columns + .iter() + .position(|tc| tc.name.as_ref() == Some(&c.name)); + matches!(col_idx_in_table, Some(col_idx) if *table == 0 && *column == col_idx) + } else { + false + } + }).count(); + + if matching_columns > best_index.1 { + best_index = (Some(index_candidate), matching_columns); + } + } } - let (key, direction) = o.first_mut().unwrap(); + let Some(matching_index) = best_index.0 else { + return Ok(false); + }; + let match_count = best_index.1; - let already_ordered = - query_is_already_ordered_by(&plan.table_references, key, &schema.indexes)?; + // If we found a matching index, use it for scanning + *index = Some(matching_index.clone()); + // If the order by direction matches the index direction, we can iterate the index in forwards order. + // If they don't, we must iterate the index in backwards order. + let index_direction = &matching_index.columns.first().as_ref().unwrap().order; + *iter_dir = match (index_direction, order[0].1) { + (SortOrder::Asc, SortOrder::Asc) | (SortOrder::Desc, SortOrder::Desc) => { + IterationDirection::Forwards + } + (SortOrder::Asc, SortOrder::Desc) | (SortOrder::Desc, SortOrder::Asc) => { + IterationDirection::Backwards + } + }; - if already_ordered { - push_scan_direction(&mut plan.table_references[0], direction); - plan.order_by = None; + // If the index covers all ORDER BY columns, and one of the following applies: + // - the ORDER BY directions exactly match the index orderings, + // - the ORDER by directions are the exact opposite of the index orderings, + // we can remove the ORDER BY clause. + if match_count == order.len() { + let full_match = { + let mut all_match_forward = true; + let mut all_match_reverse = true; + for (i, (_, direction)) in order.iter().enumerate() { + match (&matching_index.columns[i].order, direction) { + (SortOrder::Asc, SortOrder::Asc) | (SortOrder::Desc, SortOrder::Desc) => { + all_match_reverse = false; + } + (SortOrder::Asc, SortOrder::Desc) | (SortOrder::Desc, SortOrder::Asc) => { + all_match_forward = false; + } + } + } + all_match_forward || all_match_reverse + }; + if full_match { + *order_by = None; + } } - Ok(()) + Ok(order_by.is_none()) } /** * Use indexes where possible. - * Right now we make decisions about using indexes ONLY based on condition expressions, not e.g. ORDER BY or others. - * This is just because we are WIP. * * When this function is called, condition expressions from both the actual WHERE clause and the JOIN clauses are in the where_clause vector. * If we find a condition that can be used to index scan, we pop it off from the where_clause vector and put it into a Search operation. * We put it there simply because it makes it a bit easier to track during translation. + * + * In this function we also try to eliminate ORDER BY clauses if there is an index that satisfies the ORDER BY clause. */ fn use_indexes( table_references: &mut [TableReference], available_indexes: &HashMap>>, where_clause: &mut Vec, + order_by: &mut Option>, + group_by: &Option, ) -> Result<()> { - if where_clause.is_empty() { - return Ok(()); - } + // Try to use indexes for eliminating ORDER BY clauses + let did_eliminate_orderby = + eliminate_unnecessary_orderby(table_references, available_indexes, order_by, group_by)?; - 'outer: for (table_index, table_reference) in table_references.iter_mut().enumerate() { - if let Operation::Scan { .. } = &mut table_reference.op { - let mut i = 0; - while i < where_clause.len() { - let cond = where_clause.get_mut(i).unwrap(); - if let Some(index_search) = try_extract_index_search_expression( - cond, - table_index, - table_reference, - available_indexes, - )? { - where_clause.remove(i); - table_reference.op = Operation::Search(index_search); - continue 'outer; + // Try to use indexes for WHERE conditions + for (table_index, table_reference) in table_references.iter_mut().enumerate() { + if matches!(table_reference.op, Operation::Scan { .. }) { + let index = if let Operation::Scan { index, .. } = &table_reference.op { + Option::clone(index) + } else { + None + }; + match index { + // If we decided to eliminate ORDER BY using an index, let's constrain our search to only that index + Some(index) => { + let available_indexes = available_indexes + .values() + .flatten() + .filter(|i| i.name == index.name) + .cloned() + .collect::>(); + if let Some(search) = try_extract_index_search_from_where_clause( + where_clause, + table_index, + table_reference, + &available_indexes, + )? { + table_reference.op = Operation::Search(search); + } + } + None => { + let table_name = table_reference.table.get_name(); + + // If we can utilize the rowid alias of the table, let's preferentially always use it for now. + let mut i = 0; + while i < where_clause.len() { + if let Some(search) = try_extract_rowid_search_expression( + &mut where_clause[i], + table_index, + table_reference, + )? { + where_clause.remove(i); + table_reference.op = Operation::Search(search); + continue; + } else { + i += 1; + } + } + if did_eliminate_orderby && table_index == 0 { + // If we already made the decision to remove ORDER BY based on the Rowid (e.g. ORDER BY id), then skip this. + // It would be possible to analyze the index and see if the covering index would retain the ordering guarantee, + // but we just don't do that yet. + continue; + } + let placeholder = vec![]; + let mut usable_indexes_ref = &placeholder; + if let Some(indexes) = available_indexes.get(table_name) { + usable_indexes_ref = indexes; + } + if let Some(search) = try_extract_index_search_from_where_clause( + where_clause, + table_index, + table_reference, + usable_indexes_ref, + )? { + table_reference.op = Operation::Search(search); + } + } + } + } + + // Finally, if there's no other reason to use an index, if an index covers the columns used in the query, let's use it. + if let Some(indexes) = available_indexes.get(table_reference.table.get_name()) { + for index_candidate in indexes.iter() { + let is_covering = table_reference.index_is_covering(index_candidate); + if let Operation::Scan { index, .. } = &mut table_reference.op { + if index.is_some() { + continue; + } + if is_covering { + *index = Some(index_candidate.clone()); + break; + } } - i += 1; } } } @@ -221,20 +403,6 @@ fn eliminate_constant_conditions( Ok(ConstantConditionEliminationResult::Continue) } -fn push_scan_direction(table: &mut TableReference, direction: &Direction) { - if let Operation::Scan { - ref mut iter_dir, .. - } = table.op - { - if iter_dir.is_none() { - match direction { - Direction::Ascending => *iter_dir = Some(IterationDirection::Forwards), - Direction::Descending => *iter_dir = Some(IterationDirection::Backwards), - } - } - } -} - fn rewrite_exprs_select(plan: &mut SelectPlan) -> Result<()> { for rc in plan.result_columns.iter_mut() { rewrite_expr(&mut rc.expr)?; @@ -287,7 +455,7 @@ fn rewrite_exprs_update(plan: &mut UpdatePlan) -> Result<()> { } #[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum ConstantPredicate { +pub enum AlwaysTrueOrFalse { AlwaysTrue, AlwaysFalse, } @@ -297,25 +465,22 @@ pub enum ConstantPredicate { Implemented for ast::Expr */ pub trait Optimizable { - // if the expression is a constant expression e.g. '1', returns the constant condition - fn check_constant(&self) -> Result>; + // if the expression is a constant expression that, when evaluated as a condition, is always true or false + // return a [ConstantPredicate]. + fn check_always_true_or_false(&self) -> Result>; fn is_always_true(&self) -> Result { Ok(self - .check_constant()? - .map_or(false, |c| c == ConstantPredicate::AlwaysTrue)) + .check_always_true_or_false()? + .map_or(false, |c| c == AlwaysTrueOrFalse::AlwaysTrue)) } fn is_always_false(&self) -> Result { Ok(self - .check_constant()? - .map_or(false, |c| c == ConstantPredicate::AlwaysFalse)) + .check_always_true_or_false()? + .map_or(false, |c| c == AlwaysTrueOrFalse::AlwaysFalse)) } + fn is_constant(&self, resolver: &Resolver<'_>) -> bool; fn is_rowid_alias_of(&self, table_index: usize) -> bool; - fn check_index_scan( - &mut self, - table_index: usize, - table_reference: &TableReference, - available_indexes: &HashMap>>, - ) -> Result>>; + fn is_nonnull(&self, tables: &[TableReference]) -> bool; } impl Optimizable for ast::Expr { @@ -329,95 +494,189 @@ impl Optimizable for ast::Expr { _ => false, } } - fn check_index_scan( - &mut self, - table_index: usize, - table_reference: &TableReference, - available_indexes: &HashMap>>, - ) -> Result>> { + /// Returns true if the expressions is (verifiably) non-NULL. + /// It might still be non-NULL even if we return false; we just + /// weren't able to prove it. + /// This function is currently very conservative, and will return false + /// for any expression where we aren't sure and didn't bother to find out + /// by writing more complex code. + fn is_nonnull(&self, tables: &[TableReference]) -> bool { match self { - Self::Column { table, column, .. } => { - if *table != table_index { - return Ok(None); - } - let Some(available_indexes_for_table) = - available_indexes.get(table_reference.table.get_name()) - else { - return Ok(None); - }; - let Some(column) = table_reference.table.get_column_at(*column) else { - return Ok(None); - }; - for index in available_indexes_for_table.iter() { - if let Some(name) = column.name.as_ref() { - if &index.columns.first().unwrap().name == name { - return Ok(Some(index.clone())); - } - } - } - Ok(None) + Expr::Between { + lhs, start, end, .. + } => lhs.is_nonnull(tables) && start.is_nonnull(tables) && end.is_nonnull(tables), + Expr::Binary(expr, _, expr1) => expr.is_nonnull(tables) && expr1.is_nonnull(tables), + Expr::Case { + base, + when_then_pairs, + else_expr, + .. + } => { + base.as_ref().map_or(true, |base| base.is_nonnull(tables)) + && when_then_pairs + .iter() + .all(|(_, then)| then.is_nonnull(tables)) + && else_expr + .as_ref() + .map_or(true, |else_expr| else_expr.is_nonnull(tables)) } - Self::Binary(lhs, op, rhs) => { - // Only consider index scans for binary ops that are comparisons. - // e.g. "t1.id = t2.id" is a valid index scan, but "t1.id + 1" is not. - // - // TODO/optimization: consider detecting index scan on e.g. table t1 in - // "WHERE t1.id + 1 = t2.id" - // here the Expr could be rewritten to "t1.id = t2.id - 1" - // and then t1.id could be used as an index key. - if !matches!( - *op, - ast::Operator::Equals - | ast::Operator::Greater - | ast::Operator::GreaterEquals - | ast::Operator::Less - | ast::Operator::LessEquals - ) { - return Ok(None); - } - let lhs_index = - lhs.check_index_scan(table_index, &table_reference, available_indexes)?; - if lhs_index.is_some() { - return Ok(lhs_index); - } - let rhs_index = - rhs.check_index_scan(table_index, &table_reference, available_indexes)?; - if rhs_index.is_some() { - // swap lhs and rhs - let swapped_operator = match *op { - ast::Operator::Equals => ast::Operator::Equals, - ast::Operator::Greater => ast::Operator::Less, - ast::Operator::GreaterEquals => ast::Operator::LessEquals, - ast::Operator::Less => ast::Operator::Greater, - ast::Operator::LessEquals => ast::Operator::GreaterEquals, - _ => unreachable!(), - }; - let lhs_new = rhs.take_ownership(); - let rhs_new = lhs.take_ownership(); - *self = Self::Binary(Box::new(lhs_new), swapped_operator, Box::new(rhs_new)); - return Ok(rhs_index); - } - Ok(None) + Expr::Cast { expr, .. } => expr.is_nonnull(tables), + Expr::Collate(expr, _) => expr.is_nonnull(tables), + Expr::DoublyQualified(..) => { + panic!("Do not call is_nonnull before DoublyQualified has been rewritten as Column") } - _ => Ok(None), + Expr::Exists(..) => false, + Expr::FunctionCall { .. } => false, + Expr::FunctionCallStar { .. } => false, + Expr::Id(..) => panic!("Do not call is_nonnull before Id has been rewritten as Column"), + Expr::Column { + table, + column, + is_rowid_alias, + .. + } => { + if *is_rowid_alias { + return true; + } + + let table_ref = &tables[*table]; + let columns = table_ref.columns(); + let column = &columns[*column]; + return column.primary_key || column.notnull; + } + Expr::RowId { .. } => true, + Expr::InList { lhs, rhs, .. } => { + lhs.is_nonnull(tables) + && rhs + .as_ref() + .map_or(true, |rhs| rhs.iter().all(|rhs| rhs.is_nonnull(tables))) + } + Expr::InSelect { .. } => false, + Expr::InTable { .. } => false, + Expr::IsNull(..) => true, + Expr::Like { lhs, rhs, .. } => lhs.is_nonnull(tables) && rhs.is_nonnull(tables), + Expr::Literal(literal) => match literal { + ast::Literal::Numeric(_) => true, + ast::Literal::String(_) => true, + ast::Literal::Blob(_) => true, + ast::Literal::Keyword(_) => true, + ast::Literal::Null => false, + ast::Literal::CurrentDate => true, + ast::Literal::CurrentTime => true, + ast::Literal::CurrentTimestamp => true, + }, + Expr::Name(..) => false, + Expr::NotNull(..) => true, + Expr::Parenthesized(exprs) => exprs.iter().all(|expr| expr.is_nonnull(tables)), + Expr::Qualified(..) => { + panic!("Do not call is_nonnull before Qualified has been rewritten as Column") + } + Expr::Raise(..) => false, + Expr::Subquery(..) => false, + Expr::Unary(_, expr) => expr.is_nonnull(tables), + Expr::Variable(..) => false, } } - fn check_constant(&self) -> Result> { + /// Returns true if the expression is a constant i.e. does not depend on variables or columns etc. + fn is_constant(&self, resolver: &Resolver<'_>) -> bool { + match self { + Expr::Between { + lhs, start, end, .. + } => { + lhs.is_constant(resolver) + && start.is_constant(resolver) + && end.is_constant(resolver) + } + Expr::Binary(expr, _, expr1) => { + expr.is_constant(resolver) && expr1.is_constant(resolver) + } + Expr::Case { + base, + when_then_pairs, + else_expr, + } => { + base.as_ref() + .map_or(true, |base| base.is_constant(resolver)) + && when_then_pairs.iter().all(|(when, then)| { + when.is_constant(resolver) && then.is_constant(resolver) + }) + && else_expr + .as_ref() + .map_or(true, |else_expr| else_expr.is_constant(resolver)) + } + Expr::Cast { expr, .. } => expr.is_constant(resolver), + Expr::Collate(expr, _) => expr.is_constant(resolver), + Expr::DoublyQualified(_, _, _) => { + panic!("DoublyQualified should have been rewritten as Column") + } + Expr::Exists(_) => false, + Expr::FunctionCall { args, name, .. } => { + let Some(func) = + resolver.resolve_function(&name.0, args.as_ref().map_or(0, |args| args.len())) + else { + return false; + }; + func.is_deterministic() + && args.as_ref().map_or(true, |args| { + args.iter().all(|arg| arg.is_constant(resolver)) + }) + } + Expr::FunctionCallStar { .. } => false, + Expr::Id(_) => panic!("Id should have been rewritten as Column"), + Expr::Column { .. } => false, + Expr::RowId { .. } => false, + Expr::InList { lhs, rhs, .. } => { + lhs.is_constant(resolver) + && rhs + .as_ref() + .map_or(true, |rhs| rhs.iter().all(|rhs| rhs.is_constant(resolver))) + } + Expr::InSelect { .. } => { + false // might be constant, too annoying to check subqueries etc. implement later + } + Expr::InTable { .. } => false, + Expr::IsNull(expr) => expr.is_constant(resolver), + Expr::Like { + lhs, rhs, escape, .. + } => { + lhs.is_constant(resolver) + && rhs.is_constant(resolver) + && escape + .as_ref() + .map_or(true, |escape| escape.is_constant(resolver)) + } + Expr::Literal(_) => true, + Expr::Name(_) => false, + Expr::NotNull(expr) => expr.is_constant(resolver), + Expr::Parenthesized(exprs) => exprs.iter().all(|expr| expr.is_constant(resolver)), + Expr::Qualified(_, _) => { + panic!("Qualified should have been rewritten as Column") + } + Expr::Raise(_, expr) => expr + .as_ref() + .map_or(true, |expr| expr.is_constant(resolver)), + Expr::Subquery(_) => false, + Expr::Unary(_, expr) => expr.is_constant(resolver), + Expr::Variable(_) => false, + } + } + /// Returns true if the expression is a constant expression that, when evaluated as a condition, is always true or false + fn check_always_true_or_false(&self) -> Result> { match self { Self::Literal(lit) => match lit { ast::Literal::Numeric(b) => { if let Ok(int_value) = b.parse::() { return Ok(Some(if int_value == 0 { - ConstantPredicate::AlwaysFalse + AlwaysTrueOrFalse::AlwaysFalse } else { - ConstantPredicate::AlwaysTrue + AlwaysTrueOrFalse::AlwaysTrue })); } if let Ok(float_value) = b.parse::() { return Ok(Some(if float_value == 0.0 { - ConstantPredicate::AlwaysFalse + AlwaysTrueOrFalse::AlwaysFalse } else { - ConstantPredicate::AlwaysTrue + AlwaysTrueOrFalse::AlwaysTrue })); } @@ -427,35 +686,35 @@ impl Optimizable for ast::Expr { let without_quotes = s.trim_matches('\''); if let Ok(int_value) = without_quotes.parse::() { return Ok(Some(if int_value == 0 { - ConstantPredicate::AlwaysFalse + AlwaysTrueOrFalse::AlwaysFalse } else { - ConstantPredicate::AlwaysTrue + AlwaysTrueOrFalse::AlwaysTrue })); } if let Ok(float_value) = without_quotes.parse::() { return Ok(Some(if float_value == 0.0 { - ConstantPredicate::AlwaysFalse + AlwaysTrueOrFalse::AlwaysFalse } else { - ConstantPredicate::AlwaysTrue + AlwaysTrueOrFalse::AlwaysTrue })); } - Ok(Some(ConstantPredicate::AlwaysFalse)) + Ok(Some(AlwaysTrueOrFalse::AlwaysFalse)) } _ => Ok(None), }, Self::Unary(op, expr) => { if *op == ast::UnaryOperator::Not { - let trivial = expr.check_constant()?; + let trivial = expr.check_always_true_or_false()?; return Ok(trivial.map(|t| match t { - ConstantPredicate::AlwaysTrue => ConstantPredicate::AlwaysFalse, - ConstantPredicate::AlwaysFalse => ConstantPredicate::AlwaysTrue, + AlwaysTrueOrFalse::AlwaysTrue => AlwaysTrueOrFalse::AlwaysFalse, + AlwaysTrueOrFalse::AlwaysFalse => AlwaysTrueOrFalse::AlwaysTrue, })); } if *op == ast::UnaryOperator::Negative { - let trivial = expr.check_constant()?; + let trivial = expr.check_always_true_or_false()?; return Ok(trivial); } @@ -464,50 +723,50 @@ impl Optimizable for ast::Expr { Self::InList { lhs: _, not, rhs } => { if rhs.is_none() { return Ok(Some(if *not { - ConstantPredicate::AlwaysTrue + AlwaysTrueOrFalse::AlwaysTrue } else { - ConstantPredicate::AlwaysFalse + AlwaysTrueOrFalse::AlwaysFalse })); } let rhs = rhs.as_ref().unwrap(); if rhs.is_empty() { return Ok(Some(if *not { - ConstantPredicate::AlwaysTrue + AlwaysTrueOrFalse::AlwaysTrue } else { - ConstantPredicate::AlwaysFalse + AlwaysTrueOrFalse::AlwaysFalse })); } Ok(None) } Self::Binary(lhs, op, rhs) => { - let lhs_trivial = lhs.check_constant()?; - let rhs_trivial = rhs.check_constant()?; + let lhs_trivial = lhs.check_always_true_or_false()?; + let rhs_trivial = rhs.check_always_true_or_false()?; match op { ast::Operator::And => { - if lhs_trivial == Some(ConstantPredicate::AlwaysFalse) - || rhs_trivial == Some(ConstantPredicate::AlwaysFalse) + if lhs_trivial == Some(AlwaysTrueOrFalse::AlwaysFalse) + || rhs_trivial == Some(AlwaysTrueOrFalse::AlwaysFalse) { - return Ok(Some(ConstantPredicate::AlwaysFalse)); + return Ok(Some(AlwaysTrueOrFalse::AlwaysFalse)); } - if lhs_trivial == Some(ConstantPredicate::AlwaysTrue) - && rhs_trivial == Some(ConstantPredicate::AlwaysTrue) + if lhs_trivial == Some(AlwaysTrueOrFalse::AlwaysTrue) + && rhs_trivial == Some(AlwaysTrueOrFalse::AlwaysTrue) { - return Ok(Some(ConstantPredicate::AlwaysTrue)); + return Ok(Some(AlwaysTrueOrFalse::AlwaysTrue)); } Ok(None) } ast::Operator::Or => { - if lhs_trivial == Some(ConstantPredicate::AlwaysTrue) - || rhs_trivial == Some(ConstantPredicate::AlwaysTrue) + if lhs_trivial == Some(AlwaysTrueOrFalse::AlwaysTrue) + || rhs_trivial == Some(AlwaysTrueOrFalse::AlwaysTrue) { - return Ok(Some(ConstantPredicate::AlwaysTrue)); + return Ok(Some(AlwaysTrueOrFalse::AlwaysTrue)); } - if lhs_trivial == Some(ConstantPredicate::AlwaysFalse) - && rhs_trivial == Some(ConstantPredicate::AlwaysFalse) + if lhs_trivial == Some(AlwaysTrueOrFalse::AlwaysFalse) + && rhs_trivial == Some(AlwaysTrueOrFalse::AlwaysFalse) { - return Ok(Some(ConstantPredicate::AlwaysFalse)); + return Ok(Some(AlwaysTrueOrFalse::AlwaysFalse)); } Ok(None) @@ -531,17 +790,985 @@ fn opposite_cmp_op(op: ast::Operator) -> ast::Operator { } } -pub fn try_extract_index_search_expression( +/// Struct used for scoring index scans +/// Currently we just estimate cost in a really dumb way, +/// i.e. no statistics are used. +struct IndexScore { + index: Option>, + cost: f64, + constraints: Vec, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +struct IndexInfo { + unique: bool, + column_count: usize, +} + +const ESTIMATED_HARDCODED_ROWS_PER_TABLE: f64 = 1000.0; + +/// Unbelievably dumb cost estimate for rows scanned by an index scan. +fn dumb_cost_estimator( + index_info: Option, + constraints: &[IndexConstraint], + is_inner_loop: bool, + is_ephemeral: bool, +) -> f64 { + // assume that the outer table always does a full table scan :) + // this discourages building ephemeral indexes on the outer table + // (since a scan reads TABLE_ROWS rows, so an ephemeral index on the outer table would both read TABLE_ROWS rows to build the index and then seek the index) + // but encourages building it on the inner table because it's only built once but the inner loop is run as many times as the outer loop has iterations. + let loop_multiplier = if is_inner_loop { + ESTIMATED_HARDCODED_ROWS_PER_TABLE + } else { + 1.0 + }; + + // If we are building an ephemeral index, we assume we will scan the entire source table to build it. + // Non-ephemeral indexes don't need to be built. + let cost_to_build_index = is_ephemeral as usize as f64 * ESTIMATED_HARDCODED_ROWS_PER_TABLE; + + let Some(index_info) = index_info else { + return cost_to_build_index + ESTIMATED_HARDCODED_ROWS_PER_TABLE * loop_multiplier; + }; + + let final_constraint_is_range = constraints + .last() + .map_or(false, |c| c.operator != ast::Operator::Equals); + let equalities_count = constraints + .iter() + .take(if final_constraint_is_range { + constraints.len() - 1 + } else { + constraints.len() + }) + .count() as f64; + + let selectivity = match ( + index_info.unique, + index_info.column_count as f64, + equalities_count, + ) { + // no equalities: let's assume range query selectivity is 0.4. if final constraint is not range and there are no equalities, it means full table scan incoming + (_, _, 0.0) => { + if final_constraint_is_range { + 0.4 + } else { + 1.0 + } + } + // on an unique index if we have equalities across all index columns, assume very high selectivity + (true, index_cols, eq_count) if eq_count == index_cols => 0.01 * eq_count, + // some equalities: let's assume each equality has a selectivity of 0.1 and range query selectivity is 0.4 + (_, _, eq_count) => (eq_count * 0.1) * if final_constraint_is_range { 0.4 } else { 1.0 }, + }; + cost_to_build_index + selectivity * ESTIMATED_HARDCODED_ROWS_PER_TABLE * loop_multiplier +} + +/// Try to extract an index search from the WHERE clause +/// Returns an optional [Search] struct if an index search can be extracted, otherwise returns None. +pub fn try_extract_index_search_from_where_clause( + where_clause: &mut Vec, + table_index: usize, + table_reference: &TableReference, + table_indexes: &[Arc], +) -> Result> { + // If there are no WHERE terms, we can't extract a search + if where_clause.is_empty() { + return Ok(None); + } + + let iter_dir = if let Operation::Scan { iter_dir, .. } = &table_reference.op { + *iter_dir + } else { + return Ok(None); + }; + + // Find all potential index constraints + // For WHERE terms to be used to constrain an index scan, they must: + // 1. refer to columns in the table that the index is on + // 2. be a binary comparison expression + // 3. constrain the index columns in the order that they appear in the index + // - e.g. if the index is on (a,b,c) then we can use all of "a = 1 AND b = 2 AND c = 3" to constrain the index scan, + // - but if the where clause is "a = 1 and c = 3" then we can only use "a = 1". + let cost_of_full_table_scan = dumb_cost_estimator(None, &[], table_index != 0, false); + let mut constraints_cur = vec![]; + let mut best_index = IndexScore { + index: None, + cost: cost_of_full_table_scan, + constraints: vec![], + }; + + for index in table_indexes { + // Check how many terms in the where clause constrain the index in column order + find_index_constraints(where_clause, table_index, index, &mut constraints_cur)?; + // naive scoring since we don't have statistics: prefer the index where we can use the most columns + // e.g. if we can use all columns of an index on (a,b), it's better than an index of (c,d,e) where we can only use c. + let cost = dumb_cost_estimator( + Some(IndexInfo { + unique: index.unique, + column_count: index.columns.len(), + }), + &constraints_cur, + table_index != 0, + false, + ); + if cost < best_index.cost { + best_index.index = Some(Arc::clone(index)); + best_index.cost = cost; + best_index.constraints.clear(); + best_index.constraints.append(&mut constraints_cur); + } + } + + // We haven't found a persistent btree index that is any better than a full table scan; + // let's see if building an ephemeral index would be better. + if best_index.index.is_none() { + let (ephemeral_cost, constraints_with_col_idx, mut constraints_without_col_idx) = + ephemeral_index_estimate_cost(where_clause, table_reference, table_index); + if ephemeral_cost < best_index.cost { + // ephemeral index makes sense, so let's build it now. + // ephemeral columns are: columns from the table_reference, constraints first, then the rest + let ephemeral_index = + ephemeral_index_build(table_reference, table_index, &constraints_with_col_idx); + best_index.index = Some(Arc::new(ephemeral_index)); + best_index.cost = ephemeral_cost; + best_index.constraints.clear(); + best_index + .constraints + .append(&mut constraints_without_col_idx); + } + } + + if best_index.index.is_none() { + return Ok(None); + } + + // Build the seek definition + let seek_def = + build_seek_def_from_index_constraints(&best_index.constraints, iter_dir, where_clause)?; + + // Remove the used terms from the where_clause since they are now part of the seek definition + // Sort terms by position in descending order to avoid shifting indices during removal + best_index.constraints.sort_by(|a, b| { + b.position_in_where_clause + .0 + .cmp(&a.position_in_where_clause.0) + }); + + for constraint in best_index.constraints.iter() { + where_clause.remove(constraint.position_in_where_clause.0); + } + + return Ok(Some(Search::Seek { + index: best_index.index, + seek_def, + })); +} + +fn ephemeral_index_estimate_cost( + where_clause: &mut Vec, + table_reference: &TableReference, + table_index: usize, +) -> (f64, Vec<(usize, IndexConstraint)>, Vec) { + let mut constraints_with_col_idx: Vec<(usize, IndexConstraint)> = where_clause + .iter() + .enumerate() + .filter(|(_, term)| is_potential_index_constraint(term, table_index)) + .filter_map(|(i, term)| { + let Ok(ast::Expr::Binary(lhs, operator, rhs)) = unwrap_parens(&term.expr) else { + panic!("expected binary expression"); + }; + if let ast::Expr::Column { table, column, .. } = lhs.as_ref() { + if *table == table_index { + return Some(( + *column, + IndexConstraint { + position_in_where_clause: (i, BinaryExprSide::Rhs), + operator: *operator, + index_column_sort_order: SortOrder::Asc, + }, + )); + } + } + if let ast::Expr::Column { table, column, .. } = rhs.as_ref() { + if *table == table_index { + return Some(( + *column, + IndexConstraint { + position_in_where_clause: (i, BinaryExprSide::Lhs), + operator: opposite_cmp_op(*operator), + index_column_sort_order: SortOrder::Asc, + }, + )); + } + } + None + }) + .collect(); + // sort equalities first + constraints_with_col_idx.sort_by(|a, _| { + if a.1.operator == ast::Operator::Equals { + Ordering::Less + } else { + Ordering::Equal + } + }); + // drop everything after the first inequality + constraints_with_col_idx.truncate( + constraints_with_col_idx + .iter() + .position(|c| c.1.operator != ast::Operator::Equals) + .unwrap_or(constraints_with_col_idx.len()), + ); + + let ephemeral_column_count = table_reference + .columns() + .iter() + .enumerate() + .filter(|(i, _)| table_reference.column_is_used(*i)) + .count(); + + let constraints_without_col_idx = constraints_with_col_idx + .iter() + .cloned() + .map(|(_, c)| c) + .collect::>(); + let ephemeral_cost = dumb_cost_estimator( + Some(IndexInfo { + unique: false, + column_count: ephemeral_column_count, + }), + &constraints_without_col_idx, + table_index != 0, + true, + ); + ( + ephemeral_cost, + constraints_with_col_idx, + constraints_without_col_idx, + ) +} + +fn ephemeral_index_build( + table_reference: &TableReference, + table_index: usize, + index_constraints: &[(usize, IndexConstraint)], +) -> Index { + let mut ephemeral_columns: Vec = table_reference + .columns() + .iter() + .enumerate() + .map(|(i, c)| IndexColumn { + name: c.name.clone().unwrap(), + order: SortOrder::Asc, + pos_in_table: i, + }) + // only include columns that are used in the query + .filter(|c| table_reference.column_is_used(c.pos_in_table)) + .collect(); + // sort so that constraints first, then rest in whatever order they were in in the table + ephemeral_columns.sort_by(|a, b| { + let a_constraint = index_constraints + .iter() + .enumerate() + .find(|(_, c)| c.0 == a.pos_in_table); + let b_constraint = index_constraints + .iter() + .enumerate() + .find(|(_, c)| c.0 == b.pos_in_table); + match (a_constraint, b_constraint) { + (Some(_), None) => Ordering::Less, + (None, Some(_)) => Ordering::Greater, + (Some((a_idx, _)), Some((b_idx, _))) => a_idx.cmp(&b_idx), + (None, None) => Ordering::Equal, + } + }); + let ephemeral_index = Index { + name: format!( + "ephemeral_{}_{}", + table_reference.table.get_name(), + table_index + ), + columns: ephemeral_columns, + unique: false, + ephemeral: true, + table_name: table_reference.table.get_name().to_string(), + root_page: 0, + }; + + ephemeral_index +} + +#[derive(Debug, Clone)] +/// A representation of an expression in a [WhereTerm] that can potentially be used as part of an index seek key. +/// For example, if there is an index on table T(x,y) and another index on table U(z), and the where clause is "WHERE x > 10 AND 20 = z", +/// the index constraints are: +/// - x > 10 ==> IndexConstraint { position_in_where_clause: (0, [BinaryExprSide::Rhs]), operator: [ast::Operator::Greater] } +/// - 20 = z ==> IndexConstraint { position_in_where_clause: (1, [BinaryExprSide::Lhs]), operator: [ast::Operator::Equals] } +pub struct IndexConstraint { + position_in_where_clause: (usize, BinaryExprSide), + operator: ast::Operator, + index_column_sort_order: SortOrder, +} + +/// Helper enum for [IndexConstraint] to indicate which side of a binary comparison expression is being compared to the index column. +/// For example, if the where clause is "WHERE x = 10" and there's an index on x, +/// the [IndexConstraint] for the where clause term "x = 10" will have a [BinaryExprSide::Rhs] +/// because the right hand side expression "10" is being compared to the index column "x". +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum BinaryExprSide { + Lhs, + Rhs, +} + +/// Recursively unwrap parentheses from an expression +/// e.g. (((t.x > 5))) -> t.x > 5 +fn unwrap_parens(expr: T) -> Result +where + T: UnwrapParens, +{ + expr.unwrap_parens() +} + +trait UnwrapParens { + fn unwrap_parens(self) -> Result + where + Self: Sized; +} + +impl UnwrapParens for &ast::Expr { + fn unwrap_parens(self) -> Result { + match self { + ast::Expr::Column { .. } => Ok(self), + ast::Expr::Parenthesized(exprs) => match exprs.len() { + 1 => unwrap_parens(exprs.first().unwrap()), + _ => crate::bail_parse_error!("expected single expression in parentheses"), + }, + _ => Ok(self), + } + } +} + +impl UnwrapParens for ast::Expr { + fn unwrap_parens(self) -> Result { + match self { + ast::Expr::Column { .. } => Ok(self), + ast::Expr::Parenthesized(mut exprs) => match exprs.len() { + 1 => unwrap_parens(exprs.pop().unwrap()), + _ => crate::bail_parse_error!("expected single expression in parentheses"), + }, + _ => Ok(self), + } + } +} + +/// Get the position of a column in an index +/// For example, if there is an index on table T(x,y) then y's position in the index is 1. +fn get_column_position_in_index( + expr: &ast::Expr, + table_index: usize, + index: &Arc, +) -> Result> { + let ast::Expr::Column { table, column, .. } = unwrap_parens(expr)? else { + return Ok(None); + }; + if *table != table_index { + return Ok(None); + } + Ok(index.column_table_pos_to_index_pos(*column)) +} + +fn is_potential_index_constraint(term: &WhereTerm, table_index: usize) -> bool { + // Skip terms that cannot be evaluated at this table's loop level + if !term.should_eval_at_loop(table_index) { + return false; + } + // Skip terms that are not binary comparisons + let Ok(ast::Expr::Binary(lhs, operator, rhs)) = unwrap_parens(&term.expr) else { + return false; + }; + // Only consider index scans for binary ops that are comparisons + if !matches!( + *operator, + ast::Operator::Equals + | ast::Operator::Greater + | ast::Operator::GreaterEquals + | ast::Operator::Less + | ast::Operator::LessEquals + ) { + return false; + } + + // If both lhs and rhs refer to columns from this table, we can't use this constraint + // because we can't use the index to satisfy the condition. + // Examples: + // - WHERE t.x > t.y + // - WHERE t.x + 1 > t.y - 5 + // - WHERE t.x = (t.x) + let Ok(eval_at_left) = determine_where_to_eval_expr(&lhs) else { + return false; + }; + let Ok(eval_at_right) = determine_where_to_eval_expr(&rhs) else { + return false; + }; + if eval_at_left == EvalAt::Loop(table_index) && eval_at_right == EvalAt::Loop(table_index) { + return false; + } + true +} + +/// Find all [IndexConstraint]s for a given WHERE clause +/// Constraints are appended as long as they constrain the index in column order. +/// E.g. for index (a,b,c) to be fully used, there must be a [WhereTerm] for each of a, b, and c. +/// If e.g. only a and c are present, then only the first column 'a' of the index will be used. +fn find_index_constraints( + where_clause: &mut Vec, + table_index: usize, + index: &Arc, + out_constraints: &mut Vec, +) -> Result<()> { + for position_in_index in 0..index.columns.len() { + let mut found = false; + for (position_in_where_clause, term) in where_clause.iter().enumerate() { + if !is_potential_index_constraint(term, table_index) { + continue; + } + + let ast::Expr::Binary(lhs, operator, rhs) = unwrap_parens(&term.expr)? else { + panic!("expected binary expression"); + }; + + // Check if lhs is a column that is in the i'th position of the index + if Some(position_in_index) == get_column_position_in_index(lhs, table_index, index)? { + out_constraints.push(IndexConstraint { + operator: *operator, + position_in_where_clause: (position_in_where_clause, BinaryExprSide::Rhs), + index_column_sort_order: index.columns[position_in_index].order, + }); + found = true; + break; + } + // Check if rhs is a column that is in the i'th position of the index + if Some(position_in_index) == get_column_position_in_index(rhs, table_index, index)? { + out_constraints.push(IndexConstraint { + operator: opposite_cmp_op(*operator), // swap the operator since e.g. if condition is 5 >= x, we want to use x <= 5 + position_in_where_clause: (position_in_where_clause, BinaryExprSide::Lhs), + index_column_sort_order: index.columns[position_in_index].order, + }); + found = true; + break; + } + } + if !found { + // Expressions must constrain index columns in index definition order. If we didn't find a constraint for the i'th index column, + // then we stop here and return the constraints we have found so far. + break; + } + } + + // In a multicolumn index, only the last term can have a nonequality expression. + // For example, imagine an index on (x,y) and the where clause is "WHERE x > 10 AND y > 20"; + // We can't use GT(x: 10,y: 20) as the seek key, because the first row greater than (x: 10,y: 20) + // might be e.g. (x: 10,y: 21), which does not satisfy the where clause, but a row after that e.g. (x: 11,y: 21) does. + // So: + // - in this case only GT(x: 10) can be used as the seek key, and we must emit a regular condition expression for y > 20 while scanning. + // On the other hand, if the where clause is "WHERE x = 10 AND y > 20", we can use GT(x=10,y=20) as the seek key, + // because any rows where (x=10,y=20) < ROW < (x=11) will match the where clause. + for i in 0..out_constraints.len() { + if out_constraints[i].operator != ast::Operator::Equals { + out_constraints.truncate(i + 1); + break; + } + } + + Ok(()) +} + +/// Build a [SeekDef] for a given list of [IndexConstraint]s +pub fn build_seek_def_from_index_constraints( + constraints: &[IndexConstraint], + iter_dir: IterationDirection, + where_clause: &mut Vec, +) -> Result { + assert!( + !constraints.is_empty(), + "cannot build seek def from empty list of index constraints" + ); + // Extract the key values and operators + let mut key = Vec::with_capacity(constraints.len()); + + for constraint in constraints { + // Extract the other expression from the binary WhereTerm (i.e. the one being compared to the index column) + let (idx, side) = constraint.position_in_where_clause; + let where_term = &mut where_clause[idx]; + let ast::Expr::Binary(lhs, _, rhs) = unwrap_parens(where_term.expr.take_ownership())? + else { + crate::bail_parse_error!("expected binary expression"); + }; + let cmp_expr = if side == BinaryExprSide::Lhs { + *lhs + } else { + *rhs + }; + key.push((cmp_expr, constraint.index_column_sort_order)); + } + + // We know all but potentially the last term is an equality, so we can use the operator of the last term + // to form the SeekOp + let op = constraints.last().unwrap().operator; + + build_seek_def(op, iter_dir, key) +} + +/// Build a [SeekDef] for a given comparison operator and index key. +/// To be usable as a seek key, all but potentially the last term must be equalities. +/// The last term can be a nonequality. +/// The comparison operator referred to by `op` is the operator of the last term. +/// +/// There are two parts to the seek definition: +/// 1. The [SeekKey], which specifies the key that we will use to seek to the first row that matches the index key. +/// 2. The [TerminationKey], which specifies the key that we will use to terminate the index scan that follows the seek. +/// +/// There are some nuances to how, and which parts of, the index key can be used in the [SeekKey] and [TerminationKey], +/// depending on the operator and iteration direction. This function explains those nuances inline when dealing with +/// each case. +/// +/// But to illustrate the general idea, consider the following examples: +/// +/// 1. For example, having two conditions like (x>10 AND y>20) cannot be used as a valid [SeekKey] GT(x:10, y:20) +/// because the first row greater than (x:10, y:20) might be (x:10, y:21), which does not satisfy the where clause. +/// In this case, only GT(x:10) must be used as the [SeekKey], and rows with y <= 20 must be filtered as a regular condition expression for each value of x. +/// +/// 2. In contrast, having (x=10 AND y>20) forms a valid index key GT(x:10, y:20) because after the seek, we can simply terminate as soon as x > 10, +/// i.e. use GT(x:10, y:20) as the [SeekKey] and GT(x:10) as the [TerminationKey]. +/// +/// The preceding examples are for an ascending index. The logic is similar for descending indexes, but an important distinction is that +/// since a descending index is laid out in reverse order, the comparison operators are reversed, e.g. LT becomes GT, LE becomes GE, etc. +/// So when you see e.g. a SeekOp::GT below for a descending index, it actually means that we are seeking the first row where the index key is LESS than the seek key. +/// +fn build_seek_def( + op: ast::Operator, + iter_dir: IterationDirection, + key: Vec<(ast::Expr, SortOrder)>, +) -> Result { + let key_len = key.len(); + let sort_order_of_last_key = key.last().unwrap().1; + + // For the commented examples below, keep in mind that since a descending index is laid out in reverse order, the comparison operators are reversed, e.g. LT becomes GT, LE becomes GE, etc. + // Also keep in mind that index keys are compared based on the number of columns given, so for example: + // - if key is GT(x:10), then (x=10, y=usize::MAX) is not GT because only X is compared. (x=11, y=) is GT. + // - if key is GT(x:10, y:20), then (x=10, y=21) is GT because both X and Y are compared. + // - if key is GT(x:10, y:NULL), then (x=10, y=0) is GT because NULL is always LT in index key comparisons. + Ok(match (iter_dir, op) { + // Forwards, EQ: + // Example: (x=10 AND y=20) + // Seek key: start from the first GE(x:10, y:20) + // Termination key: end at the first GT(x:10, y:20) + // Ascending vs descending doesn't matter because all the comparisons are equalities. + (IterationDirection::Forwards, ast::Operator::Equals) => SeekDef { + key, + iter_dir, + seek: Some(SeekKey { + len: key_len, + null_pad: false, + op: SeekOp::GE, + }), + termination: Some(TerminationKey { + len: key_len, + null_pad: false, + op: SeekOp::GT, + }), + }, + // Forwards, GT: + // Ascending index example: (x=10 AND y>20) + // Seek key: start from the first GT(x:10, y:20), e.g. (x=10, y=21) + // Termination key: end at the first GT(x:10), e.g. (x=11, y=0) + // + // Descending index example: (x=10 AND y>20) + // Seek key: start from the first LE(x:10), e.g. (x=10, y=usize::MAX), so reversed -> GE(x:10) + // Termination key: end at the first LE(x:10, y:20), e.g. (x=10, y=20) so reversed -> GE(x:10, y:20) + (IterationDirection::Forwards, ast::Operator::Greater) => { + let (seek_key_len, termination_key_len, seek_op, termination_op) = + if sort_order_of_last_key == SortOrder::Asc { + (key_len, key_len - 1, SeekOp::GT, SeekOp::GT) + } else { + ( + key_len - 1, + key_len, + SeekOp::LE.reverse(), + SeekOp::LE.reverse(), + ) + }; + SeekDef { + key, + iter_dir, + seek: if seek_key_len > 0 { + Some(SeekKey { + len: seek_key_len, + op: seek_op, + null_pad: false, + }) + } else { + None + }, + termination: if termination_key_len > 0 { + Some(TerminationKey { + len: termination_key_len, + op: termination_op, + null_pad: false, + }) + } else { + None + }, + } + } + // Forwards, GE: + // Ascending index example: (x=10 AND y>=20) + // Seek key: start from the first GE(x:10, y:20), e.g. (x=10, y=20) + // Termination key: end at the first GT(x:10), e.g. (x=11, y=0) + // + // Descending index example: (x=10 AND y>=20) + // Seek key: start from the first LE(x:10), e.g. (x=10, y=usize::MAX), so reversed -> GE(x:10) + // Termination key: end at the first LT(x:10, y:20), e.g. (x=10, y=19), so reversed -> GT(x:10, y:20) + (IterationDirection::Forwards, ast::Operator::GreaterEquals) => { + let (seek_key_len, termination_key_len, seek_op, termination_op) = + if sort_order_of_last_key == SortOrder::Asc { + (key_len, key_len - 1, SeekOp::GE, SeekOp::GT) + } else { + ( + key_len - 1, + key_len, + SeekOp::LE.reverse(), + SeekOp::LT.reverse(), + ) + }; + SeekDef { + key, + iter_dir, + seek: if seek_key_len > 0 { + Some(SeekKey { + len: seek_key_len, + op: seek_op, + null_pad: false, + }) + } else { + None + }, + termination: if termination_key_len > 0 { + Some(TerminationKey { + len: termination_key_len, + op: termination_op, + null_pad: false, + }) + } else { + None + }, + } + } + // Forwards, LT: + // Ascending index example: (x=10 AND y<20) + // Seek key: start from the first GT(x:10, y: NULL), e.g. (x=10, y=0) + // Termination key: end at the first GE(x:10, y:20), e.g. (x=10, y=20) + // + // Descending index example: (x=10 AND y<20) + // Seek key: start from the first LT(x:10, y:20), e.g. (x=10, y=19), so reversed -> GT(x:10, y:20) + // Termination key: end at the first LT(x:10), e.g. (x=9, y=usize::MAX), so reversed -> GE(x:10, NULL); i.e. GE the smallest possible (x=10, y) combination (NULL is always LT) + (IterationDirection::Forwards, ast::Operator::Less) => { + let (seek_key_len, termination_key_len, seek_op, termination_op) = + if sort_order_of_last_key == SortOrder::Asc { + (key_len - 1, key_len, SeekOp::GT, SeekOp::GE) + } else { + (key_len, key_len - 1, SeekOp::GT, SeekOp::GE) + }; + SeekDef { + key, + iter_dir, + seek: if seek_key_len > 0 { + Some(SeekKey { + len: seek_key_len, + op: seek_op, + null_pad: sort_order_of_last_key == SortOrder::Asc, + }) + } else { + None + }, + termination: if termination_key_len > 0 { + Some(TerminationKey { + len: termination_key_len, + op: termination_op, + null_pad: sort_order_of_last_key == SortOrder::Desc, + }) + } else { + None + }, + } + } + // Forwards, LE: + // Ascending index example: (x=10 AND y<=20) + // Seek key: start from the first GE(x:10, y:NULL), e.g. (x=10, y=0) + // Termination key: end at the first GT(x:10, y:20), e.g. (x=10, y=21) + // + // Descending index example: (x=10 AND y<=20) + // Seek key: start from the first LE(x:10, y:20), e.g. (x=10, y=20) so reversed -> GE(x:10, y:20) + // Termination key: end at the first LT(x:10), e.g. (x=9, y=usize::MAX), so reversed -> GE(x:10, NULL); i.e. GE the smallest possible (x=10, y) combination (NULL is always LT) + (IterationDirection::Forwards, ast::Operator::LessEquals) => { + let (seek_key_len, termination_key_len, seek_op, termination_op) = + if sort_order_of_last_key == SortOrder::Asc { + (key_len - 1, key_len, SeekOp::GT, SeekOp::GT) + } else { + ( + key_len, + key_len - 1, + SeekOp::LE.reverse(), + SeekOp::LE.reverse(), + ) + }; + SeekDef { + key, + iter_dir, + seek: if seek_key_len > 0 { + Some(SeekKey { + len: seek_key_len, + op: seek_op, + null_pad: sort_order_of_last_key == SortOrder::Asc, + }) + } else { + None + }, + termination: if termination_key_len > 0 { + Some(TerminationKey { + len: termination_key_len, + op: termination_op, + null_pad: sort_order_of_last_key == SortOrder::Desc, + }) + } else { + None + }, + } + } + // Backwards, EQ: + // Example: (x=10 AND y=20) + // Seek key: start from the last LE(x:10, y:20) + // Termination key: end at the first LT(x:10, y:20) + // Ascending vs descending doesn't matter because all the comparisons are equalities. + (IterationDirection::Backwards, ast::Operator::Equals) => SeekDef { + key, + iter_dir, + seek: Some(SeekKey { + len: key_len, + op: SeekOp::LE, + null_pad: false, + }), + termination: Some(TerminationKey { + len: key_len, + op: SeekOp::LT, + null_pad: false, + }), + }, + // Backwards, LT: + // Ascending index example: (x=10 AND y<20) + // Seek key: start from the last LT(x:10, y:20), e.g. (x=10, y=19) + // Termination key: end at the first LE(x:10, NULL), e.g. (x=9, y=usize::MAX) + // + // Descending index example: (x=10 AND y<20) + // Seek key: start from the last GT(x:10, y:NULL), e.g. (x=10, y=0) so reversed -> LT(x:10, NULL) + // Termination key: end at the first GE(x:10, y:20), e.g. (x=10, y=20) so reversed -> LE(x:10, y:20) + (IterationDirection::Backwards, ast::Operator::Less) => { + let (seek_key_len, termination_key_len, seek_op, termination_op) = + if sort_order_of_last_key == SortOrder::Asc { + (key_len, key_len - 1, SeekOp::LT, SeekOp::LE) + } else { + ( + key_len - 1, + key_len, + SeekOp::GT.reverse(), + SeekOp::GE.reverse(), + ) + }; + SeekDef { + key, + iter_dir, + seek: if seek_key_len > 0 { + Some(SeekKey { + len: seek_key_len, + op: seek_op, + null_pad: sort_order_of_last_key == SortOrder::Desc, + }) + } else { + None + }, + termination: if termination_key_len > 0 { + Some(TerminationKey { + len: termination_key_len, + op: termination_op, + null_pad: sort_order_of_last_key == SortOrder::Asc, + }) + } else { + None + }, + } + } + // Backwards, LE: + // Ascending index example: (x=10 AND y<=20) + // Seek key: start from the last LE(x:10, y:20), e.g. (x=10, y=20) + // Termination key: end at the first LT(x:10, NULL), e.g. (x=9, y=usize::MAX) + // + // Descending index example: (x=10 AND y<=20) + // Seek key: start from the last GT(x:10, NULL), e.g. (x=10, y=0) so reversed -> LT(x:10, NULL) + // Termination key: end at the first GT(x:10, y:20), e.g. (x=10, y=21) so reversed -> LT(x:10, y:20) + (IterationDirection::Backwards, ast::Operator::LessEquals) => { + let (seek_key_len, termination_key_len, seek_op, termination_op) = + if sort_order_of_last_key == SortOrder::Asc { + (key_len, key_len - 1, SeekOp::LE, SeekOp::LE) + } else { + ( + key_len - 1, + key_len, + SeekOp::GT.reverse(), + SeekOp::GT.reverse(), + ) + }; + SeekDef { + key, + iter_dir, + seek: if seek_key_len > 0 { + Some(SeekKey { + len: seek_key_len, + op: seek_op, + null_pad: sort_order_of_last_key == SortOrder::Desc, + }) + } else { + None + }, + termination: if termination_key_len > 0 { + Some(TerminationKey { + len: termination_key_len, + op: termination_op, + null_pad: sort_order_of_last_key == SortOrder::Asc, + }) + } else { + None + }, + } + } + // Backwards, GT: + // Ascending index example: (x=10 AND y>20) + // Seek key: start from the last LE(x:10), e.g. (x=10, y=usize::MAX) + // Termination key: end at the first LE(x:10, y:20), e.g. (x=10, y=20) + // + // Descending index example: (x=10 AND y>20) + // Seek key: start from the last GT(x:10, y:20), e.g. (x=10, y=21) so reversed -> LT(x:10, y:20) + // Termination key: end at the first GT(x:10), e.g. (x=11, y=0) so reversed -> LT(x:10) + (IterationDirection::Backwards, ast::Operator::Greater) => { + let (seek_key_len, termination_key_len, seek_op, termination_op) = + if sort_order_of_last_key == SortOrder::Asc { + (key_len - 1, key_len, SeekOp::LE, SeekOp::LE) + } else { + ( + key_len, + key_len - 1, + SeekOp::GT.reverse(), + SeekOp::GT.reverse(), + ) + }; + SeekDef { + key, + iter_dir, + seek: if seek_key_len > 0 { + Some(SeekKey { + len: seek_key_len, + op: seek_op, + null_pad: false, + }) + } else { + None + }, + termination: if termination_key_len > 0 { + Some(TerminationKey { + len: termination_key_len, + op: termination_op, + null_pad: false, + }) + } else { + None + }, + } + } + // Backwards, GE: + // Ascending index example: (x=10 AND y>=20) + // Seek key: start from the last LE(x:10), e.g. (x=10, y=usize::MAX) + // Termination key: end at the first LT(x:10, y:20), e.g. (x=10, y=19) + // + // Descending index example: (x=10 AND y>=20) + // Seek key: start from the last GE(x:10, y:20), e.g. (x=10, y=20) so reversed -> LE(x:10, y:20) + // Termination key: end at the first GT(x:10), e.g. (x=11, y=0) so reversed -> LT(x:10) + (IterationDirection::Backwards, ast::Operator::GreaterEquals) => { + let (seek_key_len, termination_key_len, seek_op, termination_op) = + if sort_order_of_last_key == SortOrder::Asc { + (key_len - 1, key_len, SeekOp::LE, SeekOp::LT) + } else { + ( + key_len, + key_len - 1, + SeekOp::GE.reverse(), + SeekOp::GT.reverse(), + ) + }; + SeekDef { + key, + iter_dir, + seek: if seek_key_len > 0 { + Some(SeekKey { + len: seek_key_len, + op: seek_op, + null_pad: false, + }) + } else { + None + }, + termination: if termination_key_len > 0 { + Some(TerminationKey { + len: termination_key_len, + op: termination_op, + null_pad: false, + }) + } else { + None + }, + } + } + (_, op) => { + crate::bail_parse_error!("build_seek_def: invalid operator: {:?}", op,) + } + }) +} + +pub fn try_extract_rowid_search_expression( cond: &mut WhereTerm, table_index: usize, table_reference: &TableReference, - available_indexes: &HashMap>>, ) -> Result> { + let iter_dir = if let Operation::Scan { iter_dir, .. } = &table_reference.op { + *iter_dir + } else { + return Ok(None); + }; if !cond.should_eval_at_loop(table_index) { return Ok(None); } match &mut cond.expr { ast::Expr::Binary(lhs, operator, rhs) => { + // If both lhs and rhs refer to columns from this table, we can't perform a rowid seek + // Examples: + // - WHERE t.x > t.y + // - WHERE t.x + 1 > t.y - 5 + // - WHERE t.x = (t.x) + if determine_where_to_eval_expr(lhs)? == EvalAt::Loop(table_index) + && determine_where_to_eval_expr(rhs)? == EvalAt::Loop(table_index) + { + return Ok(None); + } if lhs.is_rowid_alias_of(table_index) { match operator { ast::Operator::Equals => { @@ -559,13 +1786,11 @@ pub fn try_extract_index_search_expression( | ast::Operator::Less | ast::Operator::LessEquals => { let rhs_owned = rhs.take_ownership(); - return Ok(Some(Search::RowidSearch { - cmp_op: *operator, - cmp_expr: WhereTerm { - expr: rhs_owned, - from_outer_join: cond.from_outer_join, - eval_at: cond.eval_at, - }, + let seek_def = + build_seek_def(*operator, iter_dir, vec![(rhs_owned, SortOrder::Asc)])?; + return Ok(Some(Search::Seek { + index: None, + seek_def, })); } _ => {} @@ -589,61 +1814,12 @@ pub fn try_extract_index_search_expression( | ast::Operator::Less | ast::Operator::LessEquals => { let lhs_owned = lhs.take_ownership(); - return Ok(Some(Search::RowidSearch { - cmp_op: opposite_cmp_op(*operator), - cmp_expr: WhereTerm { - expr: lhs_owned, - from_outer_join: cond.from_outer_join, - eval_at: cond.eval_at, - }, - })); - } - _ => {} - } - } - - if let Some(index_rc) = - lhs.check_index_scan(table_index, &table_reference, available_indexes)? - { - match operator { - ast::Operator::Equals - | ast::Operator::Greater - | ast::Operator::GreaterEquals - | ast::Operator::Less - | ast::Operator::LessEquals => { - let rhs_owned = rhs.take_ownership(); - return Ok(Some(Search::IndexSearch { - index: index_rc, - cmp_op: *operator, - cmp_expr: WhereTerm { - expr: rhs_owned, - from_outer_join: cond.from_outer_join, - eval_at: cond.eval_at, - }, - })); - } - _ => {} - } - } - - if let Some(index_rc) = - rhs.check_index_scan(table_index, &table_reference, available_indexes)? - { - match operator { - ast::Operator::Equals - | ast::Operator::Greater - | ast::Operator::GreaterEquals - | ast::Operator::Less - | ast::Operator::LessEquals => { - let lhs_owned = lhs.take_ownership(); - return Ok(Some(Search::IndexSearch { - index: index_rc, - cmp_op: opposite_cmp_op(*operator), - cmp_expr: WhereTerm { - expr: lhs_owned, - from_outer_join: cond.from_outer_join, - eval_at: cond.eval_at, - }, + let op = opposite_cmp_op(*operator); + let seek_def = + build_seek_def(op, iter_dir, vec![(lhs_owned, SortOrder::Asc)])?; + return Ok(Some(Search::Seek { + index: None, + seek_def, })); } _ => {} diff --git a/core/translate/order_by.rs b/core/translate/order_by.rs index 9793afdc9..0a43e6683 100644 --- a/core/translate/order_by.rs +++ b/core/translate/order_by.rs @@ -1,10 +1,9 @@ use std::rc::Rc; -use limbo_sqlite3_parser::ast; +use limbo_sqlite3_parser::ast::{self, SortOrder}; use crate::{ schema::{Column, PseudoTable}, - types::{OwnedValue, Record}, util::exprs_are_equivalent, vdbe::{ builder::{CursorType, ProgramBuilder}, @@ -16,7 +15,7 @@ use crate::{ use super::{ emitter::TranslateCtx, expr::translate_expr, - plan::{Direction, ResultSetColumn, SelectPlan}, + plan::{ResultSetColumn, SelectPlan}, result_row::{emit_offset, emit_result_row_and_limit}, }; @@ -33,21 +32,17 @@ pub struct SortMetadata { pub fn init_order_by( program: &mut ProgramBuilder, t_ctx: &mut TranslateCtx, - order_by: &[(ast::Expr, Direction)], + order_by: &[(ast::Expr, SortOrder)], ) -> Result<()> { let sort_cursor = program.alloc_cursor_id(None, CursorType::Sorter); t_ctx.meta_sort = Some(SortMetadata { sort_cursor, reg_sorter_data: program.alloc_register(), }); - let mut order = Vec::new(); - for (_, direction) in order_by.iter() { - order.push(OwnedValue::Integer(*direction as i64)); - } program.emit_insn(Insn::SorterOpen { cursor_id: sort_cursor, columns: order_by.len(), - order: Record::new(order), + order: order_by.iter().map(|(_, direction)| *direction).collect(), }); Ok(()) } @@ -124,8 +119,8 @@ pub fn emit_order_by( cursor_id: sort_cursor, pc_if_empty: sort_loop_end_label, }); + program.preassign_label_to_next_insn(sort_loop_start_label); - program.resolve_label(sort_loop_start_label, program.offset()); emit_offset(program, t_ctx, plan, sort_loop_next_label)?; program.emit_insn(Insn::SorterData { @@ -154,8 +149,7 @@ pub fn emit_order_by( cursor_id: sort_cursor, pc_if_next: sort_loop_start_label, }); - - program.resolve_label(sort_loop_end_label, program.offset()); + program.preassign_label_to_next_insn(sort_loop_end_label); Ok(()) } @@ -258,7 +252,7 @@ pub fn sorter_insert( /// /// If any result columns can be skipped, this returns list of 2-tuples of (SkippedResultColumnIndex: usize, ResultColumnIndexInOrderBySorter: usize) pub fn order_by_deduplicate_result_columns( - order_by: &[(ast::Expr, Direction)], + order_by: &[(ast::Expr, SortOrder)], result_columns: &[ResultSetColumn], ) -> Option> { let mut result_column_remapping: Option> = None; diff --git a/core/translate/plan.rs b/core/translate/plan.rs index 3d93548de..079e99908 100644 --- a/core/translate/plan.rs +++ b/core/translate/plan.rs @@ -1,5 +1,6 @@ use core::fmt; -use limbo_sqlite3_parser::ast; +use limbo_ext::{ConstraintInfo, ConstraintOp}; +use limbo_sqlite3_parser::ast::{self, SortOrder}; use std::{ cmp::Ordering, fmt::{Display, Formatter}, @@ -7,13 +8,22 @@ use std::{ sync::Arc, }; -use crate::schema::{PseudoTable, Type}; use crate::{ function::AggFunc, schema::{BTreeTable, Column, Index, Table}, - vdbe::BranchOffset, - VirtualTable, + vdbe::{ + builder::{CursorType, ProgramBuilder}, + BranchOffset, CursorID, + }, + Result, VirtualTable, }; +use crate::{ + schema::{PseudoTable, Type}, + types::SeekOp, + util::can_pushdown_predicate, +}; + +use super::emitter::OperationMode; #[derive(Debug, Clone)] pub struct ResultSetColumn { @@ -24,13 +34,26 @@ pub struct ResultSetColumn { } impl ResultSetColumn { - pub fn name<'a>(&'a self, tables: &'a [TableReference]) -> Option<&'a String> { + pub fn name<'a>(&'a self, tables: &'a [TableReference]) -> Option<&'a str> { if let Some(alias) = &self.alias { return Some(alias); } match &self.expr { ast::Expr::Column { table, column, .. } => { - tables[*table].columns()[*column].name.as_ref() + tables[*table].columns()[*column].name.as_deref() + } + ast::Expr::RowId { table, .. } => { + // If there is a rowid alias column, use its name + if let Table::BTree(table) = &tables[*table].table { + if let Some(rowid_alias_column) = table.get_rowid_alias_column() { + if let Some(name) = &rowid_alias_column.1.name { + return Some(name); + } + } + } + + // If there is no rowid alias, use "rowid". + Some("rowid") } _ => None, } @@ -72,6 +95,114 @@ impl WhereTerm { } } +use crate::ast::{Expr, Operator}; + +// This function takes an operator and returns the operator you would obtain if the operands were swapped. +// e.g. "literal < column" +// which is not the canonical order for constraint pushdown. +// This function will return > so that the expression can be treated as if it were written "column > literal" +fn reverse_operator(op: &Operator) -> Option { + match op { + Operator::Equals => Some(Operator::Equals), + Operator::Less => Some(Operator::Greater), + Operator::LessEquals => Some(Operator::GreaterEquals), + Operator::Greater => Some(Operator::Less), + Operator::GreaterEquals => Some(Operator::LessEquals), + Operator::NotEquals => Some(Operator::NotEquals), + Operator::Is => Some(Operator::Is), + Operator::IsNot => Some(Operator::IsNot), + _ => None, + } +} + +fn to_ext_constraint_op(op: &Operator) -> Option { + match op { + Operator::Equals => Some(ConstraintOp::Eq), + Operator::Less => Some(ConstraintOp::Lt), + Operator::LessEquals => Some(ConstraintOp::Le), + Operator::Greater => Some(ConstraintOp::Gt), + Operator::GreaterEquals => Some(ConstraintOp::Ge), + Operator::NotEquals => Some(ConstraintOp::Ne), + _ => None, + } +} + +/// This function takes a WhereTerm for a select involving a VTab at index 'table_index'. +/// It determines whether or not it involves the given table and whether or not it can +/// be converted into a ConstraintInfo which can be passed to the vtab module's xBestIndex +/// method, which will possibly calculate some information to improve the query plan, that we can send +/// back to it as arguments for the VFilter operation. +/// is going to be filtered against: e.g: +/// 'SELECT key, value FROM vtab WHERE key = 'some_key'; +/// we need to send the OwnedValue('some_key') as an argument to VFilter, and possibly omit it from +/// the filtration in the vdbe layer. +pub fn convert_where_to_vtab_constraint( + term: &WhereTerm, + table_index: usize, + pred_idx: usize, +) -> Option { + if term.from_outer_join { + return None; + } + let Expr::Binary(lhs, op, rhs) = &term.expr else { + return None; + }; + let expr_is_ready = |e: &Expr| -> bool { can_pushdown_predicate(e, table_index) }; + let (vcol_idx, op_for_vtab, usable, is_rhs) = match (&**lhs, &**rhs) { + ( + Expr::Column { + table: tbl_l, + column: col_l, + .. + }, + Expr::Column { + table: tbl_r, + column: col_r, + .. + }, + ) => { + // one side must be the virtual table + let vtab_on_l = *tbl_l == table_index; + let vtab_on_r = *tbl_r == table_index; + if vtab_on_l == vtab_on_r { + return None; // either both or none -> not convertible + } + + if vtab_on_l { + // vtab on left side: operator unchanged + let usable = *tbl_r < table_index; // usable if the other table is already positioned + (col_l, op, usable, false) + } else { + // vtab on right side of the expr: reverse operator + let usable = *tbl_l < table_index; + (col_r, &reverse_operator(op).unwrap_or(*op), usable, true) + } + } + (Expr::Column { table, column, .. }, other) if *table == table_index => { + ( + column, + op, + expr_is_ready(other), // literal / earlier‑table / deterministic func ? + false, + ) + } + (other, Expr::Column { table, column, .. }) if *table == table_index => ( + column, + &reverse_operator(op).unwrap_or(*op), + expr_is_ready(other), + true, + ), + + _ => return None, // does not involve the virtual table at all + }; + + Some(ConstraintInfo { + column_index: *vcol_idx as u32, + op: to_ext_constraint_op(op_for_vtab)?, + usable, + plan_info: ConstraintInfo::pack_plan_info(pred_idx as u32, is_rhs), + }) +} /// The loop index where to evaluate the condition. /// For example, in `SELECT * FROM u JOIN p WHERE u.id = 5`, the condition can already be evaluated at the first loop (idx 0), /// because that is the rightmost table that it references. @@ -136,7 +267,7 @@ pub struct SelectPlan { /// group by clause pub group_by: Option, /// order by clause - pub order_by: Option>, + pub order_by: Option>, /// all the aggregates collected from the result columns, order by, and (TODO) having clauses pub aggregates: Vec, /// limit clause @@ -159,13 +290,15 @@ pub struct DeletePlan { /// where clause split into a vec at 'AND' boundaries. pub where_clause: Vec, /// order by clause - pub order_by: Option>, + pub order_by: Option>, /// limit clause pub limit: Option, /// offset clause pub offset: Option, /// query contains a constant condition that is always false pub contains_constant_false_condition: bool, + /// Indexes that must be updated by the delete operation. + pub indexes: Vec>, } #[derive(Debug, Clone)] @@ -175,13 +308,14 @@ pub struct UpdatePlan { // (colum index, new value) pairs pub set_clauses: Vec<(usize, ast::Expr)>, pub where_clause: Vec, - pub order_by: Option>, - // TODO: support OFFSET + pub order_by: Option>, pub limit: Option, + pub offset: Option, // TODO: optional RETURNING clause pub returning: Option>, // whether the WHERE clause is always false pub contains_constant_false_condition: bool, + pub indexes_to_update: Vec>, } #[derive(Clone, Copy, Debug, PartialEq, Eq)] @@ -253,18 +387,54 @@ pub struct TableReference { pub identifier: String, /// The join info for this table reference, if it is the right side of a join (which all except the first table reference have) pub join_info: Option, + /// Bitmask of columns that are referenced in the query. + /// Used to decide whether a covering index can be used. + pub col_used_mask: ColumnUsedMask, +} + +#[derive(Clone, Debug, PartialEq, Eq)] +#[repr(transparent)] +pub struct ColumnUsedMask(u128); + +impl ColumnUsedMask { + pub fn new() -> Self { + Self(0) + } + + pub fn set(&mut self, index: usize) { + assert!( + index < 128, + "ColumnUsedMask only supports up to 128 columns" + ); + self.0 |= 1 << index; + } + + pub fn get(&self, index: usize) -> bool { + assert!( + index < 128, + "ColumnUsedMask only supports up to 128 columns" + ); + self.0 & (1 << index) != 0 + } + + pub fn contains_all_set_bits_of(&self, other: &Self) -> bool { + self.0 & other.0 == other.0 + } + + pub fn is_empty(&self) -> bool { + self.0 == 0 + } } #[derive(Clone, Debug)] pub enum Operation { // Scan operation // This operation is used to scan a table. - // The iter_dir are uset to indicate the direction of the iterator. - // The use of Option for iter_dir is aimed at implementing a conservative optimization strategy: it only pushes - // iter_dir down to Scan when iter_dir is None, to prevent potential result set errors caused by multiple - // assignments. for more detailed discussions, please refer to https://github.com/tursodatabase/limbo/pull/376 + // The iter_dir is used to indicate the direction of the iterator. Scan { - iter_dir: Option, + iter_dir: IterationDirection, + /// The index that we are using to scan the table, if any. + index: Option>, }, // Search operation // This operation is used to search for a row in a table using an index @@ -279,6 +449,17 @@ pub enum Operation { }, } +impl Operation { + pub fn index(&self) -> Option<&Arc> { + match self { + Operation::Scan { index, .. } => index.as_ref(), + Operation::Search(Search::RowidEq { .. }) => None, + Operation::Search(Search::Seek { index, .. }) => index.as_ref(), + Operation::Subquery { .. } => None, + } + } +} + impl TableReference { /// Returns the btree table for this table reference, if it is a BTreeTable. pub fn btree(&self) -> Option> { @@ -300,7 +481,7 @@ impl TableReference { plan.result_columns .iter() .map(|rc| Column { - name: rc.name(&plan.table_references).map(String::clone), + name: rc.name(&plan.table_references).map(String::from), ty: Type::Text, // FIXME: infer proper type ty_str: "TEXT".to_string(), is_rowid_alias: false, @@ -318,12 +499,172 @@ impl TableReference { table, identifier: identifier.clone(), join_info, + col_used_mask: ColumnUsedMask::new(), } } pub fn columns(&self) -> &[Column] { self.table.columns() } + + /// Mark a column as used in the query. + /// This is used to determine whether a covering index can be used. + pub fn mark_column_used(&mut self, index: usize) { + self.col_used_mask.set(index); + } + + /// Open the necessary cursors for this table reference. + /// Generally a table cursor is always opened unless a SELECT query can use a covering index. + /// An index cursor is opened if an index is used in any way for reading data from the table. + pub fn open_cursors( + &self, + program: &mut ProgramBuilder, + mode: OperationMode, + ) -> Result<(Option, Option)> { + let index = self.op.index(); + match &self.table { + Table::BTree(btree) => { + let use_covering_index = self.utilizes_covering_index(); + let index_is_ephemeral = index.map_or(false, |index| index.ephemeral); + let table_not_required = + OperationMode::SELECT == mode && use_covering_index && !index_is_ephemeral; + let table_cursor_id = if table_not_required { + None + } else { + Some(program.alloc_cursor_id( + Some(self.identifier.clone()), + CursorType::BTreeTable(btree.clone()), + )) + }; + let index_cursor_id = if let Some(index) = index { + Some(program.alloc_cursor_id( + Some(index.name.clone()), + CursorType::BTreeIndex(index.clone()), + )) + } else { + None + }; + Ok((table_cursor_id, index_cursor_id)) + } + Table::Virtual(virtual_table) => { + let table_cursor_id = Some(program.alloc_cursor_id( + Some(self.identifier.clone()), + CursorType::VirtualTable(virtual_table.clone()), + )); + let index_cursor_id = None; + Ok((table_cursor_id, index_cursor_id)) + } + Table::Pseudo(_) => Ok((None, None)), + } + } + + /// Resolve the already opened cursors for this table reference. + pub fn resolve_cursors( + &self, + program: &mut ProgramBuilder, + ) -> Result<(Option, Option)> { + let index = self.op.index(); + let table_cursor_id = program.resolve_cursor_id_safe(&self.identifier); + let index_cursor_id = index.map(|index| program.resolve_cursor_id(&index.name)); + Ok((table_cursor_id, index_cursor_id)) + } + + /// Returns true if a given index is a covering index for this [TableReference]. + pub fn index_is_covering(&self, index: &Index) -> bool { + let Table::BTree(btree) = &self.table else { + return false; + }; + if self.col_used_mask.is_empty() { + return false; + } + let mut index_cols_mask = ColumnUsedMask::new(); + for col in index.columns.iter() { + index_cols_mask.set(col.pos_in_table); + } + + // If a table has a rowid (i.e. is not a WITHOUT ROWID table), the index is guaranteed to contain the rowid as well. + if btree.has_rowid { + if let Some(pos_of_rowid_alias_col) = btree.get_rowid_alias_column().map(|(pos, _)| pos) + { + let mut empty_mask = ColumnUsedMask::new(); + empty_mask.set(pos_of_rowid_alias_col); + if self.col_used_mask == empty_mask { + // However if the index would be ONLY used for the rowid, then let's not bother using it to cover the query. + // Example: if the query is SELECT id FROM t, and id is a rowid alias, then let's rather just scan the table + // instead of an index. + return false; + } + index_cols_mask.set(pos_of_rowid_alias_col); + } + } + + index_cols_mask.contains_all_set_bits_of(&self.col_used_mask) + } + + /// Returns true if the index selected for use with this [TableReference] is a covering index, + /// meaning that it contains all the columns that are referenced in the query. + pub fn utilizes_covering_index(&self) -> bool { + let Some(index) = self.op.index() else { + return false; + }; + self.index_is_covering(index.as_ref()) + } + + pub fn column_is_used(&self, index: usize) -> bool { + self.col_used_mask.get(index) + } +} + +/// A definition of a rowid/index search. +/// +/// [SeekKey] is the condition that is used to seek to a specific row in a table/index. +/// [TerminationKey] is the condition that is used to terminate the search after a seek. +#[derive(Debug, Clone)] +pub struct SeekDef { + /// The key to use when seeking and when terminating the scan that follows the seek. + /// For example, given: + /// - CREATE INDEX i ON t (x, y desc) + /// - SELECT * FROM t WHERE x = 1 AND y >= 30 + /// The key is [(1, ASC), (30, DESC)] + pub key: Vec<(ast::Expr, SortOrder)>, + /// The condition to use when seeking. See [SeekKey] for more details. + pub seek: Option, + /// The condition to use when terminating the scan that follows the seek. See [TerminationKey] for more details. + pub termination: Option, + /// The direction of the scan that follows the seek. + pub iter_dir: IterationDirection, +} + +/// A condition to use when seeking. +#[derive(Debug, Clone)] +pub struct SeekKey { + /// How many columns from [SeekDef::key] are used in seeking. + pub len: usize, + /// Whether to NULL pad the last column of the seek key to match the length of [SeekDef::key]. + /// The reason it is done is that sometimes our full index key is not used in seeking, + /// but we want to find the lowest value that matches the non-null prefix of the key. + /// For example, given: + /// - CREATE INDEX i ON t (x, y) + /// - SELECT * FROM t WHERE x = 1 AND y < 30 + /// We want to seek to the first row where x = 1, and then iterate forwards. + /// In this case, the seek key is GT(1, NULL) since NULL is always LT in index key comparisons. + /// We can't use just GT(1) because in index key comparisons, only the given number of columns are compared, + /// so this means any index keys with (x=1) will compare equal, e.g. (x=1, y=usize::MAX) will compare equal to the seek key (x:1) + pub null_pad: bool, + /// The comparison operator to use when seeking. + pub op: SeekOp, +} + +#[derive(Debug, Clone)] +/// A condition to use when terminating the scan that follows a seek. +pub struct TerminationKey { + /// How many columns from [SeekDef::key] are used in terminating the scan that follows the seek. + pub len: usize, + /// Whether to NULL pad the last column of the termination key to match the length of [SeekDef::key]. + /// See [SeekKey::null_pad]. + pub null_pad: bool, + /// The comparison operator to use when terminating the scan that follows the seek. + pub op: SeekOp, } /// An enum that represents a search operation that can be used to search for a row in a table using an index @@ -333,32 +674,11 @@ impl TableReference { pub enum Search { /// A rowid equality point lookup. This is a special case that uses the SeekRowid bytecode instruction and does not loop. RowidEq { cmp_expr: WhereTerm }, - /// A rowid search. Uses bytecode instructions like SeekGT, SeekGE etc. - RowidSearch { - cmp_op: ast::Operator, - cmp_expr: WhereTerm, + /// A search on a table btree (via `rowid`) or a secondary index search. Uses bytecode instructions like SeekGE, SeekGT etc. + Seek { + index: Option>, + seek_def: SeekDef, }, - /// A secondary index search. Uses bytecode instructions like SeekGE, SeekGT etc. - IndexSearch { - index: Arc, - cmp_op: ast::Operator, - cmp_expr: WhereTerm, - }, -} - -#[derive(Clone, Copy, Debug, PartialEq)] -pub enum Direction { - Ascending, - Descending, -} - -impl Display for Direction { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - match self { - Direction::Ascending => write!(f, "ASC"), - Direction::Descending => write!(f, "DESC"), - } - } } #[derive(Clone, Debug, PartialEq)] @@ -419,14 +739,16 @@ impl Display for SelectPlan { writeln!(f, "{}SCAN {}", indent, table_name)?; } Operation::Search(search) => match search { - Search::RowidEq { .. } | Search::RowidSearch { .. } => { + Search::RowidEq { .. } | Search::Seek { index: None, .. } => { writeln!( f, "{}SEARCH {} USING INTEGER PRIMARY KEY (rowid=?)", indent, reference.identifier )?; } - Search::IndexSearch { index, .. } => { + Search::Seek { + index: Some(index), .. + } => { writeln!( f, "{}SEARCH {} USING INDEX {}", @@ -508,14 +830,16 @@ impl fmt::Display for UpdatePlan { } } Operation::Search(search) => match search { - Search::RowidEq { .. } | Search::RowidSearch { .. } => { + Search::RowidEq { .. } | Search::Seek { index: None, .. } => { writeln!( f, "{}SEARCH {} USING INTEGER PRIMARY KEY (rowid=?)", indent, reference.identifier )?; } - Search::IndexSearch { index, .. } => { + Search::Seek { + index: Some(index), .. + } => { writeln!( f, "{}SEARCH {} USING INDEX {}", @@ -534,7 +858,16 @@ impl fmt::Display for UpdatePlan { if let Some(order_by) = &self.order_by { writeln!(f, "ORDER BY:")?; for (expr, dir) in order_by { - writeln!(f, " - {} {}", expr, dir)?; + writeln!( + f, + " - {} {}", + expr, + if *dir == SortOrder::Asc { + "ASC" + } else { + "DESC" + } + )?; } } if let Some(limit) = self.limit { diff --git a/core/translate/planner.rs b/core/translate/planner.rs index 953a15e59..f1d7aaeea 100644 --- a/core/translate/planner.rs +++ b/core/translate/planner.rs @@ -1,7 +1,7 @@ use super::{ plan::{ - Aggregate, EvalAt, JoinInfo, Operation, Plan, ResultSetColumn, SelectPlan, SelectQueryType, - TableReference, WhereTerm, + Aggregate, ColumnUsedMask, EvalAt, IterationDirection, JoinInfo, Operation, Plan, + ResultSetColumn, SelectPlan, SelectQueryType, TableReference, WhereTerm, }, select::prepare_select_plan, SymbolTable, @@ -85,7 +85,7 @@ pub fn resolve_aggregates(expr: &Expr, aggs: &mut Vec) -> bool { pub fn bind_column_references( expr: &mut Expr, - referenced_tables: &[TableReference], + referenced_tables: &mut [TableReference], result_columns: Option<&[ResultSetColumn]>, ) -> Result<()> { match expr { @@ -128,6 +128,7 @@ pub fn bind_column_references( column: col_idx, is_rowid_alias, }; + referenced_tables[tbl_idx].mark_column_used(col_idx); return Ok(()); } @@ -178,6 +179,7 @@ pub fn bind_column_references( column: col_idx.unwrap(), is_rowid_alias: col.is_rowid_alias, }; + referenced_tables[tbl_idx].mark_column_used(col_idx.unwrap()); Ok(()) } Expr::Between { @@ -320,10 +322,14 @@ fn parse_from_clause_table<'a>( )); }; scope.tables.push(TableReference { - op: Operation::Scan { iter_dir: None }, + op: Operation::Scan { + iter_dir: IterationDirection::Forwards, + index: None, + }, table: tbl_ref, identifier: alias.unwrap_or(normalized_qualified_name), join_info: None, + col_used_mask: ColumnUsedMask::new(), }); return Ok(()); }; @@ -399,10 +405,14 @@ fn parse_from_clause_table<'a>( .unwrap_or(normalized_name.to_string()); scope.tables.push(TableReference { - op: Operation::Scan { iter_dir: None }, + op: Operation::Scan { + iter_dir: IterationDirection::Forwards, + index: None, + }, join_info: None, table: Table::Virtual(vtab), identifier: alias, + col_used_mask: ColumnUsedMask::new(), }); Ok(()) @@ -533,7 +543,7 @@ pub fn parse_from<'a>( pub fn parse_where( where_clause: Option, - table_references: &[TableReference], + table_references: &mut [TableReference], result_columns: Option<&[ResultSetColumn]>, out_where_clause: &mut Vec, ) -> Result<()> { @@ -564,7 +574,7 @@ pub fn parse_where( For expressions not referencing any tables (e.g. constants), this is before the main loop is opened, because they do not need any table data. */ -fn determine_where_to_eval_expr<'a>(predicate: &'a ast::Expr) -> Result { +pub fn determine_where_to_eval_expr<'a>(predicate: &'a ast::Expr) -> Result { let mut eval_at: EvalAt = EvalAt::BeforeLoop; match predicate { ast::Expr::Binary(e1, _, e2) => { @@ -752,7 +762,7 @@ fn parse_join<'a>( let mut preds = vec![]; break_predicate_at_and_boundaries(expr, &mut preds); for predicate in preds.iter_mut() { - bind_column_references(predicate, &scope.tables, None)?; + bind_column_references(predicate, &mut scope.tables, None)?; } for pred in preds { let cur_table_idx = scope.tables.len() - 1; @@ -826,6 +836,11 @@ fn parse_join<'a>( is_rowid_alias: right_col.is_rowid_alias, }), ); + + let left_table = scope.tables.get_mut(left_table_idx).unwrap(); + left_table.mark_column_used(left_col_idx); + let right_table = scope.tables.get_mut(cur_table_idx).unwrap(); + right_table.mark_column_used(right_col_idx); let eval_at = if outer { EvalAt::Loop(cur_table_idx) } else { @@ -850,30 +865,33 @@ fn parse_join<'a>( Ok(()) } -pub fn parse_limit(limit: Limit) -> Result<(Option, Option)> { - let offset_val = match limit.offset { +pub fn parse_limit(limit: &Limit) -> Result<(Option, Option)> { + let offset_val = match &limit.offset { Some(offset_expr) => match offset_expr { Expr::Literal(ast::Literal::Numeric(n)) => n.parse().ok(), // If OFFSET is negative, the result is as if OFFSET is zero - Expr::Unary(UnaryOperator::Negative, expr) => match *expr { - Expr::Literal(ast::Literal::Numeric(n)) => n.parse::().ok().map(|num| -num), - _ => crate::bail_parse_error!("Invalid OFFSET clause"), - }, + Expr::Unary(UnaryOperator::Negative, expr) => { + if let Expr::Literal(ast::Literal::Numeric(ref n)) = &**expr { + n.parse::().ok().map(|num| -num) + } else { + crate::bail_parse_error!("Invalid OFFSET clause"); + } + } _ => crate::bail_parse_error!("Invalid OFFSET clause"), }, None => Some(0), }; - if let Expr::Literal(ast::Literal::Numeric(n)) = limit.expr { + if let Expr::Literal(ast::Literal::Numeric(n)) = &limit.expr { Ok((n.parse().ok(), offset_val)) - } else if let Expr::Unary(UnaryOperator::Negative, expr) = limit.expr { - if let Expr::Literal(ast::Literal::Numeric(n)) = *expr { + } else if let Expr::Unary(UnaryOperator::Negative, expr) = &limit.expr { + if let Expr::Literal(ast::Literal::Numeric(n)) = &**expr { let limit_val = n.parse::().ok().map(|num| -num); Ok((limit_val, offset_val)) } else { crate::bail_parse_error!("Invalid LIMIT clause"); } - } else if let Expr::Id(id) = limit.expr { + } else if let Expr::Id(id) = &limit.expr { if id.0.eq_ignore_ascii_case("true") { Ok((Some(1), offset_val)) } else if id.0.eq_ignore_ascii_case("false") { diff --git a/core/translate/pragma.rs b/core/translate/pragma.rs index 668c1f214..4e49a55d1 100644 --- a/core/translate/pragma.rs +++ b/core/translate/pragma.rs @@ -29,7 +29,7 @@ fn list_pragmas( } program.emit_halt(); - program.resolve_label(init_label, program.offset()); + program.preassign_label_to_next_insn(init_label); program.emit_constant_insns(); program.emit_goto(start_offset); } @@ -104,7 +104,7 @@ pub fn translate_pragma( }, }; program.emit_halt(); - program.resolve_label(init_label, program.offset()); + program.preassign_label_to_next_insn(init_label); program.emit_transaction(write); program.emit_constant_insns(); program.emit_goto(start_offset); @@ -154,12 +154,19 @@ fn update_pragma( // TODO: Implement updating user_version todo!("updating user_version not yet implemented") } + PragmaName::SchemaVersion => { + // TODO: Implement updating schema_version + todo!("updating schema_version not yet implemented") + } PragmaName::TableInfo => { // because we need control over the write parameter for the transaction, // this should be unreachable. We have to force-call query_pragma before // getting here unreachable!(); } + PragmaName::PageSize => { + todo!("updating page_size is not yet implemented") + } } } @@ -249,7 +256,6 @@ fn query_pragma( } } PragmaName::UserVersion => { - program.emit_transaction(false); program.emit_insn(Insn::ReadCookie { db: 0, dest: register, @@ -257,6 +263,18 @@ fn query_pragma( }); program.emit_result_row(register, 1); } + PragmaName::SchemaVersion => { + program.emit_insn(Insn::ReadCookie { + db: 0, + dest: register, + cookie: Cookie::SchemaVersion, + }); + program.emit_result_row(register, 1); + } + PragmaName::PageSize => { + program.emit_int(database_header.lock().get_page_size().into(), register); + program.emit_result_row(register, 1); + } } Ok(()) diff --git a/core/translate/result_row.rs b/core/translate/result_row.rs index ad8454c25..dc24cee67 100644 --- a/core/translate/result_row.rs +++ b/core/translate/result_row.rs @@ -25,7 +25,16 @@ pub fn emit_select_result( } let start_reg = t_ctx.reg_result_cols_start.unwrap(); - for (i, rc) in plan.result_columns.iter().enumerate() { + for (i, rc) in plan.result_columns.iter().enumerate().filter(|(_, rc)| { + // For aggregate queries, we handle columns differently; example: select id, first_name, sum(age) from users limit 1; + // 1. Columns with aggregates (e.g., sum(age)) are computed in each iteration of aggregation + // 2. Non-aggregate columns (e.g., id, first_name) are only computed once in the first iteration + // This filter ensures we only emit expressions for non aggregate columns once, + // preserving previously calculated values while updating aggregate results + // For all other queries where reg_nonagg_emit_once_flag is none we do nothing. + t_ctx.reg_nonagg_emit_once_flag.is_some() && rc.contains_aggregates + || t_ctx.reg_nonagg_emit_once_flag.is_none() + }) { let reg = start_reg + i; translate_expr( program, diff --git a/core/translate/schema.rs b/core/translate/schema.rs index a887bdef8..2d746d520 100644 --- a/core/translate/schema.rs +++ b/core/translate/schema.rs @@ -1,7 +1,11 @@ use std::fmt::Display; +use std::rc::Rc; use crate::ast; +use crate::ext::VTabImpl; use crate::schema::Schema; +use crate::schema::Table; +use crate::storage::pager::CreateBTreeFlags; use crate::translate::ProgramBuilder; use crate::translate::ProgramBuilderOpts; use crate::translate::QueryMode; @@ -9,8 +13,10 @@ use crate::util::PRIMARY_KEY_AUTOMATIC_INDEX_NAME_PREFIX; use crate::vdbe::builder::CursorType; use crate::vdbe::insn::{CmpInsFlags, Insn}; use crate::LimboError; +use crate::SymbolTable; use crate::{bail_parse_error, Result}; +use limbo_ext::VTabKind; use limbo_sqlite3_parser::ast::{fmt::ToTokens, CreateVirtualTable}; pub fn translate_create_table( @@ -35,7 +41,7 @@ pub fn translate_create_table( let init_label = program.emit_init(); let start_offset = program.offset(); program.emit_halt(); - program.resolve_label(init_label, program.offset()); + program.preassign_label_to_next_insn(init_label); program.emit_transaction(true); program.emit_constant_insns(); program.emit_goto(start_offset); @@ -60,7 +66,7 @@ pub fn translate_create_table( program.emit_insn(Insn::CreateBtree { db: 0, root: table_root_reg, - flags: 1, // Table leaf page + flags: CreateBTreeFlags::new_table(), }); // Create an automatic index B-tree if needed @@ -92,7 +98,7 @@ pub fn translate_create_table( program.emit_insn(Insn::CreateBtree { db: 0, root: index_root_reg, - flags: 2, // Index leaf page + flags: CreateBTreeFlags::new_index(), }); } @@ -101,11 +107,10 @@ pub fn translate_create_table( Some(SQLITE_TABLEID.to_owned()), CursorType::BTreeTable(table.clone()), ); - program.emit_insn(Insn::OpenWriteAsync { + program.emit_insn(Insn::OpenWrite { cursor_id: sqlite_schema_cursor_id, - root_page: 1, + root_page: 1usize.into(), }); - program.emit_insn(Insn::OpenWriteAwait {}); // Add the table entry to sqlite_schema emit_schema_entry( @@ -147,7 +152,7 @@ pub fn translate_create_table( // TODO: SqlExec program.emit_halt(); - program.resolve_label(init_label, program.offset()); + program.preassign_label_to_next_insn(init_label); program.emit_transaction(true); program.emit_constant_insns(); program.emit_goto(start_offset); @@ -155,8 +160,8 @@ pub fn translate_create_table( Ok(program) } -#[derive(Debug)] -enum SchemaEntryType { +#[derive(Debug, Clone, Copy)] +pub enum SchemaEntryType { Table, Index, } @@ -169,9 +174,9 @@ impl SchemaEntryType { } } } -const SQLITE_TABLEID: &str = "sqlite_schema"; +pub const SQLITE_TABLEID: &str = "sqlite_schema"; -fn emit_schema_entry( +pub fn emit_schema_entry( program: &mut ProgramBuilder, sqlite_schema_cursor_id: usize, entry_type: SchemaEntryType, @@ -219,15 +224,12 @@ fn emit_schema_entry( dest_reg: record_reg, }); - program.emit_insn(Insn::InsertAsync { + program.emit_insn(Insn::Insert { cursor: sqlite_schema_cursor_id, key_reg: rowid_reg, record_reg, flag: 0, }); - program.emit_insn(Insn::InsertAwait { - cursor_id: sqlite_schema_cursor_id, - }); } struct PrimaryKeyColumnInfo<'a> { @@ -398,7 +400,7 @@ fn create_table_body_to_str(tbl_name: &ast::QualifiedName, body: &ast::CreateTab sql } -fn create_vtable_body_to_str(vtab: &CreateVirtualTable) -> String { +fn create_vtable_body_to_str(vtab: &CreateVirtualTable, module: Rc) -> String { let args = if let Some(args) = &vtab.args { args.iter() .map(|arg| arg.to_string()) @@ -412,8 +414,25 @@ fn create_vtable_body_to_str(vtab: &CreateVirtualTable) -> String { } else { "" }; + let ext_args = vtab + .args + .as_ref() + .unwrap_or(&vec![]) + .iter() + .map(|a| limbo_ext::Value::from_text(a.to_string())) + .collect::>(); + let schema = module + .implementation + .init_schema(ext_args) + .unwrap_or_default(); + let vtab_args = if let Some(first_paren) = schema.find('(') { + let closing_paren = schema.rfind(')').unwrap_or_default(); + &schema[first_paren..=closing_paren] + } else { + "()" + }; format!( - "CREATE VIRTUAL TABLE {} {} USING {}{}", + "CREATE VIRTUAL TABLE {} {} USING {}{}\n /*{}{}*/", vtab.tbl_name.name.0, if_not_exists, vtab.module_name.0, @@ -421,7 +440,9 @@ fn create_vtable_body_to_str(vtab: &CreateVirtualTable) -> String { String::new() } else { format!("({})", args) - } + }, + vtab.tbl_name.name.0, + vtab_args ) } @@ -429,6 +450,7 @@ pub fn translate_create_virtual_table( vtab: CreateVirtualTable, schema: &Schema, query_mode: QueryMode, + syms: &SymbolTable, ) -> Result { let ast::CreateVirtualTable { if_not_exists, @@ -440,7 +462,12 @@ pub fn translate_create_virtual_table( let table_name = tbl_name.name.0.clone(); let module_name_str = module_name.0.clone(); let args_vec = args.clone().unwrap_or_default(); - + let Some(vtab_module) = syms.vtab_modules.get(&module_name_str) else { + bail_parse_error!("no such module: {}", module_name_str); + }; + if !vtab_module.module_kind.eq(&VTabKind::VirtualTable) { + bail_parse_error!("module {} is not a virtual table", module_name_str); + }; if schema.get_table(&table_name).is_some() && *if_not_exists { let mut program = ProgramBuilder::new(ProgramBuilderOpts { query_mode, @@ -450,7 +477,7 @@ pub fn translate_create_virtual_table( }); let init_label = program.emit_init(); program.emit_halt(); - program.resolve_label(init_label, program.offset()); + program.preassign_label_to_next_insn(init_label); program.emit_transaction(true); program.emit_constant_insns(); return Ok(program); @@ -462,10 +489,10 @@ pub fn translate_create_virtual_table( approx_num_insns: 40, approx_num_labels: 2, }); - + let init_label = program.emit_init(); + let start_offset = program.offset(); let module_name_reg = program.emit_string8_new_reg(module_name_str.clone()); let table_name_reg = program.emit_string8_new_reg(table_name.clone()); - let args_reg = if !args_vec.is_empty() { let args_start = program.alloc_register(); @@ -491,19 +518,17 @@ pub fn translate_create_virtual_table( table_name: table_name_reg, args_reg, }); - let table = schema.get_btree_table(SQLITE_TABLEID).unwrap(); let sqlite_schema_cursor_id = program.alloc_cursor_id( Some(SQLITE_TABLEID.to_owned()), CursorType::BTreeTable(table.clone()), ); - program.emit_insn(Insn::OpenWriteAsync { + program.emit_insn(Insn::OpenWrite { cursor_id: sqlite_schema_cursor_id, - root_page: 1, + root_page: 1usize.into(), }); - program.emit_insn(Insn::OpenWriteAwait {}); - let sql = create_vtable_body_to_str(&vtab); + let sql = create_vtable_body_to_str(&vtab, vtab_module.clone()); emit_schema_entry( &mut program, sqlite_schema_cursor_id, @@ -520,10 +545,8 @@ pub fn translate_create_virtual_table( where_clause: parse_schema_where_clause, }); - let init_label = program.emit_init(); - let start_offset = program.offset(); program.emit_halt(); - program.resolve_label(init_label, program.offset()); + program.preassign_label_to_next_insn(init_label); program.emit_transaction(true); program.emit_constant_insns(); program.emit_goto(start_offset); @@ -543,13 +566,13 @@ pub fn translate_drop_table( approx_num_insns: 30, approx_num_labels: 1, }); - let table = schema.get_btree_table(tbl_name.name.0.as_str()); + let table = schema.get_table(tbl_name.name.0.as_str()); if table.is_none() { if if_exists { let init_label = program.emit_init(); let start_offset = program.offset(); program.emit_halt(); - program.resolve_label(init_label, program.offset()); + program.preassign_label_to_next_insn(init_label); program.emit_transaction(true); program.emit_constant_insns(); program.emit_goto(start_offset); @@ -558,6 +581,7 @@ pub fn translate_drop_table( } bail_parse_error!("No such table: {}", tbl_name.name.0.as_str()); } + let table = table.unwrap(); // safe since we just checked for None let init_label = program.emit_init(); @@ -573,31 +597,27 @@ pub fn translate_drop_table( let row_id_reg = program.alloc_register(); // r5 let table_name = "sqlite_schema"; - let schema_table = schema.get_btree_table(&table_name).unwrap(); + let schema_table = schema.get_btree_table(table_name).unwrap(); let sqlite_schema_cursor_id = program.alloc_cursor_id( Some(table_name.to_string()), CursorType::BTreeTable(schema_table.clone()), ); - program.emit_insn(Insn::OpenWriteAsync { + program.emit_insn(Insn::OpenWrite { cursor_id: sqlite_schema_cursor_id, - root_page: 1, + root_page: 1usize.into(), }); - program.emit_insn(Insn::OpenWriteAwait {}); // 1. Remove all entries from the schema table related to the table we are dropping, except for triggers // loop to beginning of schema table - program.emit_insn(Insn::RewindAsync { - cursor_id: sqlite_schema_cursor_id, - }); let end_metadata_label = program.allocate_label(); - program.emit_insn(Insn::RewindAwait { + let metadata_loop = program.allocate_label(); + program.emit_insn(Insn::Rewind { cursor_id: sqlite_schema_cursor_id, pc_if_empty: end_metadata_label, }); + program.preassign_label_to_next_insn(metadata_loop); // start loop on schema table - let metadata_loop = program.allocate_label(); - program.resolve_label(metadata_loop, program.offset()); program.emit_insn(Insn::Column { cursor_id: sqlite_schema_cursor_id, column: 2, @@ -625,22 +645,16 @@ pub fn translate_drop_table( cursor_id: sqlite_schema_cursor_id, dest: row_id_reg, }); - program.emit_insn(Insn::DeleteAsync { - cursor_id: sqlite_schema_cursor_id, - }); - program.emit_insn(Insn::DeleteAwait { + program.emit_insn(Insn::Delete { cursor_id: sqlite_schema_cursor_id, }); program.resolve_label(next_label, program.offset()); - program.emit_insn(Insn::NextAsync { - cursor_id: sqlite_schema_cursor_id, - }); - program.emit_insn(Insn::NextAwait { + program.emit_insn(Insn::Next { cursor_id: sqlite_schema_cursor_id, pc_if_next: metadata_loop, }); - program.resolve_label(end_metadata_label, program.offset()); + program.preassign_label_to_next_insn(end_metadata_label); // end of loop on schema table // 2. Destroy the indices within a loop @@ -663,11 +677,31 @@ pub fn translate_drop_table( } // 3. Destroy the table structure - program.emit_insn(Insn::Destroy { - root: table.root_page, - former_root_reg: 0, // no autovacuum (https://www.sqlite.org/opcode.html#Destroy) - is_temp: 0, - }); + match table.as_ref() { + Table::BTree(table) => { + program.emit_insn(Insn::Destroy { + root: table.root_page, + former_root_reg: 0, // no autovacuum (https://www.sqlite.org/opcode.html#Destroy) + is_temp: 0, + }); + } + Table::Virtual(vtab) => { + // From what I see, TableValuedFunction is not stored in the schema as a table. + // But this line here below is a safeguard in case this behavior changes in the future + // And mirrors what SQLite does. + if matches!(vtab.kind, limbo_ext::VTabKind::TableValuedFunction) { + return Err(crate::LimboError::ParseError(format!( + "table {} may not be dropped", + vtab.name + ))); + } + program.emit_insn(Insn::VDestroy { + table_name: vtab.name.clone(), + db: 0, // TODO change this for multiple databases + }); + } + Table::Pseudo(..) => unimplemented!(), + }; let r6 = program.alloc_register(); let r7 = program.alloc_register(); @@ -689,7 +723,7 @@ pub fn translate_drop_table( // end of the program program.emit_halt(); - program.resolve_label(init_label, program.offset()); + program.preassign_label_to_next_insn(init_label); program.emit_transaction(true); program.emit_constant_insns(); diff --git a/core/translate/select.rs b/core/translate/select.rs index fe7656f16..b1eb613bb 100644 --- a/core/translate/select.rs +++ b/core/translate/select.rs @@ -3,7 +3,7 @@ use super::plan::{select_star, Operation, Search, SelectQueryType}; use super::planner::Scope; use crate::function::{AggFunc, ExtFunc, Func}; use crate::translate::optimizer::optimize_plan; -use crate::translate::plan::{Aggregate, Direction, GroupBy, Plan, ResultSetColumn, SelectPlan}; +use crate::translate::plan::{Aggregate, GroupBy, Plan, ResultSetColumn, SelectPlan}; use crate::translate::planner::{ bind_column_references, break_predicate_at_and_boundaries, parse_from, parse_limit, parse_where, resolve_aggregates, @@ -104,12 +104,17 @@ pub fn prepare_select_plan<'a>( match column { ResultColumn::Star => { select_star(&plan.table_references, &mut plan.result_columns); + for table in plan.table_references.iter_mut() { + for idx in 0..table.columns().len() { + table.mark_column_used(idx); + } + } } ResultColumn::TableStar(name) => { let name_normalized = normalize_ident(name.0.as_str()); let referenced_table = plan .table_references - .iter() + .iter_mut() .enumerate() .find(|(_, t)| t.identifier == name_normalized); @@ -117,23 +122,29 @@ pub fn prepare_select_plan<'a>( crate::bail_parse_error!("Table {} not found", name.0); } let (table_index, table) = referenced_table.unwrap(); - for (idx, col) in table.columns().iter().enumerate() { + let num_columns = table.columns().len(); + for idx in 0..num_columns { + let is_rowid_alias = { + let columns = table.columns(); + columns[idx].is_rowid_alias + }; plan.result_columns.push(ResultSetColumn { expr: ast::Expr::Column { database: None, // TODO: support different databases table: table_index, column: idx, - is_rowid_alias: col.is_rowid_alias, + is_rowid_alias, }, alias: None, contains_aggregates: false, }); + table.mark_column_used(idx); } } ResultColumn::Expr(ref mut expr, maybe_alias) => { bind_column_references( expr, - &plan.table_references, + &mut plan.table_references, Some(&plan.result_columns), )?; match expr { @@ -293,7 +304,7 @@ pub fn prepare_select_plan<'a>( // Parse the actual WHERE clause and add its conditions to the plan WHERE clause that already contains the join conditions. parse_where( where_clause, - &plan.table_references, + &mut plan.table_references, Some(&plan.result_columns), &mut plan.where_clause, )?; @@ -303,7 +314,7 @@ pub fn prepare_select_plan<'a>( replace_column_number_with_copy_of_column_expr(expr, &plan.result_columns)?; bind_column_references( expr, - &plan.table_references, + &mut plan.table_references, Some(&plan.result_columns), )?; } @@ -316,7 +327,7 @@ pub fn prepare_select_plan<'a>( for expr in predicates.iter_mut() { bind_column_references( expr, - &plan.table_references, + &mut plan.table_references, Some(&plan.result_columns), )?; let contains_aggregates = @@ -352,25 +363,19 @@ pub fn prepare_select_plan<'a>( bind_column_references( &mut o.expr, - &plan.table_references, + &mut plan.table_references, Some(&plan.result_columns), )?; resolve_aggregates(&o.expr, &mut plan.aggregates); - key.push(( - o.expr, - o.order.map_or(Direction::Ascending, |o| match o { - ast::SortOrder::Asc => Direction::Ascending, - ast::SortOrder::Desc => Direction::Descending, - }), - )); + key.push((o.expr, o.order.unwrap_or(ast::SortOrder::Asc))); } plan.order_by = Some(key); } // Parse the LIMIT/OFFSET clause (plan.limit, plan.offset) = - select.limit.map_or(Ok((None, None)), |l| parse_limit(*l))?; + select.limit.map_or(Ok((None, None)), |l| parse_limit(&l))?; // Return the unoptimized query plan Ok(Plan::Select(plan)) @@ -411,8 +416,8 @@ fn count_plan_required_cursors(plan: &SelectPlan) -> usize { .map(|t| match &t.op { Operation::Scan { .. } => 1, Operation::Search(search) => match search { - Search::RowidEq { .. } | Search::RowidSearch { .. } => 1, - Search::IndexSearch { .. } => 2, // btree cursor and index cursor + Search::RowidEq { .. } => 1, + Search::Seek { index, .. } => 1 + index.is_some() as usize, }, Operation::Subquery { plan, .. } => count_plan_required_cursors(plan), }) diff --git a/core/translate/subquery.rs b/core/translate/subquery.rs index 1730312be..17434edc2 100644 --- a/core/translate/subquery.rs +++ b/core/translate/subquery.rs @@ -52,7 +52,7 @@ pub fn emit_subquery<'a>( t_ctx: &mut TranslateCtx<'a>, ) -> Result { let yield_reg = program.alloc_register(); - let coroutine_implementation_start_offset = program.offset().add(1u32); + let coroutine_implementation_start_offset = program.allocate_label(); match &mut plan.query_type { SelectQueryType::Subquery { yield_reg: y, @@ -75,6 +75,7 @@ pub fn emit_subquery<'a>( meta_left_joins: (0..plan.table_references.len()).map(|_| None).collect(), meta_sort: None, reg_agg_start: None, + reg_nonagg_emit_once_flag: None, reg_result_cols_start: None, result_column_indexes_in_orderby_sorter: (0..plan.result_columns.len()).collect(), result_columns_to_skip_in_orderby_sorter: None, @@ -82,6 +83,7 @@ pub fn emit_subquery<'a>( reg_offset: plan.offset.map(|_| program.alloc_register()), reg_limit_offset_sum: plan.offset.map(|_| program.alloc_register()), resolver: Resolver::new(t_ctx.resolver.symbol_table), + omit_predicates: Vec::new(), }; let subquery_body_end_label = program.allocate_label(); program.emit_insn(Insn::InitCoroutine { @@ -89,6 +91,7 @@ pub fn emit_subquery<'a>( jump_on_definition: subquery_body_end_label, start_offset: coroutine_implementation_start_offset, }); + program.preassign_label_to_next_insn(coroutine_implementation_start_offset); // Normally we mark each LIMIT value as a constant insn that is emitted only once, but in the case of a subquery, // we need to initialize it every time the subquery is run; otherwise subsequent runs of the subquery will already // have the LIMIT counter at 0, and will never return rows. @@ -101,6 +104,6 @@ pub fn emit_subquery<'a>( let result_column_start_reg = emit_query(program, plan, &mut metadata)?; program.resolve_label(end_coroutine_label, program.offset()); program.emit_insn(Insn::EndCoroutine { yield_reg }); - program.resolve_label(subquery_body_end_label, program.offset()); + program.preassign_label_to_next_insn(subquery_body_end_label); Ok(result_column_start_reg) } diff --git a/core/translate/transaction.rs b/core/translate/transaction.rs index 60e00e73b..11c0a8a10 100644 --- a/core/translate/transaction.rs +++ b/core/translate/transaction.rs @@ -33,7 +33,7 @@ pub fn translate_tx_begin( } } program.emit_halt(); - program.resolve_label(init_label, program.offset()); + program.preassign_label_to_next_insn(init_label); program.emit_goto(start_offset); Ok(program) } @@ -52,7 +52,7 @@ pub fn translate_tx_commit(_tx_name: Option) -> Result { rollback: false, }); program.emit_halt(); - program.resolve_label(init_label, program.offset()); + program.preassign_label_to_next_insn(init_label); program.emit_goto(start_offset); Ok(program) } diff --git a/core/translate/update.rs b/core/translate/update.rs index a9388ee13..308a0cec1 100644 --- a/core/translate/update.rs +++ b/core/translate/update.rs @@ -11,9 +11,10 @@ use limbo_sqlite3_parser::ast::{self, Expr, ResultColumn, SortOrder, Update}; use super::emitter::emit_program; use super::optimizer::optimize_plan; use super::plan::{ - Direction, IterationDirection, Plan, ResultSetColumn, TableReference, UpdatePlan, + ColumnUsedMask, IterationDirection, Plan, ResultSetColumn, TableReference, UpdatePlan, }; -use super::planner::{bind_column_references, parse_limit, parse_where}; +use super::planner::bind_column_references; +use super::planner::{parse_limit, parse_where}; /* * Update is simple. By default we scan the table, and for each row, we check the WHERE @@ -64,35 +65,50 @@ pub fn translate_update( } pub fn prepare_update_plan(schema: &Schema, body: &mut Update) -> crate::Result { + if body.with.is_some() { + bail_parse_error!("WITH clause is not supported"); + } + if body.or_conflict.is_some() { + bail_parse_error!("ON CONFLICT clause is not supported"); + } let table_name = &body.tbl_name.name; let table = match schema.get_table(table_name.0.as_str()) { Some(table) => table, None => bail_parse_error!("Parse error: no such table: {}", table_name), }; - let Some(btree_table) = table.btree() else { - bail_parse_error!("Error: {} is not a btree table", table_name); - }; - let iter_dir: Option = body.order_by.as_ref().and_then(|order_by| { - order_by.first().and_then(|ob| { - ob.order.map(|o| match o { - SortOrder::Asc => IterationDirection::Forwards, - SortOrder::Desc => IterationDirection::Backwards, + let iter_dir = body + .order_by + .as_ref() + .and_then(|order_by| { + order_by.first().and_then(|ob| { + ob.order.map(|o| match o { + SortOrder::Asc => IterationDirection::Forwards, + SortOrder::Desc => IterationDirection::Backwards, + }) }) }) - }); - let table_references = vec![TableReference { - table: Table::BTree(btree_table.clone()), + .unwrap_or(IterationDirection::Forwards); + let mut table_references = vec![TableReference { + table: match table.as_ref() { + Table::Virtual(vtab) => Table::Virtual(vtab.clone()), + Table::BTree(btree_table) => Table::BTree(btree_table.clone()), + _ => unreachable!(), + }, identifier: table_name.0.clone(), - op: Operation::Scan { iter_dir }, + op: Operation::Scan { + iter_dir, + index: None, + }, join_info: None, + col_used_mask: ColumnUsedMask::new(), }]; let set_clauses = body .sets .iter_mut() .map(|set| { let ident = normalize_ident(set.col_names[0].0.as_str()); - let col_index = btree_table - .columns + let col_index = table + .columns() .iter() .enumerate() .find_map(|(i, col)| { @@ -108,7 +124,7 @@ pub fn prepare_update_plan(schema: &Schema, body: &mut Update) -> crate::Result< )) })?; - let _ = bind_column_references(&mut set.expr, &table_references, None); + let _ = bind_column_references(&mut set.expr, &mut table_references, None); Ok((col_index, set.expr.clone())) }) .collect::, crate::LimboError>>()?; @@ -118,7 +134,7 @@ pub fn prepare_update_plan(schema: &Schema, body: &mut Update) -> crate::Result< if let Some(returning) = &mut body.returning { for rc in returning.iter_mut() { if let ResultColumn::Expr(expr, alias) = rc { - bind_column_references(expr, &table_references, None)?; + bind_column_references(expr, &mut table_references, None)?; result_columns.push(ResultSetColumn { expr: expr.clone(), alias: alias.as_ref().and_then(|a| { @@ -138,30 +154,39 @@ pub fn prepare_update_plan(schema: &Schema, body: &mut Update) -> crate::Result< let order_by = body.order_by.as_ref().map(|order| { order .iter() - .map(|o| { - ( - o.expr.clone(), - o.order - .map(|s| match s { - SortOrder::Asc => Direction::Ascending, - SortOrder::Desc => Direction::Descending, - }) - .unwrap_or(Direction::Ascending), - ) - }) + .map(|o| (o.expr.clone(), o.order.unwrap_or(SortOrder::Asc))) .collect() }); + // Parse the WHERE clause parse_where( body.where_clause.as_ref().map(|w| *w.clone()), - &table_references, + &mut table_references, Some(&result_columns), &mut where_clause, )?; - let limit = if let Some(Ok((limit, _))) = body.limit.as_ref().map(|l| parse_limit(*l.clone())) { - limit - } else { - None - }; + + // Parse the LIMIT/OFFSET clause + let (limit, offset) = body + .limit + .as_ref() + .map(|l| parse_limit(l)) + .unwrap_or(Ok((None, None)))?; + + // Check what indexes will need to be updated by checking set_clauses and see + // if a column is contained in an index. + let indexes = schema.get_indices(&table_name.0); + let indexes_to_update = indexes + .iter() + .filter(|index| { + index.columns.iter().any(|index_column| { + set_clauses + .iter() + .any(|(set_index_column, _)| index_column.pos_in_table == *set_index_column) + }) + }) + .cloned() + .collect(); + Ok(Plan::Update(UpdatePlan { table_references, set_clauses, @@ -169,6 +194,8 @@ pub fn prepare_update_plan(schema: &Schema, body: &mut Update) -> crate::Result< returning: Some(result_columns), order_by, limit, + offset, contains_constant_false_condition: false, + indexes_to_update, })) } diff --git a/core/types.rs b/core/types.rs index 9e7869b76..3d2faa4f8 100644 --- a/core/types.rs +++ b/core/types.rs @@ -1,10 +1,13 @@ use limbo_ext::{AggCtx, FinalizeFunction, StepFunction}; +use limbo_sqlite3_parser::ast::SortOrder; use crate::error::LimboError; use crate::ext::{ExtValue, ExtValueType}; use crate::pseudo::PseudoCursor; +use crate::schema::Index; use crate::storage::btree::BTreeCursor; use crate::storage::sqlite3_ondisk::write_varint; +use crate::translate::plan::IterationDirection; use crate::vdbe::sorter::Sorter; use crate::vdbe::{Register, VTabOpaqueCursor}; use crate::Result; @@ -22,6 +25,20 @@ pub enum OwnedValueType { Error, } +impl Display for OwnedValueType { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let value = match self { + Self::Null => "NULL", + Self::Integer => "INT", + Self::Float => "REAL", + Self::Blob => "BLOB", + Self::Text => "TEXT", + Self::Error => "ERROR", + }; + write!(f, "{}", value) + } +} + #[derive(Debug, Clone, PartialEq)] pub enum TextSubtype { Text, @@ -69,6 +86,15 @@ impl Text { } } +impl From for Text { + fn from(value: String) -> Self { + Text { + value: value.into_bytes(), + subtype: TextSubtype::Text, + } + } +} + impl TextRef { pub fn as_str(&self) -> &str { unsafe { std::str::from_utf8_unchecked(self.value.to_slice()) } @@ -145,13 +171,13 @@ impl OwnedValue { OwnedValue::Null => {} OwnedValue::Integer(i) => { let serial_type = SerialType::from(self); - match serial_type { - SerialType::I8 => out.extend_from_slice(&(*i as i8).to_be_bytes()), - SerialType::I16 => out.extend_from_slice(&(*i as i16).to_be_bytes()), - SerialType::I24 => out.extend_from_slice(&(*i as i32).to_be_bytes()[1..]), // remove most significant byte - SerialType::I32 => out.extend_from_slice(&(*i as i32).to_be_bytes()), - SerialType::I48 => out.extend_from_slice(&i.to_be_bytes()[2..]), // remove 2 most significant bytes - SerialType::I64 => out.extend_from_slice(&i.to_be_bytes()), + match serial_type.kind() { + SerialTypeKind::I8 => out.extend_from_slice(&(*i as i8).to_be_bytes()), + SerialTypeKind::I16 => out.extend_from_slice(&(*i as i16).to_be_bytes()), + SerialTypeKind::I24 => out.extend_from_slice(&(*i as i32).to_be_bytes()[1..]), // remove most significant byte + SerialTypeKind::I32 => out.extend_from_slice(&(*i as i32).to_be_bytes()), + SerialTypeKind::I48 => out.extend_from_slice(&i.to_be_bytes()[2..]), // remove 2 most significant bytes + SerialTypeKind::I64 => out.extend_from_slice(&i.to_be_bytes()), _ => unreachable!(), } } @@ -197,6 +223,12 @@ impl Display for OwnedValue { } Self::Float(fl) => { let fl = *fl; + if fl == f64::INFINITY { + return write!(f, "Inf"); + } + if fl == f64::NEG_INFINITY { + return write!(f, "-Inf"); + } if fl.is_nan() { return write!(f, ""); } @@ -732,6 +764,10 @@ impl ImmutableRecord { &self.values[idx] } + pub fn get_value_opt(&self, idx: usize) -> Option<&RefValue> { + self.values.get(idx) + } + pub fn len(&self) -> usize { self.values.len() } @@ -750,18 +786,7 @@ impl ImmutableRecord { let n = write_varint(&mut serial_type_buf[0..], serial_type.into()); serials.push((serial_type_buf, n)); - let value_size = match serial_type { - SerialType::Null => 0, - SerialType::I8 => 1, - SerialType::I16 => 2, - SerialType::I24 => 3, - SerialType::I32 => 4, - SerialType::I48 => 6, - SerialType::I64 => 8, - SerialType::F64 => 8, - SerialType::Text { content_size } => content_size, - SerialType::Blob { content_size } => content_size, - }; + let value_size = serial_type.size(); size_header += n; size_values += value_size; @@ -808,16 +833,17 @@ impl ImmutableRecord { OwnedValue::Integer(i) => { values.push(RefValue::Integer(*i)); let serial_type = SerialType::from(value); - match serial_type { - SerialType::I8 => writer.extend_from_slice(&(*i as i8).to_be_bytes()), - SerialType::I16 => writer.extend_from_slice(&(*i as i16).to_be_bytes()), - SerialType::I24 => { + match serial_type.kind() { + SerialTypeKind::ConstInt0 | SerialTypeKind::ConstInt1 => {} + SerialTypeKind::I8 => writer.extend_from_slice(&(*i as i8).to_be_bytes()), + SerialTypeKind::I16 => writer.extend_from_slice(&(*i as i16).to_be_bytes()), + SerialTypeKind::I24 => { writer.extend_from_slice(&(*i as i32).to_be_bytes()[1..]) } // remove most significant byte - SerialType::I32 => writer.extend_from_slice(&(*i as i32).to_be_bytes()), - SerialType::I48 => writer.extend_from_slice(&i.to_be_bytes()[2..]), // remove 2 most significant bytes - SerialType::I64 => writer.extend_from_slice(&i.to_be_bytes()), - _ => unreachable!(), + SerialTypeKind::I32 => writer.extend_from_slice(&(*i as i32).to_be_bytes()), + SerialTypeKind::I48 => writer.extend_from_slice(&i.to_be_bytes()[2..]), // remove 2 most significant bytes + SerialTypeKind::I64 => writer.extend_from_slice(&i.to_be_bytes()), + other => panic!("Serial type is not an integer: {:?}", other), } } OwnedValue::Float(f) => { @@ -877,6 +903,26 @@ impl ImmutableRecord { } } +impl Display for ImmutableRecord { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + for value in &self.values { + match value { + RefValue::Null => write!(f, "NULL")?, + RefValue::Integer(i) => write!(f, "Integer({})", *i)?, + RefValue::Float(flo) => write!(f, "Float({})", *flo)?, + RefValue::Text(text_ref) => write!(f, "Text({})", text_ref.as_str())?, + RefValue::Blob(raw_slice) => { + write!(f, "Blob({})", String::from_utf8_lossy(raw_slice.to_slice()))? + } + } + if value != self.values.last().unwrap() { + write!(f, ", ")?; + } + } + Ok(()) + } +} + impl Clone for ImmutableRecord { fn clone(&self) -> Self { let mut new_values = Vec::new(); @@ -1009,8 +1055,66 @@ impl PartialOrd for RefValue { } } -pub fn compare_immutable(l: &[RefValue], r: &[RefValue]) -> std::cmp::Ordering { - l.partial_cmp(r).unwrap() +/// A bitfield that represents the comparison spec for index keys. +/// Since indexed columns can individually specify ASC/DESC, each key must +/// be compared differently. +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +#[repr(transparent)] +pub struct IndexKeySortOrder(u64); + +impl IndexKeySortOrder { + pub fn get_sort_order_for_col(&self, column_idx: usize) -> SortOrder { + assert!(column_idx < 64, "column index out of range: {}", column_idx); + match self.0 & (1 << column_idx) { + 0 => SortOrder::Asc, + _ => SortOrder::Desc, + } + } + + pub fn from_index(index: &Index) -> Self { + let mut spec = 0; + for (i, column) in index.columns.iter().enumerate() { + spec |= ((column.order == SortOrder::Desc) as u64) << i; + } + IndexKeySortOrder(spec) + } + + pub fn from_list(order: &[SortOrder]) -> Self { + let mut spec = 0; + for (i, order) in order.iter().enumerate() { + spec |= ((*order == SortOrder::Desc) as u64) << i; + } + IndexKeySortOrder(spec) + } + + pub fn default() -> Self { + Self(0) + } +} + +impl Default for IndexKeySortOrder { + fn default() -> Self { + Self::default() + } +} + +pub fn compare_immutable( + l: &[RefValue], + r: &[RefValue], + index_key_sort_order: IndexKeySortOrder, +) -> std::cmp::Ordering { + assert_eq!(l.len(), r.len()); + for (i, (l, r)) in l.iter().zip(r).enumerate() { + let column_order = index_key_sort_order.get_sort_order_for_col(i); + let cmp = l.partial_cmp(r).unwrap(); + if !cmp.is_eq() { + return match column_order { + SortOrder::Asc => cmp, + SortOrder::Desc => cmp.reverse(), + }; + } + } + std::cmp::Ordering::Equal } const I8_LOW: i64 = -128; @@ -1027,7 +1131,11 @@ const I48_HIGH: i64 = 140737488355327; /// Sqlite Serial Types /// https://www.sqlite.org/fileformat.html#record_format #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] -enum SerialType { +#[repr(transparent)] +pub struct SerialType(u64); + +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +pub enum SerialTypeKind { Null, I8, I16, @@ -1036,47 +1144,154 @@ enum SerialType { I48, I64, F64, - Text { content_size: usize }, - Blob { content_size: usize }, + ConstInt0, + ConstInt1, + Text, + Blob, +} + +impl SerialType { + #[inline(always)] + pub fn u64_is_valid_serial_type(n: u64) -> bool { + n != 10 && n != 11 + } + + const NULL: Self = Self(0); + const I8: Self = Self(1); + const I16: Self = Self(2); + const I24: Self = Self(3); + const I32: Self = Self(4); + const I48: Self = Self(5); + const I64: Self = Self(6); + const F64: Self = Self(7); + const CONST_INT0: Self = Self(8); + const CONST_INT1: Self = Self(9); + + pub fn null() -> Self { + Self::NULL + } + + pub fn i8() -> Self { + Self::I8 + } + + pub fn i16() -> Self { + Self::I16 + } + + pub fn i24() -> Self { + Self::I24 + } + + pub fn i32() -> Self { + Self::I32 + } + + pub fn i48() -> Self { + Self::I48 + } + + pub fn i64() -> Self { + Self::I64 + } + + pub fn f64() -> Self { + Self::F64 + } + + pub fn const_int0() -> Self { + Self::CONST_INT0 + } + + pub fn const_int1() -> Self { + Self::CONST_INT1 + } + + pub fn blob(size: u64) -> Self { + Self(12 + size * 2) + } + + pub fn text(size: u64) -> Self { + Self(13 + size * 2) + } + + pub fn kind(&self) -> SerialTypeKind { + match self.0 { + 0 => SerialTypeKind::Null, + 1 => SerialTypeKind::I8, + 2 => SerialTypeKind::I16, + 3 => SerialTypeKind::I24, + 4 => SerialTypeKind::I32, + 5 => SerialTypeKind::I48, + 6 => SerialTypeKind::I64, + 7 => SerialTypeKind::F64, + 8 => SerialTypeKind::ConstInt0, + 9 => SerialTypeKind::ConstInt1, + n if n >= 12 => match n % 2 { + 0 => SerialTypeKind::Blob, + 1 => SerialTypeKind::Text, + _ => unreachable!(), + }, + _ => unreachable!(), + } + } + + pub fn size(&self) -> usize { + match self.kind() { + SerialTypeKind::Null => 0, + SerialTypeKind::I8 => 1, + SerialTypeKind::I16 => 2, + SerialTypeKind::I24 => 3, + SerialTypeKind::I32 => 4, + SerialTypeKind::I48 => 6, + SerialTypeKind::I64 => 8, + SerialTypeKind::F64 => 8, + SerialTypeKind::ConstInt0 => 0, + SerialTypeKind::ConstInt1 => 0, + SerialTypeKind::Text => (self.0 as usize - 13) / 2, + SerialTypeKind::Blob => (self.0 as usize - 12) / 2, + } + } } impl From<&OwnedValue> for SerialType { fn from(value: &OwnedValue) -> Self { match value { - OwnedValue::Null => SerialType::Null, + OwnedValue::Null => SerialType::null(), OwnedValue::Integer(i) => match i { - i if *i >= I8_LOW && *i <= I8_HIGH => SerialType::I8, - i if *i >= I16_LOW && *i <= I16_HIGH => SerialType::I16, - i if *i >= I24_LOW && *i <= I24_HIGH => SerialType::I24, - i if *i >= I32_LOW && *i <= I32_HIGH => SerialType::I32, - i if *i >= I48_LOW && *i <= I48_HIGH => SerialType::I48, - _ => SerialType::I64, - }, - OwnedValue::Float(_) => SerialType::F64, - OwnedValue::Text(t) => SerialType::Text { - content_size: t.value.len(), - }, - OwnedValue::Blob(b) => SerialType::Blob { - content_size: b.len(), + 0 => SerialType::const_int0(), + 1 => SerialType::const_int1(), + i if *i >= I8_LOW && *i <= I8_HIGH => SerialType::i8(), + i if *i >= I16_LOW && *i <= I16_HIGH => SerialType::i16(), + i if *i >= I24_LOW && *i <= I24_HIGH => SerialType::i24(), + i if *i >= I32_LOW && *i <= I32_HIGH => SerialType::i32(), + i if *i >= I48_LOW && *i <= I48_HIGH => SerialType::i48(), + _ => SerialType::i64(), }, + OwnedValue::Float(_) => SerialType::f64(), + OwnedValue::Text(t) => SerialType::text(t.value.len() as u64), + OwnedValue::Blob(b) => SerialType::blob(b.len() as u64), } } } impl From for u64 { fn from(serial_type: SerialType) -> Self { - match serial_type { - SerialType::Null => 0, - SerialType::I8 => 1, - SerialType::I16 => 2, - SerialType::I24 => 3, - SerialType::I32 => 4, - SerialType::I48 => 5, - SerialType::I64 => 6, - SerialType::F64 => 7, - SerialType::Text { content_size } => (content_size * 2 + 13) as u64, - SerialType::Blob { content_size } => (content_size * 2 + 12) as u64, + serial_type.0 + } +} + +impl TryFrom for SerialType { + type Error = LimboError; + + fn try_from(uint: u64) -> Result { + if uint == 10 || uint == 11 { + return Err(LimboError::Corrupt(format!( + "Invalid serial type: {}", + uint + ))); } + Ok(SerialType(uint)) } } @@ -1104,13 +1319,15 @@ impl Record { OwnedValue::Null => {} OwnedValue::Integer(i) => { let serial_type = SerialType::from(value); - match serial_type { - SerialType::I8 => buf.extend_from_slice(&(*i as i8).to_be_bytes()), - SerialType::I16 => buf.extend_from_slice(&(*i as i16).to_be_bytes()), - SerialType::I24 => buf.extend_from_slice(&(*i as i32).to_be_bytes()[1..]), // remove most significant byte - SerialType::I32 => buf.extend_from_slice(&(*i as i32).to_be_bytes()), - SerialType::I48 => buf.extend_from_slice(&i.to_be_bytes()[2..]), // remove 2 most significant bytes - SerialType::I64 => buf.extend_from_slice(&i.to_be_bytes()), + match serial_type.kind() { + SerialTypeKind::I8 => buf.extend_from_slice(&(*i as i8).to_be_bytes()), + SerialTypeKind::I16 => buf.extend_from_slice(&(*i as i16).to_be_bytes()), + SerialTypeKind::I24 => { + buf.extend_from_slice(&(*i as i32).to_be_bytes()[1..]) + } // remove most significant byte + SerialTypeKind::I32 => buf.extend_from_slice(&(*i as i32).to_be_bytes()), + SerialTypeKind::I48 => buf.extend_from_slice(&i.to_be_bytes()[2..]), // remove 2 most significant bytes + SerialTypeKind::I64 => buf.extend_from_slice(&i.to_be_bytes()), _ => unreachable!(), } } @@ -1193,11 +1410,43 @@ pub enum CursorResult { IO, } -#[derive(Clone, PartialEq, Debug)] +#[derive(Clone, Copy, PartialEq, Eq, Debug)] +/// The match condition of a table/index seek. pub enum SeekOp { EQ, GE, GT, + LE, + LT, +} + +impl SeekOp { + /// A given seek op implies an iteration direction. + /// + /// For example, a seek with SeekOp::GT implies: + /// Find the first table/index key that compares greater than the seek key + /// -> used in forwards iteration. + /// + /// A seek with SeekOp::LE implies: + /// Find the last table/index key that compares less than or equal to the seek key + /// -> used in backwards iteration. + #[inline(always)] + pub fn iteration_direction(&self) -> IterationDirection { + match self { + SeekOp::EQ | SeekOp::GE | SeekOp::GT => IterationDirection::Forwards, + SeekOp::LE | SeekOp::LT => IterationDirection::Backwards, + } + } + + pub fn reverse(&self) -> Self { + match self { + SeekOp::EQ => SeekOp::EQ, + SeekOp::GE => SeekOp::LE, + SeekOp::GT => SeekOp::LT, + SeekOp::LE => SeekOp::GE, + SeekOp::LT => SeekOp::GT, + } + } } #[derive(Clone, PartialEq, Debug)] @@ -1234,7 +1483,7 @@ mod tests { // First byte should be header size assert_eq!(header[0], header_length as u8); // Second byte should be serial type for NULL - assert_eq!(header[1] as u64, u64::from(SerialType::Null)); + assert_eq!(header[1] as u64, u64::from(SerialType::null())); // Check that the buffer is empty after the header assert_eq!(buf.len(), header_length); } @@ -1258,12 +1507,12 @@ mod tests { assert_eq!(header[0], header_length as u8); // Header should be larger than number of values // Check that correct serial types were chosen - assert_eq!(header[1] as u64, u64::from(SerialType::I8)); - assert_eq!(header[2] as u64, u64::from(SerialType::I16)); - assert_eq!(header[3] as u64, u64::from(SerialType::I24)); - assert_eq!(header[4] as u64, u64::from(SerialType::I32)); - assert_eq!(header[5] as u64, u64::from(SerialType::I48)); - assert_eq!(header[6] as u64, u64::from(SerialType::I64)); + assert_eq!(header[1] as u64, u64::from(SerialType::i8())); + assert_eq!(header[2] as u64, u64::from(SerialType::i16())); + assert_eq!(header[3] as u64, u64::from(SerialType::i24())); + assert_eq!(header[4] as u64, u64::from(SerialType::i32())); + assert_eq!(header[5] as u64, u64::from(SerialType::i48())); + assert_eq!(header[6] as u64, u64::from(SerialType::i64())); // test that the bytes after the header can be interpreted as the correct values let mut cur_offset = header_length; @@ -1326,7 +1575,7 @@ mod tests { // First byte should be header size assert_eq!(header[0], header_length as u8); // Second byte should be serial type for FLOAT - assert_eq!(header[1] as u64, u64::from(SerialType::F64)); + assert_eq!(header[1] as u64, u64::from(SerialType::f64())); // Check that the bytes after the header can be interpreted as the float let float_bytes = &buf[header_length..header_length + size_of::()]; let float = f64::from_be_bytes(float_bytes.try_into().unwrap()); @@ -1390,11 +1639,11 @@ mod tests { // First byte should be header size assert_eq!(header[0], header_length as u8); // Second byte should be serial type for NULL - assert_eq!(header[1] as u64, u64::from(SerialType::Null)); + assert_eq!(header[1] as u64, u64::from(SerialType::null())); // Third byte should be serial type for I8 - assert_eq!(header[2] as u64, u64::from(SerialType::I8)); + assert_eq!(header[2] as u64, u64::from(SerialType::i8())); // Fourth byte should be serial type for F64 - assert_eq!(header[3] as u64, u64::from(SerialType::F64)); + assert_eq!(header[3] as u64, u64::from(SerialType::f64())); // Fifth byte should be serial type for TEXT, which is (len * 2 + 13) assert_eq!(header[4] as u64, (4 * 2 + 13) as u64); diff --git a/core/util.rs b/core/util.rs index 13b53bcb3..4e79d151f 100644 --- a/core/util.rs +++ b/core/util.rs @@ -2,6 +2,7 @@ use limbo_sqlite3_parser::ast::{self, CreateTableBody, Expr, FunctionTail, Liter use std::{rc::Rc, sync::Arc}; use crate::{ + function::Func, schema::{self, Column, Schema, Type}, types::{OwnedValue, OwnedValueType}, LimboError, OpenFlags, Result, Statement, StepResult, SymbolTable, IO, @@ -36,6 +37,21 @@ pub fn normalize_ident(identifier: &str) -> String { pub const PRIMARY_KEY_AUTOMATIC_INDEX_NAME_PREFIX: &str = "sqlite_autoindex_"; +enum UnparsedIndex { + /// CREATE INDEX idx ON table_name(sql) + FromSql { + table_name: String, + root_page: usize, + sql: String, + }, + /// Implicitly created index due to primary key constraints (or UNIQUE, but not implemented) + FromConstraint { + name: String, + table_name: String, + root_page: usize, + }, +} + pub fn parse_schema_rows( rows: Option, schema: &mut Schema, @@ -45,7 +61,7 @@ pub fn parse_schema_rows( ) -> Result<()> { if let Some(mut rows) = rows { rows.set_mv_tx_id(mv_tx_id); - let mut automatic_indexes = Vec::new(); + let mut unparsed_indexes = Vec::with_capacity(10); loop { match rows.step()? { StepResult::Row => { @@ -58,9 +74,37 @@ pub fn parse_schema_rows( "table" => { let root_page: i64 = row.get::(3)?; let sql: &str = row.get::<&str>(4)?; - if root_page == 0 && sql.to_lowercase().contains("virtual") { + if root_page == 0 && sql.to_lowercase().contains("create virtual") { let name: &str = row.get::<&str>(1)?; - let vtab = syms.vtabs.get(name).unwrap().clone(); + // a virtual table is found in the sqlite_schema, but it's no + // longer in the in-memory schema. We need to recreate it if + // the module is loaded in the symbol table. + let vtab = if let Some(vtab) = syms.vtabs.get(name) { + vtab.clone() + } else { + let mod_name = module_name_from_sql(sql)?; + if let Some(vmod) = syms.vtab_modules.get(mod_name) { + if let limbo_ext::VTabKind::VirtualTable = vmod.module_kind + { + crate::VirtualTable::from_args( + Some(name), + mod_name, + module_args_from_sql(sql)?, + syms, + vmod.module_kind, + None, + )? + } else { + return Err(LimboError::Corrupt("Table valued function: {name} registered as virtual table in schema".to_string())); + } + } else { + // the extension isn't loaded, so we emit a warning. + return Err(LimboError::ExtensionError(format!( + "Virtual table module '{}' not found\nPlease load extension", + &mod_name + ))); + } + }; schema.add_virtual_table(vtab); } else { let table = schema::BTreeTable::from_sql(sql, root_page as usize)?; @@ -71,21 +115,24 @@ pub fn parse_schema_rows( let root_page: i64 = row.get::(3)?; match row.get::<&str>(4) { Ok(sql) => { - let index = schema::Index::from_sql(sql, root_page as usize)?; - schema.add_index(Arc::new(index)); + unparsed_indexes.push(UnparsedIndex::FromSql { + table_name: row.get::<&str>(2)?.to_string(), + root_page: root_page as usize, + sql: sql.to_string(), + }); } _ => { // Automatic index on primary key, e.g. // table|foo|foo|2|CREATE TABLE foo (a text PRIMARY KEY, b) // index|sqlite_autoindex_foo_1|foo|3| - let index_name = row.get::<&str>(1)?; - let table_name = row.get::<&str>(2)?; + let index_name = row.get::<&str>(1)?.to_string(); + let table_name = row.get::<&str>(2)?.to_string(); let root_page = row.get::(3)?; - automatic_indexes.push(( - index_name.to_string(), - table_name.to_string(), - root_page, - )); + unparsed_indexes.push(UnparsedIndex::FromConstraint { + name: index_name, + table_name, + root_page: root_page as usize, + }); } } } @@ -102,12 +149,31 @@ pub fn parse_schema_rows( StepResult::Busy => break, } } - for (index_name, table_name, root_page) in automatic_indexes { - // We need to process these after all tables are loaded into memory due to the schema.get_table() call - let table = schema.get_btree_table(&table_name).unwrap(); - let index = - schema::Index::automatic_from_primary_key(&table, &index_name, root_page as usize)?; - schema.add_index(Arc::new(index)); + for unparsed_index in unparsed_indexes { + match unparsed_index { + UnparsedIndex::FromSql { + table_name, + root_page, + sql, + } => { + let table = schema.get_btree_table(&table_name).unwrap(); + let index = schema::Index::from_sql(&sql, root_page as usize, table.as_ref())?; + schema.add_index(Arc::new(index)); + } + UnparsedIndex::FromConstraint { + name, + table_name, + root_page, + } => { + let table = schema.get_btree_table(&table_name).unwrap(); + let index = schema::Index::automatic_from_primary_key( + table.as_ref(), + &name, + root_page as usize, + )?; + schema.add_index(Arc::new(index)); + } + } } } Ok(()) @@ -132,6 +198,99 @@ pub fn check_ident_equivalency(ident1: &str, ident2: &str) -> bool { strip_quotes(ident1).eq_ignore_ascii_case(strip_quotes(ident2)) } +fn module_name_from_sql(sql: &str) -> Result<&str> { + if let Some(start) = sql.find("USING") { + let start = start + 6; + // stop at the first space, semicolon, or parenthesis + let end = sql[start..] + .find(|c: char| c.is_whitespace() || c == ';' || c == '(') + .unwrap_or(sql.len() - start) + + start; + Ok(sql[start..end].trim()) + } else { + Err(LimboError::InvalidArgument( + "Expected 'USING' in module name".to_string(), + )) + } +} + +// CREATE VIRTUAL TABLE table_name USING module_name(arg1, arg2, ...); +// CREATE VIRTUAL TABLE table_name USING module_name; +fn module_args_from_sql(sql: &str) -> Result> { + if !sql.contains('(') { + return Ok(vec![]); + } + let start = sql.find('(').ok_or_else(|| { + LimboError::InvalidArgument("Expected '(' in module argument list".to_string()) + })? + 1; + let end = sql.rfind(')').ok_or_else(|| { + LimboError::InvalidArgument("Expected ')' in module argument list".to_string()) + })?; + + let mut args = Vec::new(); + let mut current_arg = String::new(); + let mut chars = sql[start..end].chars().peekable(); + let mut in_quotes = false; + + while let Some(c) = chars.next() { + match c { + '\'' => { + if in_quotes { + if chars.peek() == Some(&'\'') { + // Escaped quote + current_arg.push('\''); + chars.next(); + } else { + in_quotes = false; + args.push(limbo_ext::Value::from_text(current_arg.trim().to_string())); + current_arg.clear(); + // Skip until comma or end + while let Some(&nc) = chars.peek() { + if nc == ',' { + chars.next(); // Consume comma + break; + } else if nc.is_whitespace() { + chars.next(); + } else { + return Err(LimboError::InvalidArgument( + "Unexpected characters after quoted argument".to_string(), + )); + } + } + } + } else { + in_quotes = true; + } + } + ',' => { + if !in_quotes { + if !current_arg.trim().is_empty() { + args.push(limbo_ext::Value::from_text(current_arg.trim().to_string())); + current_arg.clear(); + } + } else { + current_arg.push(c); + } + } + _ => { + current_arg.push(c); + } + } + } + + if !current_arg.trim().is_empty() && !in_quotes { + args.push(limbo_ext::Value::from_text(current_arg.trim().to_string())); + } + + if in_quotes { + return Err(LimboError::InvalidArgument( + "Unterminated string literal in module arguments".to_string(), + )); + } + + Ok(args) +} + pub fn check_literal_equivalency(lhs: &Literal, rhs: &Literal) -> bool { match (lhs, rhs) { (Literal::Numeric(n1), Literal::Numeric(n2)) => cmp_numeric_strings(n1, n2), @@ -278,7 +437,11 @@ pub fn exprs_are_equivalent(expr1: &Expr, expr2: &Expr) -> bool { (Expr::Unary(op1, expr1), Expr::Unary(op2, expr2)) => { op1 == op2 && exprs_are_equivalent(expr1, expr2) } - (Expr::Variable(var1), Expr::Variable(var2)) => var1 == var2, + // Variables that are not bound to a specific value, are treated as NULL + // https://sqlite.org/lang_expr.html#varparam + (Expr::Variable(var), Expr::Variable(var2)) if var == "" && var2 == "" => false, + // Named variables can be compared by their name + (Expr::Variable(val), Expr::Variable(val2)) => val == val2, (Expr::Parenthesized(exprs1), Expr::Parenthesized(exprs2)) => { exprs1.len() == exprs2.len() && exprs1 @@ -403,6 +566,39 @@ pub fn columns_from_create_table_body(body: &ast::CreateTableBody) -> crate::Res .collect::>()) } +/// This function checks if a given expression is a constant value that can be pushed down to the database engine. +/// It is expected to be called with the other half of a binary expression with an Expr::Column +pub fn can_pushdown_predicate(expr: &Expr, table_idx: usize) -> bool { + match expr { + Expr::Literal(_) => true, + Expr::Column { table, .. } => *table <= table_idx, + Expr::Binary(lhs, _, rhs) => { + can_pushdown_predicate(lhs, table_idx) && can_pushdown_predicate(rhs, table_idx) + } + Expr::Parenthesized(exprs) => can_pushdown_predicate(exprs.first().unwrap(), table_idx), + Expr::Unary(_, expr) => can_pushdown_predicate(expr, table_idx), + Expr::FunctionCall { args, name, .. } => { + let function = crate::function::Func::resolve_function( + &name.0, + args.as_ref().map_or(0, |a| a.len()), + ); + // is deterministic + matches!(function, Ok(Func::Scalar(_))) + } + Expr::Like { lhs, rhs, .. } => { + can_pushdown_predicate(lhs, table_idx) && can_pushdown_predicate(rhs, table_idx) + } + Expr::Between { + lhs, start, end, .. + } => { + can_pushdown_predicate(lhs, table_idx) + && can_pushdown_predicate(start, table_idx) + && can_pushdown_predicate(end, table_idx) + } + _ => false, + } +} + #[derive(Debug, Default, PartialEq)] pub struct OpenOptions<'a> { /// The authority component of the URI. may be 'localhost' or empty @@ -716,11 +912,10 @@ fn parse_numeric_str(text: &str) -> Result<(OwnedValueType, &str), ()> { let text = text.trim(); let bytes = text.as_bytes(); - if bytes.is_empty() - || bytes[0] == b'e' - || bytes[0] == b'E' - || (bytes[0] == b'.' && (bytes[1] == b'e' || bytes[1] == b'E')) - { + if matches!( + bytes, + [] | [b'e', ..] | [b'E', ..] | [b'.', b'e' | b'E', ..] + ) { return Err(()); } @@ -824,6 +1019,24 @@ pub mod tests { assert_eq!(normalize_ident("\"foo\""), "foo"); } + #[test] + fn test_anonymous_variable_comparison() { + let expr1 = Expr::Variable("".to_string()); + let expr2 = Expr::Variable("".to_string()); + assert!(!exprs_are_equivalent(&expr1, &expr2)); + } + + #[test] + fn test_named_variable_comparison() { + let expr1 = Expr::Variable("1".to_string()); + let expr2 = Expr::Variable("1".to_string()); + assert!(exprs_are_equivalent(&expr1, &expr2)); + + let expr1 = Expr::Variable("1".to_string()); + let expr2 = Expr::Variable("2".to_string()); + assert!(!exprs_are_equivalent(&expr1, &expr2)); + } + #[test] fn test_basic_addition_exprs_are_equivalent() { let expr1 = Expr::Binary( @@ -1632,4 +1845,88 @@ pub mod tests { Ok((OwnedValueType::Float, "1.23e4")) ); } + + #[test] + fn test_module_name_basic() { + let sql = "CREATE VIRTUAL TABLE x USING y;"; + assert_eq!(module_name_from_sql(sql).unwrap(), "y"); + } + + #[test] + fn test_module_name_with_args() { + let sql = "CREATE VIRTUAL TABLE x USING modname('a', 'b');"; + assert_eq!(module_name_from_sql(sql).unwrap(), "modname"); + } + + #[test] + fn test_module_name_missing_using() { + let sql = "CREATE VIRTUAL TABLE x (a, b);"; + assert!(module_name_from_sql(sql).is_err()); + } + + #[test] + fn test_module_name_no_semicolon() { + let sql = "CREATE VIRTUAL TABLE x USING limbo(a, b)"; + assert_eq!(module_name_from_sql(sql).unwrap(), "limbo"); + } + + #[test] + fn test_module_name_no_semicolon_or_args() { + let sql = "CREATE VIRTUAL TABLE x USING limbo"; + assert_eq!(module_name_from_sql(sql).unwrap(), "limbo"); + } + + #[test] + fn test_module_args_none() { + let sql = "CREATE VIRTUAL TABLE x USING modname;"; + let args = module_args_from_sql(sql).unwrap(); + assert_eq!(args.len(), 0); + } + + #[test] + fn test_module_args_basic() { + let sql = "CREATE VIRTUAL TABLE x USING modname('arg1', 'arg2');"; + let args = module_args_from_sql(sql).unwrap(); + assert_eq!(args.len(), 2); + assert_eq!("arg1", args[0].to_text().unwrap()); + assert_eq!("arg2", args[1].to_text().unwrap()); + for arg in args { + unsafe { arg.__free_internal_type() } + } + } + + #[test] + fn test_module_args_with_escaped_quote() { + let sql = "CREATE VIRTUAL TABLE x USING modname('a''b', 'c');"; + let args = module_args_from_sql(sql).unwrap(); + assert_eq!(args.len(), 2); + assert_eq!(args[0].to_text().unwrap(), "a'b"); + assert_eq!(args[1].to_text().unwrap(), "c"); + for arg in args { + unsafe { arg.__free_internal_type() } + } + } + + #[test] + fn test_module_args_unterminated_string() { + let sql = "CREATE VIRTUAL TABLE x USING modname('arg1, 'arg2');"; + assert!(module_args_from_sql(sql).is_err()); + } + + #[test] + fn test_module_args_extra_garbage_after_quote() { + let sql = "CREATE VIRTUAL TABLE x USING modname('arg1'x);"; + assert!(module_args_from_sql(sql).is_err()); + } + + #[test] + fn test_module_args_trailing_comma() { + let sql = "CREATE VIRTUAL TABLE x USING modname('arg1',);"; + let args = module_args_from_sql(sql).unwrap(); + assert_eq!(args.len(), 1); + assert_eq!("arg1", args[0].to_text().unwrap()); + for arg in args { + unsafe { arg.__free_internal_type() } + } + } } diff --git a/core/vdbe/builder.rs b/core/vdbe/builder.rs index 058fc8aab..6be6fdad8 100644 --- a/core/vdbe/builder.rs +++ b/core/vdbe/builder.rs @@ -1,6 +1,6 @@ use std::{ cell::Cell, - collections::HashMap, + cmp::Ordering, rc::{Rc, Weak}, sync::Arc, }; @@ -16,24 +16,25 @@ use crate::{ Connection, VirtualTable, }; -use super::{BranchOffset, CursorID, Insn, InsnFunction, InsnReference, Program}; +use super::{BranchOffset, CursorID, Insn, InsnFunction, InsnReference, JumpTarget, Program}; #[allow(dead_code)] pub struct ProgramBuilder { next_free_register: usize, next_free_cursor_id: usize, - insns: Vec<(Insn, InsnFunction)>, - // for temporarily storing instructions that will be put after Transaction opcode - constant_insns: Vec<(Insn, InsnFunction)>, - // Vector of labels which must be assigned to next emitted instruction - next_insn_labels: Vec, + /// Instruction, the function to execute it with, and its original index in the vector. + insns: Vec<(Insn, InsnFunction, usize)>, + /// A span of instructions from (offset_start_inclusive, offset_end_exclusive), + /// that are deemed to be compile-time constant and can be hoisted out of loops + /// so that they get evaluated only once at the start of the program. + pub constant_spans: Vec<(usize, usize)>, // Cursors that are referenced by the program. Indexed by CursorID. pub cursor_ref: Vec<(Option, CursorType)>, /// A vector where index=label number, value=resolved offset. Resolved in build(). - label_to_resolved_offset: Vec>, + label_to_resolved_offset: Vec>, // Bitmask of cursors that have emitted a SeekRowid instruction. seekrowid_emitted_bitmask: u64, // map of instruction index to manual comment (used in EXPLAIN only) - comments: Option>, + comments: Option>, pub parameters: Parameters, pub result_columns: Vec, pub table_references: Vec, @@ -82,13 +83,12 @@ impl ProgramBuilder { next_free_register: 1, next_free_cursor_id: 0, insns: Vec::with_capacity(opts.approx_num_insns), - next_insn_labels: Vec::with_capacity(2), cursor_ref: Vec::with_capacity(opts.num_cursors), - constant_insns: Vec::new(), + constant_spans: Vec::new(), label_to_resolved_offset: Vec::with_capacity(opts.approx_num_labels), seekrowid_emitted_bitmask: 0, comments: if opts.query_mode == QueryMode::Explain { - Some(HashMap::new()) + Some(Vec::new()) } else { None }, @@ -98,6 +98,56 @@ impl ProgramBuilder { } } + /// Start a new constant span. The next instruction to be emitted will be the first + /// instruction in the span. + pub fn constant_span_start(&mut self) -> usize { + let span = self.constant_spans.len(); + let start = self.insns.len(); + self.constant_spans.push((start, usize::MAX)); + span + } + + /// End the current constant span. The last instruction that was emitted is the last + /// instruction in the span. + pub fn constant_span_end(&mut self, span_idx: usize) { + let span = &mut self.constant_spans[span_idx]; + if span.1 == usize::MAX { + span.1 = self.insns.len().saturating_sub(1); + } + } + + /// End all constant spans that are currently open. This is used to handle edge cases + /// where we think a parent expression is constant, but we decide during the evaluation + /// of one of its children that it is not. + pub fn constant_span_end_all(&mut self) { + for span in self.constant_spans.iter_mut() { + if span.1 == usize::MAX { + span.1 = self.insns.len().saturating_sub(1); + } + } + } + + /// Check if there is a constant span that is currently open. + pub fn constant_span_is_open(&self) -> bool { + self.constant_spans + .last() + .map_or(false, |(_, end)| *end == usize::MAX) + } + + /// Get the index of the next constant span. + /// Used in [crate::translate::expr::translate_expr_no_constant_opt()] to invalidate + /// all constant spans after the given index. + pub fn constant_spans_next_idx(&self) -> usize { + self.constant_spans.len() + } + + /// Invalidate all constant spans after the given index. This is used when we want to + /// be sure that constant optimization is never used for translating a given expression. + /// See [crate::translate::expr::translate_expr_no_constant_opt()] for more details. + pub fn constant_spans_invalidate_after(&mut self, idx: usize) { + self.constant_spans.truncate(idx); + } + pub fn alloc_register(&mut self) -> usize { let reg = self.next_free_register; self.next_free_register += 1; @@ -123,12 +173,14 @@ impl ProgramBuilder { } pub fn emit_insn(&mut self, insn: Insn) { - for label in self.next_insn_labels.drain(..) { - self.label_to_resolved_offset[label.to_label_value() as usize] = - Some(self.insns.len() as InsnReference); - } let function = insn.to_function(); - self.insns.push((insn, function)); + self.insns.push((insn, function, self.insns.len())); + } + + pub fn close_cursors(&mut self, cursors: &[CursorID]) { + for cursor in cursors { + self.emit_insn(Insn::Close { cursor_id: *cursor }); + } } pub fn emit_string8(&mut self, value: String, dest: usize) { @@ -194,20 +246,69 @@ impl ProgramBuilder { pub fn add_comment(&mut self, insn_index: BranchOffset, comment: &'static str) { if let Some(comments) = &mut self.comments { - comments.insert(insn_index.to_offset_int(), comment); + comments.push((insn_index.to_offset_int(), comment)); } } - // Emit an instruction that will be put at the end of the program (after Transaction statement). - // This is useful for instructions that otherwise will be unnecessarily repeated in a loop. - // Example: In `SELECT * from users where name='John'`, it is unnecessary to set r[1]='John' as we SCAN users table. - // We could simply set it once before the SCAN started. pub fn mark_last_insn_constant(&mut self) { - self.constant_insns.push(self.insns.pop().unwrap()); + if self.constant_span_is_open() { + // no need to mark this insn as constant as the surrounding parent expression is already constant + return; + } + + let prev = self.insns.len().saturating_sub(1); + self.constant_spans.push((prev, prev)); } pub fn emit_constant_insns(&mut self) { - self.insns.append(&mut self.constant_insns); + // move compile-time constant instructions to the end of the program, where they are executed once after Init jumps to it. + // any label_to_resolved_offset that points to an instruction within any moved constant span should be updated to point to the new location. + + // the instruction reordering can be done by sorting the insns, so that the ordering is: + // 1. if insn not in any constant span, it stays where it is + // 2. if insn is in a constant span, it is after other insns, except those that are in a later constant span + // 3. within a single constant span the order is preserver + self.insns.sort_by(|(_, _, index_a), (_, _, index_b)| { + let a_span = self + .constant_spans + .iter() + .find(|span| span.0 <= *index_a && span.1 >= *index_a); + let b_span = self + .constant_spans + .iter() + .find(|span| span.0 <= *index_b && span.1 >= *index_b); + if a_span.is_some() && b_span.is_some() { + a_span.unwrap().0.cmp(&b_span.unwrap().0) + } else if a_span.is_some() { + Ordering::Greater + } else if b_span.is_some() { + Ordering::Less + } else { + Ordering::Equal + } + }); + for resolved_offset in self.label_to_resolved_offset.iter_mut() { + if let Some((old_offset, target)) = resolved_offset { + let new_offset = self + .insns + .iter() + .position(|(_, _, index)| *old_offset == *index as u32) + .unwrap() as u32; + *resolved_offset = Some((new_offset, *target)); + } + } + + // Fix comments to refer to new locations + if let Some(comments) = &mut self.comments { + for (old_offset, _) in comments.iter_mut() { + let new_offset = self + .insns + .iter() + .position(|(_, _, index)| *old_offset == *index as u32) + .expect("comment must exist") as u32; + *old_offset = new_offset; + } + } } pub fn offset(&self) -> BranchOffset { @@ -220,18 +321,42 @@ impl ProgramBuilder { BranchOffset::Label(label_n as u32) } - // Effectively a GOTO without the need to emit an explicit GOTO instruction. - // Useful when you know you need to jump to "the next part", but the exact offset is unknowable - // at the time of emitting the instruction. + /// Resolve a label to whatever instruction follows the one that was + /// last emitted. + /// + /// Use this when your use case is: "the program should jump to whatever instruction + /// follows the one that was previously emitted", and you don't care exactly + /// which instruction that is. Examples include "the start of a loop", or + /// "after the loop ends". + /// + /// It is important to handle those cases this way, because the precise + /// instruction that follows any given instruction might change due to + /// reordering the emitted instructions. + #[inline] pub fn preassign_label_to_next_insn(&mut self, label: BranchOffset) { - self.next_insn_labels.push(label); + assert!(label.is_label(), "BranchOffset {:?} is not a label", label); + self._resolve_label(label, self.offset().sub(1u32), JumpTarget::AfterThisInsn); } + /// Resolve a label to exactly the instruction that was last emitted. + /// + /// Use this when your use case is: "the program should jump to the exact instruction + /// that was last emitted", and you don't care WHERE exactly that ends up being + /// once the order of the bytecode of the program is finalized. Examples include + /// "jump to the Halt instruction", or "jump to the Next instruction of a loop". + #[inline] pub fn resolve_label(&mut self, label: BranchOffset, to_offset: BranchOffset) { + self._resolve_label(label, to_offset, JumpTarget::ExactlyThisInsn); + } + + fn _resolve_label(&mut self, label: BranchOffset, to_offset: BranchOffset, target: JumpTarget) { assert!(matches!(label, BranchOffset::Label(_))); assert!(matches!(to_offset, BranchOffset::Offset(_))); - self.label_to_resolved_offset[label.to_label_value() as usize] = - Some(to_offset.to_offset_int()); + let BranchOffset::Label(label_number) = label else { + unreachable!("Label is not a label"); + }; + self.label_to_resolved_offset[label_number as usize] = + Some((to_offset.to_offset_int(), target)); } /// Resolve unresolved labels to a specific offset in the instruction list. @@ -242,19 +367,25 @@ impl ProgramBuilder { pub fn resolve_labels(&mut self) { let resolve = |pc: &mut BranchOffset, insn_name: &str| { if let BranchOffset::Label(label) = pc { - let to_offset = self - .label_to_resolved_offset - .get(*label as usize) - .unwrap_or_else(|| { - panic!("Reference to undefined label in {}: {}", insn_name, label) - }); + let Some(Some((to_offset, target))) = + self.label_to_resolved_offset.get(*label as usize) + else { + panic!( + "Reference to undefined or unresolved label in {}: {}", + insn_name, label + ); + }; *pc = BranchOffset::Offset( to_offset - .unwrap_or_else(|| panic!("Unresolved label in {}: {}", insn_name, label)), + + if *target == JumpTarget::ExactlyThisInsn { + 0 + } else { + 1 + }, ); } }; - for (insn, _) in self.insns.iter_mut() { + for (insn, _, _) in self.insns.iter_mut() { match insn { Insn::Init { target_pc } => { resolve(target_pc, "Init"); @@ -321,17 +452,11 @@ impl ProgramBuilder { } => { resolve(target_pc, "IfNot"); } - Insn::RewindAwait { - cursor_id: _cursor_id, - pc_if_empty, - } => { - resolve(pc_if_empty, "RewindAwait"); + Insn::Rewind { pc_if_empty, .. } => { + resolve(pc_if_empty, "Rewind"); } - Insn::LastAwait { - cursor_id: _cursor_id, - pc_if_empty, - } => { - resolve(pc_if_empty, "LastAwait"); + Insn::Last { pc_if_empty, .. } => { + resolve(pc_if_empty, "Last"); } Insn::Goto { target_pc } => { resolve(target_pc, "Goto"); @@ -360,18 +485,25 @@ impl ProgramBuilder { Insn::IfPos { target_pc, .. } => { resolve(target_pc, "IfPos"); } - Insn::NextAwait { pc_if_next, .. } => { - resolve(pc_if_next, "NextAwait"); + Insn::Next { pc_if_next, .. } => { + resolve(pc_if_next, "Next"); } - Insn::PrevAwait { pc_if_next, .. } => { - resolve(pc_if_next, "PrevAwait"); + Insn::Once { + target_pc_when_reentered, + .. + } => { + resolve(target_pc_when_reentered, "Once"); + } + Insn::Prev { pc_if_prev, .. } => { + resolve(pc_if_prev, "Prev"); } Insn::InitCoroutine { yield_reg: _, jump_on_definition, - start_offset: _, + start_offset, } => { resolve(jump_on_definition, "InitCoroutine"); + resolve(start_offset, "InitCoroutine"); } Insn::NotExists { cursor: _, @@ -407,6 +539,12 @@ impl ProgramBuilder { Insn::SeekGT { target_pc, .. } => { resolve(target_pc, "SeekGT"); } + Insn::SeekLE { target_pc, .. } => { + resolve(target_pc, "SeekLE"); + } + Insn::SeekLT { target_pc, .. } => { + resolve(target_pc, "SeekLT"); + } Insn::IdxGE { target_pc, .. } => { resolve(target_pc, "IdxGE"); } @@ -428,6 +566,9 @@ impl ProgramBuilder { Insn::VFilter { pc_if_empty, .. } => { resolve(pc_if_empty, "VFilter"); } + Insn::NoConflict { target_pc, .. } => { + resolve(target_pc, "NoConflict"); + } _ => {} } } @@ -435,15 +576,17 @@ impl ProgramBuilder { } // translate table to cursor id + pub fn resolve_cursor_id_safe(&self, table_identifier: &str) -> Option { + self.cursor_ref.iter().position(|(t_ident, _)| { + t_ident + .as_ref() + .is_some_and(|ident| ident == table_identifier) + }) + } + pub fn resolve_cursor_id(&self, table_identifier: &str) -> CursorID { - self.cursor_ref - .iter() - .position(|(t_ident, _)| { - t_ident - .as_ref() - .is_some_and(|ident| ident == table_identifier) - }) - .unwrap() + self.resolve_cursor_id_safe(table_identifier) + .unwrap_or_else(|| panic!("Cursor not found: {}", table_identifier)) } pub fn build( @@ -453,15 +596,15 @@ impl ProgramBuilder { change_cnt_on: bool, ) -> Program { self.resolve_labels(); - assert!( - self.constant_insns.is_empty(), - "constant_insns is not empty when build() is called, did you forget to call emit_constant_insns()?" - ); self.parameters.list.dedup(); Program { max_registers: self.next_free_register, - insns: self.insns, + insns: self + .insns + .into_iter() + .map(|(insn, function, _)| (insn, function)) + .collect(), cursor_ref: self.cursor_ref, database_header, comments: self.comments, diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index 31a64d491..01339c3b3 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -1,44 +1,65 @@ #![allow(unused_variables)] -use crate::error::{LimboError, SQLITE_CONSTRAINT_PRIMARYKEY}; -use crate::ext::ExtValue; -use crate::function::{AggFunc, ExtFunc, MathFunc, MathFuncArity, ScalarFunc, VectorFunc}; -use crate::functions::datetime::{ - exec_date, exec_datetime_full, exec_julianday, exec_strftime, exec_time, exec_unixepoch, +use crate::numeric::{NullableInteger, Numeric}; +use crate::storage::database::FileMemoryStorage; +use crate::storage::page_cache::DumbLruPageCache; +use crate::storage::pager::CreateBTreeFlags; +use crate::types::ImmutableRecord; +use crate::{ + error::{LimboError, SQLITE_CONSTRAINT, SQLITE_CONSTRAINT_PRIMARYKEY}, + ext::ExtValue, + function::{AggFunc, ExtFunc, MathFunc, MathFuncArity, ScalarFunc, VectorFunc}, + functions::{ + datetime::{ + exec_date, exec_datetime_full, exec_julianday, exec_strftime, exec_time, exec_unixepoch, + }, + printf::exec_printf, + }, + types::compare_immutable, }; -use crate::functions::printf::exec_printf; -use std::{borrow::BorrowMut, rc::Rc}; +use std::{borrow::BorrowMut, rc::Rc, sync::Arc}; -use crate::pseudo::PseudoCursor; -use crate::result::LimboResult; -use crate::schema::{affinity, Affinity}; -use crate::storage::btree::BTreeCursor; -use crate::storage::wal::CheckpointResult; -use crate::types::{ - AggContext, Cursor, CursorResult, ExternalAggState, OwnedValue, SeekKey, SeekOp, -}; -use crate::util::{ - cast_real_to_integer, cast_text_to_integer, cast_text_to_numeric, cast_text_to_real, - checked_cast_text_to_numeric, parse_schema_rows, RoundToPrecision, -}; -use crate::vdbe::builder::CursorType; -use crate::vdbe::insn::Insn; -use crate::vector::{vector32, vector64, vector_distance_cos, vector_extract}; +use crate::{pseudo::PseudoCursor, result::LimboResult}; -use crate::{info, MvCursor, RefValue, Row, StepResult, TransactionState}; - -use super::insn::{ - exec_add, exec_and, exec_bit_and, exec_bit_not, exec_bit_or, exec_boolean_not, exec_concat, - exec_divide, exec_multiply, exec_or, exec_remainder, exec_shift_left, exec_shift_right, - exec_subtract, Cookie, +use crate::{ + schema::{affinity, Affinity}, + storage::btree::{BTreeCursor, BTreeKey}, }; -use super::HaltState; + +use crate::{ + storage::wal::CheckpointResult, + types::{ + AggContext, Cursor, CursorResult, ExternalAggState, OwnedValue, OwnedValueType, SeekKey, + SeekOp, + }, + util::{ + cast_real_to_integer, cast_text_to_integer, cast_text_to_numeric, cast_text_to_real, + checked_cast_text_to_numeric, parse_schema_rows, RoundToPrecision, + }, + vdbe::{ + builder::CursorType, + insn::{IdxInsertFlags, Insn}, + }, + vector::{vector32, vector64, vector_distance_cos, vector_extract}, +}; + +use crate::{ + info, maybe_init_database_file, BufferPool, MvCursor, OpenFlags, RefValue, Row, StepResult, + TransactionState, IO, +}; + +use super::{ + insn::{Cookie, RegisterOrLiteral}, + HaltState, +}; +use parking_lot::RwLock; use rand::thread_rng; -use super::likeop::{construct_like_escape_arg, exec_glob, exec_like_with_escape}; -use super::sorter::Sorter; +use super::{ + likeop::{construct_like_escape_arg, exec_glob, exec_like_with_escape}, + sorter::Sorter, +}; use regex::{Regex, RegexBuilder}; -use std::cell::RefCell; -use std::collections::HashMap; +use std::{cell::RefCell, collections::HashMap}; #[cfg(feature = "json")] use crate::{ @@ -795,14 +816,14 @@ pub fn op_if_not( Ok(InsnFunctionStepResult::Step) } -pub fn op_open_read_async( +pub fn op_open_read( program: &Program, state: &mut ProgramState, insn: &Insn, pager: &Rc, mv_store: Option<&Rc>, ) -> Result { - let Insn::OpenReadAsync { + let Insn::OpenRead { cursor_id, root_page, } = insn @@ -821,59 +842,50 @@ pub fn op_open_read_async( } None => None, }; - let cursor = BTreeCursor::new(mv_cursor, pager.clone(), *root_page); let mut cursors = state.cursors.borrow_mut(); match cursor_type { CursorType::BTreeTable(_) => { + let cursor = BTreeCursor::new(mv_cursor, pager.clone(), *root_page); cursors .get_mut(*cursor_id) .unwrap() .replace(Cursor::new_btree(cursor)); } - CursorType::BTreeIndex(_) => { + CursorType::BTreeIndex(index) => { + let cursor = + BTreeCursor::new_index(mv_cursor, pager.clone(), *root_page, index.as_ref()); cursors .get_mut(*cursor_id) .unwrap() .replace(Cursor::new_btree(cursor)); } CursorType::Pseudo(_) => { - panic!("OpenReadAsync on pseudo cursor"); + panic!("OpenRead on pseudo cursor"); } CursorType::Sorter => { - panic!("OpenReadAsync on sorter cursor"); + panic!("OpenRead on sorter cursor"); } CursorType::VirtualTable(_) => { - panic!("OpenReadAsync on virtual table cursor, use Insn:VOpenAsync instead"); + panic!("OpenRead on virtual table cursor, use Insn:VOpen instead"); } } state.pc += 1; Ok(InsnFunctionStepResult::Step) } -pub fn op_open_read_await( +pub fn op_vopen( program: &Program, state: &mut ProgramState, insn: &Insn, pager: &Rc, mv_store: Option<&Rc>, ) -> Result { - state.pc += 1; - Ok(InsnFunctionStepResult::Step) -} - -pub fn op_vopen_async( - program: &Program, - state: &mut ProgramState, - insn: &Insn, - pager: &Rc, - mv_store: Option<&Rc>, -) -> Result { - let Insn::VOpenAsync { cursor_id } = insn else { + let Insn::VOpen { cursor_id } = insn else { unreachable!("unexpected Insn {:?}", insn) }; let (_, cursor_type) = program.cursor_ref.get(*cursor_id).unwrap(); let CursorType::VirtualTable(virtual_table) = cursor_type else { - panic!("VOpenAsync on non-virtual table cursor"); + panic!("VOpen on non-virtual table cursor"); }; let cursor = virtual_table.open()?; state @@ -917,12 +929,21 @@ pub fn op_vcreate( "Failed to upgrade Connection".to_string(), )); }; + let mod_type = conn + .syms + .borrow() + .vtab_modules + .get(&module_name) + .ok_or_else(|| { + crate::LimboError::ExtensionError(format!("Module {} not found", module_name)) + })? + .module_kind; let table = crate::VirtualTable::from_args( Some(&table_name), &module_name, args, &conn.syms.borrow(), - limbo_ext::VTabKind::VirtualTable, + mod_type, None, )?; { @@ -935,17 +956,6 @@ pub fn op_vcreate( Ok(InsnFunctionStepResult::Step) } -pub fn op_vopen_await( - program: &Program, - state: &mut ProgramState, - insn: &Insn, - pager: &Rc, - mv_store: Option<&Rc>, -) -> Result { - state.pc += 1; - Ok(InsnFunctionStepResult::Step) -} - pub fn op_vfilter( program: &Program, state: &mut ProgramState, @@ -958,6 +968,8 @@ pub fn op_vfilter( pc_if_empty, arg_count, args_reg, + idx_str, + idx_num, } = insn else { unreachable!("unexpected Insn {:?}", insn) @@ -969,11 +981,21 @@ pub fn op_vfilter( let has_rows = { let mut cursor = state.get_cursor(*cursor_id); let cursor = cursor.as_virtual_mut(); - let mut args = Vec::new(); + let mut args = Vec::with_capacity(*arg_count); for i in 0..*arg_count { - args.push(state.registers[args_reg + i].get_owned_value().clone()); + args.push( + state.registers[args_reg + i] + .get_owned_value() + .clone() + .to_ffi(), + ); } - virtual_table.filter(cursor, *arg_count, args)? + let idx_str = if let Some(idx_str) = idx_str { + Some(state.registers[*idx_str].get_owned_value().to_string()) + } else { + None + }; + virtual_table.filter(cursor, *idx_num as i32, idx_str, *arg_count, args)? }; if !has_rows { state.pc = pc_if_empty.to_offset_int(); @@ -1092,7 +1114,7 @@ pub fn op_vnext( }; let (_, cursor_type) = program.cursor_ref.get(*cursor_id).unwrap(); let CursorType::VirtualTable(virtual_table) = cursor_type else { - panic!("VNextAsync on non-virtual table cursor"); + panic!("VNext on non-virtual table cursor"); }; let has_more = { let mut cursor = state.get_cursor(*cursor_id); @@ -1107,6 +1129,35 @@ pub fn op_vnext( Ok(InsnFunctionStepResult::Step) } +pub fn op_vdestroy( + program: &Program, + state: &mut ProgramState, + insn: &Insn, + pager: &Rc, + mv_store: Option<&Rc>, +) -> Result { + let Insn::VDestroy { db, table_name } = insn else { + unreachable!("unexpected Insn {:?}", insn) + }; + let Some(conn) = program.connection.upgrade() else { + return Err(crate::LimboError::ExtensionError( + "Failed to upgrade Connection".to_string(), + )); + }; + + { + let Some(vtab) = conn.syms.borrow_mut().vtabs.remove(table_name) else { + return Err(crate::LimboError::InternalError( + "Could not find Virtual Table to Destroy".to_string(), + )); + }; + vtab.destroy()?; + } + + state.pc += 1; + Ok(InsnFunctionStepResult::Step) +} + pub fn op_open_pseudo( program: &Program, state: &mut ProgramState, @@ -1134,53 +1185,14 @@ pub fn op_open_pseudo( Ok(InsnFunctionStepResult::Step) } -pub fn op_rewind_async( +pub fn op_rewind( program: &Program, state: &mut ProgramState, insn: &Insn, pager: &Rc, mv_store: Option<&Rc>, ) -> Result { - let Insn::RewindAsync { cursor_id } = insn else { - unreachable!("unexpected Insn {:?}", insn) - }; - { - let mut cursor = - must_be_btree_cursor!(*cursor_id, program.cursor_ref, state, "RewindAsync"); - let cursor = cursor.as_btree_mut(); - return_if_io!(cursor.rewind()); - } - state.pc += 1; - Ok(InsnFunctionStepResult::Step) -} - -pub fn op_last_async( - program: &Program, - state: &mut ProgramState, - insn: &Insn, - pager: &Rc, - mv_store: Option<&Rc>, -) -> Result { - let Insn::LastAsync { cursor_id } = insn else { - unreachable!("unexpected Insn {:?}", insn) - }; - { - let mut cursor = must_be_btree_cursor!(*cursor_id, program.cursor_ref, state, "LastAsync"); - let cursor = cursor.as_btree_mut(); - return_if_io!(cursor.last()); - } - state.pc += 1; - Ok(InsnFunctionStepResult::Step) -} - -pub fn op_last_await( - program: &Program, - state: &mut ProgramState, - insn: &Insn, - pager: &Rc, - mv_store: Option<&Rc>, -) -> Result { - let Insn::LastAwait { + let Insn::Rewind { cursor_id, pc_if_empty, } = insn @@ -1189,9 +1201,9 @@ pub fn op_last_await( }; assert!(pc_if_empty.is_offset()); let is_empty = { - let mut cursor = must_be_btree_cursor!(*cursor_id, program.cursor_ref, state, "LastAwait"); + let mut cursor = must_be_btree_cursor!(*cursor_id, program.cursor_ref, state, "Rewind"); let cursor = cursor.as_btree_mut(); - cursor.wait_for_completion()?; + return_if_io!(cursor.rewind()); cursor.is_empty() }; if is_empty { @@ -1202,14 +1214,14 @@ pub fn op_last_await( Ok(InsnFunctionStepResult::Step) } -pub fn op_rewind_await( +pub fn op_last( program: &Program, state: &mut ProgramState, insn: &Insn, pager: &Rc, mv_store: Option<&Rc>, ) -> Result { - let Insn::RewindAwait { + let Insn::Last { cursor_id, pc_if_empty, } = insn @@ -1218,10 +1230,9 @@ pub fn op_rewind_await( }; assert!(pc_if_empty.is_offset()); let is_empty = { - let mut cursor = - must_be_btree_cursor!(*cursor_id, program.cursor_ref, state, "RewindAwait"); + let mut cursor = must_be_btree_cursor!(*cursor_id, program.cursor_ref, state, "Last"); let cursor = cursor.as_btree_mut(); - cursor.wait_for_completion()?; + return_if_io!(cursor.last()); cursor.is_empty() }; if is_empty { @@ -1278,7 +1289,10 @@ pub fn op_column( if cursor.get_null_flag() { RefValue::Null } else { - record.get_value(*column).clone() + match record.get_value_opt(*column) { + Some(val) => val.clone(), + None => RefValue::Null, + } } } else { RefValue::Null @@ -1305,10 +1319,14 @@ pub fn op_column( let record = { let mut cursor = state.get_cursor(*cursor_id); let cursor = cursor.as_sorter_mut(); - cursor.record().map(|r| r.clone()) + cursor.record().cloned() }; if let Some(record) = record { - state.registers[*dest] = Register::OwnedValue(record.get_value(*column).to_owned()); + state.registers[*dest] = + Register::OwnedValue(match record.get_value_opt(*column) { + Some(val) => val.to_owned(), + None => OwnedValue::Null, + }); } else { state.registers[*dest] = Register::OwnedValue(OwnedValue::Null); } @@ -1334,6 +1352,68 @@ pub fn op_column( Ok(InsnFunctionStepResult::Step) } +pub fn op_type_check( + program: &Program, + state: &mut ProgramState, + insn: &Insn, + pager: &Rc, + mv_store: Option<&Rc>, +) -> Result { + let Insn::TypeCheck { + start_reg, + count, + check_generated, + table_reference, + } = insn + else { + unreachable!("unexpected Insn {:?}", insn) + }; + assert_eq!(table_reference.is_strict, true); + state.registers[*start_reg..*start_reg + *count] + .iter_mut() + .zip(table_reference.columns.iter()) + .try_for_each(|(reg, col)| { + // INT PRIMARY KEY is not row_id_alias so we throw error if this col is NULL + if !col.is_rowid_alias + && col.primary_key + && matches!(reg.get_owned_value(), OwnedValue::Null) + { + bail_constraint_error!( + "NOT NULL constraint failed: {}.{} ({})", + &table_reference.name, + col.name.as_ref().map(|s| s.as_str()).unwrap_or(""), + SQLITE_CONSTRAINT + ) + } else if col.is_rowid_alias && matches!(reg.get_owned_value(), OwnedValue::Null) { + // Handle INTEGER PRIMARY KEY for null as usual (Rowid will be auto-assigned) + return Ok(()); + } + let col_affinity = col.affinity(); + let ty_str = col.ty_str.as_str(); + let applied = apply_affinity_char(reg, col_affinity); + let value_type = reg.get_owned_value().value_type(); + match (ty_str, value_type) { + ("INTEGER" | "INT", OwnedValueType::Integer) => {} + ("REAL", OwnedValueType::Float) => {} + ("BLOB", OwnedValueType::Blob) => {} + ("TEXT", OwnedValueType::Text) => {} + ("ANY", _) => {} + (t, v) => bail_constraint_error!( + "cannot store {} value in {} column {}.{} ({})", + v, + t, + &table_reference.name, + col.name.as_ref().map(|s| s.as_str()).unwrap_or(""), + SQLITE_CONSTRAINT + ), + }; + Ok(()) + })?; + + state.pc += 1; + Ok(InsnFunctionStepResult::Step) +} + pub fn op_make_record( program: &Program, state: &mut ProgramState, @@ -1369,60 +1449,19 @@ pub fn op_result_row( values: &state.registers[*start_reg] as *const Register, count: *count, }; - state.result_row = Some(row); state.pc += 1; return Ok(InsnFunctionStepResult::Row); } -pub fn op_next_async( +pub fn op_next( program: &Program, state: &mut ProgramState, insn: &Insn, pager: &Rc, mv_store: Option<&Rc>, ) -> Result { - let Insn::NextAsync { cursor_id } = insn else { - unreachable!("unexpected Insn {:?}", insn) - }; - { - let mut cursor = must_be_btree_cursor!(*cursor_id, program.cursor_ref, state, "NextAsync"); - let cursor = cursor.as_btree_mut(); - cursor.set_null_flag(false); - return_if_io!(cursor.next()); - } - state.pc += 1; - Ok(InsnFunctionStepResult::Step) -} - -pub fn op_prev_async( - program: &Program, - state: &mut ProgramState, - insn: &Insn, - pager: &Rc, - mv_store: Option<&Rc>, -) -> Result { - let Insn::PrevAsync { cursor_id } = insn else { - unreachable!("unexpected Insn {:?}", insn) - }; - { - let mut cursor = must_be_btree_cursor!(*cursor_id, program.cursor_ref, state, "PrevAsync"); - let cursor = cursor.as_btree_mut(); - cursor.set_null_flag(false); - return_if_io!(cursor.prev()); - } - state.pc += 1; - Ok(InsnFunctionStepResult::Step) -} - -pub fn op_prev_await( - program: &Program, - state: &mut ProgramState, - insn: &Insn, - pager: &Rc, - mv_store: Option<&Rc>, -) -> Result { - let Insn::PrevAwait { + let Insn::Next { cursor_id, pc_if_next, } = insn @@ -1431,9 +1470,11 @@ pub fn op_prev_await( }; assert!(pc_if_next.is_offset()); let is_empty = { - let mut cursor = must_be_btree_cursor!(*cursor_id, program.cursor_ref, state, "PrevAwait"); + let mut cursor = must_be_btree_cursor!(*cursor_id, program.cursor_ref, state, "Next"); let cursor = cursor.as_btree_mut(); - cursor.wait_for_completion()?; + cursor.set_null_flag(false); + return_if_io!(cursor.next()); + cursor.is_empty() }; if !is_empty { @@ -1444,29 +1485,31 @@ pub fn op_prev_await( Ok(InsnFunctionStepResult::Step) } -pub fn op_next_await( +pub fn op_prev( program: &Program, state: &mut ProgramState, insn: &Insn, pager: &Rc, mv_store: Option<&Rc>, ) -> Result { - let Insn::NextAwait { + let Insn::Prev { cursor_id, - pc_if_next, + pc_if_prev, } = insn else { unreachable!("unexpected Insn {:?}", insn) }; - assert!(pc_if_next.is_offset()); + assert!(pc_if_prev.is_offset()); let is_empty = { - let mut cursor = must_be_btree_cursor!(*cursor_id, program.cursor_ref, state, "NextAwait"); + let mut cursor = must_be_btree_cursor!(*cursor_id, program.cursor_ref, state, "Prev"); let cursor = cursor.as_btree_mut(); - cursor.wait_for_completion()?; + cursor.set_null_flag(false); + return_if_io!(cursor.prev()); + cursor.is_empty() }; if !is_empty { - state.pc = pc_if_next.to_offset_int(); + state.pc = pc_if_prev.to_offset_int(); } else { state.pc += 1; } @@ -1502,7 +1545,7 @@ pub fn op_halt( ))); } } - match program.halt(pager.clone(), state, mv_store.clone())? { + match program.halt(pager.clone(), state, mv_store)? { StepResult::Done => Ok(InsnFunctionStepResult::Done), StepResult::IO => Ok(InsnFunctionStepResult::IO), StepResult::Row => Ok(InsnFunctionStepResult::Row), @@ -1521,22 +1564,19 @@ pub fn op_transaction( let Insn::Transaction { write } = insn else { unreachable!("unexpected Insn {:?}", insn) }; + let connection = program.connection.upgrade().unwrap(); + if *write && connection._db.open_flags.contains(OpenFlags::ReadOnly) { + return Err(LimboError::ReadOnly); + } if let Some(mv_store) = &mv_store { if state.mv_tx_id.is_none() { let tx_id = mv_store.begin_tx(); - program - .connection - .upgrade() - .unwrap() - .mv_transactions - .borrow_mut() - .push(tx_id); + connection.mv_transactions.borrow_mut().push(tx_id); state.mv_tx_id = Some(tx_id); } } else { - let connection = program.connection.upgrade().unwrap(); - let current_state = connection.transaction_state.borrow().clone(); - let (new_transaction_state, updated) = match (¤t_state, write) { + let current_state = connection.transaction_state.get(); + let (new_transaction_state, updated) = match (current_state, write) { (TransactionState::Write, true) => (TransactionState::Write, false), (TransactionState::Write, false) => (TransactionState::Write, false), (TransactionState::Read, true) => (TransactionState::Write, true), @@ -1581,7 +1621,7 @@ pub fn op_auto_commit( }; let conn = program.connection.upgrade().unwrap(); if matches!(state.halt_state, Some(HaltState::Checkpointing)) { - return match program.halt(pager.clone(), state, mv_store.clone())? { + return match program.halt(pager.clone(), state, mv_store)? { super::StepResult::Done => Ok(InsnFunctionStepResult::Done), super::StepResult::IO => Ok(InsnFunctionStepResult::IO), super::StepResult::Row => Ok(InsnFunctionStepResult::Row), @@ -1590,7 +1630,7 @@ pub fn op_auto_commit( }; } - if *auto_commit != *conn.auto_commit.borrow() { + if *auto_commit != conn.auto_commit.get() { if *rollback { todo!("Rollback is not implemented"); } else { @@ -1609,7 +1649,7 @@ pub fn op_auto_commit( "cannot commit - no transaction is active".to_string(), )); } - return match program.halt(pager.clone(), state, mv_store.clone())? { + return match program.halt(pager.clone(), state, mv_store)? { super::StepResult::Done => Ok(InsnFunctionStepResult::Done), super::StepResult::IO => Ok(InsnFunctionStepResult::IO), super::StepResult::Row => Ok(InsnFunctionStepResult::Row), @@ -1768,17 +1808,20 @@ pub fn op_row_id( let rowid = { let mut index_cursor = state.get_cursor(index_cursor_id); let index_cursor = index_cursor.as_btree_mut(); - let rowid = index_cursor.rowid()?; - rowid + let record = index_cursor.record(); + let record = record.as_ref().unwrap(); + let rowid = record.get_values().last().unwrap(); + match rowid { + RefValue::Integer(rowid) => *rowid as u64, + _ => unreachable!(), + } }; let mut table_cursor = state.get_cursor(table_cursor_id); let table_cursor = table_cursor.as_btree_mut(); - let deferred_seek = - match table_cursor.seek(SeekKey::TableRowId(rowid.unwrap()), SeekOp::EQ)? { - CursorResult::Ok(_) => None, - CursorResult::IO => Some((index_cursor_id, table_cursor_id)), - }; - deferred_seek + match table_cursor.seek(SeekKey::TableRowId(rowid), SeekOp::EQ)? { + CursorResult::Ok(_) => None, + CursorResult::IO => Some((index_cursor_id, table_cursor_id)), + } }; if let Some(deferred_seek) = deferred_seek { state.deferred_seek = Some(deferred_seek); @@ -1812,6 +1855,28 @@ pub fn op_row_id( Ok(InsnFunctionStepResult::Step) } +pub fn op_idx_row_id( + program: &Program, + state: &mut ProgramState, + insn: &Insn, + pager: &Rc, + mv_store: Option<&Rc>, +) -> Result { + let Insn::IdxRowId { cursor_id, dest } = insn else { + unreachable!("unexpected Insn {:?}", insn) + }; + let mut cursors = state.cursors.borrow_mut(); + let cursor = cursors.get_mut(*cursor_id).unwrap().as_mut().unwrap(); + let cursor = cursor.as_btree_mut(); + let rowid = cursor.rowid()?; + state.registers[*dest] = match rowid { + Some(rowid) => Register::OwnedValue(OwnedValue::Integer(rowid as i64)), + None => Register::OwnedValue(OwnedValue::Null), + }; + state.pc += 1; + Ok(InsnFunctionStepResult::Step) +} + pub fn op_seek_rowid( program: &Program, state: &mut ProgramState, @@ -1876,97 +1941,69 @@ pub fn op_deferred_seek( Ok(InsnFunctionStepResult::Step) } -pub fn op_seek_ge( +pub fn op_seek( program: &Program, state: &mut ProgramState, insn: &Insn, pager: &Rc, mv_store: Option<&Rc>, ) -> Result { - let Insn::SeekGE { + let (Insn::SeekGE { cursor_id, start_reg, num_regs, target_pc, is_index, - } = insn - else { - unreachable!("unexpected Insn {:?}", insn) - }; - assert!(target_pc.is_offset()); - if *is_index { - let found = { - let mut cursor = state.get_cursor(*cursor_id); - let cursor = cursor.as_btree_mut(); - let record_from_regs = make_record(&state.registers, start_reg, num_regs); - let found = - return_if_io!(cursor.seek(SeekKey::IndexKey(&record_from_regs), SeekOp::GE)); - found - }; - if !found { - state.pc = target_pc.to_offset_int(); - } else { - state.pc += 1; - } - } else { - let pc = { - let mut cursor = state.get_cursor(*cursor_id); - let cursor = cursor.as_btree_mut(); - let rowid = match state.registers[*start_reg].get_owned_value() { - OwnedValue::Null => { - // All integer values are greater than null so we just rewind the cursor - return_if_io!(cursor.rewind()); - None - } - OwnedValue::Integer(rowid) => Some(*rowid as u64), - _ => { - return Err(LimboError::InternalError( - "SeekGE: the value in the register is not an integer".into(), - )); - } - }; - match rowid { - Some(rowid) => { - let found = return_if_io!(cursor.seek(SeekKey::TableRowId(rowid), SeekOp::GE)); - if !found { - target_pc.to_offset_int() - } else { - state.pc + 1 - } - } - None => state.pc + 1, - } - }; - state.pc = pc; } - Ok(InsnFunctionStepResult::Step) -} - -pub fn op_seek_gt( - program: &Program, - state: &mut ProgramState, - insn: &Insn, - pager: &Rc, - mv_store: Option<&Rc>, -) -> Result { - let Insn::SeekGT { + | Insn::SeekGT { cursor_id, start_reg, num_regs, target_pc, is_index, - } = insn + } + | Insn::SeekLE { + cursor_id, + start_reg, + num_regs, + target_pc, + is_index, + } + | Insn::SeekLT { + cursor_id, + start_reg, + num_regs, + target_pc, + is_index, + }) = insn else { unreachable!("unexpected Insn {:?}", insn) }; - assert!(target_pc.is_offset()); + assert!( + target_pc.is_offset(), + "target_pc should be an offset, is: {:?}", + target_pc + ); + let op = match insn { + Insn::SeekGE { .. } => SeekOp::GE, + Insn::SeekGT { .. } => SeekOp::GT, + Insn::SeekLE { .. } => SeekOp::LE, + Insn::SeekLT { .. } => SeekOp::LT, + _ => unreachable!("unexpected Insn {:?}", insn), + }; + let op_name = match op { + SeekOp::GE => "SeekGE", + SeekOp::GT => "SeekGT", + SeekOp::LE => "SeekLE", + SeekOp::LT => "SeekLT", + _ => unreachable!("unexpected SeekOp {:?}", op), + }; if *is_index { let found = { let mut cursor = state.get_cursor(*cursor_id); let cursor = cursor.as_btree_mut(); let record_from_regs = make_record(&state.registers, start_reg, num_regs); - let found = - return_if_io!(cursor.seek(SeekKey::IndexKey(&record_from_regs), SeekOp::GT)); + let found = return_if_io!(cursor.seek(SeekKey::IndexKey(&record_from_regs), op)); found }; if !found { @@ -1986,14 +2023,15 @@ pub fn op_seek_gt( } OwnedValue::Integer(rowid) => Some(*rowid as u64), _ => { - return Err(LimboError::InternalError( - "SeekGT: the value in the register is not an integer".into(), - )); + return Err(LimboError::InternalError(format!( + "{}: the value in the register is not an integer", + op_name + ))); } }; let found = match rowid { Some(rowid) => { - let found = return_if_io!(cursor.seek(SeekKey::TableRowId(rowid), SeekOp::GT)); + let found = return_if_io!(cursor.seek(SeekKey::TableRowId(rowid), op)); if !found { target_pc.to_offset_int() } else { @@ -2032,9 +2070,10 @@ pub fn op_idx_ge( let record_from_regs = make_record(&state.registers, start_reg, num_regs); let pc = if let Some(ref idx_record) = *cursor.record() { // Compare against the same number of values - let ord = idx_record.get_values()[..record_from_regs.len()] - .partial_cmp(&record_from_regs.get_values()[..]) - .unwrap(); + let idx_values = idx_record.get_values(); + let idx_values = &idx_values[..record_from_regs.len()]; + let record_values = record_from_regs.get_values(); + let ord = compare_immutable(&idx_values, &record_values, cursor.index_key_sort_order); if ord.is_ge() { target_pc.to_offset_int() } else { @@ -2049,6 +2088,24 @@ pub fn op_idx_ge( Ok(InsnFunctionStepResult::Step) } +pub fn op_seek_end( + program: &Program, + state: &mut ProgramState, + insn: &Insn, + pager: &Rc, + mv_store: Option<&Rc>, +) -> Result { + if let Insn::SeekEnd { cursor_id } = *insn { + let mut cursor = state.get_cursor(cursor_id); + let cursor = cursor.as_btree_mut(); + return_if_io!(cursor.seek_end()); + } else { + unreachable!("unexpected Insn {:?}", insn) + } + state.pc += 1; + Ok(InsnFunctionStepResult::Step) +} + pub fn op_idx_le( program: &Program, state: &mut ProgramState, @@ -2072,9 +2129,10 @@ pub fn op_idx_le( let record_from_regs = make_record(&state.registers, start_reg, num_regs); let pc = if let Some(ref idx_record) = *cursor.record() { // Compare against the same number of values - let ord = idx_record.get_values()[..record_from_regs.len()] - .partial_cmp(&record_from_regs.get_values()[..]) - .unwrap(); + let idx_values = idx_record.get_values(); + let idx_values = &idx_values[..record_from_regs.len()]; + let record_values = record_from_regs.get_values(); + let ord = compare_immutable(&idx_values, &record_values, cursor.index_key_sort_order); if ord.is_le() { target_pc.to_offset_int() } else { @@ -2112,9 +2170,10 @@ pub fn op_idx_gt( let record_from_regs = make_record(&state.registers, start_reg, num_regs); let pc = if let Some(ref idx_record) = *cursor.record() { // Compare against the same number of values - let ord = idx_record.get_values()[..record_from_regs.len()] - .partial_cmp(&record_from_regs.get_values()[..]) - .unwrap(); + let idx_values = idx_record.get_values(); + let idx_values = &idx_values[..record_from_regs.len()]; + let record_values = record_from_regs.get_values(); + let ord = compare_immutable(&idx_values, &record_values, cursor.index_key_sort_order); if ord.is_gt() { target_pc.to_offset_int() } else { @@ -2152,9 +2211,10 @@ pub fn op_idx_lt( let record_from_regs = make_record(&state.registers, start_reg, num_regs); let pc = if let Some(ref idx_record) = *cursor.record() { // Compare against the same number of values - let ord = idx_record.get_values()[..record_from_regs.len()] - .partial_cmp(&record_from_regs.get_values()[..]) - .unwrap(); + let idx_values = idx_record.get_values(); + let idx_values = &idx_values[..record_from_regs.len()]; + let record_values = record_from_regs.get_values(); + let ord = compare_immutable(&idx_values, &record_values, cursor.index_key_sort_order); if ord.is_lt() { target_pc.to_offset_int() } else { @@ -2635,14 +2695,6 @@ pub fn op_sorter_open( else { unreachable!("unexpected Insn {:?}", insn) }; - let order = order - .get_values() - .iter() - .map(|v| match v { - OwnedValue::Integer(i) => *i == 0, - _ => unreachable!(), - }) - .collect(); let cursor = Sorter::new(order); let mut cursors = state.cursors.borrow_mut(); cursors @@ -3329,6 +3381,21 @@ pub fn op_function( let result = exec_time(values); state.registers[*dest] = Register::OwnedValue(result); } + ScalarFunc::TimeDiff => { + if arg_count != 2 { + state.registers[*dest] = Register::OwnedValue(OwnedValue::Null); + } else { + let start = state.registers[*start_reg].get_owned_value().clone(); + let end = state.registers[*start_reg + 1].get_owned_value().clone(); + + let result = crate::functions::datetime::exec_timediff(&[ + Register::OwnedValue(start), + Register::OwnedValue(end), + ]); + + state.registers[*dest] = Register::OwnedValue(result); + } + } ScalarFunc::TotalChanges => { let res = &program.connection.upgrade().unwrap().total_changes; let total_changes = res.get(); @@ -3340,26 +3407,8 @@ pub fn op_function( state.registers[*dest] = Register::OwnedValue(result); } ScalarFunc::JulianDay => { - if *start_reg == 0 { - let julianday: String = exec_julianday(&OwnedValue::build_text("now"))?; - state.registers[*dest] = - Register::OwnedValue(OwnedValue::build_text(&julianday)); - } else { - let datetime_value = &state.registers[*start_reg]; - let julianday = exec_julianday(datetime_value.get_owned_value()); - match julianday { - Ok(time) => { - state.registers[*dest] = - Register::OwnedValue(OwnedValue::build_text(&time)) - } - Err(e) => { - return Err(LimboError::ParseError(format!( - "Error encountered while parsing datetime value: {}", - e - ))); - } - } - } + let result = exec_julianday(&state.registers[*start_reg..*start_reg + arg_count]); + state.registers[*dest] = Register::OwnedValue(result); } ScalarFunc::UnixEpoch => { if *start_reg == 0 { @@ -3423,6 +3472,19 @@ pub fn op_function( let result = exec_printf(&state.registers[*start_reg..*start_reg + arg_count])?; state.registers[*dest] = Register::OwnedValue(result); } + ScalarFunc::Likely => { + let value = &state.registers[*start_reg].borrow_mut(); + let result = exec_likely(value.get_owned_value()); + state.registers[*dest] = Register::OwnedValue(result); + } + ScalarFunc::Likelihood => { + assert_eq!(arg_count, 2); + let value = &state.registers[*start_reg]; + let probability = &state.registers[*start_reg + 1]; + let result = + exec_likelihood(value.get_owned_value(), probability.get_owned_value()); + state.registers[*dest] = Register::OwnedValue(result); + } }, crate::function::Func::Vector(vector_func) => match vector_func { VectorFunc::Vector => { @@ -3625,14 +3687,14 @@ pub fn op_yield( Ok(InsnFunctionStepResult::Step) } -pub fn op_insert_async( +pub fn op_insert( program: &Program, state: &mut ProgramState, insn: &Insn, pager: &Rc, mv_store: Option<&Rc>, ) -> Result { - let Insn::InsertAsync { + let Insn::Insert { cursor, key_reg, record_reg, @@ -3648,30 +3710,14 @@ pub fn op_insert_async( Register::Record(r) => r, _ => unreachable!("Not a record! Cannot insert a non record value."), }; - let key = &state.registers[*key_reg]; + let key = match &state.registers[*key_reg].get_owned_value() { + OwnedValue::Integer(i) => *i, + _ => unreachable!("expected integer key"), + }; // NOTE(pere): Sending moved_before == true is okay because we moved before but // if we were to set to false after starting a balance procedure, it might // leave undefined state. - return_if_io!(cursor.insert(key.get_owned_value(), record, true)); - } - state.pc += 1; - Ok(InsnFunctionStepResult::Step) -} - -pub fn op_insert_await( - program: &Program, - state: &mut ProgramState, - insn: &Insn, - pager: &Rc, - mv_store: Option<&Rc>, -) -> Result { - let Insn::InsertAwait { cursor_id } = insn else { - unreachable!("unexpected Insn {:?}", insn) - }; - { - let mut cursor = state.get_cursor(*cursor_id); - let cursor = cursor.as_btree_mut(); - cursor.wait_for_completion()?; + return_if_io!(cursor.insert(&BTreeKey::new_table_rowid(key as u64, Some(record)), true)); // Only update last_insert_rowid for regular table inserts, not schema modifications if cursor.root_page() != 1 { if let Some(rowid) = cursor.rowid()? { @@ -3687,46 +3733,155 @@ pub fn op_insert_await( Ok(InsnFunctionStepResult::Step) } -pub fn op_delete_async( +pub fn op_delete( program: &Program, state: &mut ProgramState, insn: &Insn, pager: &Rc, mv_store: Option<&Rc>, ) -> Result { - let Insn::DeleteAsync { cursor_id } = insn else { + let Insn::Delete { cursor_id } = insn else { unreachable!("unexpected Insn {:?}", insn) }; { let mut cursor = state.get_cursor(*cursor_id); let cursor = cursor.as_btree_mut(); + tracing::debug!( + "op_delete(record={:?}, rowid={:?})", + cursor.record(), + cursor.rowid()? + ); return_if_io!(cursor.delete()); } - state.pc += 1; - Ok(InsnFunctionStepResult::Step) -} - -pub fn op_delete_await( - program: &Program, - state: &mut ProgramState, - insn: &Insn, - pager: &Rc, - mv_store: Option<&Rc>, -) -> Result { - let Insn::DeleteAwait { cursor_id } = insn else { - unreachable!("unexpected Insn {:?}", insn) - }; - { - let mut cursor = state.get_cursor(*cursor_id); - let cursor = cursor.as_btree_mut(); - cursor.wait_for_completion()?; - } let prev_changes = program.n_change.get(); program.n_change.set(prev_changes + 1); state.pc += 1; Ok(InsnFunctionStepResult::Step) } +pub enum OpIdxDeleteState { + Seeking(ImmutableRecord), // First seek row to delete + Deleting, +} +pub fn op_idx_delete( + program: &Program, + state: &mut ProgramState, + insn: &Insn, + pager: &Rc, + mv_store: Option<&Rc>, +) -> Result { + let Insn::IdxDelete { + cursor_id, + start_reg, + num_regs, + } = insn + else { + unreachable!("unexpected Insn {:?}", insn) + }; + loop { + match &state.op_idx_delete_state { + Some(OpIdxDeleteState::Seeking(record)) => { + { + let mut cursor = state.get_cursor(*cursor_id); + let cursor = cursor.as_btree_mut(); + return_if_io!(cursor.seek(SeekKey::IndexKey(&record), SeekOp::EQ)); + tracing::debug!( + "op_idx_delete(seek={}, record={} rowid={:?})", + &record, + cursor.record().as_ref().unwrap(), + cursor.rowid() + ); + if cursor.rowid()?.is_none() { + // If P5 is not zero, then raise an SQLITE_CORRUPT_INDEX error if no matching + // index entry is found. This happens when running an UPDATE or DELETE statement and the + // index entry to be updated or deleted is not found. For some uses of IdxDelete + // (example: the EXCEPT operator) it does not matter that no matching entry is found. + // For those cases, P5 is zero. Also, do not raise this (self-correcting and non-critical) error if in writable_schema mode. + return Err(LimboError::Corrupt(format!( + "IdxDelete: no matching index entry found for record {:?}", + record + ))); + } + } + state.op_idx_delete_state = Some(OpIdxDeleteState::Deleting); + } + Some(OpIdxDeleteState::Deleting) => { + { + let mut cursor = state.get_cursor(*cursor_id); + let cursor = cursor.as_btree_mut(); + return_if_io!(cursor.delete()); + } + let n_change = program.n_change.get(); + program.n_change.set(n_change + 1); + state.pc += 1; + return Ok(InsnFunctionStepResult::Step); + } + None => { + let record = make_record(&state.registers, start_reg, num_regs); + state.op_idx_delete_state = Some(OpIdxDeleteState::Seeking(record)); + } + } + } +} + +pub fn op_idx_insert( + program: &Program, + state: &mut ProgramState, + insn: &Insn, + pager: &Rc, + mv_store: Option<&Rc>, +) -> Result { + if let Insn::IdxInsert { + cursor_id, + record_reg, + flags, + .. + } = *insn + { + let (_, cursor_type) = program.cursor_ref.get(cursor_id).unwrap(); + let CursorType::BTreeIndex(index_meta) = cursor_type else { + panic!("IdxInsert: not a BTree index cursor"); + }; + { + let mut cursor = state.get_cursor(cursor_id); + let cursor = cursor.as_btree_mut(); + let record = match &state.registers[record_reg] { + Register::Record(ref r) => r, + _ => return Err(LimboError::InternalError("expected record".into())), + }; + // To make this reentrant in case of `moved_before` = false, we need to check if the previous cursor.insert started + // a write/balancing operation. If it did, it means we already moved to the place we wanted. + let moved_before = if cursor.is_write_in_progress() { + true + } else { + if index_meta.unique { + // check for uniqueness violation + match cursor.key_exists_in_index(record)? { + CursorResult::Ok(true) => { + return Err(LimboError::Constraint( + "UNIQUE constraint failed: duplicate key".into(), + )) + } + CursorResult::IO => return Ok(InsnFunctionStepResult::IO), + CursorResult::Ok(false) => {} + }; + false + } else { + flags.has(IdxInsertFlags::USE_SEEK) + } + }; + + // Start insertion of row. This might trigger a balance procedure which will take care of moving to different pages, + // therefore, we don't want to seek again if that happens, meaning we don't want to return on io without moving to the following opcode + // because it could trigger a movement to child page after a balance root which will leave the current page as the root page. + return_if_io!(cursor.insert(&BTreeKey::new_index_key(record), moved_before)); + } + // TODO: flag optimizations, update n_change if OPFLAG_NCHANGE + state.pc += 1; + } + Ok(InsnFunctionStepResult::Step) +} + pub fn op_new_rowid( program: &Program, state: &mut ProgramState, @@ -3804,6 +3959,60 @@ pub fn op_soft_null( Ok(InsnFunctionStepResult::Step) } +pub fn op_no_conflict( + program: &Program, + state: &mut ProgramState, + insn: &Insn, + pager: &Rc, + mv_store: Option<&Rc>, +) -> Result { + let Insn::NoConflict { + cursor_id, + target_pc, + record_reg, + num_regs, + } = insn + else { + unreachable!("unexpected Insn {:?}", insn) + }; + let mut cursor_ref = state.get_cursor(*cursor_id); + let cursor = cursor_ref.as_btree_mut(); + + let record = if *num_regs == 0 { + let record = match &state.registers[*record_reg] { + Register::Record(r) => r, + _ => { + return Err(LimboError::InternalError( + "NoConflict: exepected a record in the register".into(), + )); + } + }; + record + } else { + &make_record(&state.registers, record_reg, num_regs) + }; + // If there is at least one NULL in the index record, there cannot be a conflict so we can immediately jump. + let contains_nulls = record + .get_values() + .iter() + .any(|val| matches!(val, RefValue::Null)); + + if contains_nulls { + drop(cursor_ref); + state.pc = target_pc.to_offset_int(); + return Ok(InsnFunctionStepResult::Step); + } + + let conflict = return_if_io!(cursor.seek(SeekKey::IndexKey(record), SeekOp::EQ)); + drop(cursor_ref); + if !conflict { + state.pc = target_pc.to_offset_int(); + } else { + state.pc += 1; + } + Ok(InsnFunctionStepResult::Step) +} + pub fn op_not_exists( program: &Program, state: &mut ProgramState, @@ -3879,26 +4088,41 @@ pub fn op_offset_limit( // this cursor may be reused for next insert // Update: tablemoveto is used to travers on not exists, on insert depending on flags if nonseek it traverses again. // If not there might be some optimizations obviously. -pub fn op_open_write_async( +pub fn op_open_write( program: &Program, state: &mut ProgramState, insn: &Insn, pager: &Rc, mv_store: Option<&Rc>, ) -> Result { - let Insn::OpenWriteAsync { + let Insn::OpenWrite { cursor_id, root_page, + .. } = insn else { unreachable!("unexpected Insn {:?}", insn) }; + let root_page = match root_page { + RegisterOrLiteral::Literal(lit) => *lit as u64, + RegisterOrLiteral::Register(reg) => match &state.registers[*reg].get_owned_value() { + OwnedValue::Integer(val) => *val as u64, + _ => { + return Err(LimboError::InternalError( + "OpenWrite: the value in root_page is not an integer".into(), + )); + } + }, + }; let (_, cursor_type) = program.cursor_ref.get(*cursor_id).unwrap(); let mut cursors = state.cursors.borrow_mut(); - let is_index = cursor_type.is_index(); + let maybe_index = match cursor_type { + CursorType::BTreeIndex(index) => Some(index), + _ => None, + }; let mv_cursor = match state.mv_tx_id { Some(tx_id) => { - let table_id = *root_page as u64; + let table_id = root_page; let mv_store = mv_store.unwrap().clone(); let mv_cursor = Rc::new(RefCell::new( MvCursor::new(mv_store.clone(), tx_id, table_id).unwrap(), @@ -3907,13 +4131,15 @@ pub fn op_open_write_async( } None => None, }; - let cursor = BTreeCursor::new(mv_cursor, pager.clone(), *root_page); - if is_index { + if let Some(index) = maybe_index { + let cursor = + BTreeCursor::new_index(mv_cursor, pager.clone(), root_page as usize, index.as_ref()); cursors .get_mut(*cursor_id) .unwrap() .replace(Cursor::new_btree(cursor)); } else { + let cursor = BTreeCursor::new(mv_cursor, pager.clone(), root_page as usize); cursors .get_mut(*cursor_id) .unwrap() @@ -3923,20 +4149,6 @@ pub fn op_open_write_async( Ok(InsnFunctionStepResult::Step) } -pub fn op_open_write_await( - program: &Program, - state: &mut ProgramState, - insn: &Insn, - pager: &Rc, - mv_store: Option<&Rc>, -) -> Result { - let Insn::OpenWriteAwait {} = insn else { - unreachable!("unexpected Insn {:?}", insn) - }; - state.pc += 1; - Ok(InsnFunctionStepResult::Step) -} - pub fn op_copy( program: &Program, state: &mut ProgramState, @@ -3973,7 +4185,7 @@ pub fn op_create_btree( // TODO: implement temp databases todo!("temp databases not implemented yet"); } - let root_page = pager.btree_create(*flags); + let root_page = pager.btree_create(flags); state.registers[*root] = Register::OwnedValue(OwnedValue::Integer(root_page as i64)); state.pc += 1; Ok(InsnFunctionStepResult::Step) @@ -4082,13 +4294,8 @@ pub fn op_page_count( // TODO: implement temp databases todo!("temp databases not implemented yet"); } - // SQLite returns "0" on an empty database, and 2 on the first insertion, - // so we'll mimic that behavior. - let mut pages = pager.db_header.lock().database_size.into(); - if pages == 1 { - pages = 0; - } - state.registers[*dest] = Register::OwnedValue(OwnedValue::Integer(pages)); + let count = pager.db_header.lock().database_size.into(); + state.registers[*dest] = Register::OwnedValue(OwnedValue::Integer(count)); state.pc += 1; Ok(InsnFunctionStepResult::Step) } @@ -4110,18 +4317,20 @@ pub fn op_parse_schema( let conn = program.connection.upgrade(); let conn = conn.as_ref().unwrap(); let stmt = conn.prepare(format!( - "SELECT * FROM sqlite_schema WHERE {}", + "SELECT * FROM sqlite_schema WHERE {}", where_clause ))?; let mut schema = conn.schema.write(); // TODO: This function below is synchronous, make it async - parse_schema_rows( - Some(stmt), - &mut schema, - conn.pager.io.clone(), - &conn.syms.borrow(), - state.mv_tx_id, - )?; + { + parse_schema_rows( + Some(stmt), + &mut schema, + conn.pager.io.clone(), + &conn.syms.borrow(), + state.mv_tx_id, + )?; + } state.pc += 1; Ok(InsnFunctionStepResult::Step) } @@ -4142,6 +4351,7 @@ pub fn op_read_cookie( } let cookie_value = match cookie { Cookie::UserVersion => pager.db_header.lock().user_version.into(), + Cookie::SchemaVersion => pager.db_header.lock().schema_cookie.into(), cookie => todo!("{cookie:?} is not yet implement for ReadCookie"), }; state.registers[*dest] = Register::OwnedValue(OwnedValue::Integer(cookie_value)); @@ -4195,12 +4405,7 @@ pub fn op_variable( let Insn::Variable { index, dest } = insn else { unreachable!("unexpected Insn {:?}", insn) }; - state.registers[*dest] = Register::OwnedValue( - state - .get_parameter(*index) - .ok_or(LimboError::Unbound(*index))? - .clone(), - ); + state.registers[*dest] = Register::OwnedValue(state.get_parameter(*index)); state.pc += 1; Ok(InsnFunctionStepResult::Step) } @@ -4309,6 +4514,205 @@ pub fn op_noop( Ok(InsnFunctionStepResult::Step) } +pub fn op_open_ephemeral( + program: &Program, + state: &mut ProgramState, + insn: &Insn, + pager: &Rc, + mv_store: Option<&Rc>, +) -> Result { + let (cursor_id, is_table) = match insn { + Insn::OpenEphemeral { + cursor_id, + is_table, + } => (*cursor_id, *is_table), + Insn::OpenAutoindex { cursor_id } => (*cursor_id, false), + _ => unreachable!("unexpected Insn {:?}", insn), + }; + + let conn = program.connection.upgrade().unwrap(); + let io = conn.pager.io.get_memory_io(); + + let file = io.open_file("", OpenFlags::Create, true)?; + maybe_init_database_file(&file, &(io.clone() as Arc))?; + let db_file = Arc::new(FileMemoryStorage::new(file)); + + let db_header = Pager::begin_open(db_file.clone())?; + let buffer_pool = Rc::new(BufferPool::new(db_header.lock().get_page_size() as usize)); + let page_cache = Arc::new(RwLock::new(DumbLruPageCache::new(10))); + + let pager = Rc::new(Pager::finish_open( + db_header, + db_file, + None, + io, + page_cache, + buffer_pool, + )?); + + let flag = if is_table { + &CreateBTreeFlags::new_table() + } else { + &CreateBTreeFlags::new_index() + }; + + let root_page = pager.btree_create(flag); + + let (_, cursor_type) = program.cursor_ref.get(cursor_id).unwrap(); + let mv_cursor = match state.mv_tx_id { + Some(tx_id) => { + let table_id = root_page as u64; + let mv_store = mv_store.unwrap().clone(); + let mv_cursor = Rc::new(RefCell::new( + MvCursor::new(mv_store.clone(), tx_id, table_id).unwrap(), + )); + Some(mv_cursor) + } + None => None, + }; + let mut cursor = BTreeCursor::new(mv_cursor, pager, root_page as usize); + cursor.rewind()?; // Will never return io + + let mut cursors: std::cell::RefMut<'_, Vec>> = state.cursors.borrow_mut(); + + // Table content is erased if the cursor already exists + match cursor_type { + CursorType::BTreeTable(_) => { + cursors + .get_mut(cursor_id) + .unwrap() + .replace(Cursor::new_btree(cursor)); + } + CursorType::BTreeIndex(_) => { + cursors + .get_mut(cursor_id) + .unwrap() + .replace(Cursor::new_btree(cursor)); + } + CursorType::Pseudo(_) => { + panic!("OpenEphemeral on pseudo cursor"); + } + CursorType::Sorter => { + panic!("OpenEphemeral on sorter cursor"); + } + CursorType::VirtualTable(_) => { + panic!("OpenEphemeral on virtual table cursor, use Insn::VOpen instead"); + } + } + + state.pc += 1; + Ok(InsnFunctionStepResult::Step) +} + +/// Execute the [Insn::Once] instruction. +/// +/// This instruction is used to execute a block of code only once. +/// If the instruction is executed again, it will jump to the target program counter. +pub fn op_once( + program: &Program, + state: &mut ProgramState, + insn: &Insn, + pager: &Rc, + mv_store: Option<&Rc>, +) -> Result { + let Insn::Once { + target_pc_when_reentered, + } = insn + else { + unreachable!("unexpected Insn: {:?}", insn) + }; + assert!(target_pc_when_reentered.is_offset()); + let offset = state.pc; + if state.once.iter().any(|o| o == offset) { + state.pc = target_pc_when_reentered.to_offset_int(); + return Ok(InsnFunctionStepResult::Step); + } + state.once.push(offset); + state.pc += 1; + Ok(InsnFunctionStepResult::Step) +} + +pub fn op_not_found( + program: &Program, + state: &mut ProgramState, + insn: &Insn, + pager: &Rc, + mv_store: Option<&Rc>, +) -> Result { + let Insn::NotFound { + cursor_id, + target_pc, + record_reg, + num_regs, + } = insn + else { + unreachable!("unexpected Insn {:?}", insn) + }; + + let found = { + let mut cursor = state.get_cursor(*cursor_id); + let cursor = cursor.as_btree_mut(); + + if *num_regs == 0 { + let record = match &state.registers[*record_reg] { + Register::Record(r) => r, + _ => { + return Err(LimboError::InternalError( + "NotFound: exepected a record in the register".into(), + )); + } + }; + + return_if_io!(cursor.seek(SeekKey::IndexKey(&record), SeekOp::EQ)) + } else { + let record = make_record(&state.registers, record_reg, num_regs); + return_if_io!(cursor.seek(SeekKey::IndexKey(&record), SeekOp::EQ)) + } + }; + + if found { + state.pc += 1; + } else { + state.pc = target_pc.to_offset_int(); + } + + Ok(InsnFunctionStepResult::Step) +} + +pub fn op_affinity( + program: &Program, + state: &mut ProgramState, + insn: &Insn, + pager: &Rc, + mv_store: Option<&Rc>, +) -> Result { + let Insn::Affinity { + start_reg, + count, + affinities, + } = insn + else { + unreachable!("unexpected Insn {:?}", insn) + }; + + if affinities.len() != count.get() { + return Err(LimboError::InternalError( + "Affinity: the length of affinities does not match the count".into(), + )); + } + + for (i, affinity_char) in affinities.chars().enumerate().take(count.get()) { + let reg_index = *start_reg + i; + + let affinity = Affinity::from_char(affinity_char)?; + + apply_affinity_char(&mut state.registers[reg_index], affinity); + } + + state.pc += 1; + Ok(InsnFunctionStepResult::Step) +} + fn exec_lower(reg: &OwnedValue) -> Option { match reg { OwnedValue::Text(t) => Some(OwnedValue::build_text(&t.as_str().to_lowercase())), @@ -4900,6 +5304,77 @@ fn exec_if(reg: &OwnedValue, jump_if_null: bool, not: bool) -> bool { } } +fn apply_affinity_char(target: &mut Register, affinity: Affinity) -> bool { + if let Register::OwnedValue(value) = target { + if matches!(value, OwnedValue::Blob(_)) { + return true; + } + match affinity { + Affinity::Blob => return true, + Affinity::Text => { + if matches!(value, OwnedValue::Text(_) | OwnedValue::Null) { + return true; + } + let text = value.to_string(); + *value = OwnedValue::Text(text.into()); + return true; + } + Affinity::Integer | Affinity::Numeric => { + if matches!(value, OwnedValue::Integer(_)) { + return true; + } + if !matches!(value, OwnedValue::Text(_) | OwnedValue::Float(_)) { + return true; + } + + if let OwnedValue::Float(fl) = *value { + if let Ok(int) = cast_real_to_integer(fl).map(OwnedValue::Integer) { + *value = int; + return true; + } + return false; + } + + let text = value.to_text().unwrap(); + let Ok(num) = checked_cast_text_to_numeric(&text) else { + return false; + }; + + *value = match &num { + OwnedValue::Float(fl) => { + cast_real_to_integer(*fl) + .map(OwnedValue::Integer) + .unwrap_or(num); + return true; + } + OwnedValue::Integer(_) if text.starts_with("0x") => { + return false; + } + _ => num, + }; + } + + Affinity::Real => { + if let OwnedValue::Integer(i) = value { + *value = OwnedValue::Float(*i as f64); + return true; + } else if let OwnedValue::Text(t) = value { + if t.as_str().starts_with("0x") { + return false; + } + if let Ok(num) = checked_cast_text_to_numeric(t.as_str()) { + *value = num; + return true; + } else { + return false; + } + } + } + }; + } + return true; +} + fn exec_cast(value: &OwnedValue, datatype: &str) -> OwnedValue { if matches!(value, OwnedValue::Null) { return OwnedValue::Null; @@ -5113,15 +5588,605 @@ fn exec_math_log(arg: &OwnedValue, base: Option<&OwnedValue>) -> OwnedValue { OwnedValue::Float(result) } +fn exec_likely(reg: &OwnedValue) -> OwnedValue { + reg.clone() +} + +fn exec_likelihood(reg: &OwnedValue, _probability: &OwnedValue) -> OwnedValue { + reg.clone() +} + +pub fn exec_add(lhs: &OwnedValue, rhs: &OwnedValue) -> OwnedValue { + (Numeric::from(lhs) + Numeric::from(rhs)).into() +} + +pub fn exec_subtract(lhs: &OwnedValue, rhs: &OwnedValue) -> OwnedValue { + (Numeric::from(lhs) - Numeric::from(rhs)).into() +} + +pub fn exec_multiply(lhs: &OwnedValue, rhs: &OwnedValue) -> OwnedValue { + (Numeric::from(lhs) * Numeric::from(rhs)).into() +} + +pub fn exec_divide(lhs: &OwnedValue, rhs: &OwnedValue) -> OwnedValue { + (Numeric::from(lhs) / Numeric::from(rhs)).into() +} + +pub fn exec_bit_and(lhs: &OwnedValue, rhs: &OwnedValue) -> OwnedValue { + (NullableInteger::from(lhs) & NullableInteger::from(rhs)).into() +} + +pub fn exec_bit_or(lhs: &OwnedValue, rhs: &OwnedValue) -> OwnedValue { + (NullableInteger::from(lhs) | NullableInteger::from(rhs)).into() +} + +pub fn exec_remainder(lhs: &OwnedValue, rhs: &OwnedValue) -> OwnedValue { + let convert_to_float = matches!(Numeric::from(lhs), Numeric::Float(_)) + || matches!(Numeric::from(rhs), Numeric::Float(_)); + + match NullableInteger::from(lhs) % NullableInteger::from(rhs) { + NullableInteger::Null => OwnedValue::Null, + NullableInteger::Integer(v) => { + if convert_to_float { + OwnedValue::Float(v as f64) + } else { + OwnedValue::Integer(v) + } + } + } +} + +pub fn exec_bit_not(reg: &OwnedValue) -> OwnedValue { + (!NullableInteger::from(reg)).into() +} + +pub fn exec_shift_left(lhs: &OwnedValue, rhs: &OwnedValue) -> OwnedValue { + (NullableInteger::from(lhs) << NullableInteger::from(rhs)).into() +} + +pub fn exec_shift_right(lhs: &OwnedValue, rhs: &OwnedValue) -> OwnedValue { + (NullableInteger::from(lhs) >> NullableInteger::from(rhs)).into() +} + +pub fn exec_boolean_not(reg: &OwnedValue) -> OwnedValue { + match Numeric::from(reg).try_into_bool() { + None => OwnedValue::Null, + Some(v) => OwnedValue::Integer(!v as i64), + } +} +pub fn exec_concat(lhs: &OwnedValue, rhs: &OwnedValue) -> OwnedValue { + match (lhs, rhs) { + (OwnedValue::Text(lhs_text), OwnedValue::Text(rhs_text)) => { + OwnedValue::build_text(&(lhs_text.as_str().to_string() + rhs_text.as_str())) + } + (OwnedValue::Text(lhs_text), OwnedValue::Integer(rhs_int)) => { + OwnedValue::build_text(&(lhs_text.as_str().to_string() + &rhs_int.to_string())) + } + (OwnedValue::Text(lhs_text), OwnedValue::Float(rhs_float)) => { + OwnedValue::build_text(&(lhs_text.as_str().to_string() + &rhs_float.to_string())) + } + (OwnedValue::Integer(lhs_int), OwnedValue::Text(rhs_text)) => { + OwnedValue::build_text(&(lhs_int.to_string() + rhs_text.as_str())) + } + (OwnedValue::Integer(lhs_int), OwnedValue::Integer(rhs_int)) => { + OwnedValue::build_text(&(lhs_int.to_string() + &rhs_int.to_string())) + } + (OwnedValue::Integer(lhs_int), OwnedValue::Float(rhs_float)) => { + OwnedValue::build_text(&(lhs_int.to_string() + &rhs_float.to_string())) + } + (OwnedValue::Float(lhs_float), OwnedValue::Text(rhs_text)) => { + OwnedValue::build_text(&(lhs_float.to_string() + rhs_text.as_str())) + } + (OwnedValue::Float(lhs_float), OwnedValue::Integer(rhs_int)) => { + OwnedValue::build_text(&(lhs_float.to_string() + &rhs_int.to_string())) + } + (OwnedValue::Float(lhs_float), OwnedValue::Float(rhs_float)) => { + OwnedValue::build_text(&(lhs_float.to_string() + &rhs_float.to_string())) + } + (OwnedValue::Null, _) | (_, OwnedValue::Null) => OwnedValue::Null, + (OwnedValue::Blob(_), _) | (_, OwnedValue::Blob(_)) => { + todo!("TODO: Handle Blob conversion to String") + } + } +} + +pub fn exec_and(lhs: &OwnedValue, rhs: &OwnedValue) -> OwnedValue { + match ( + Numeric::from(lhs).try_into_bool(), + Numeric::from(rhs).try_into_bool(), + ) { + (Some(false), _) | (_, Some(false)) => OwnedValue::Integer(0), + (None, _) | (_, None) => OwnedValue::Null, + _ => OwnedValue::Integer(1), + } +} + +pub fn exec_or(lhs: &OwnedValue, rhs: &OwnedValue) -> OwnedValue { + match ( + Numeric::from(lhs).try_into_bool(), + Numeric::from(rhs).try_into_bool(), + ) { + (Some(true), _) | (_, Some(true)) => OwnedValue::Integer(1), + (None, _) | (_, None) => OwnedValue::Null, + _ => OwnedValue::Integer(0), + } +} + #[cfg(test)] mod tests { - use crate::vdbe::{execute::exec_replace, Bitfield, Register}; + use crate::types::{OwnedValue, Text}; + + use super::{exec_add, exec_or}; + + #[test] + fn test_exec_add() { + let inputs = vec![ + (OwnedValue::Integer(3), OwnedValue::Integer(1)), + (OwnedValue::Float(3.0), OwnedValue::Float(1.0)), + (OwnedValue::Float(3.0), OwnedValue::Integer(1)), + (OwnedValue::Integer(3), OwnedValue::Float(1.0)), + (OwnedValue::Null, OwnedValue::Null), + (OwnedValue::Null, OwnedValue::Integer(1)), + (OwnedValue::Null, OwnedValue::Float(1.0)), + (OwnedValue::Null, OwnedValue::Text(Text::from_str("2"))), + (OwnedValue::Integer(1), OwnedValue::Null), + (OwnedValue::Float(1.0), OwnedValue::Null), + (OwnedValue::Text(Text::from_str("1")), OwnedValue::Null), + ( + OwnedValue::Text(Text::from_str("1")), + OwnedValue::Text(Text::from_str("3")), + ), + ( + OwnedValue::Text(Text::from_str("1.0")), + OwnedValue::Text(Text::from_str("3.0")), + ), + ( + OwnedValue::Text(Text::from_str("1.0")), + OwnedValue::Float(3.0), + ), + ( + OwnedValue::Text(Text::from_str("1.0")), + OwnedValue::Integer(3), + ), + ( + OwnedValue::Float(1.0), + OwnedValue::Text(Text::from_str("3.0")), + ), + ( + OwnedValue::Integer(1), + OwnedValue::Text(Text::from_str("3")), + ), + ]; + + let outputs = [ + OwnedValue::Integer(4), + OwnedValue::Float(4.0), + OwnedValue::Float(4.0), + OwnedValue::Float(4.0), + OwnedValue::Null, + OwnedValue::Null, + OwnedValue::Null, + OwnedValue::Null, + OwnedValue::Null, + OwnedValue::Null, + OwnedValue::Null, + OwnedValue::Integer(4), + OwnedValue::Float(4.0), + OwnedValue::Float(4.0), + OwnedValue::Float(4.0), + OwnedValue::Float(4.0), + OwnedValue::Float(4.0), + ]; + + assert_eq!( + inputs.len(), + outputs.len(), + "Inputs and Outputs should have same size" + ); + for (i, (lhs, rhs)) in inputs.iter().enumerate() { + assert_eq!( + exec_add(lhs, rhs), + outputs[i], + "Wrong ADD for lhs: {}, rhs: {}", + lhs, + rhs + ); + } + } + + use super::exec_subtract; + + #[test] + fn test_exec_subtract() { + let inputs = vec![ + (OwnedValue::Integer(3), OwnedValue::Integer(1)), + (OwnedValue::Float(3.0), OwnedValue::Float(1.0)), + (OwnedValue::Float(3.0), OwnedValue::Integer(1)), + (OwnedValue::Integer(3), OwnedValue::Float(1.0)), + (OwnedValue::Null, OwnedValue::Null), + (OwnedValue::Null, OwnedValue::Integer(1)), + (OwnedValue::Null, OwnedValue::Float(1.0)), + (OwnedValue::Null, OwnedValue::Text(Text::from_str("1"))), + (OwnedValue::Integer(1), OwnedValue::Null), + (OwnedValue::Float(1.0), OwnedValue::Null), + (OwnedValue::Text(Text::from_str("4")), OwnedValue::Null), + ( + OwnedValue::Text(Text::from_str("1")), + OwnedValue::Text(Text::from_str("3")), + ), + ( + OwnedValue::Text(Text::from_str("1.0")), + OwnedValue::Text(Text::from_str("3.0")), + ), + ( + OwnedValue::Text(Text::from_str("1.0")), + OwnedValue::Float(3.0), + ), + ( + OwnedValue::Text(Text::from_str("1.0")), + OwnedValue::Integer(3), + ), + ( + OwnedValue::Float(1.0), + OwnedValue::Text(Text::from_str("3.0")), + ), + ( + OwnedValue::Integer(1), + OwnedValue::Text(Text::from_str("3")), + ), + ]; + + let outputs = [ + OwnedValue::Integer(2), + OwnedValue::Float(2.0), + OwnedValue::Float(2.0), + OwnedValue::Float(2.0), + OwnedValue::Null, + OwnedValue::Null, + OwnedValue::Null, + OwnedValue::Null, + OwnedValue::Null, + OwnedValue::Null, + OwnedValue::Null, + OwnedValue::Integer(-2), + OwnedValue::Float(-2.0), + OwnedValue::Float(-2.0), + OwnedValue::Float(-2.0), + OwnedValue::Float(-2.0), + OwnedValue::Float(-2.0), + ]; + + assert_eq!( + inputs.len(), + outputs.len(), + "Inputs and Outputs should have same size" + ); + for (i, (lhs, rhs)) in inputs.iter().enumerate() { + assert_eq!( + exec_subtract(lhs, rhs), + outputs[i], + "Wrong subtract for lhs: {}, rhs: {}", + lhs, + rhs + ); + } + } + use super::exec_multiply; + + #[test] + fn test_exec_multiply() { + let inputs = vec![ + (OwnedValue::Integer(3), OwnedValue::Integer(2)), + (OwnedValue::Float(3.0), OwnedValue::Float(2.0)), + (OwnedValue::Float(3.0), OwnedValue::Integer(2)), + (OwnedValue::Integer(3), OwnedValue::Float(2.0)), + (OwnedValue::Null, OwnedValue::Null), + (OwnedValue::Null, OwnedValue::Integer(1)), + (OwnedValue::Null, OwnedValue::Float(1.0)), + (OwnedValue::Null, OwnedValue::Text(Text::from_str("1"))), + (OwnedValue::Integer(1), OwnedValue::Null), + (OwnedValue::Float(1.0), OwnedValue::Null), + (OwnedValue::Text(Text::from_str("4")), OwnedValue::Null), + ( + OwnedValue::Text(Text::from_str("2")), + OwnedValue::Text(Text::from_str("3")), + ), + ( + OwnedValue::Text(Text::from_str("2.0")), + OwnedValue::Text(Text::from_str("3.0")), + ), + ( + OwnedValue::Text(Text::from_str("2.0")), + OwnedValue::Float(3.0), + ), + ( + OwnedValue::Text(Text::from_str("2.0")), + OwnedValue::Integer(3), + ), + ( + OwnedValue::Float(2.0), + OwnedValue::Text(Text::from_str("3.0")), + ), + ( + OwnedValue::Integer(2), + OwnedValue::Text(Text::from_str("3.0")), + ), + ]; + + let outputs = [ + OwnedValue::Integer(6), + OwnedValue::Float(6.0), + OwnedValue::Float(6.0), + OwnedValue::Float(6.0), + OwnedValue::Null, + OwnedValue::Null, + OwnedValue::Null, + OwnedValue::Null, + OwnedValue::Null, + OwnedValue::Null, + OwnedValue::Null, + OwnedValue::Integer(6), + OwnedValue::Float(6.0), + OwnedValue::Float(6.0), + OwnedValue::Float(6.0), + OwnedValue::Float(6.0), + OwnedValue::Float(6.0), + ]; + + assert_eq!( + inputs.len(), + outputs.len(), + "Inputs and Outputs should have same size" + ); + for (i, (lhs, rhs)) in inputs.iter().enumerate() { + assert_eq!( + exec_multiply(lhs, rhs), + outputs[i], + "Wrong multiply for lhs: {}, rhs: {}", + lhs, + rhs + ); + } + } + use super::exec_divide; + + #[test] + fn test_exec_divide() { + let inputs = vec![ + (OwnedValue::Integer(1), OwnedValue::Integer(0)), + (OwnedValue::Float(1.0), OwnedValue::Float(0.0)), + (OwnedValue::Integer(i64::MIN), OwnedValue::Integer(-1)), + (OwnedValue::Float(6.0), OwnedValue::Float(2.0)), + (OwnedValue::Float(6.0), OwnedValue::Integer(2)), + (OwnedValue::Integer(6), OwnedValue::Integer(2)), + (OwnedValue::Null, OwnedValue::Integer(2)), + (OwnedValue::Integer(2), OwnedValue::Null), + (OwnedValue::Null, OwnedValue::Null), + ( + OwnedValue::Text(Text::from_str("6")), + OwnedValue::Text(Text::from_str("2")), + ), + ( + OwnedValue::Text(Text::from_str("6")), + OwnedValue::Integer(2), + ), + ]; + + let outputs = [ + OwnedValue::Null, + OwnedValue::Null, + OwnedValue::Float(9.223372036854776e18), + OwnedValue::Float(3.0), + OwnedValue::Float(3.0), + OwnedValue::Float(3.0), + OwnedValue::Null, + OwnedValue::Null, + OwnedValue::Null, + OwnedValue::Float(3.0), + OwnedValue::Float(3.0), + ]; + + assert_eq!( + inputs.len(), + outputs.len(), + "Inputs and Outputs should have same size" + ); + for (i, (lhs, rhs)) in inputs.iter().enumerate() { + assert_eq!( + exec_divide(lhs, rhs), + outputs[i], + "Wrong divide for lhs: {}, rhs: {}", + lhs, + rhs + ); + } + } + + use super::exec_remainder; + #[test] + fn test_exec_remainder() { + let inputs = vec![ + (OwnedValue::Null, OwnedValue::Null), + (OwnedValue::Null, OwnedValue::Float(1.0)), + (OwnedValue::Null, OwnedValue::Integer(1)), + (OwnedValue::Null, OwnedValue::Text(Text::from_str("1"))), + (OwnedValue::Float(1.0), OwnedValue::Null), + (OwnedValue::Integer(1), OwnedValue::Null), + (OwnedValue::Integer(12), OwnedValue::Integer(0)), + (OwnedValue::Float(12.0), OwnedValue::Float(0.0)), + (OwnedValue::Float(12.0), OwnedValue::Integer(0)), + (OwnedValue::Integer(12), OwnedValue::Float(0.0)), + (OwnedValue::Integer(i64::MIN), OwnedValue::Integer(-1)), + (OwnedValue::Integer(12), OwnedValue::Integer(3)), + (OwnedValue::Float(12.0), OwnedValue::Float(3.0)), + (OwnedValue::Float(12.0), OwnedValue::Integer(3)), + (OwnedValue::Integer(12), OwnedValue::Float(3.0)), + (OwnedValue::Integer(12), OwnedValue::Integer(-3)), + (OwnedValue::Float(12.0), OwnedValue::Float(-3.0)), + (OwnedValue::Float(12.0), OwnedValue::Integer(-3)), + (OwnedValue::Integer(12), OwnedValue::Float(-3.0)), + ( + OwnedValue::Text(Text::from_str("12.0")), + OwnedValue::Text(Text::from_str("3.0")), + ), + ( + OwnedValue::Text(Text::from_str("12.0")), + OwnedValue::Float(3.0), + ), + ( + OwnedValue::Float(12.0), + OwnedValue::Text(Text::from_str("3.0")), + ), + ]; + let outputs = vec![ + OwnedValue::Null, + OwnedValue::Null, + OwnedValue::Null, + OwnedValue::Null, + OwnedValue::Null, + OwnedValue::Null, + OwnedValue::Null, + OwnedValue::Null, + OwnedValue::Null, + OwnedValue::Null, + OwnedValue::Float(0.0), + OwnedValue::Integer(0), + OwnedValue::Float(0.0), + OwnedValue::Float(0.0), + OwnedValue::Float(0.0), + OwnedValue::Integer(0), + OwnedValue::Float(0.0), + OwnedValue::Float(0.0), + OwnedValue::Float(0.0), + OwnedValue::Float(0.0), + OwnedValue::Float(0.0), + OwnedValue::Float(0.0), + ]; + + assert_eq!( + inputs.len(), + outputs.len(), + "Inputs and Outputs should have same size" + ); + + for (i, (lhs, rhs)) in inputs.iter().enumerate() { + assert_eq!( + exec_remainder(lhs, rhs), + outputs[i], + "Wrong remainder for lhs: {}, rhs: {}", + lhs, + rhs + ); + } + } + + use super::exec_and; + + #[test] + fn test_exec_and() { + let inputs = vec![ + (OwnedValue::Integer(0), OwnedValue::Null), + (OwnedValue::Null, OwnedValue::Integer(1)), + (OwnedValue::Null, OwnedValue::Null), + (OwnedValue::Float(0.0), OwnedValue::Null), + (OwnedValue::Integer(1), OwnedValue::Float(2.2)), + ( + OwnedValue::Integer(0), + OwnedValue::Text(Text::from_str("string")), + ), + ( + OwnedValue::Integer(0), + OwnedValue::Text(Text::from_str("1")), + ), + ( + OwnedValue::Integer(1), + OwnedValue::Text(Text::from_str("1")), + ), + ]; + let outputs = [ + OwnedValue::Integer(0), + OwnedValue::Null, + OwnedValue::Null, + OwnedValue::Integer(0), + OwnedValue::Integer(1), + OwnedValue::Integer(0), + OwnedValue::Integer(0), + OwnedValue::Integer(1), + ]; + + assert_eq!( + inputs.len(), + outputs.len(), + "Inputs and Outputs should have same size" + ); + for (i, (lhs, rhs)) in inputs.iter().enumerate() { + assert_eq!( + exec_and(lhs, rhs), + outputs[i], + "Wrong AND for lhs: {}, rhs: {}", + lhs, + rhs + ); + } + } + + #[test] + fn test_exec_or() { + let inputs = vec![ + (OwnedValue::Integer(0), OwnedValue::Null), + (OwnedValue::Null, OwnedValue::Integer(1)), + (OwnedValue::Null, OwnedValue::Null), + (OwnedValue::Float(0.0), OwnedValue::Null), + (OwnedValue::Integer(1), OwnedValue::Float(2.2)), + (OwnedValue::Float(0.0), OwnedValue::Integer(0)), + ( + OwnedValue::Integer(0), + OwnedValue::Text(Text::from_str("string")), + ), + ( + OwnedValue::Integer(0), + OwnedValue::Text(Text::from_str("1")), + ), + (OwnedValue::Integer(0), OwnedValue::Text(Text::from_str(""))), + ]; + let outputs = [ + OwnedValue::Null, + OwnedValue::Integer(1), + OwnedValue::Null, + OwnedValue::Null, + OwnedValue::Integer(1), + OwnedValue::Integer(0), + OwnedValue::Integer(0), + OwnedValue::Integer(1), + OwnedValue::Integer(0), + ]; + + assert_eq!( + inputs.len(), + outputs.len(), + "Inputs and Outputs should have same size" + ); + for (i, (lhs, rhs)) in inputs.iter().enumerate() { + assert_eq!( + exec_or(lhs, rhs), + outputs[i], + "Wrong OR for lhs: {}, rhs: {}", + lhs, + rhs + ); + } + } + + use crate::vdbe::{ + execute::{exec_likelihood, exec_likely, exec_replace}, + Bitfield, Register, + }; use super::{ exec_abs, exec_char, exec_hex, exec_if, exec_instr, exec_length, exec_like, exec_lower, exec_ltrim, exec_max, exec_min, exec_nullif, exec_quote, exec_random, exec_randomblob, exec_round, exec_rtrim, exec_sign, exec_soundex, exec_substring, exec_trim, exec_typeof, - exec_unhex, exec_unicode, exec_upper, exec_zeroblob, execute_sqlite_version, OwnedValue, + exec_unhex, exec_unicode, exec_upper, exec_zeroblob, execute_sqlite_version, }; use std::collections::HashMap; @@ -6007,6 +7072,62 @@ mod tests { ); } + #[test] + fn test_likely() { + let input = OwnedValue::build_text("limbo"); + let expected = OwnedValue::build_text("limbo"); + assert_eq!(exec_likely(&input), expected); + + let input = OwnedValue::Integer(100); + let expected = OwnedValue::Integer(100); + assert_eq!(exec_likely(&input), expected); + + let input = OwnedValue::Float(12.34); + let expected = OwnedValue::Float(12.34); + assert_eq!(exec_likely(&input), expected); + + let input = OwnedValue::Null; + let expected = OwnedValue::Null; + assert_eq!(exec_likely(&input), expected); + + let input = OwnedValue::Blob(vec![1, 2, 3, 4]); + let expected = OwnedValue::Blob(vec![1, 2, 3, 4]); + assert_eq!(exec_likely(&input), expected); + } + + #[test] + fn test_likelihood() { + let value = OwnedValue::build_text("limbo"); + let prob = OwnedValue::Float(0.5); + assert_eq!(exec_likelihood(&value, &prob), value); + + let value = OwnedValue::build_text("database"); + let prob = OwnedValue::Float(0.9375); + assert_eq!(exec_likelihood(&value, &prob), value); + + let value = OwnedValue::Integer(100); + let prob = OwnedValue::Float(1.0); + assert_eq!(exec_likelihood(&value, &prob), value); + + let value = OwnedValue::Float(12.34); + let prob = OwnedValue::Float(0.5); + assert_eq!(exec_likelihood(&value, &prob), value); + + let value = OwnedValue::Null; + let prob = OwnedValue::Float(0.5); + assert_eq!(exec_likelihood(&value, &prob), value); + + let value = OwnedValue::Blob(vec![1, 2, 3, 4]); + let prob = OwnedValue::Float(0.5); + assert_eq!(exec_likelihood(&value, &prob), value); + + let prob = OwnedValue::build_text("0.5"); + assert_eq!(exec_likelihood(&value, &prob), value); + + let prob = OwnedValue::Null; + assert_eq!(exec_likelihood(&value, &prob), value); + } + #[test] fn test_bitfield() { let mut bitfield = Bitfield::<4>::new(); diff --git a/core/vdbe/explain.rs b/core/vdbe/explain.rs index 67333c334..3df247cd0 100644 --- a/core/vdbe/explain.rs +++ b/core/vdbe/explain.rs @@ -1,4 +1,6 @@ -use crate::vdbe::builder::CursorType; +use limbo_sqlite3_parser::ast::SortOrder; + +use crate::vdbe::{builder::CursorType, insn::RegisterOrLiteral}; use super::{Insn, InsnReference, OwnedValue, Program}; use crate::function::{Func, ScalarFunc}; @@ -336,11 +338,11 @@ pub fn insn_to_str( 0, format!("if !r[{}] goto {}", reg, target_pc.to_debug_int()), ), - Insn::OpenReadAsync { + Insn::OpenRead { cursor_id, root_page, } => ( - "OpenReadAsync", + "OpenRead", *cursor_id as i32, *root_page as i32, 0, @@ -355,17 +357,8 @@ pub fn insn_to_str( root_page ), ), - Insn::OpenReadAwait => ( - "OpenReadAwait", - 0, - 0, - 0, - OwnedValue::build_text(""), - 0, - "".to_string(), - ), - Insn::VOpenAsync { cursor_id } => ( - "VOpenAsync", + Insn::VOpen { cursor_id } => ( + "VOpen", *cursor_id as i32, 0, 0, @@ -373,15 +366,6 @@ pub fn insn_to_str( 0, "".to_string(), ), - Insn::VOpenAwait => ( - "VOpenAwait", - 0, - 0, - 0, - OwnedValue::build_text(""), - 0, - "".to_string(), - ), Insn::VCreate { table_name, module_name, @@ -449,6 +433,15 @@ pub fn insn_to_str( 0, "".to_string(), ), + Insn::VDestroy { db, table_name } => ( + "VDestroy", + *db as i32, + 0, + 0, + OwnedValue::build_text(table_name), + 0, + "".to_string(), + ), Insn::OpenPseudo { cursor_id, content_reg, @@ -462,27 +455,18 @@ pub fn insn_to_str( 0, format!("{} columns in r[{}]", num_fields, content_reg), ), - Insn::RewindAsync { cursor_id } => ( - "RewindAsync", - *cursor_id as i32, - 0, - 0, - OwnedValue::build_text(""), - 0, - "".to_string(), - ), - Insn::RewindAwait { + Insn::Rewind { cursor_id, pc_if_empty, } => ( - "RewindAwait", + "Rewind", *cursor_id as i32, pc_if_empty.to_debug_int(), 0, OwnedValue::build_text(""), 0, format!( - "Rewind table {}", + "Rewind {}", program.cursor_ref[*cursor_id] .0 .as_ref() @@ -528,6 +512,20 @@ pub fn insn_to_str( ), ) } + Insn::TypeCheck { + start_reg, + count, + check_generated, + .. + } => ( + "TypeCheck", + *start_reg as i32, + *count as i32, + *check_generated as i32, + OwnedValue::build_text(""), + 0, + String::from(""), + ), Insn::MakeRecord { start_reg, count, @@ -559,20 +557,11 @@ pub fn insn_to_str( format!("output=r[{}..{}]", start_reg, start_reg + count - 1) }, ), - Insn::NextAsync { cursor_id } => ( - "NextAsync", - *cursor_id as i32, - 0, - 0, - OwnedValue::build_text(""), - 0, - "".to_string(), - ), - Insn::NextAwait { + Insn::Next { cursor_id, pc_if_next, } => ( - "NextAwait", + "Next", *cursor_id as i32, pc_if_next.to_debug_int(), 0, @@ -582,13 +571,13 @@ pub fn insn_to_str( ), Insn::Halt { err_code, - description: _, + description, } => ( "Halt", *err_code as i32, 0, 0, - OwnedValue::build_text(""), + OwnedValue::build_text(&description), 0, "".to_string(), ), @@ -697,6 +686,22 @@ pub fn insn_to_str( .unwrap_or(&format!("cursor {}", cursor_id)) ), ), + Insn::IdxRowId { cursor_id, dest } => ( + "IdxRowId", + *cursor_id as i32, + *dest as i32, + 0, + OwnedValue::build_text(""), + 0, + format!( + "r[{}]={}.rowid", + dest, + &program.cursor_ref[*cursor_id] + .0 + .as_ref() + .unwrap_or(&format!("cursor {}", cursor_id)) + ), + ), Insn::SeekRowid { cursor_id, src_reg, @@ -734,87 +739,105 @@ pub fn insn_to_str( is_index: _, cursor_id, start_reg, - num_regs: _, + num_regs, + target_pc, + } + | Insn::SeekGE { + is_index: _, + cursor_id, + start_reg, + num_regs, + target_pc, + } + | Insn::SeekLE { + is_index: _, + cursor_id, + start_reg, + num_regs, + target_pc, + } + | Insn::SeekLT { + is_index: _, + cursor_id, + start_reg, + num_regs, target_pc, } => ( - "SeekGT", + match insn { + Insn::SeekGT { .. } => "SeekGT", + Insn::SeekGE { .. } => "SeekGE", + Insn::SeekLE { .. } => "SeekLE", + Insn::SeekLT { .. } => "SeekLT", + _ => unreachable!(), + }, *cursor_id as i32, target_pc.to_debug_int(), *start_reg as i32, OwnedValue::build_text(""), 0, + format!("key=[{}..{}]", start_reg, start_reg + num_regs - 1), + ), + Insn::SeekEnd { cursor_id } => ( + "SeekEnd", + *cursor_id as i32, + 0, + 0, + OwnedValue::build_text(""), + 0, "".to_string(), ), - Insn::SeekGE { - is_index: _, + Insn::IdxInsert { cursor_id, - start_reg, - num_regs: _, - target_pc, + record_reg, + unpacked_start, + flags, + .. } => ( - "SeekGE", + "IdxInsert", *cursor_id as i32, - target_pc.to_debug_int(), - *start_reg as i32, + *record_reg as i32, + unpacked_start.unwrap_or(0) as i32, OwnedValue::build_text(""), - 0, - "".to_string(), + flags.0 as u16, + format!("key=r[{}]", record_reg), ), Insn::IdxGT { cursor_id, start_reg, - num_regs: _, + num_regs, target_pc, - } => ( - "IdxGT", - *cursor_id as i32, - target_pc.to_debug_int(), - *start_reg as i32, - OwnedValue::build_text(""), - 0, - "".to_string(), - ), - Insn::IdxGE { + } + | Insn::IdxGE { cursor_id, start_reg, - num_regs: _, + num_regs, target_pc, - } => ( - "IdxGE", - *cursor_id as i32, - target_pc.to_debug_int(), - *start_reg as i32, - OwnedValue::build_text(""), - 0, - "".to_string(), - ), - Insn::IdxLT { + } + | Insn::IdxLE { cursor_id, start_reg, - num_regs: _, + num_regs, target_pc, - } => ( - "IdxLT", - *cursor_id as i32, - target_pc.to_debug_int(), - *start_reg as i32, - OwnedValue::build_text(""), - 0, - "".to_string(), - ), - Insn::IdxLE { + } + | Insn::IdxLT { cursor_id, start_reg, - num_regs: _, + num_regs, target_pc, } => ( - "IdxLE", + match insn { + Insn::IdxGT { .. } => "IdxGT", + Insn::IdxGE { .. } => "IdxGE", + Insn::IdxLE { .. } => "IdxLE", + Insn::IdxLT { .. } => "IdxLT", + _ => unreachable!(), + }, *cursor_id as i32, target_pc.to_debug_int(), *start_reg as i32, OwnedValue::build_text(""), 0, - "".to_string(), + format!("key=[{}..{}]", start_reg, start_reg + num_regs - 1), ), Insn::DecrJumpZero { reg, target_pc } => ( "DecrJumpZero", @@ -855,17 +878,10 @@ pub fn insn_to_str( } => { let _p4 = String::new(); let to_print: Vec = order - .get_values() .iter() .map(|v| match v { - OwnedValue::Integer(i) => { - if *i == 0 { - "B".to_string() - } else { - "-B".to_string() - } - } - _ => unreachable!(), + SortOrder::Asc => "B".to_string(), + SortOrder::Desc => "-B".to_string(), }) .collect(); ( @@ -993,13 +1009,13 @@ pub fn insn_to_str( 0, "".to_string(), ), - Insn::InsertAsync { + Insn::Insert { cursor, key_reg, record_reg, flag, } => ( - "InsertAsync", + "Insert", *cursor as i32, *record_reg as i32, *key_reg as i32, @@ -1007,8 +1023,8 @@ pub fn insn_to_str( *flag as u16, "".to_string(), ), - Insn::InsertAwait { cursor_id } => ( - "InsertAwait", + Insn::Delete { cursor_id } => ( + "Delete", *cursor_id as i32, 0, 0, @@ -1016,20 +1032,15 @@ pub fn insn_to_str( 0, "".to_string(), ), - Insn::DeleteAsync { cursor_id } => ( - "DeleteAsync", + Insn::IdxDelete { + cursor_id, + start_reg, + num_regs, + } => ( + "IdxDelete", *cursor_id as i32, - 0, - 0, - OwnedValue::build_text(""), - 0, - "".to_string(), - ), - Insn::DeleteAwait { cursor_id } => ( - "DeleteAwait", - *cursor_id as i32, - 0, - 0, + *start_reg as i32, + *num_regs as i32, OwnedValue::build_text(""), 0, "".to_string(), @@ -1065,6 +1076,20 @@ pub fn insn_to_str( 0, "".to_string(), ), + Insn::NoConflict { + cursor_id, + target_pc, + record_reg, + num_regs, + } => ( + "NoConflict", + *cursor_id as i32, + target_pc.to_debug_int(), + *record_reg as i32, + OwnedValue::build_text(&format!("{num_regs}")), + 0, + format!("key=r[{}]", record_reg), + ), Insn::NotExists { cursor, rowid_reg, @@ -1094,22 +1119,17 @@ pub fn insn_to_str( limit_reg, combined_reg, limit_reg, offset_reg, combined_reg ), ), - Insn::OpenWriteAsync { + Insn::OpenWrite { cursor_id, root_page, + .. } => ( - "OpenWriteAsync", + "OpenWrite", *cursor_id as i32, - *root_page as i32, - 0, - OwnedValue::build_text(""), - 0, - "".to_string(), - ), - Insn::OpenWriteAwait {} => ( - "OpenWriteAwait", - 0, - 0, + match root_page { + RegisterOrLiteral::Literal(i) => *i as _, + RegisterOrLiteral::Register(i) => *i as _, + }, 0, OwnedValue::build_text(""), 0, @@ -1132,10 +1152,10 @@ pub fn insn_to_str( "CreateBtree", *db as i32, *root as i32, - *flags as i32, + flags.get_flags() as i32, OwnedValue::build_text(""), 0, - format!("r[{}]=root iDb={} flags={}", root, db, flags), + format!("r[{}]=root iDb={} flags={}", root, db, flags.get_flags()), ), Insn::Destroy { root, @@ -1176,10 +1196,13 @@ pub fn insn_to_str( 0, "".to_string(), ), - Insn::LastAsync { .. } => ( - "LastAsync", - 0, - 0, + Insn::Last { + cursor_id, + pc_if_empty, + } => ( + "Last", + *cursor_id as i32, + pc_if_empty.to_debug_int(), 0, OwnedValue::build_text(""), 0, @@ -1203,28 +1226,13 @@ pub fn insn_to_str( 0, where_clause.clone(), ), - Insn::LastAwait { .. } => ( - "LastAwait", - 0, - 0, - 0, - OwnedValue::build_text(""), - 0, - "".to_string(), - ), - Insn::PrevAsync { .. } => ( - "PrevAsync", - 0, - 0, - 0, - OwnedValue::build_text(""), - 0, - "".to_string(), - ), - Insn::PrevAwait { .. } => ( - "PrevAwait", - 0, - 0, + Insn::Prev { + cursor_id, + pc_if_prev, + } => ( + "Prev", + *cursor_id as i32, + pc_if_prev.to_debug_int(), 0, OwnedValue::build_text(""), 0, @@ -1344,6 +1352,93 @@ pub fn insn_to_str( 0, format!("auto_commit={}, rollback={}", auto_commit, rollback), ), + Insn::OpenEphemeral { + cursor_id, + is_table, + } => ( + "OpenEphemeral", + *cursor_id as i32, + *is_table as i32, + 0, + OwnedValue::build_text(""), + 0, + format!( + "cursor={} is_table={}", + cursor_id, + if *is_table { "true" } else { "false" } + ), + ), + Insn::OpenAutoindex { cursor_id } => ( + "OpenAutoindex", + *cursor_id as i32, + 0, + 0, + OwnedValue::build_text(""), + 0, + format!("cursor={}", cursor_id), + ), + Insn::Once { + target_pc_when_reentered, + } => ( + "Once", + target_pc_when_reentered.to_debug_int(), + 0, + 0, + OwnedValue::build_text(""), + 0, + format!("goto {}", target_pc_when_reentered.to_debug_int()), + ), + Insn::BeginSubrtn { dest, dest_end } => ( + "BeginSubrtn", + *dest as i32, + dest_end.map_or(0, |end| end as i32), + 0, + OwnedValue::build_text(""), + 0, + dest_end.map_or(format!("r[{}]=NULL", dest), |end| { + format!("r[{}..{}]=NULL", dest, end) + }), + ), + Insn::NotFound { + cursor_id, + target_pc, + record_reg, + .. + } => ( + "NotFound", + *cursor_id as i32, + target_pc.to_debug_int(), + *record_reg as i32, + OwnedValue::build_text(""), + 0, + format!( + "if (r[{}] != NULL) goto {}", + record_reg, + target_pc.to_debug_int() + ), + ), + Insn::Affinity { + start_reg, + count, + affinities, + } => ( + "Affinity", + *start_reg as i32, + count.get() as i32, + 0, + OwnedValue::build_text(""), + 0, + format!( + "r[{}..{}] = {}", + start_reg, + start_reg + count.get(), + affinities + .chars() + .map(|a| a.to_string()) + .collect::>() + .join(", ") + ), + ), }; format!( "{:<4} {:<17} {:<4} {:<4} {:<4} {:<13} {:<2} {}", diff --git a/core/vdbe/insn.rs b/core/vdbe/insn.rs index ab9013de4..34a9f680e 100644 --- a/core/vdbe/insn.rs +++ b/core/vdbe/insn.rs @@ -1,11 +1,15 @@ -use std::num::NonZero; - -use super::{ - cast_text_to_numeric, execute, AggFunc, BranchOffset, CursorID, FuncCtx, InsnFunction, PageIdx, +use std::{ + num::{NonZero, NonZeroUsize}, + rc::Rc, +}; + +use super::{execute, AggFunc, BranchOffset, CursorID, FuncCtx, InsnFunction, PageIdx}; +use crate::{ + schema::BTreeTable, + storage::{pager::CreateBTreeFlags, wal::CheckpointMode}, }; -use crate::storage::wal::CheckpointMode; -use crate::types::{OwnedValue, Record}; use limbo_macros::Description; +use limbo_sqlite3_parser::ast::SortOrder; /// Flags provided to comparison instructions (e.g. Eq, Ne) which determine behavior related to NULL values. #[derive(Clone, Copy, Debug, Default)] @@ -38,104 +42,160 @@ impl CmpInsFlags { } } +#[derive(Clone, Copy, Debug, Default)] +pub struct IdxInsertFlags(pub u8); +impl IdxInsertFlags { + pub const APPEND: u8 = 0x01; // Hint: insert likely at the end + pub const NCHANGE: u8 = 0x02; // Increment the change counter + pub const USE_SEEK: u8 = 0x04; // Skip seek if last one was same key + pub fn new() -> Self { + IdxInsertFlags(0) + } + pub fn has(&self, flag: u8) -> bool { + (self.0 & flag) != 0 + } + pub fn append(mut self, append: bool) -> Self { + if append { + self.0 |= IdxInsertFlags::APPEND; + } else { + self.0 &= !IdxInsertFlags::APPEND; + } + self + } + pub fn use_seek(mut self, seek: bool) -> Self { + if seek { + self.0 |= IdxInsertFlags::USE_SEEK; + } else { + self.0 &= !IdxInsertFlags::USE_SEEK; + } + self + } + pub fn nchange(mut self, change: bool) -> Self { + if change { + self.0 |= IdxInsertFlags::NCHANGE; + } else { + self.0 &= !IdxInsertFlags::NCHANGE; + } + self + } +} + +#[derive(Clone, Copy, Debug)] +pub enum RegisterOrLiteral { + Register(usize), + Literal(T), +} + +impl From for RegisterOrLiteral { + fn from(value: PageIdx) -> Self { + RegisterOrLiteral::Literal(value) + } +} + #[derive(Description, Debug)] pub enum Insn { - // Initialize the program state and jump to the given PC. + /// Initialize the program state and jump to the given PC. Init { target_pc: BranchOffset, }, - // Write a NULL into register dest. If dest_end is Some, then also write NULL into register dest_end and every register in between dest and dest_end. If dest_end is not set, then only register dest is set to NULL. + /// Write a NULL into register dest. If dest_end is Some, then also write NULL into register dest_end and every register in between dest and dest_end. If dest_end is not set, then only register dest is set to NULL. Null { dest: usize, dest_end: Option, }, - // Move the cursor P1 to a null row. Any Column operations that occur while the cursor is on the null row will always write a NULL. + /// Mark the beginning of a subroutine tha can be entered in-line. This opcode is identical to Null + /// it has a different name only to make the byte code easier to read and verify + BeginSubrtn { + dest: usize, + dest_end: Option, + }, + /// Move the cursor P1 to a null row. Any Column operations that occur while the cursor is on the null row will always write a NULL. NullRow { cursor_id: CursorID, }, - // Add two registers and store the result in a third register. + /// Add two registers and store the result in a third register. Add { lhs: usize, rhs: usize, dest: usize, }, - // Subtract rhs from lhs and store in dest + /// Subtract rhs from lhs and store in dest Subtract { lhs: usize, rhs: usize, dest: usize, }, - // Multiply two registers and store the result in a third register. + /// Multiply two registers and store the result in a third register. Multiply { lhs: usize, rhs: usize, dest: usize, }, - // Divide lhs by rhs and store the result in a third register. + /// Divide lhs by rhs and store the result in a third register. Divide { lhs: usize, rhs: usize, dest: usize, }, - // Compare two vectors of registers in reg(P1)..reg(P1+P3-1) (call this vector "A") and in reg(P2)..reg(P2+P3-1) ("B"). Save the result of the comparison for use by the next Jump instruct. + /// Compare two vectors of registers in reg(P1)..reg(P1+P3-1) (call this vector "A") and in reg(P2)..reg(P2+P3-1) ("B"). Save the result of the comparison for use by the next Jump instruct. Compare { start_reg_a: usize, start_reg_b: usize, count: usize, }, - // Place the result of rhs bitwise AND lhs in third register. + /// Place the result of rhs bitwise AND lhs in third register. BitAnd { lhs: usize, rhs: usize, dest: usize, }, - // Place the result of rhs bitwise OR lhs in third register. + /// Place the result of rhs bitwise OR lhs in third register. BitOr { lhs: usize, rhs: usize, dest: usize, }, - // Place the result of bitwise NOT register P1 in dest register. + /// Place the result of bitwise NOT register P1 in dest register. BitNot { reg: usize, dest: usize, }, - // Checkpoint the database (applying wal file content to database file). + /// Checkpoint the database (applying wal file content to database file). Checkpoint { database: usize, // checkpoint database P1 checkpoint_mode: CheckpointMode, // P2 checkpoint mode dest: usize, // P3 checkpoint result }, - // Divide lhs by rhs and place the remainder in dest register. + /// Divide lhs by rhs and place the remainder in dest register. Remainder { lhs: usize, rhs: usize, dest: usize, }, - // Jump to the instruction at address P1, P2, or P3 depending on whether in the most recent Compare instruction the P1 vector was less than, equal to, or greater than the P2 vector, respectively. + /// Jump to the instruction at address P1, P2, or P3 depending on whether in the most recent Compare instruction the P1 vector was less than, equal to, or greater than the P2 vector, respectively. Jump { target_pc_lt: BranchOffset, target_pc_eq: BranchOffset, target_pc_gt: BranchOffset, }, - // Move the P3 values in register P1..P1+P3-1 over into registers P2..P2+P3-1. Registers P1..P1+P3-1 are left holding a NULL. It is an error for register ranges P1..P1+P3-1 and P2..P2+P3-1 to overlap. It is an error for P3 to be less than 1. + /// Move the P3 values in register P1..P1+P3-1 over into registers P2..P2+P3-1. Registers P1..P1+P3-1 are left holding a NULL. It is an error for register ranges P1..P1+P3-1 and P2..P2+P3-1 to overlap. It is an error for P3 to be less than 1. Move { source_reg: usize, dest_reg: usize, count: usize, }, - // If the given register is a positive integer, decrement it by decrement_by and jump to the given PC. + /// If the given register is a positive integer, decrement it by decrement_by and jump to the given PC. IfPos { reg: usize, target_pc: BranchOffset, decrement_by: usize, }, - // If the given register is not NULL, jump to the given PC. + /// If the given register is not NULL, jump to the given PC. NotNull { reg: usize, target_pc: BranchOffset, }, - // Compare two registers and jump to the given PC if they are equal. + /// Compare two registers and jump to the given PC if they are equal. Eq { lhs: usize, rhs: usize, @@ -149,7 +209,7 @@ pub enum Insn { /// This flag indicates that if either is null we should still jump. flags: CmpInsFlags, }, - // Compare two registers and jump to the given PC if they are not equal. + /// Compare two registers and jump to the given PC if they are not equal. Ne { lhs: usize, rhs: usize, @@ -159,7 +219,7 @@ pub enum Insn { /// jump_if_null jumps if either of the operands is null. Used for "jump when false" logic. flags: CmpInsFlags, }, - // Compare two registers and jump to the given PC if the left-hand side is less than the right-hand side. + /// Compare two registers and jump to the given PC if the left-hand side is less than the right-hand side. Lt { lhs: usize, rhs: usize, @@ -175,7 +235,7 @@ pub enum Insn { /// jump_if_null: Jump if either of the operands is null. Used for "jump when false" logic. flags: CmpInsFlags, }, - // Compare two registers and jump to the given PC if the left-hand side is greater than the right-hand side. + /// Compare two registers and jump to the given PC if the left-hand side is greater than the right-hand side. Gt { lhs: usize, rhs: usize, @@ -183,7 +243,7 @@ pub enum Insn { /// jump_if_null: Jump if either of the operands is null. Used for "jump when false" logic. flags: CmpInsFlags, }, - // Compare two registers and jump to the given PC if the left-hand side is greater than or equal to the right-hand side. + /// Compare two registers and jump to the given PC if the left-hand side is greater than or equal to the right-hand side. Ge { lhs: usize, rhs: usize, @@ -205,23 +265,17 @@ pub enum Insn { /// P3. If r\[reg\] is null, jump iff r\[jump_if_null\] != 0 jump_if_null: bool, }, - // Open a cursor for reading. - OpenReadAsync { + /// Open a cursor for reading. + OpenRead { cursor_id: CursorID, root_page: PageIdx, }, - // Await for the completion of open cursor. - OpenReadAwait, - /// Open a cursor for a virtual table. - VOpenAsync { + VOpen { cursor_id: CursorID, }, - /// Await for the completion of open cursor for a virtual table. - VOpenAwait, - /// Create a new virtual table. VCreate { module_name: usize, // P1: Name of the module that contains the virtual table implementation @@ -235,6 +289,8 @@ pub enum Insn { pc_if_empty: BranchOffset, arg_count: usize, args_reg: usize, + idx_str: Option, + idx_num: usize, }, /// Read a column from the current row of the virtual table cursor. @@ -260,40 +316,48 @@ pub enum Insn { pc_if_next: BranchOffset, }, - // Open a cursor for a pseudo-table that contains a single row. + /// P4 is the name of a virtual table in database P1. Call the xDestroy method of that table. + VDestroy { + /// Name of a virtual table being destroyed + table_name: String, + /// The database within which this virtual table needs to be destroyed (P1). + db: usize, + }, + + /// Open a cursor for a pseudo-table that contains a single row. OpenPseudo { cursor_id: CursorID, content_reg: usize, num_fields: usize, }, - // Rewind the cursor to the beginning of the B-Tree. - RewindAsync { - cursor_id: CursorID, - }, - - // Await for the completion of cursor rewind. - RewindAwait { + /// Rewind the cursor to the beginning of the B-Tree. + Rewind { cursor_id: CursorID, pc_if_empty: BranchOffset, }, - LastAsync { - cursor_id: CursorID, - }, - - LastAwait { + Last { cursor_id: CursorID, pc_if_empty: BranchOffset, }, - // Read a column from the current row of the cursor. + /// Read a column from the current row of the cursor. Column { cursor_id: CursorID, column: usize, dest: usize, }, + TypeCheck { + start_reg: usize, // P1 + count: usize, // P2 + /// GENERATED ALWAYS AS ... STATIC columns are only checked if P3 is zero. + /// When P3 is non-zero, no type checking occurs for static generated columns. + check_generated: bool, // P3 + table_reference: Rc, // P4 + }, + // Make a record and write it to destination register. MakeRecord { start_reg: usize, // P1 @@ -301,78 +365,69 @@ pub enum Insn { dest_reg: usize, // P3 }, - // Emit a row of results. + /// Emit a row of results. ResultRow { start_reg: usize, // P1 count: usize, // P2 }, - // Advance the cursor to the next row. - NextAsync { - cursor_id: CursorID, - }, - - // Await for the completion of cursor advance. - NextAwait { + /// Advance the cursor to the next row. + Next { cursor_id: CursorID, pc_if_next: BranchOffset, }, - PrevAsync { + Prev { cursor_id: CursorID, + pc_if_prev: BranchOffset, }, - PrevAwait { - cursor_id: CursorID, - pc_if_next: BranchOffset, - }, - - // Halt the program. + /// Halt the program. Halt { err_code: usize, description: String, }, - // Start a transaction. + /// Start a transaction. Transaction { write: bool, }, - // Set database auto-commit mode and potentially rollback. + /// Set database auto-commit mode and potentially rollback. AutoCommit { auto_commit: bool, rollback: bool, }, - // Branch to the given PC. + /// Branch to the given PC. Goto { target_pc: BranchOffset, }, - // Stores the current program counter into register 'return_reg' then jumps to address target_pc. + /// Stores the current program counter into register 'return_reg' then jumps to address target_pc. Gosub { target_pc: BranchOffset, return_reg: usize, }, - // Returns to the program counter stored in register 'return_reg'. + /// Returns to the program counter stored in register 'return_reg'. Return { return_reg: usize, }, - // Write an integer value into a register. + /// Write an integer value into a register. Integer { value: i64, dest: usize, }, - // Write a float value into a register + /// Write a float value into a register Real { value: f64, dest: usize, }, - // If register holds an integer, transform it to a float + /// If register holds an integer, transform it to a float RealAffinity { register: usize, }, @@ -383,36 +438,80 @@ pub enum Insn { dest: usize, }, - // Write a blob value into a register. + /// Write a blob value into a register. Blob { value: Vec, dest: usize, }, - // Read the rowid of the current row. + /// Read the rowid of the current row. RowId { cursor_id: CursorID, dest: usize, }, + /// Read the rowid of the current row from an index cursor. + IdxRowId { + cursor_id: CursorID, + dest: usize, + }, - // Seek to a rowid in the cursor. If not found, jump to the given PC. Otherwise, continue to the next instruction. + /// Seek to a rowid in the cursor. If not found, jump to the given PC. Otherwise, continue to the next instruction. SeekRowid { cursor_id: CursorID, src_reg: usize, target_pc: BranchOffset, }, + SeekEnd { + cursor_id: CursorID, + }, - // P1 is an open index cursor and P3 is a cursor on the corresponding table. This opcode does a deferred seek of the P3 table cursor to the row that corresponds to the current row of P1. - // This is a deferred seek. Nothing actually happens until the cursor is used to read a record. That way, if no reads occur, no unnecessary I/O happens. + /// P1 is an open index cursor and P3 is a cursor on the corresponding table. This opcode does a deferred seek of the P3 table cursor to the row that corresponds to the current row of P1. + /// This is a deferred seek. Nothing actually happens until the cursor is used to read a record. That way, if no reads occur, no unnecessary I/O happens. DeferredSeek { index_cursor_id: CursorID, table_cursor_id: CursorID, }, + /// If cursor_id refers to an SQL table (B-Tree that uses integer keys), use the value in start_reg as the key. + /// If cursor_id refers to an SQL index, then start_reg is the first in an array of num_regs registers that are used as an unpacked index key. + /// Seek to the first index entry that is greater than or equal to the given key. If not found, jump to the given PC. Otherwise, continue to the next instruction. + SeekGE { + is_index: bool, + cursor_id: CursorID, + start_reg: usize, + num_regs: usize, + target_pc: BranchOffset, + }, + + /// If cursor_id refers to an SQL table (B-Tree that uses integer keys), use the value in start_reg as the key. + /// If cursor_id refers to an SQL index, then start_reg is the first in an array of num_regs registers that are used as an unpacked index key. + /// Seek to the first index entry that is greater than the given key. If not found, jump to the given PC. Otherwise, continue to the next instruction. + SeekGT { + is_index: bool, + cursor_id: CursorID, + start_reg: usize, + num_regs: usize, + target_pc: BranchOffset, + }, + + /// cursor_id is a cursor pointing to a B-Tree index that uses integer keys, this op writes the value obtained from MakeRecord into the index. + /// P3 + P4 are for the original column values that make up that key in unpacked (pre-serialized) form. + /// If P5 has the OPFLAG_APPEND bit set, that is a hint to the b-tree layer that this insert is likely to be an append. + /// OPFLAG_NCHANGE bit set, then the change counter is incremented by this instruction. If the OPFLAG_NCHANGE bit is clear, then the change counter is unchanged + IdxInsert { + cursor_id: CursorID, + record_reg: usize, // P2 the register containing the record to insert + unpacked_start: Option, // P3 the index of the first register for the unpacked key + unpacked_count: Option, // P4 # of unpacked values in the key in P2 + flags: IdxInsertFlags, // TODO: optimization + }, + + /// The P4 register values beginning with P3 form an unpacked index key that omits the PRIMARY KEY. Compare this key value against the index that P1 is currently pointing to, ignoring the PRIMARY KEY or ROWID fields at the end. + /// If the P1 index entry is greater or equal than the key value then jump to P2. Otherwise fall through to the next instruction. // If cursor_id refers to an SQL table (B-Tree that uses integer keys), use the value in start_reg as the key. // If cursor_id refers to an SQL index, then start_reg is the first in an array of num_regs registers that are used as an unpacked index key. - // Seek to the first index entry that is greater than or equal to the given key. If not found, jump to the given PC. Otherwise, continue to the next instruction. - SeekGE { + // Seek to the first index entry that is less than or equal to the given key. If not found, jump to the given PC. Otherwise, continue to the next instruction. + SeekLE { is_index: bool, cursor_id: CursorID, start_reg: usize, @@ -422,8 +521,8 @@ pub enum Insn { // If cursor_id refers to an SQL table (B-Tree that uses integer keys), use the value in start_reg as the key. // If cursor_id refers to an SQL index, then start_reg is the first in an array of num_regs registers that are used as an unpacked index key. - // Seek to the first index entry that is greater than the given key. If not found, jump to the given PC. Otherwise, continue to the next instruction. - SeekGT { + // Seek to the first index entry that is less than the given key. If not found, jump to the given PC. Otherwise, continue to the next instruction. + SeekLT { is_index: bool, cursor_id: CursorID, start_reg: usize, @@ -440,8 +539,8 @@ pub enum Insn { target_pc: BranchOffset, }, - // The P4 register values beginning with P3 form an unpacked index key that omits the PRIMARY KEY. Compare this key value against the index that P1 is currently pointing to, ignoring the PRIMARY KEY or ROWID fields at the end. - // If the P1 index entry is greater than the key value then jump to P2. Otherwise fall through to the next instruction. + /// The P4 register values beginning with P3 form an unpacked index key that omits the PRIMARY KEY. Compare this key value against the index that P1 is currently pointing to, ignoring the PRIMARY KEY or ROWID fields at the end. + /// If the P1 index entry is greater than the key value then jump to P2. Otherwise fall through to the next instruction. IdxGT { cursor_id: CursorID, start_reg: usize, @@ -449,8 +548,8 @@ pub enum Insn { target_pc: BranchOffset, }, - // The P4 register values beginning with P3 form an unpacked index key that omits the PRIMARY KEY. Compare this key value against the index that P1 is currently pointing to, ignoring the PRIMARY KEY or ROWID fields at the end. - // If the P1 index entry is lesser or equal than the key value then jump to P2. Otherwise fall through to the next instruction. + /// The P4 register values beginning with P3 form an unpacked index key that omits the PRIMARY KEY. Compare this key value against the index that P1 is currently pointing to, ignoring the PRIMARY KEY or ROWID fields at the end. + /// If the P1 index entry is lesser or equal than the key value then jump to P2. Otherwise fall through to the next instruction. IdxLE { cursor_id: CursorID, start_reg: usize, @@ -458,8 +557,8 @@ pub enum Insn { target_pc: BranchOffset, }, - // The P4 register values beginning with P3 form an unpacked index key that omits the PRIMARY KEY. Compare this key value against the index that P1 is currently pointing to, ignoring the PRIMARY KEY or ROWID fields at the end. - // If the P1 index entry is lesser than the key value then jump to P2. Otherwise fall through to the next instruction. + /// The P4 register values beginning with P3 form an unpacked index key that omits the PRIMARY KEY. Compare this key value against the index that P1 is currently pointing to, ignoring the PRIMARY KEY or ROWID fields at the end. + /// If the P1 index entry is lesser than the key value then jump to P2. Otherwise fall through to the next instruction. IdxLT { cursor_id: CursorID, start_reg: usize, @@ -467,7 +566,7 @@ pub enum Insn { target_pc: BranchOffset, }, - // Decrement the given register and jump to the given PC if the result is zero. + /// Decrement the given register and jump to the given PC if the result is zero. DecrJumpZero { reg: usize, target_pc: BranchOffset, @@ -485,39 +584,39 @@ pub enum Insn { func: AggFunc, }, - // Open a sorter. + /// Open a sorter. SorterOpen { - cursor_id: CursorID, // P1 - columns: usize, // P2 - order: Record, // P4. 0 if ASC and 1 if DESC + cursor_id: CursorID, // P1 + columns: usize, // P2 + order: Vec, // P4. }, - // Insert a row into the sorter. + /// Insert a row into the sorter. SorterInsert { cursor_id: CursorID, record_reg: usize, }, - // Sort the rows in the sorter. + /// Sort the rows in the sorter. SorterSort { cursor_id: CursorID, pc_if_empty: BranchOffset, }, - // Retrieve the next row from the sorter. + /// Retrieve the next row from the sorter. SorterData { cursor_id: CursorID, // P1 dest_reg: usize, // P2 pseudo_cursor: usize, // P3 }, - // Advance to the next row in the sorter. + /// Advance to the next row in the sorter. SorterNext { cursor_id: CursorID, pc_if_next: BranchOffset, }, - // Function + /// Function Function { constant_mask: i32, // P1 start_reg: usize, // P2, start of argument registers @@ -540,22 +639,20 @@ pub enum Insn { end_offset: BranchOffset, }, - InsertAsync { + Insert { cursor: CursorID, key_reg: usize, // Must be int. record_reg: usize, // Blob of record data. flag: usize, // Flags used by insert, for now not used. }, - InsertAwait { - cursor_id: usize, - }, - - DeleteAsync { + Delete { cursor_id: CursorID, }, - DeleteAwait { + IdxDelete { + start_reg: usize, + num_regs: usize, cursor_id: CursorID, }, @@ -573,6 +670,18 @@ pub enum Insn { reg: usize, }, + /// If P4==0 then register P3 holds a blob constructed by [MakeRecord](https://sqlite.org/opcode.html#MakeRecord). If P4>0 then register P3 is the first of P4 registers that form an unpacked record.\ + /// + /// Cursor P1 is on an index btree. If the record identified by P3 and P4 contains any NULL value, jump immediately to P2. If all terms of the record are not-NULL then a check is done to determine if any row in the P1 index btree has a matching key prefix. If there are no matches, jump immediately to P2. If there is a match, fall through and leave the P1 cursor pointing to the matching row.\ + /// + /// This opcode is similar to [NotFound](https://sqlite.org/opcode.html#NotFound) with the exceptions that the branch is always taken if any part of the search key input is NULL. + NoConflict { + cursor_id: CursorID, // P1 index cursor + target_pc: BranchOffset, // P2 jump target + record_reg: usize, + num_regs: usize, + }, + NotExists { cursor: CursorID, rowid_reg: usize, @@ -585,13 +694,11 @@ pub enum Insn { offset_reg: usize, }, - OpenWriteAsync { + OpenWrite { cursor_id: CursorID, - root_page: PageIdx, + root_page: RegisterOrLiteral, }, - OpenWriteAwait {}, - Copy { src_reg: usize, dst_reg: usize, @@ -605,7 +712,7 @@ pub enum Insn { /// The root page of the new b-tree (P2). root: usize, /// Flags (P3). - flags: usize, + flags: CreateBTreeFlags, }, /// Deletes an entire database table or index whose root page in the database file is given by P1. @@ -618,7 +725,7 @@ pub enum Insn { is_temp: usize, }, - // Drop a table + /// Drop a table DropTable { /// The database within which this b-tree needs to be dropped (P1). db: usize, @@ -648,14 +755,14 @@ pub enum Insn { where_clause: String, }, - // Place the result of lhs >> rhs in dest register. + /// Place the result of lhs >> rhs in dest register. ShiftRight { lhs: usize, rhs: usize, dest: usize, }, - // Place the result of lhs << rhs in dest register. + /// Place the result of lhs << rhs in dest register. ShiftLeft { lhs: usize, rhs: usize, @@ -697,6 +804,7 @@ pub enum Insn { rhs: usize, dest: usize, }, + /// Do nothing. Continue downward to the next opcode. Noop, /// Write the current number of pages in database P1 to memory cell P2. PageCount { @@ -709,6 +817,159 @@ pub enum Insn { dest: usize, cookie: Cookie, }, + /// Open a new cursor P1 to a transient table. + OpenEphemeral { + cursor_id: usize, + is_table: bool, + }, + /// Works the same as OpenEphemeral, name just distinguishes its use; used for transient indexes in joins. + OpenAutoindex { + cursor_id: usize, + }, + /// Fall through to the next instruction on the first invocation, otherwise jump to target_pc + Once { + target_pc_when_reentered: BranchOffset, + }, + /// Search for record in the index cusor, if any entry for which the key is a prefix exists + /// is a no-op, otherwise go to target_pc + /// Example => + /// For a index key (1,2,3): + /// NotFound((1,2,3)) => No-op + /// NotFound((1,2)) => No-op + /// NotFound((2,2, 1)) => Jump + NotFound { + cursor_id: CursorID, + target_pc: BranchOffset, + record_reg: usize, + num_regs: usize, + }, + /// Apply affinities to a range of registers. Affinities must have the same size of count + Affinity { + start_reg: usize, + count: NonZeroUsize, + affinities: String, + }, +} + +impl Insn { + pub fn to_function(&self) -> InsnFunction { + match self { + Insn::Init { .. } => execute::op_init, + Insn::Null { .. } => execute::op_null, + Insn::BeginSubrtn { .. } => execute::op_null, + Insn::NullRow { .. } => execute::op_null_row, + Insn::Add { .. } => execute::op_add, + Insn::Subtract { .. } => execute::op_subtract, + Insn::Multiply { .. } => execute::op_multiply, + Insn::Divide { .. } => execute::op_divide, + Insn::Compare { .. } => execute::op_compare, + Insn::BitAnd { .. } => execute::op_bit_and, + Insn::BitOr { .. } => execute::op_bit_or, + Insn::BitNot { .. } => execute::op_bit_not, + Insn::Checkpoint { .. } => execute::op_checkpoint, + Insn::Remainder { .. } => execute::op_remainder, + Insn::Jump { .. } => execute::op_jump, + Insn::Move { .. } => execute::op_move, + Insn::IfPos { .. } => execute::op_if_pos, + Insn::NotNull { .. } => execute::op_not_null, + Insn::Eq { .. } => execute::op_eq, + Insn::Ne { .. } => execute::op_ne, + Insn::Lt { .. } => execute::op_lt, + Insn::Le { .. } => execute::op_le, + Insn::Gt { .. } => execute::op_gt, + Insn::Ge { .. } => execute::op_ge, + Insn::If { .. } => execute::op_if, + Insn::IfNot { .. } => execute::op_if_not, + Insn::OpenRead { .. } => execute::op_open_read, + Insn::VOpen { .. } => execute::op_vopen, + Insn::VCreate { .. } => execute::op_vcreate, + Insn::VFilter { .. } => execute::op_vfilter, + Insn::VColumn { .. } => execute::op_vcolumn, + Insn::VUpdate { .. } => execute::op_vupdate, + Insn::VNext { .. } => execute::op_vnext, + Insn::VDestroy { .. } => execute::op_vdestroy, + + Insn::OpenPseudo { .. } => execute::op_open_pseudo, + Insn::Rewind { .. } => execute::op_rewind, + Insn::Last { .. } => execute::op_last, + Insn::Column { .. } => execute::op_column, + Insn::TypeCheck { .. } => execute::op_type_check, + Insn::MakeRecord { .. } => execute::op_make_record, + Insn::ResultRow { .. } => execute::op_result_row, + Insn::Next { .. } => execute::op_next, + Insn::Prev { .. } => execute::op_prev, + Insn::Halt { .. } => execute::op_halt, + Insn::Transaction { .. } => execute::op_transaction, + Insn::AutoCommit { .. } => execute::op_auto_commit, + Insn::Goto { .. } => execute::op_goto, + Insn::Gosub { .. } => execute::op_gosub, + Insn::Return { .. } => execute::op_return, + Insn::Integer { .. } => execute::op_integer, + Insn::Real { .. } => execute::op_real, + Insn::RealAffinity { .. } => execute::op_real_affinity, + Insn::String8 { .. } => execute::op_string8, + Insn::Blob { .. } => execute::op_blob, + Insn::RowId { .. } => execute::op_row_id, + Insn::IdxRowId { .. } => execute::op_idx_row_id, + Insn::SeekRowid { .. } => execute::op_seek_rowid, + Insn::DeferredSeek { .. } => execute::op_deferred_seek, + Insn::SeekGE { .. } => execute::op_seek, + Insn::SeekGT { .. } => execute::op_seek, + Insn::SeekLE { .. } => execute::op_seek, + Insn::SeekLT { .. } => execute::op_seek, + Insn::SeekEnd { .. } => execute::op_seek_end, + Insn::IdxGE { .. } => execute::op_idx_ge, + Insn::IdxGT { .. } => execute::op_idx_gt, + Insn::IdxLE { .. } => execute::op_idx_le, + Insn::IdxLT { .. } => execute::op_idx_lt, + Insn::DecrJumpZero { .. } => execute::op_decr_jump_zero, + Insn::AggStep { .. } => execute::op_agg_step, + Insn::AggFinal { .. } => execute::op_agg_final, + Insn::SorterOpen { .. } => execute::op_sorter_open, + Insn::SorterInsert { .. } => execute::op_sorter_insert, + Insn::SorterSort { .. } => execute::op_sorter_sort, + Insn::SorterData { .. } => execute::op_sorter_data, + Insn::SorterNext { .. } => execute::op_sorter_next, + Insn::Function { .. } => execute::op_function, + Insn::InitCoroutine { .. } => execute::op_init_coroutine, + Insn::EndCoroutine { .. } => execute::op_end_coroutine, + Insn::Yield { .. } => execute::op_yield, + Insn::Insert { .. } => execute::op_insert, + Insn::IdxInsert { .. } => execute::op_idx_insert, + Insn::Delete { .. } => execute::op_delete, + Insn::NewRowid { .. } => execute::op_new_rowid, + Insn::MustBeInt { .. } => execute::op_must_be_int, + Insn::SoftNull { .. } => execute::op_soft_null, + Insn::NoConflict { .. } => execute::op_no_conflict, + Insn::NotExists { .. } => execute::op_not_exists, + Insn::OffsetLimit { .. } => execute::op_offset_limit, + Insn::OpenWrite { .. } => execute::op_open_write, + Insn::Copy { .. } => execute::op_copy, + Insn::CreateBtree { .. } => execute::op_create_btree, + Insn::Destroy { .. } => execute::op_destroy, + + Insn::DropTable { .. } => execute::op_drop_table, + Insn::Close { .. } => execute::op_close, + Insn::IsNull { .. } => execute::op_is_null, + Insn::ParseSchema { .. } => execute::op_parse_schema, + Insn::ShiftRight { .. } => execute::op_shift_right, + Insn::ShiftLeft { .. } => execute::op_shift_left, + Insn::Variable { .. } => execute::op_variable, + Insn::ZeroOrNull { .. } => execute::op_zero_or_null, + Insn::Not { .. } => execute::op_not, + Insn::Concat { .. } => execute::op_concat, + Insn::And { .. } => execute::op_and, + Insn::Or { .. } => execute::op_or, + Insn::Noop => execute::op_noop, + Insn::PageCount { .. } => execute::op_page_count, + Insn::ReadCookie { .. } => execute::op_read_cookie, + Insn::OpenEphemeral { .. } | Insn::OpenAutoindex { .. } => execute::op_open_ephemeral, + Insn::Once { .. } => execute::op_once, + Insn::NotFound { .. } => execute::op_not_found, + Insn::Affinity { .. } => execute::op_affinity, + Insn::IdxDelete { .. } => execute::op_idx_delete, + } + } } // TODO: Add remaining cookies. @@ -727,1041 +988,3 @@ pub enum Cookie { /// The "user version" as read and set by the user_version pragma. UserVersion = 6, } - -pub fn exec_add(lhs: &OwnedValue, rhs: &OwnedValue) -> OwnedValue { - let result = match (lhs, rhs) { - (OwnedValue::Integer(lhs), OwnedValue::Integer(rhs)) => { - let result = lhs.overflowing_add(*rhs); - if result.1 { - OwnedValue::Float(*lhs as f64 + *rhs as f64) - } else { - OwnedValue::Integer(result.0) - } - } - (OwnedValue::Float(lhs), OwnedValue::Float(rhs)) => OwnedValue::Float(lhs + rhs), - (OwnedValue::Float(f), OwnedValue::Integer(i)) - | (OwnedValue::Integer(i), OwnedValue::Float(f)) => OwnedValue::Float(*f + *i as f64), - (OwnedValue::Null, _) | (_, OwnedValue::Null) => OwnedValue::Null, - (OwnedValue::Text(lhs), OwnedValue::Text(rhs)) => exec_add( - &cast_text_to_numeric(lhs.as_str()), - &cast_text_to_numeric(rhs.as_str()), - ), - (OwnedValue::Text(text), other) | (other, OwnedValue::Text(text)) => { - exec_add(&cast_text_to_numeric(text.as_str()), other) - } - _ => todo!(), - }; - match result { - OwnedValue::Float(f) if f.is_nan() => OwnedValue::Null, - _ => result, - } -} - -pub fn exec_subtract(lhs: &OwnedValue, rhs: &OwnedValue) -> OwnedValue { - let result = match (lhs, rhs) { - (OwnedValue::Integer(lhs), OwnedValue::Integer(rhs)) => { - let result = lhs.overflowing_sub(*rhs); - if result.1 { - OwnedValue::Float(*lhs as f64 - *rhs as f64) - } else { - OwnedValue::Integer(result.0) - } - } - (OwnedValue::Float(lhs), OwnedValue::Float(rhs)) => OwnedValue::Float(lhs - rhs), - (OwnedValue::Float(lhs), OwnedValue::Integer(rhs)) => OwnedValue::Float(lhs - *rhs as f64), - (OwnedValue::Integer(lhs), OwnedValue::Float(rhs)) => OwnedValue::Float(*lhs as f64 - rhs), - (OwnedValue::Null, _) | (_, OwnedValue::Null) => OwnedValue::Null, - (OwnedValue::Text(lhs), OwnedValue::Text(rhs)) => exec_subtract( - &cast_text_to_numeric(lhs.as_str()), - &cast_text_to_numeric(rhs.as_str()), - ), - (OwnedValue::Text(text), other) => { - exec_subtract(&cast_text_to_numeric(text.as_str()), other) - } - (other, OwnedValue::Text(text)) => { - exec_subtract(other, &cast_text_to_numeric(text.as_str())) - } - _ => todo!(), - }; - match result { - OwnedValue::Float(f) if f.is_nan() => OwnedValue::Null, - _ => result, - } -} - -pub fn exec_multiply(lhs: &OwnedValue, rhs: &OwnedValue) -> OwnedValue { - let result = match (lhs, rhs) { - (OwnedValue::Integer(lhs), OwnedValue::Integer(rhs)) => { - let result = lhs.overflowing_mul(*rhs); - if result.1 { - OwnedValue::Float(*lhs as f64 * *rhs as f64) - } else { - OwnedValue::Integer(result.0) - } - } - (OwnedValue::Float(lhs), OwnedValue::Float(rhs)) => OwnedValue::Float(lhs * rhs), - (OwnedValue::Integer(i), OwnedValue::Float(f)) - | (OwnedValue::Float(f), OwnedValue::Integer(i)) => OwnedValue::Float(*i as f64 * { *f }), - (OwnedValue::Null, _) | (_, OwnedValue::Null) => OwnedValue::Null, - (OwnedValue::Text(lhs), OwnedValue::Text(rhs)) => exec_multiply( - &cast_text_to_numeric(lhs.as_str()), - &cast_text_to_numeric(rhs.as_str()), - ), - (OwnedValue::Text(text), other) | (other, OwnedValue::Text(text)) => { - exec_multiply(&cast_text_to_numeric(text.as_str()), other) - } - - _ => todo!(), - }; - match result { - OwnedValue::Float(f) if f.is_nan() => OwnedValue::Null, - _ => result, - } -} - -pub fn exec_divide(lhs: &OwnedValue, rhs: &OwnedValue) -> OwnedValue { - let result = match (lhs, rhs) { - (_, OwnedValue::Integer(0)) | (_, OwnedValue::Float(0.0)) => OwnedValue::Null, - (OwnedValue::Integer(lhs), OwnedValue::Integer(rhs)) => { - let result = lhs.overflowing_div(*rhs); - if result.1 { - OwnedValue::Float(*lhs as f64 / *rhs as f64) - } else { - OwnedValue::Integer(result.0) - } - } - (OwnedValue::Float(lhs), OwnedValue::Float(rhs)) => OwnedValue::Float(lhs / rhs), - (OwnedValue::Float(lhs), OwnedValue::Integer(rhs)) => OwnedValue::Float(lhs / *rhs as f64), - (OwnedValue::Integer(lhs), OwnedValue::Float(rhs)) => OwnedValue::Float(*lhs as f64 / rhs), - (OwnedValue::Null, _) | (_, OwnedValue::Null) => OwnedValue::Null, - (OwnedValue::Text(lhs), OwnedValue::Text(rhs)) => exec_divide( - &cast_text_to_numeric(lhs.as_str()), - &cast_text_to_numeric(rhs.as_str()), - ), - (OwnedValue::Text(text), other) => exec_divide(&cast_text_to_numeric(text.as_str()), other), - (other, OwnedValue::Text(text)) => exec_divide(other, &cast_text_to_numeric(text.as_str())), - _ => todo!(), - }; - match result { - OwnedValue::Float(f) if f.is_nan() => OwnedValue::Null, - _ => result, - } -} - -pub fn exec_bit_and(lhs: &OwnedValue, rhs: &OwnedValue) -> OwnedValue { - match (lhs, rhs) { - (OwnedValue::Null, _) | (_, OwnedValue::Null) => OwnedValue::Null, - (_, OwnedValue::Integer(0)) - | (OwnedValue::Integer(0), _) - | (_, OwnedValue::Float(0.0)) - | (OwnedValue::Float(0.0), _) => OwnedValue::Integer(0), - (OwnedValue::Integer(lh), OwnedValue::Integer(rh)) => OwnedValue::Integer(lh & rh), - (OwnedValue::Float(lh), OwnedValue::Float(rh)) => { - OwnedValue::Integer(*lh as i64 & *rh as i64) - } - (OwnedValue::Float(lh), OwnedValue::Integer(rh)) => OwnedValue::Integer(*lh as i64 & rh), - (OwnedValue::Integer(lh), OwnedValue::Float(rh)) => OwnedValue::Integer(lh & *rh as i64), - (OwnedValue::Text(lhs), OwnedValue::Text(rhs)) => exec_bit_and( - &cast_text_to_numeric(lhs.as_str()), - &cast_text_to_numeric(rhs.as_str()), - ), - (OwnedValue::Text(text), other) | (other, OwnedValue::Text(text)) => { - exec_bit_and(&cast_text_to_numeric(text.as_str()), other) - } - _ => todo!(), - } -} - -pub fn exec_bit_or(lhs: &OwnedValue, rhs: &OwnedValue) -> OwnedValue { - match (lhs, rhs) { - (OwnedValue::Null, _) | (_, OwnedValue::Null) => OwnedValue::Null, - (OwnedValue::Integer(lh), OwnedValue::Integer(rh)) => OwnedValue::Integer(lh | rh), - (OwnedValue::Float(lh), OwnedValue::Integer(rh)) => OwnedValue::Integer(*lh as i64 | rh), - (OwnedValue::Integer(lh), OwnedValue::Float(rh)) => OwnedValue::Integer(lh | *rh as i64), - (OwnedValue::Float(lh), OwnedValue::Float(rh)) => { - OwnedValue::Integer(*lh as i64 | *rh as i64) - } - (OwnedValue::Text(lhs), OwnedValue::Text(rhs)) => exec_bit_or( - &cast_text_to_numeric(lhs.as_str()), - &cast_text_to_numeric(rhs.as_str()), - ), - (OwnedValue::Text(text), other) | (other, OwnedValue::Text(text)) => { - exec_bit_or(&cast_text_to_numeric(text.as_str()), other) - } - _ => todo!(), - } -} - -pub fn exec_remainder(lhs: &OwnedValue, rhs: &OwnedValue) -> OwnedValue { - match (lhs, rhs) { - (OwnedValue::Null, _) - | (_, OwnedValue::Null) - | (_, OwnedValue::Integer(0)) - | (_, OwnedValue::Float(0.0)) => OwnedValue::Null, - (OwnedValue::Integer(lhs), OwnedValue::Integer(rhs)) => { - if rhs == &0 { - OwnedValue::Null - } else { - OwnedValue::Integer(lhs % rhs) - } - } - (OwnedValue::Float(lhs), OwnedValue::Float(rhs)) => { - let rhs_int = *rhs as i64; - if rhs_int == 0 { - OwnedValue::Null - } else { - OwnedValue::Float(((*lhs as i64) % rhs_int) as f64) - } - } - (OwnedValue::Float(lhs), OwnedValue::Integer(rhs)) => { - if rhs == &0 { - OwnedValue::Null - } else { - OwnedValue::Float(((*lhs as i64) % rhs) as f64) - } - } - (OwnedValue::Integer(lhs), OwnedValue::Float(rhs)) => { - let rhs_int = *rhs as i64; - if rhs_int == 0 { - OwnedValue::Null - } else { - OwnedValue::Float((lhs % rhs_int) as f64) - } - } - (OwnedValue::Text(lhs), OwnedValue::Text(rhs)) => exec_remainder( - &cast_text_to_numeric(lhs.as_str()), - &cast_text_to_numeric(rhs.as_str()), - ), - (OwnedValue::Text(text), other) | (other, OwnedValue::Text(text)) => { - exec_remainder(&cast_text_to_numeric(text.as_str()), other) - } - other => todo!("remainder not implemented for: {:?} {:?}", lhs, other), - } -} - -pub fn exec_bit_not(reg: &OwnedValue) -> OwnedValue { - match reg { - OwnedValue::Null => OwnedValue::Null, - OwnedValue::Integer(i) => OwnedValue::Integer(!i), - OwnedValue::Float(f) => OwnedValue::Integer(!(*f as i64)), - OwnedValue::Text(text) => exec_bit_not(&cast_text_to_numeric(text.as_str())), - _ => todo!(), - } -} - -pub fn exec_shift_left(lhs: &OwnedValue, rhs: &OwnedValue) -> OwnedValue { - match (lhs, rhs) { - (OwnedValue::Null, _) | (_, OwnedValue::Null) => OwnedValue::Null, - (OwnedValue::Integer(lh), OwnedValue::Integer(rh)) => { - OwnedValue::Integer(compute_shl(*lh, *rh)) - } - (OwnedValue::Float(lh), OwnedValue::Integer(rh)) => { - OwnedValue::Integer(compute_shl(*lh as i64, *rh)) - } - (OwnedValue::Integer(lh), OwnedValue::Float(rh)) => { - OwnedValue::Integer(compute_shl(*lh, *rh as i64)) - } - (OwnedValue::Float(lh), OwnedValue::Float(rh)) => { - OwnedValue::Integer(compute_shl(*lh as i64, *rh as i64)) - } - (OwnedValue::Text(lhs), OwnedValue::Text(rhs)) => exec_shift_left( - &cast_text_to_numeric(lhs.as_str()), - &cast_text_to_numeric(rhs.as_str()), - ), - (OwnedValue::Text(text), other) => { - exec_shift_left(&cast_text_to_numeric(text.as_str()), other) - } - (other, OwnedValue::Text(text)) => { - exec_shift_left(other, &cast_text_to_numeric(text.as_str())) - } - _ => todo!(), - } -} - -fn compute_shl(lhs: i64, rhs: i64) -> i64 { - if rhs == 0 { - lhs - } else if rhs > 0 { - // for positive shifts, if it's too large return 0 - if rhs >= 64 { - 0 - } else { - lhs << rhs - } - } else { - // for negative shifts, check if it's i64::MIN to avoid overflow on negation - if rhs == i64::MIN || rhs <= -64 { - if lhs < 0 { - -1 - } else { - 0 - } - } else { - lhs >> (-rhs) - } - } -} - -pub fn exec_shift_right(lhs: &OwnedValue, rhs: &OwnedValue) -> OwnedValue { - match (lhs, rhs) { - (OwnedValue::Null, _) | (_, OwnedValue::Null) => OwnedValue::Null, - (OwnedValue::Integer(lh), OwnedValue::Integer(rh)) => { - OwnedValue::Integer(compute_shr(*lh, *rh)) - } - (OwnedValue::Float(lh), OwnedValue::Integer(rh)) => { - OwnedValue::Integer(compute_shr(*lh as i64, *rh)) - } - (OwnedValue::Integer(lh), OwnedValue::Float(rh)) => { - OwnedValue::Integer(compute_shr(*lh, *rh as i64)) - } - (OwnedValue::Float(lh), OwnedValue::Float(rh)) => { - OwnedValue::Integer(compute_shr(*lh as i64, *rh as i64)) - } - (OwnedValue::Text(lhs), OwnedValue::Text(rhs)) => exec_shift_right( - &cast_text_to_numeric(lhs.as_str()), - &cast_text_to_numeric(rhs.as_str()), - ), - (OwnedValue::Text(text), other) => { - exec_shift_right(&cast_text_to_numeric(text.as_str()), other) - } - (other, OwnedValue::Text(text)) => { - exec_shift_right(other, &cast_text_to_numeric(text.as_str())) - } - _ => todo!(), - } -} - -// compute binary shift to the right if rhs >= 0 and binary shift to the left - if rhs < 0 -// note, that binary shift to the right is sign-extended -fn compute_shr(lhs: i64, rhs: i64) -> i64 { - if rhs == 0 { - lhs - } else if rhs > 0 { - // for positive right shifts - if rhs >= 64 { - if lhs < 0 { - -1 - } else { - 0 - } - } else { - lhs >> rhs - } - } else { - // for negative right shifts, check if it's i64::MIN to avoid overflow - if rhs == i64::MIN || -rhs >= 64 { - 0 - } else { - lhs << (-rhs) - } - } -} - -pub fn exec_boolean_not(reg: &OwnedValue) -> OwnedValue { - match reg { - OwnedValue::Null => OwnedValue::Null, - OwnedValue::Integer(i) => OwnedValue::Integer((*i == 0) as i64), - OwnedValue::Float(f) => OwnedValue::Integer((*f == 0.0) as i64), - OwnedValue::Text(text) => exec_boolean_not(&cast_text_to_numeric(text.as_str())), - _ => todo!(), - } -} -pub fn exec_concat(lhs: &OwnedValue, rhs: &OwnedValue) -> OwnedValue { - match (lhs, rhs) { - (OwnedValue::Text(lhs_text), OwnedValue::Text(rhs_text)) => { - OwnedValue::build_text(&(lhs_text.as_str().to_string() + rhs_text.as_str())) - } - (OwnedValue::Text(lhs_text), OwnedValue::Integer(rhs_int)) => { - OwnedValue::build_text(&(lhs_text.as_str().to_string() + &rhs_int.to_string())) - } - (OwnedValue::Text(lhs_text), OwnedValue::Float(rhs_float)) => { - OwnedValue::build_text(&(lhs_text.as_str().to_string() + &rhs_float.to_string())) - } - (OwnedValue::Integer(lhs_int), OwnedValue::Text(rhs_text)) => { - OwnedValue::build_text(&(lhs_int.to_string() + rhs_text.as_str())) - } - (OwnedValue::Integer(lhs_int), OwnedValue::Integer(rhs_int)) => { - OwnedValue::build_text(&(lhs_int.to_string() + &rhs_int.to_string())) - } - (OwnedValue::Integer(lhs_int), OwnedValue::Float(rhs_float)) => { - OwnedValue::build_text(&(lhs_int.to_string() + &rhs_float.to_string())) - } - (OwnedValue::Float(lhs_float), OwnedValue::Text(rhs_text)) => { - OwnedValue::build_text(&(lhs_float.to_string() + rhs_text.as_str())) - } - (OwnedValue::Float(lhs_float), OwnedValue::Integer(rhs_int)) => { - OwnedValue::build_text(&(lhs_float.to_string() + &rhs_int.to_string())) - } - (OwnedValue::Float(lhs_float), OwnedValue::Float(rhs_float)) => { - OwnedValue::build_text(&(lhs_float.to_string() + &rhs_float.to_string())) - } - (OwnedValue::Null, _) | (_, OwnedValue::Null) => OwnedValue::Null, - (OwnedValue::Blob(_), _) | (_, OwnedValue::Blob(_)) => { - todo!("TODO: Handle Blob conversion to String") - } - } -} - -pub fn exec_and(lhs: &OwnedValue, rhs: &OwnedValue) -> OwnedValue { - match (lhs, rhs) { - (_, OwnedValue::Integer(0)) - | (OwnedValue::Integer(0), _) - | (_, OwnedValue::Float(0.0)) - | (OwnedValue::Float(0.0), _) => OwnedValue::Integer(0), - (OwnedValue::Null, _) | (_, OwnedValue::Null) => OwnedValue::Null, - (OwnedValue::Text(lhs), OwnedValue::Text(rhs)) => exec_and( - &cast_text_to_numeric(lhs.as_str()), - &cast_text_to_numeric(rhs.as_str()), - ), - (OwnedValue::Text(text), other) | (other, OwnedValue::Text(text)) => { - exec_and(&cast_text_to_numeric(text.as_str()), other) - } - _ => OwnedValue::Integer(1), - } -} - -pub fn exec_or(lhs: &OwnedValue, rhs: &OwnedValue) -> OwnedValue { - match (lhs, rhs) { - (OwnedValue::Null, OwnedValue::Null) - | (OwnedValue::Null, OwnedValue::Float(0.0)) - | (OwnedValue::Float(0.0), OwnedValue::Null) - | (OwnedValue::Null, OwnedValue::Integer(0)) - | (OwnedValue::Integer(0), OwnedValue::Null) => OwnedValue::Null, - (OwnedValue::Float(0.0), OwnedValue::Integer(0)) - | (OwnedValue::Integer(0), OwnedValue::Float(0.0)) - | (OwnedValue::Float(0.0), OwnedValue::Float(0.0)) - | (OwnedValue::Integer(0), OwnedValue::Integer(0)) => OwnedValue::Integer(0), - (OwnedValue::Text(lhs), OwnedValue::Text(rhs)) => exec_or( - &cast_text_to_numeric(lhs.as_str()), - &cast_text_to_numeric(rhs.as_str()), - ), - (OwnedValue::Text(text), other) | (other, OwnedValue::Text(text)) => { - exec_or(&cast_text_to_numeric(text.as_str()), other) - } - _ => OwnedValue::Integer(1), - } -} - -impl Insn { - pub fn to_function(&self) -> InsnFunction { - match self { - Insn::Init { .. } => execute::op_init, - - Insn::Null { .. } => execute::op_null, - - Insn::NullRow { .. } => execute::op_null_row, - - Insn::Add { .. } => execute::op_add, - - Insn::Subtract { .. } => execute::op_subtract, - - Insn::Multiply { .. } => execute::op_multiply, - - Insn::Divide { .. } => execute::op_divide, - - Insn::Compare { .. } => execute::op_compare, - Insn::BitAnd { .. } => execute::op_bit_and, - - Insn::BitOr { .. } => execute::op_bit_or, - - Insn::BitNot { .. } => execute::op_bit_not, - - Insn::Checkpoint { .. } => execute::op_checkpoint, - Insn::Remainder { .. } => execute::op_remainder, - - Insn::Jump { .. } => execute::op_jump, - Insn::Move { .. } => execute::op_move, - Insn::IfPos { .. } => execute::op_if_pos, - Insn::NotNull { .. } => execute::op_not_null, - - Insn::Eq { .. } => execute::op_eq, - Insn::Ne { .. } => execute::op_ne, - Insn::Lt { .. } => execute::op_lt, - Insn::Le { .. } => execute::op_le, - Insn::Gt { .. } => execute::op_gt, - Insn::Ge { .. } => execute::op_ge, - Insn::If { .. } => execute::op_if, - Insn::IfNot { .. } => execute::op_if_not, - Insn::OpenReadAsync { .. } => execute::op_open_read_async, - Insn::OpenReadAwait => execute::op_open_read_await, - - Insn::VOpenAsync { .. } => execute::op_vopen_async, - - Insn::VOpenAwait => execute::op_vopen_await, - - Insn::VCreate { .. } => execute::op_vcreate, - Insn::VFilter { .. } => execute::op_vfilter, - Insn::VColumn { .. } => execute::op_vcolumn, - Insn::VUpdate { .. } => execute::op_vupdate, - Insn::VNext { .. } => execute::op_vnext, - Insn::OpenPseudo { .. } => execute::op_open_pseudo, - Insn::RewindAsync { .. } => execute::op_rewind_async, - - Insn::RewindAwait { .. } => execute::op_rewind_await, - Insn::LastAsync { .. } => execute::op_last_async, - - Insn::LastAwait { .. } => execute::op_last_await, - Insn::Column { .. } => execute::op_column, - Insn::MakeRecord { .. } => execute::op_make_record, - Insn::ResultRow { .. } => execute::op_result_row, - - Insn::NextAsync { .. } => execute::op_next_async, - - Insn::NextAwait { .. } => execute::op_next_await, - Insn::PrevAsync { .. } => execute::op_prev_async, - - Insn::PrevAwait { .. } => execute::op_prev_await, - Insn::Halt { .. } => execute::op_halt, - Insn::Transaction { .. } => execute::op_transaction, - - Insn::AutoCommit { .. } => execute::op_auto_commit, - Insn::Goto { .. } => execute::op_goto, - - Insn::Gosub { .. } => execute::op_gosub, - Insn::Return { .. } => execute::op_return, - - Insn::Integer { .. } => execute::op_integer, - - Insn::Real { .. } => execute::op_real, - - Insn::RealAffinity { .. } => execute::op_real_affinity, - - Insn::String8 { .. } => execute::op_string8, - - Insn::Blob { .. } => execute::op_blob, - - Insn::RowId { .. } => execute::op_row_id, - - Insn::SeekRowid { .. } => execute::op_seek_rowid, - Insn::DeferredSeek { .. } => execute::op_deferred_seek, - Insn::SeekGE { .. } => execute::op_seek_ge, - Insn::SeekGT { .. } => execute::op_seek_gt, - Insn::IdxGE { .. } => execute::op_idx_ge, - Insn::IdxGT { .. } => execute::op_idx_gt, - Insn::IdxLE { .. } => execute::op_idx_le, - Insn::IdxLT { .. } => execute::op_idx_lt, - Insn::DecrJumpZero { .. } => execute::op_decr_jump_zero, - - Insn::AggStep { .. } => execute::op_agg_step, - Insn::AggFinal { .. } => execute::op_agg_final, - - Insn::SorterOpen { .. } => execute::op_sorter_open, - Insn::SorterInsert { .. } => execute::op_sorter_insert, - Insn::SorterSort { .. } => execute::op_sorter_sort, - Insn::SorterData { .. } => execute::op_sorter_data, - Insn::SorterNext { .. } => execute::op_sorter_next, - Insn::Function { .. } => execute::op_function, - Insn::InitCoroutine { .. } => execute::op_init_coroutine, - Insn::EndCoroutine { .. } => execute::op_end_coroutine, - - Insn::Yield { .. } => execute::op_yield, - Insn::InsertAsync { .. } => execute::op_insert_async, - Insn::InsertAwait { .. } => execute::op_insert_await, - - Insn::DeleteAsync { .. } => execute::op_delete_async, - - Insn::DeleteAwait { .. } => execute::op_delete_await, - - Insn::NewRowid { .. } => execute::op_new_rowid, - Insn::MustBeInt { .. } => execute::op_must_be_int, - - Insn::SoftNull { .. } => execute::op_soft_null, - - Insn::NotExists { .. } => execute::op_not_exists, - Insn::OffsetLimit { .. } => execute::op_offset_limit, - Insn::OpenWriteAsync { .. } => execute::op_open_write_async, - Insn::OpenWriteAwait { .. } => execute::op_open_write_await, - - Insn::Copy { .. } => execute::op_copy, - Insn::CreateBtree { .. } => execute::op_create_btree, - - Insn::Destroy { .. } => execute::op_destroy, - Insn::DropTable { .. } => execute::op_drop_table, - Insn::Close { .. } => execute::op_close, - - Insn::IsNull { .. } => execute::op_is_null, - - Insn::ParseSchema { .. } => execute::op_parse_schema, - - Insn::ShiftRight { .. } => execute::op_shift_right, - - Insn::ShiftLeft { .. } => execute::op_shift_left, - - Insn::Variable { .. } => execute::op_variable, - - Insn::ZeroOrNull { .. } => execute::op_zero_or_null, - - Insn::Not { .. } => execute::op_not, - - Insn::Concat { .. } => execute::op_concat, - - Insn::And { .. } => execute::op_and, - - Insn::Or { .. } => execute::op_or, - - Insn::Noop => execute::op_noop, - Insn::PageCount { .. } => execute::op_page_count, - - Insn::ReadCookie { .. } => execute::op_read_cookie, - } - } -} - -#[cfg(test)] -mod tests { - use crate::{ - types::{OwnedValue, Text}, - vdbe::insn::exec_or, - }; - - use super::exec_add; - - #[test] - fn test_exec_add() { - let inputs = vec![ - (OwnedValue::Integer(3), OwnedValue::Integer(1)), - (OwnedValue::Float(3.0), OwnedValue::Float(1.0)), - (OwnedValue::Float(3.0), OwnedValue::Integer(1)), - (OwnedValue::Integer(3), OwnedValue::Float(1.0)), - (OwnedValue::Null, OwnedValue::Null), - (OwnedValue::Null, OwnedValue::Integer(1)), - (OwnedValue::Null, OwnedValue::Float(1.0)), - (OwnedValue::Null, OwnedValue::Text(Text::from_str("2"))), - (OwnedValue::Integer(1), OwnedValue::Null), - (OwnedValue::Float(1.0), OwnedValue::Null), - (OwnedValue::Text(Text::from_str("1")), OwnedValue::Null), - ( - OwnedValue::Text(Text::from_str("1")), - OwnedValue::Text(Text::from_str("3")), - ), - ( - OwnedValue::Text(Text::from_str("1.0")), - OwnedValue::Text(Text::from_str("3.0")), - ), - ( - OwnedValue::Text(Text::from_str("1.0")), - OwnedValue::Float(3.0), - ), - ( - OwnedValue::Text(Text::from_str("1.0")), - OwnedValue::Integer(3), - ), - ( - OwnedValue::Float(1.0), - OwnedValue::Text(Text::from_str("3.0")), - ), - ( - OwnedValue::Integer(1), - OwnedValue::Text(Text::from_str("3")), - ), - ]; - - let outputs = [ - OwnedValue::Integer(4), - OwnedValue::Float(4.0), - OwnedValue::Float(4.0), - OwnedValue::Float(4.0), - OwnedValue::Null, - OwnedValue::Null, - OwnedValue::Null, - OwnedValue::Null, - OwnedValue::Null, - OwnedValue::Null, - OwnedValue::Null, - OwnedValue::Integer(4), - OwnedValue::Float(4.0), - OwnedValue::Float(4.0), - OwnedValue::Float(4.0), - OwnedValue::Float(4.0), - OwnedValue::Float(4.0), - ]; - - assert_eq!( - inputs.len(), - outputs.len(), - "Inputs and Outputs should have same size" - ); - for (i, (lhs, rhs)) in inputs.iter().enumerate() { - assert_eq!( - exec_add(lhs, rhs), - outputs[i], - "Wrong ADD for lhs: {}, rhs: {}", - lhs, - rhs - ); - } - } - - use super::exec_subtract; - - #[test] - fn test_exec_subtract() { - let inputs = vec![ - (OwnedValue::Integer(3), OwnedValue::Integer(1)), - (OwnedValue::Float(3.0), OwnedValue::Float(1.0)), - (OwnedValue::Float(3.0), OwnedValue::Integer(1)), - (OwnedValue::Integer(3), OwnedValue::Float(1.0)), - (OwnedValue::Null, OwnedValue::Null), - (OwnedValue::Null, OwnedValue::Integer(1)), - (OwnedValue::Null, OwnedValue::Float(1.0)), - (OwnedValue::Null, OwnedValue::Text(Text::from_str("1"))), - (OwnedValue::Integer(1), OwnedValue::Null), - (OwnedValue::Float(1.0), OwnedValue::Null), - (OwnedValue::Text(Text::from_str("4")), OwnedValue::Null), - ( - OwnedValue::Text(Text::from_str("1")), - OwnedValue::Text(Text::from_str("3")), - ), - ( - OwnedValue::Text(Text::from_str("1.0")), - OwnedValue::Text(Text::from_str("3.0")), - ), - ( - OwnedValue::Text(Text::from_str("1.0")), - OwnedValue::Float(3.0), - ), - ( - OwnedValue::Text(Text::from_str("1.0")), - OwnedValue::Integer(3), - ), - ( - OwnedValue::Float(1.0), - OwnedValue::Text(Text::from_str("3.0")), - ), - ( - OwnedValue::Integer(1), - OwnedValue::Text(Text::from_str("3")), - ), - ]; - - let outputs = [ - OwnedValue::Integer(2), - OwnedValue::Float(2.0), - OwnedValue::Float(2.0), - OwnedValue::Float(2.0), - OwnedValue::Null, - OwnedValue::Null, - OwnedValue::Null, - OwnedValue::Null, - OwnedValue::Null, - OwnedValue::Null, - OwnedValue::Null, - OwnedValue::Integer(-2), - OwnedValue::Float(-2.0), - OwnedValue::Float(-2.0), - OwnedValue::Float(-2.0), - OwnedValue::Float(-2.0), - OwnedValue::Float(-2.0), - ]; - - assert_eq!( - inputs.len(), - outputs.len(), - "Inputs and Outputs should have same size" - ); - for (i, (lhs, rhs)) in inputs.iter().enumerate() { - assert_eq!( - exec_subtract(lhs, rhs), - outputs[i], - "Wrong subtract for lhs: {}, rhs: {}", - lhs, - rhs - ); - } - } - use super::exec_multiply; - - #[test] - fn test_exec_multiply() { - let inputs = vec![ - (OwnedValue::Integer(3), OwnedValue::Integer(2)), - (OwnedValue::Float(3.0), OwnedValue::Float(2.0)), - (OwnedValue::Float(3.0), OwnedValue::Integer(2)), - (OwnedValue::Integer(3), OwnedValue::Float(2.0)), - (OwnedValue::Null, OwnedValue::Null), - (OwnedValue::Null, OwnedValue::Integer(1)), - (OwnedValue::Null, OwnedValue::Float(1.0)), - (OwnedValue::Null, OwnedValue::Text(Text::from_str("1"))), - (OwnedValue::Integer(1), OwnedValue::Null), - (OwnedValue::Float(1.0), OwnedValue::Null), - (OwnedValue::Text(Text::from_str("4")), OwnedValue::Null), - ( - OwnedValue::Text(Text::from_str("2")), - OwnedValue::Text(Text::from_str("3")), - ), - ( - OwnedValue::Text(Text::from_str("2.0")), - OwnedValue::Text(Text::from_str("3.0")), - ), - ( - OwnedValue::Text(Text::from_str("2.0")), - OwnedValue::Float(3.0), - ), - ( - OwnedValue::Text(Text::from_str("2.0")), - OwnedValue::Integer(3), - ), - ( - OwnedValue::Float(2.0), - OwnedValue::Text(Text::from_str("3.0")), - ), - ( - OwnedValue::Integer(2), - OwnedValue::Text(Text::from_str("3.0")), - ), - ]; - - let outputs = [ - OwnedValue::Integer(6), - OwnedValue::Float(6.0), - OwnedValue::Float(6.0), - OwnedValue::Float(6.0), - OwnedValue::Null, - OwnedValue::Null, - OwnedValue::Null, - OwnedValue::Null, - OwnedValue::Null, - OwnedValue::Null, - OwnedValue::Null, - OwnedValue::Integer(6), - OwnedValue::Float(6.0), - OwnedValue::Float(6.0), - OwnedValue::Float(6.0), - OwnedValue::Float(6.0), - OwnedValue::Float(6.0), - ]; - - assert_eq!( - inputs.len(), - outputs.len(), - "Inputs and Outputs should have same size" - ); - for (i, (lhs, rhs)) in inputs.iter().enumerate() { - assert_eq!( - exec_multiply(lhs, rhs), - outputs[i], - "Wrong multiply for lhs: {}, rhs: {}", - lhs, - rhs - ); - } - } - use super::exec_divide; - - #[test] - fn test_exec_divide() { - let inputs = vec![ - (OwnedValue::Integer(1), OwnedValue::Integer(0)), - (OwnedValue::Float(1.0), OwnedValue::Float(0.0)), - (OwnedValue::Integer(i64::MIN), OwnedValue::Integer(-1)), - (OwnedValue::Float(6.0), OwnedValue::Float(2.0)), - (OwnedValue::Float(6.0), OwnedValue::Integer(2)), - (OwnedValue::Integer(6), OwnedValue::Integer(2)), - (OwnedValue::Null, OwnedValue::Integer(2)), - (OwnedValue::Integer(2), OwnedValue::Null), - (OwnedValue::Null, OwnedValue::Null), - ( - OwnedValue::Text(Text::from_str("6")), - OwnedValue::Text(Text::from_str("2")), - ), - ( - OwnedValue::Text(Text::from_str("6")), - OwnedValue::Integer(2), - ), - ]; - - let outputs = [ - OwnedValue::Null, - OwnedValue::Null, - OwnedValue::Float(9.223372036854776e18), - OwnedValue::Float(3.0), - OwnedValue::Float(3.0), - OwnedValue::Float(3.0), - OwnedValue::Null, - OwnedValue::Null, - OwnedValue::Null, - OwnedValue::Float(3.0), - OwnedValue::Float(3.0), - ]; - - assert_eq!( - inputs.len(), - outputs.len(), - "Inputs and Outputs should have same size" - ); - for (i, (lhs, rhs)) in inputs.iter().enumerate() { - assert_eq!( - exec_divide(lhs, rhs), - outputs[i], - "Wrong divide for lhs: {}, rhs: {}", - lhs, - rhs - ); - } - } - - use super::exec_remainder; - #[test] - fn test_exec_remainder() { - let inputs = vec![ - (OwnedValue::Null, OwnedValue::Null), - (OwnedValue::Null, OwnedValue::Float(1.0)), - (OwnedValue::Null, OwnedValue::Integer(1)), - (OwnedValue::Null, OwnedValue::Text(Text::from_str("1"))), - (OwnedValue::Float(1.0), OwnedValue::Null), - (OwnedValue::Integer(1), OwnedValue::Null), - (OwnedValue::Integer(12), OwnedValue::Integer(0)), - (OwnedValue::Float(12.0), OwnedValue::Float(0.0)), - (OwnedValue::Float(12.0), OwnedValue::Integer(0)), - (OwnedValue::Integer(12), OwnedValue::Float(0.0)), - (OwnedValue::Integer(12), OwnedValue::Integer(3)), - (OwnedValue::Float(12.0), OwnedValue::Float(3.0)), - (OwnedValue::Float(12.0), OwnedValue::Integer(3)), - (OwnedValue::Integer(12), OwnedValue::Float(3.0)), - ( - OwnedValue::Text(Text::from_str("12.0")), - OwnedValue::Text(Text::from_str("3.0")), - ), - ( - OwnedValue::Text(Text::from_str("12.0")), - OwnedValue::Float(3.0), - ), - ( - OwnedValue::Float(12.0), - OwnedValue::Text(Text::from_str("12.0")), - ), - ]; - let outputs = vec![ - OwnedValue::Null, - OwnedValue::Null, - OwnedValue::Null, - OwnedValue::Null, - OwnedValue::Null, - OwnedValue::Null, - OwnedValue::Null, - OwnedValue::Null, - OwnedValue::Null, - OwnedValue::Null, - OwnedValue::Integer(0), - OwnedValue::Float(0.0), - OwnedValue::Float(0.0), - OwnedValue::Float(0.0), - OwnedValue::Float(0.0), - OwnedValue::Float(0.0), - OwnedValue::Float(0.0), - ]; - - assert_eq!( - inputs.len(), - outputs.len(), - "Inputs and Outputs should have same size" - ); - - for (i, (lhs, rhs)) in inputs.iter().enumerate() { - assert_eq!( - exec_remainder(lhs, rhs), - outputs[i], - "Wrong remainder for lhs: {}, rhs: {}", - lhs, - rhs - ); - } - } - - use super::exec_and; - - #[test] - fn test_exec_and() { - let inputs = vec![ - (OwnedValue::Integer(0), OwnedValue::Null), - (OwnedValue::Null, OwnedValue::Integer(1)), - (OwnedValue::Null, OwnedValue::Null), - (OwnedValue::Float(0.0), OwnedValue::Null), - (OwnedValue::Integer(1), OwnedValue::Float(2.2)), - ( - OwnedValue::Integer(0), - OwnedValue::Text(Text::from_str("string")), - ), - ( - OwnedValue::Integer(0), - OwnedValue::Text(Text::from_str("1")), - ), - ( - OwnedValue::Integer(1), - OwnedValue::Text(Text::from_str("1")), - ), - ]; - let outputs = [ - OwnedValue::Integer(0), - OwnedValue::Null, - OwnedValue::Null, - OwnedValue::Integer(0), - OwnedValue::Integer(1), - OwnedValue::Integer(0), - OwnedValue::Integer(0), - OwnedValue::Integer(1), - ]; - - assert_eq!( - inputs.len(), - outputs.len(), - "Inputs and Outputs should have same size" - ); - for (i, (lhs, rhs)) in inputs.iter().enumerate() { - assert_eq!( - exec_and(lhs, rhs), - outputs[i], - "Wrong AND for lhs: {}, rhs: {}", - lhs, - rhs - ); - } - } - - #[test] - fn test_exec_or() { - let inputs = vec![ - (OwnedValue::Integer(0), OwnedValue::Null), - (OwnedValue::Null, OwnedValue::Integer(1)), - (OwnedValue::Null, OwnedValue::Null), - (OwnedValue::Float(0.0), OwnedValue::Null), - (OwnedValue::Integer(1), OwnedValue::Float(2.2)), - (OwnedValue::Float(0.0), OwnedValue::Integer(0)), - ( - OwnedValue::Integer(0), - OwnedValue::Text(Text::from_str("string")), - ), - ( - OwnedValue::Integer(0), - OwnedValue::Text(Text::from_str("1")), - ), - (OwnedValue::Integer(0), OwnedValue::Text(Text::from_str(""))), - ]; - let outputs = [ - OwnedValue::Null, - OwnedValue::Integer(1), - OwnedValue::Null, - OwnedValue::Null, - OwnedValue::Integer(1), - OwnedValue::Integer(0), - OwnedValue::Integer(0), - OwnedValue::Integer(1), - OwnedValue::Integer(0), - ]; - - assert_eq!( - inputs.len(), - outputs.len(), - "Inputs and Outputs should have same size" - ); - for (i, (lhs, rhs)) in inputs.iter().enumerate() { - assert_eq!( - exec_or(lhs, rhs), - outputs[i], - "Wrong OR for lhs: {}, rhs: {}", - lhs, - rhs - ); - } - } -} diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index 673b836a4..2adf438b6 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -24,37 +24,61 @@ pub mod insn; pub mod likeop; pub mod sorter; -use crate::error::LimboError; -use crate::fast_lock::SpinLock; -use crate::function::{AggFunc, FuncCtx}; - -use crate::storage::sqlite3_ondisk::DatabaseHeader; -use crate::storage::{btree::BTreeCursor, pager::Pager}; -use crate::translate::plan::{ResultSetColumn, TableReference}; -use crate::types::{ - AggContext, Cursor, CursorResult, ImmutableRecord, OwnedValue, SeekKey, SeekOp, +use crate::{ + error::LimboError, + fast_lock::SpinLock, + function::{AggFunc, FuncCtx}, + storage::sqlite3_ondisk::SmallVec, +}; + +use crate::{ + storage::{btree::BTreeCursor, pager::Pager, sqlite3_ondisk::DatabaseHeader}, + translate::plan::{ResultSetColumn, TableReference}, + types::{AggContext, Cursor, CursorResult, ImmutableRecord, OwnedValue, SeekKey, SeekOp}, + vdbe::{builder::CursorType, insn::Insn}, }; -use crate::util::cast_text_to_numeric; -use crate::vdbe::builder::CursorType; -use crate::vdbe::insn::Insn; use crate::CheckpointStatus; #[cfg(feature = "json")] use crate::json::JsonCacheCell; use crate::{Connection, MvStore, Result, TransactionState}; -use execute::{InsnFunction, InsnFunctionStepResult}; +use execute::{InsnFunction, InsnFunctionStepResult, OpIdxDeleteState}; -use rand::distributions::{Distribution, Uniform}; -use rand::Rng; +use rand::{ + distributions::{Distribution, Uniform}, + Rng, +}; use regex::Regex; -use std::cell::{Cell, RefCell}; -use std::collections::HashMap; -use std::ffi::c_void; -use std::num::NonZero; -use std::ops::Deref; -use std::rc::{Rc, Weak}; -use std::sync::Arc; +use std::{ + cell::{Cell, RefCell}, + collections::HashMap, + ffi::c_void, + num::NonZero, + ops::Deref, + rc::{Rc, Weak}, + sync::Arc, +}; + +/// We use labels to indicate that we want to jump to whatever the instruction offset +/// will be at runtime, because the offset cannot always be determined when the jump +/// instruction is created. +/// +/// In some cases, we want to jump to EXACTLY a specific instruction. +/// - Example: a condition is not met, so we want to jump to wherever Halt is. +/// In other cases, we don't care what the exact instruction is, but we know that we +/// want to jump to whatever comes AFTER a certain instruction. +/// - Example: a Next instruction will want to jump to "whatever the start of the loop is", +/// but it doesn't care what instruction that is. +/// +/// The reason this distinction is important is that we might reorder instructions that are +/// constant at compile time, and when we do that, we need to change the offsets of any impacted +/// jump instructions, so the instruction that comes immediately after "next Insn" might have changed during the reordering. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum JumpTarget { + ExactlyThisInsn, + AfterThisInsn, +} #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] /// Represents a target for a jump instruction. @@ -91,15 +115,6 @@ impl BranchOffset { } } - /// Returns the label value. Panics if the branch offset is an offset or placeholder. - pub fn to_label_value(&self) -> u32 { - match self { - BranchOffset::Label(v) => *v, - BranchOffset::Offset(_) => unreachable!("Offset cannot be converted to label value"), - BranchOffset::Placeholder => unreachable!("Unresolved placeholder"), - } - } - /// Returns the branch offset as a signed integer. /// Used in explain output, where we don't want to panic in case we have an unresolved /// label or placeholder. @@ -117,6 +132,10 @@ impl BranchOffset { pub fn add>(self, n: N) -> BranchOffset { BranchOffset::Offset(self.to_offset_int() + n.into()) } + + pub fn sub>(self, n: N) -> BranchOffset { + BranchOffset::Offset(self.to_offset_int() - n.into()) + } } pub type CursorID = usize; @@ -229,6 +248,8 @@ pub struct ProgramState { last_compare: Option, deferred_seek: Option<(CursorID, CursorID)>, ended_coroutine: Bitfield<4>, // flag to indicate that a coroutine has ended (key is the yield register. currently we assume that the yield register is always between 0-255, YOLO) + /// Indicate whether an [Insn::Once] instruction at a given program counter position has already been executed, well, once. + once: SmallVec, regex_cache: RegexCache, pub(crate) mv_tx_id: Option, interrupted: bool, @@ -236,6 +257,7 @@ pub struct ProgramState { halt_state: Option, #[cfg(feature = "json")] json_cache: JsonCacheCell, + op_idx_delete_state: Option, } impl ProgramState { @@ -251,6 +273,7 @@ impl ProgramState { last_compare: None, deferred_seek: None, ended_coroutine: Bitfield::new(), + once: SmallVec::::new(), regex_cache: RegexCache::new(), mv_tx_id: None, interrupted: false, @@ -258,6 +281,7 @@ impl ProgramState { halt_state: None, #[cfg(feature = "json")] json_cache: JsonCacheCell::new(), + op_idx_delete_state: None, } } @@ -281,8 +305,11 @@ impl ProgramState { self.parameters.insert(index, value); } - pub fn get_parameter(&self, index: NonZero) -> Option<&OwnedValue> { - self.parameters.get(&index) + pub fn get_parameter(&self, index: NonZero) -> OwnedValue { + self.parameters + .get(&index) + .cloned() + .unwrap_or(OwnedValue::Null) } pub fn reset(&mut self) { @@ -342,7 +369,7 @@ pub struct Program { pub insns: Vec<(Insn, InsnFunction)>, pub cursor_ref: Vec<(Option, CursorType)>, pub database_header: Arc>, - pub comments: Option>, + pub comments: Option>, pub parameters: crate::parameters::Parameters, pub connection: Weak, pub n_change: Cell, @@ -386,7 +413,7 @@ impl Program { ) -> Result { if let Some(mv_store) = mv_store { let conn = self.connection.upgrade().unwrap(); - let auto_commit = *conn.auto_commit.borrow(); + let auto_commit = conn.auto_commit.get(); if auto_commit { let mut mv_transactions = conn.mv_transactions.borrow_mut(); for tx_id in mv_transactions.iter() { @@ -394,13 +421,13 @@ impl Program { } mv_transactions.clear(); } - return Ok(StepResult::Done); + Ok(StepResult::Done) } else { let connection = self .connection .upgrade() .expect("only weak ref to connection?"); - let auto_commit = *connection.auto_commit.borrow(); + let auto_commit = connection.auto_commit.get(); tracing::trace!("Halt auto_commit {}", auto_commit); assert!( program_state.halt_state.is_none() @@ -408,30 +435,28 @@ impl Program { ); if program_state.halt_state.is_some() { self.step_end_write_txn(&pager, &mut program_state.halt_state, connection.deref()) - } else { - if auto_commit { - let current_state = connection.transaction_state.borrow().clone(); - match current_state { - TransactionState::Write => self.step_end_write_txn( - &pager, - &mut program_state.halt_state, - connection.deref(), - ), - TransactionState::Read => { - connection.transaction_state.replace(TransactionState::None); - pager.end_read_tx()?; - Ok(StepResult::Done) - } - TransactionState::None => Ok(StepResult::Done), + } else if auto_commit { + let current_state = connection.transaction_state.get(); + match current_state { + TransactionState::Write => self.step_end_write_txn( + &pager, + &mut program_state.halt_state, + connection.deref(), + ), + TransactionState::Read => { + connection.transaction_state.replace(TransactionState::None); + pager.end_read_tx()?; + Ok(StepResult::Done) } - } else { - if self.change_cnt_on { - if let Some(conn) = self.connection.upgrade() { - conn.set_changes(self.n_change.get()); - } - } - Ok(StepResult::Done) + TransactionState::None => Ok(StepResult::Done), } + } else { + if self.change_cnt_on { + if let Some(conn) = self.connection.upgrade() { + conn.set_changes(self.n_change.get()); + } + } + Ok(StepResult::Done) } } } @@ -534,10 +559,11 @@ fn trace_insn(program: &Program, addr: InsnReference, insn: &Insn) { addr, insn, String::new(), - program - .comments - .as_ref() - .and_then(|comments| comments.get(&{ addr }).copied()) + program.comments.as_ref().and_then(|comments| comments + .iter() + .find(|(offset, _)| *offset == addr) + .map(|(_, comment)| comment) + .copied()) ) ); } @@ -548,10 +574,13 @@ fn print_insn(program: &Program, addr: InsnReference, insn: &Insn, indent: Strin addr, insn, indent, - program - .comments - .as_ref() - .and_then(|comments| comments.get(&{ addr }).copied()), + program.comments.as_ref().and_then(|comments| { + comments + .iter() + .find(|(offset, _)| *offset == addr) + .map(|(_, comment)| comment) + .copied() + }), ); w.push_str(&s); } @@ -559,11 +588,14 @@ fn print_insn(program: &Program, addr: InsnReference, insn: &Insn, indent: Strin fn get_indent_count(indent_count: usize, curr_insn: &Insn, prev_insn: Option<&Insn>) -> usize { let indent_count = if let Some(insn) = prev_insn { match insn { - Insn::RewindAwait { .. } - | Insn::LastAwait { .. } + Insn::Rewind { .. } + | Insn::Last { .. } | Insn::SorterSort { .. } | Insn::SeekGE { .. } - | Insn::SeekGT { .. } => indent_count + 1, + | Insn::SeekGT { .. } + | Insn::SeekLE { .. } + | Insn::SeekLT { .. } => indent_count + 1, + _ => indent_count, } } else { @@ -571,9 +603,7 @@ fn get_indent_count(indent_count: usize, curr_insn: &Insn, prev_insn: Option<&In }; match curr_insn { - Insn::NextAsync { .. } | Insn::SorterNext { .. } | Insn::PrevAsync { .. } => { - indent_count - 1 - } + Insn::Next { .. } | Insn::SorterNext { .. } | Insn::Prev { .. } => indent_count - 1, _ => indent_count, } } @@ -593,6 +623,15 @@ impl<'a> FromValueRow<'a> for i64 { } } +impl<'a> FromValueRow<'a> for f64 { + fn from_value(value: &'a OwnedValue) -> Result { + match value { + OwnedValue::Float(f) => Ok(*f), + _ => Err(LimboError::ConversionError("Expected integer value".into())), + } + } +} + impl<'a> FromValueRow<'a> for String { fn from_value(value: &'a OwnedValue) -> Result { match value { @@ -629,11 +668,10 @@ impl Row { pub fn get_value<'a>(&'a self, idx: usize) -> &'a OwnedValue { let value = unsafe { self.values.add(idx).as_ref().unwrap() }; - let value = match value { + match value { Register::OwnedValue(owned_value) => owned_value, _ => unreachable!("a row should be formed of values only"), - }; - value + } } pub fn get_values(&self) -> impl Iterator { diff --git a/core/vdbe/sorter.rs b/core/vdbe/sorter.rs index 584a29271..d758f91f5 100644 --- a/core/vdbe/sorter.rs +++ b/core/vdbe/sorter.rs @@ -1,18 +1,21 @@ -use crate::types::ImmutableRecord; -use std::cmp::Ordering; +use limbo_sqlite3_parser::ast::SortOrder; + +use crate::types::{compare_immutable, ImmutableRecord, IndexKeySortOrder}; pub struct Sorter { records: Vec, current: Option, - order: Vec, + order: IndexKeySortOrder, + key_len: usize, } impl Sorter { - pub fn new(order: Vec) -> Self { + pub fn new(order: &[SortOrder]) -> Self { Self { records: Vec::new(), current: None, - order, + key_len: order.len(), + order: IndexKeySortOrder::from_list(order), } } pub fn is_empty(&self) -> bool { @@ -26,24 +29,11 @@ impl Sorter { // We do the sorting here since this is what is called by the SorterSort instruction pub fn sort(&mut self) { self.records.sort_by(|a, b| { - let cmp_by_idx = |idx: usize, ascending: bool| { - let a = &a.get_value(idx); - let b = &b.get_value(idx); - if ascending { - a.cmp(b) - } else { - b.cmp(a) - } - }; - - let mut cmp_ret = Ordering::Equal; - for (idx, &is_asc) in self.order.iter().enumerate() { - cmp_ret = cmp_by_idx(idx, is_asc); - if cmp_ret != Ordering::Equal { - break; - } - } - cmp_ret + compare_immutable( + &a.values[..self.key_len], + &b.values[..self.key_len], + self.order, + ) }); self.records.reverse(); self.next() diff --git a/db.sqlite b/db.sqlite new file mode 100644 index 000000000..a7582c3c9 Binary files /dev/null and b/db.sqlite differ diff --git a/dist-workspace.toml b/dist-workspace.toml new file mode 100644 index 000000000..bdeb71f97 --- /dev/null +++ b/dist-workspace.toml @@ -0,0 +1,28 @@ +[workspace] +members = ["cargo:."] + +# Config for 'dist' +[dist] +# The preferred dist version to use in CI (Cargo.toml SemVer syntax) +cargo-dist-version = "0.28.3" +# CI backends to support +ci = "github" +# The installers to generate for each app +installers = ["shell", "powershell"] +# Target platforms to build apps for (Rust target-triple syntax) +targets = [ + "aarch64-apple-darwin", + "x86_64-apple-darwin", + "x86_64-unknown-linux-gnu", + "x86_64-pc-windows-msvc", +] +# Which actions to run on pull requests +pr-run-mode = "plan" +# Path that installers should place binaries in +install-path = "~/.limbo" +# Whether to install an updater program +install-updater = true +# Whether to consider the binaries in a package for distribution (defaults true) +dist = false +# Whether to enable GitHub Attestations +github-attestations = true diff --git a/docs/testing.md b/docs/testing.md new file mode 100644 index 000000000..399cc53fe --- /dev/null +++ b/docs/testing.md @@ -0,0 +1,140 @@ +# Testing in Limbo + +Limbo supports a comprehensive testing system to ensure correctness, performance, and compatibility with SQLite. + +## 1. Compatibility Tests + +The `make test` target is the main entry point. + +Most compatibility tests live in the testing/ directory and are written in SQLite’s TCL test format. These tests ensure that Limbo matches SQLite’s behavior exactly. The database used during these tests is located at testing/testing.db, which includes the following schema: + +```sql +CREATE TABLE users ( + id INTEGER PRIMARY KEY, + first_name TEXT, + last_name TEXT, + email TEXT, + phone_number TEXT, + address TEXT, + city TEXT, + state TEXT, + zipcode TEXT, + age INTEGER +); +CREATE TABLE products ( + id INTEGER PRIMARY KEY, + name TEXT, + price REAL +); +CREATE INDEX age_idx ON users (age); +``` + +You can freely write queries against these tables during compatibility testing. + +### Shell and Python-based Tests + +For cases where output or behavior differs intentionally from SQLite (e.g. due to new features or limitations), tests should be placed in the testing/cli_tests/ directory and written in Python. + +These tests use the TestLimboShell class: + +```python +from cli_tests.common import TestLimboShell + +def test_uuid(): + limbo = TestLimboShell() + limbo.run_test_fn("SELECT uuid4_str();", lambda res: len(res) == 36) + limbo.quit() +``` + +You can use run_test, run_test_fn, or debug_print to interact with the shell and validate results. +The constructor takes an optional argument with the `sql` you want to initiate the tests with. You can also enable blob testing or override the executable and flags. + +Use these Python-based tests for validating: + + - Output formatting + + - Shell commands and .dot interactions + + - Limbo-specific extensions in `testing/cli_tests/extensions.py` + + - Any known divergence from SQLite behavior + + +> Logging and Tracing +If you wish to trace internal events during test execution, you can set the RUST_LOG environment variable before running the test. For example: + +```bash +RUST_LOG=none,limbo_core=trace make test +``` + +This will enable trace-level logs for the limbo_core crate and disable logs elsewhere. Logging all internal traces to the `testing/test.log` file. + +**Note:** trace logs can be very verbose—it's not uncommon for a single test run to generate megabytes of logs. + + +## Deterministic Simulation Testing (DST): + +Limbo simulator uses randomized deterministic simulations to test the Limbo database behaviors. + +Each simulation begins with a random configurations: + +- the database workload distribution(percentages of reads, writes, deletes...), +- database parameters(page size), +- number of reader or writers, etc. + +Based on these parameters, we randomly generate **interaction plans**. Interaction plans consist of statements/queries, and assertions that will be executed in order. The building blocks of interaction plans are: + +- Randomly generated SQL queries satisfying the workload distribution, +- Properties, which contain multiple matching queries with assertions indicating the expected result. + +An example of a property is the following: + +```sql +-- begin testing 'Select-Select-Optimizer' +-- ASSUME table marvelous_ideal exists; +SELECT ((devoted_ahmed = -9142609771.541502 AND loving_wicker = -1246708244.164486)) FROM marvelous_ideal WHERE TRUE; +SELECT * FROM marvelous_ideal WHERE (devoted_ahmed = -9142609771.541502 AND loving_wicker = -1246708244.164486); +-- ASSERT select queries should return the same amount of results; +-- end testing 'Select-Select-Optimizer' +``` + +The simulator starts from an initially empty database, adding random interactions based on the workload distribution. It can +add random queries unrelated to the properties without breaking the property invariants to reach more diverse states and respect the configured workload distribution. + +The simulator executes the interaction plans in a loop, and checks the assertions. It can add random queries unrelated to the properties without +breaking the property invariants to reach more diverse states and respect the configured workload distribution. + +## Usage + +To run the simulator, you can use the following command: + +```bash +RUST_LOG=limbo_sim=debug cargo run --bin limbo_sim +``` + +The simulator CLI has a few configuration options that you can explore via `--help` flag. + +```txt +The Limbo deterministic simulator + +Usage: limbo_sim [OPTIONS] + +Options: + -s, --seed set seed for reproducible runs + -d, --doublecheck enable doublechecking, run the simulator with the plan twice and check output equality + -n, --maximum-size change the maximum size of the randomly generated sequence of interactions [default: 5000] + -k, --minimum-size change the minimum size of the randomly generated sequence of interactions [default: 1000] + -t, --maximum-time change the maximum time of the simulation(in seconds) [default: 3600] + -l, --load load plan from the bug base + -w, --watch enable watch mode that reruns the simulation on file changes + --differential run differential testing between sqlite and Limbo + -h, --help Print help + -V, --version Print version +``` + +## Fuzzing + +TODO! + + + diff --git a/extensions/completion/src/lib.rs b/extensions/completion/src/lib.rs index 09b09c479..53358c23c 100644 --- a/extensions/completion/src/lib.rs +++ b/extensions/completion/src/lib.rs @@ -91,8 +91,8 @@ impl VTabModule for CompletionVTab { cursor.eof() } - fn filter(cursor: &mut Self::VCursor, args: &[Value]) -> ResultCode { - if args.len() == 0 || args.len() > 2 { + fn filter(cursor: &mut Self::VCursor, args: &[Value], _: Option<(&str, i32)>) -> ResultCode { + if args.is_empty() || args.len() > 2 { return ResultCode::InvalidArgs; } cursor.reset(); diff --git a/extensions/core/src/lib.rs b/extensions/core/src/lib.rs index e73b2b894..99729de6c 100644 --- a/extensions/core/src/lib.rs +++ b/extensions/core/src/lib.rs @@ -15,7 +15,10 @@ pub use types::{ResultCode, Value, ValueType}; #[cfg(feature = "vfs")] pub use vfs_modules::{RegisterVfsFn, VfsExtension, VfsFile, VfsFileImpl, VfsImpl, VfsInterface}; use vtabs::RegisterModuleFn; -pub use vtabs::{VTabCursor, VTabKind, VTabModule, VTabModuleImpl}; +pub use vtabs::{ + ConstraintInfo, ConstraintOp, ConstraintUsage, ExtIndexInfo, IndexInfo, OrderByInfo, + VTabCursor, VTabKind, VTabModule, VTabModuleImpl, +}; pub type ExtResult = std::result::Result; diff --git a/extensions/core/src/vtabs.rs b/extensions/core/src/vtabs.rs index b6e70b8bb..5d86457f7 100644 --- a/extensions/core/src/vtabs.rs +++ b/extensions/core/src/vtabs.rs @@ -21,6 +21,8 @@ pub struct VTabModuleImpl { pub eof: VtabFnEof, pub update: VtabFnUpdate, pub rowid: VtabRowIDFn, + pub destroy: VtabFnDestroy, + pub best_idx: BestIdxFn, } #[cfg(feature = "core_only")] @@ -42,8 +44,13 @@ pub type VtabFnCreateSchema = unsafe extern "C" fn(args: *const Value, argc: i32 pub type VtabFnOpen = unsafe extern "C" fn(*const c_void) -> *const c_void; -pub type VtabFnFilter = - unsafe extern "C" fn(cursor: *const c_void, argc: i32, argv: *const Value) -> ResultCode; +pub type VtabFnFilter = unsafe extern "C" fn( + cursor: *const c_void, + argc: i32, + argv: *const Value, + idx_str: *const c_char, + idx_num: i32, +) -> ResultCode; pub type VtabFnColumn = unsafe extern "C" fn(cursor: *const c_void, idx: u32) -> Value; @@ -60,6 +67,14 @@ pub type VtabFnUpdate = unsafe extern "C" fn( p_out_rowid: *mut i64, ) -> ResultCode; +pub type VtabFnDestroy = unsafe extern "C" fn(vtab: *const c_void) -> ResultCode; +pub type BestIdxFn = unsafe extern "C" fn( + constraints: *const ConstraintInfo, + constraint_len: i32, + order_by: *const OrderByInfo, + order_by_len: i32, +) -> ExtIndexInfo; + #[repr(C)] #[derive(Clone, Copy, Debug, PartialEq)] pub enum VTabKind { @@ -75,7 +90,11 @@ pub trait VTabModule: 'static { fn create_schema(args: &[Value]) -> String; fn open(&self) -> Result; - fn filter(cursor: &mut Self::VCursor, args: &[Value]) -> ResultCode; + fn filter( + cursor: &mut Self::VCursor, + args: &[Value], + idx_info: Option<(&str, i32)>, + ) -> ResultCode; fn column(cursor: &Self::VCursor, idx: u32) -> Result; fn next(cursor: &mut Self::VCursor) -> ResultCode; fn eof(cursor: &Self::VCursor) -> bool; @@ -88,6 +107,25 @@ pub trait VTabModule: 'static { fn delete(&mut self, _rowid: i64) -> Result<(), Self::Error> { Ok(()) } + fn destroy(&mut self) -> Result<(), Self::Error> { + Ok(()) + } + fn best_index(_constraints: &[ConstraintInfo], _order_by: &[OrderByInfo]) -> IndexInfo { + IndexInfo { + idx_num: 0, + idx_str: None, + order_by_consumed: false, + estimated_cost: 1_000_000.0, + estimated_rows: u32::MAX, + constraint_usages: _constraints + .iter() + .map(|_| ConstraintUsage { + argv_index: Some(0), + omit: false, + }) + .collect(), + } + } } pub trait VTabCursor: Sized { @@ -97,3 +135,172 @@ pub trait VTabCursor: Sized { fn eof(&self) -> bool; fn next(&mut self) -> ResultCode; } + +#[repr(u8)] +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub enum ConstraintOp { + Eq = 2, + Lt = 4, + Le = 8, + Gt = 16, + Ge = 32, + Match = 64, + Like = 65, + Glob = 66, + Regexp = 67, + Ne = 68, + IsNot = 69, + IsNotNull = 70, + IsNull = 71, + Is = 72, + In = 73, +} + +#[repr(C)] +#[derive(Copy, Clone)] +/// Describes an ORDER BY clause in a query involving a virtual table. +/// Passed along with the constraints to xBestIndex. +pub struct OrderByInfo { + /// The index of the column referenced in the ORDER BY clause. + pub column_index: u32, + /// Whether or not the clause is in descending order. + pub desc: bool, +} + +/// The internal (core) representation of an 'index' on a virtual table. +/// Returned from xBestIndex and then processed and passed to VFilter. +#[derive(Debug, Clone)] +pub struct IndexInfo { + /// The index number, used to identify the index internally by the VTab + pub idx_num: i32, + /// Optional index name. these are passed to vfilter in a tuple (idx_num, idx_str) + pub idx_str: Option, + /// Whether the index is used for order by + pub order_by_consumed: bool, + /// TODO: for eventual cost based query planning + pub estimated_cost: f64, + /// Estimated number of rows that the query will return + pub estimated_rows: u32, + /// List of constraints that can be used to optimize the query. + pub constraint_usages: Vec, +} +impl Default for IndexInfo { + fn default() -> Self { + Self { + idx_num: 0, + idx_str: None, + order_by_consumed: false, + estimated_cost: 1_000_000.0, + estimated_rows: u32::MAX, + constraint_usages: Vec::new(), + } + } +} + +impl IndexInfo { + /// + /// Converts IndexInfo to an FFI-safe `ExtIndexInfo`. + /// This method transfers ownership of `constraint_usages` and `idx_str`, + /// which must later be reclaimed using `from_ffi` to prevent leaks. + pub fn to_ffi(self) -> ExtIndexInfo { + let len = self.constraint_usages.len(); + let ptr = Box::into_raw(self.constraint_usages.into_boxed_slice()) as *mut ConstraintUsage; + let idx_str_len = self.idx_str.as_ref().map(|s| s.len()).unwrap_or(0); + let c_idx_str = self + .idx_str + .map(|s| std::ffi::CString::new(s).unwrap().into_raw()) + .unwrap_or(std::ptr::null_mut()); + ExtIndexInfo { + idx_num: self.idx_num, + estimated_cost: self.estimated_cost, + estimated_rows: self.estimated_rows, + order_by_consumed: self.order_by_consumed, + constraint_usages_ptr: ptr, + constraint_usage_len: len, + idx_str: c_idx_str as *mut _, + idx_str_len, + } + } + + /// Reclaims ownership of `constraint_usages` and `idx_str` from an FFI-safe `ExtIndexInfo`. + /// # Safety + /// This method is unsafe because it can cause memory leaks if not used correctly. + /// to_ffi and from_ffi are meant to send index info across ffi bounds then immediately reclaim it. + pub unsafe fn from_ffi(ffi: ExtIndexInfo) -> Self { + let constraint_usages = unsafe { + Box::from_raw(std::slice::from_raw_parts_mut( + ffi.constraint_usages_ptr, + ffi.constraint_usage_len, + )) + .to_vec() + }; + let idx_str = if ffi.idx_str.is_null() { + None + } else { + Some(unsafe { + std::ffi::CString::from_raw(ffi.idx_str as *mut _) + .to_string_lossy() + .into_owned() + }) + }; + Self { + idx_num: ffi.idx_num, + idx_str, + order_by_consumed: ffi.order_by_consumed, + estimated_cost: ffi.estimated_cost, + estimated_rows: ffi.estimated_rows, + constraint_usages, + } + } +} + +#[repr(C)] +#[derive(Clone, Debug)] +/// FFI representation of IndexInfo. +pub struct ExtIndexInfo { + pub idx_num: i32, + pub idx_str: *const u8, + pub idx_str_len: usize, + pub order_by_consumed: bool, + pub estimated_cost: f64, + pub estimated_rows: u32, + pub constraint_usages_ptr: *mut ConstraintUsage, + pub constraint_usage_len: usize, +} + +/// Returned from xBestIndex to describe how the virtual table +/// can use the constraints in the WHERE clause of a query. +#[derive(Debug, Clone, Copy)] +pub struct ConstraintUsage { + /// 1 based index of the argument passed + pub argv_index: Option, + /// If true, core can omit this constraint in the vdbe layer. + pub omit: bool, +} + +#[derive(Clone, Copy, Debug)] +#[repr(C)] +/// The primary argument to xBestIndex, which describes a constraint +/// in a query involving a virtual table. +pub struct ConstraintInfo { + /// The index of the column referenced in the WHERE clause. + pub column_index: u32, + /// The operator used in the clause. + pub op: ConstraintOp, + /// Whether or not constraint is garaunteed to be enforced. + pub usable: bool, + /// packed integer with the index of the constraint in the planner, + /// and the side of the binary expr that the relevant column is on. + pub plan_info: u32, +} + +impl ConstraintInfo { + #[inline(always)] + pub fn pack_plan_info(pred_idx: u32, is_right_side: bool) -> u32 { + ((pred_idx) << 1) | (is_right_side as u32) + } + #[inline(always)] + pub fn unpack_plan_info(&self) -> (usize, bool) { + ((self.plan_info >> 1) as usize, (self.plan_info & 1) != 0) + } +} diff --git a/extensions/series/src/lib.rs b/extensions/series/src/lib.rs index 43028eed5..21d3a89fa 100644 --- a/extensions/series/src/lib.rs +++ b/extensions/series/src/lib.rs @@ -45,7 +45,7 @@ impl VTabModule for GenerateSeriesVTab { }) } - fn filter(cursor: &mut Self::VCursor, args: &[Value]) -> ResultCode { + fn filter(cursor: &mut Self::VCursor, args: &[Value], _: Option<(&str, i32)>) -> ResultCode { // args are the start, stop, and step if args.is_empty() || args.len() > 3 { return ResultCode::InvalidArgs; @@ -240,7 +240,7 @@ mod tests { ]; // Initialize cursor through filter - match GenerateSeriesVTab::filter(&mut cursor, &args) { + match GenerateSeriesVTab::filter(&mut cursor, &args, None) { ResultCode::OK => (), ResultCode::EOF => return Ok(vec![]), err => return Err(err), @@ -293,7 +293,7 @@ mod tests { let expected_len = series_expected_length(&series); assert_eq!( values.len(), - expected_len as usize, + expected_len, "Series length mismatch for start={}, stop={}, step={}: expected {}, got {}, values: {:?}", start, stop, @@ -546,7 +546,7 @@ mod tests { let start = series.start; let stop = series.stop; let step = series.step; - let tbl = GenerateSeriesVTab::default(); + let tbl = GenerateSeriesVTab {}; let mut cursor = tbl.open().unwrap(); let args = vec![ @@ -556,7 +556,7 @@ mod tests { ]; // Initialize cursor through filter - GenerateSeriesVTab::filter(&mut cursor, &args); + GenerateSeriesVTab::filter(&mut cursor, &args, None); let mut rowids = vec![]; while !GenerateSeriesVTab::eof(&cursor) { diff --git a/extensions/tests/src/lib.rs b/extensions/tests/src/lib.rs index 92e4f874f..5c6495595 100644 --- a/extensions/tests/src/lib.rs +++ b/extensions/tests/src/lib.rs @@ -1,7 +1,7 @@ use lazy_static::lazy_static; use limbo_ext::{ - register_extension, scalar, ExtResult, ResultCode, VTabCursor, VTabKind, VTabModule, - VTabModuleDerive, Value, + register_extension, scalar, ConstraintInfo, ConstraintOp, ConstraintUsage, ExtResult, + IndexInfo, OrderByInfo, ResultCode, VTabCursor, VTabKind, VTabModule, VTabModuleDerive, Value, }; #[cfg(not(target_family = "wasm"))] use limbo_ext::{VfsDerive, VfsExtension, VfsFile}; @@ -40,27 +40,99 @@ impl VTabModule for KVStoreVTab { } fn open(&self) -> Result { + let _ = env_logger::try_init(); Ok(KVStoreCursor { rows: Vec::new(), index: None, }) } - fn filter(cursor: &mut Self::VCursor, _args: &[Value]) -> ResultCode { - let store = GLOBAL_STORE.lock().unwrap(); - cursor.rows = store - .iter() - .map(|(&rowid, (k, v))| (rowid, k.clone(), v.clone())) - .collect(); - cursor.rows.sort_by_key(|(rowid, _, _)| *rowid); - - if cursor.rows.is_empty() { - cursor.index = None; - return ResultCode::EOF; - } else { - cursor.index = Some(0); + fn best_index(constraints: &[ConstraintInfo], _order_by: &[OrderByInfo]) -> IndexInfo { + // Look for: key = ? + for constraint in constraints.iter() { + if constraint.usable + && constraint.op == ConstraintOp::Eq + && constraint.column_index == 0 + { + // this extension wouldn't support order by but for testing purposes, + // we will consume it if we find an ASC order by clause on the value column + let mut consumed = false; + if let Some(order) = _order_by.first() { + if order.column_index == 1 && !order.desc { + consumed = true; + } + } + log::debug!("xBestIndex: constraint found for 'key = ?'"); + return IndexInfo { + idx_num: 1, + idx_str: Some("key_eq".to_string()), + order_by_consumed: consumed, + estimated_cost: 10.0, + estimated_rows: 4, + constraint_usages: vec![ConstraintUsage { + omit: true, + argv_index: Some(1), + }], + }; + } + } + + // fallback: full scan + log::debug!("No usable constraints found, using full scan"); + IndexInfo { + idx_num: -1, + idx_str: None, + order_by_consumed: false, + estimated_cost: 1000.0, + ..Default::default() + } + } + + fn filter( + cursor: &mut Self::VCursor, + args: &[Value], + idx_str: Option<(&str, i32)>, + ) -> ResultCode { + match idx_str { + Some(("key_eq", 1)) => { + let key = args + .first() + .and_then(|v| v.to_text()) + .map(|s| s.to_string()); + log::debug!("idx_str found: key_eq\n value: {:?}", key); + if let Some(key) = key { + let rowid = hash_key(&key); + let store = GLOBAL_STORE.lock().unwrap(); + if let Some((k, v)) = store.get(&rowid) { + cursor.rows.push((rowid, k.clone(), v.clone())); + cursor.index = Some(0); + } else { + cursor.rows.clear(); + cursor.index = None; + return ResultCode::EOF; + } + return ResultCode::OK; + } + cursor.rows.clear(); + cursor.index = None; + ResultCode::OK + } + _ => { + let store = GLOBAL_STORE.lock().unwrap(); + cursor.rows = store + .iter() + .map(|(&rowid, (k, v))| (rowid, k.clone(), v.clone())) + .collect(); + cursor.rows.sort_by_key(|(rowid, _, _)| *rowid); + if cursor.rows.is_empty() { + cursor.index = None; + ResultCode::EOF + } else { + cursor.index = Some(0); + ResultCode::OK + } + } } - ResultCode::OK } fn insert(&mut self, values: &[Value]) -> Result { @@ -96,6 +168,7 @@ impl VTabModule for KVStoreVTab { let _ = self.insert(values)?; Ok(()) } + fn eof(cursor: &Self::VCursor) -> bool { cursor.index.is_some_and(|s| s >= cursor.rows.len()) || cursor.index.is_none() } @@ -112,13 +185,21 @@ impl VTabModule for KVStoreVTab { if cursor.index.is_some_and(|c| c >= cursor.rows.len()) { return Err("cursor out of range".into()); } - let (_, ref key, ref val) = cursor.rows[cursor.index.unwrap_or(0)]; - match idx { - 0 => Ok(Value::from_text(key.clone())), // key - 1 => Ok(Value::from_text(val.clone())), // value - _ => Err("Invalid column".into()), + if let Some((_, ref key, ref val)) = cursor.rows.get(cursor.index.unwrap_or(0)) { + match idx { + 0 => Ok(Value::from_text(key.clone())), // key + 1 => Ok(Value::from_text(val.clone())), // value + _ => Err("Invalid column".into()), + } + } else { + Err("Invalid Column".into()) } } + + fn destroy(&mut self) -> Result<(), Self::Error> { + println!("VDestroy called"); + Ok(()) + } } fn hash_key(key: &str) -> i64 { diff --git a/fuzz/Cargo.lock b/fuzz/Cargo.lock index f32c94005..091feceb7 100644 --- a/fuzz/Cargo.lock +++ b/fuzz/Cargo.lock @@ -47,15 +47,6 @@ version = "2.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5c8214115b7bf84099f1309324e63141d4c5d7cc26862f97a0a857dbefe165bd" -[[package]] -name = "block-buffer" -version = "0.10.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" -dependencies = [ - "generic-array", -] - [[package]] name = "built" version = "0.7.7" @@ -72,12 +63,6 @@ version = "3.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1628fb46dfa0b37568d12e5edd512553eccf6a22a78e8bde00bb4aed84d5bdbf" -[[package]] -name = "byteorder" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" - [[package]] name = "cc" version = "1.2.16" @@ -130,15 +115,6 @@ version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" -[[package]] -name = "cpufeatures" -version = "0.2.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280" -dependencies = [ - "libc", -] - [[package]] name = "crossbeam-epoch" version = "0.9.18" @@ -164,16 +140,6 @@ version = "0.8.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" -[[package]] -name = "crypto-common" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" -dependencies = [ - "generic-array", - "typenum", -] - [[package]] name = "derive_arbitrary" version = "1.4.1" @@ -185,16 +151,6 @@ dependencies = [ "syn", ] -[[package]] -name = "digest" -version = "0.10.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" -dependencies = [ - "block-buffer", - "crypto-common", -] - [[package]] name = "displaydoc" version = "0.2.5" @@ -234,12 +190,6 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" -[[package]] -name = "fast-float2" -version = "0.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8eb564c5c7423d25c886fb561d1e4ee69f72354d16918afa32c08811f6b6a55" - [[package]] name = "foldhash" version = "0.1.4" @@ -255,16 +205,6 @@ dependencies = [ "percent-encoding", ] -[[package]] -name = "generic-array" -version = "0.14.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" -dependencies = [ - "typenum", - "version_check", -] - [[package]] name = "getrandom" version = "0.2.15" @@ -507,15 +447,8 @@ checksum = "3954d50fe15b02142bf25d3b8bdadb634ec3948f103d04ffe3031bc8fe9d7058" dependencies = [ "equivalent", "hashbrown", - "serde", ] -[[package]] -name = "itoa" -version = "1.0.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" - [[package]] name = "jobserver" version = "0.1.32" @@ -535,27 +468,11 @@ dependencies = [ "wasm-bindgen", ] -[[package]] -name = "jsonb" -version = "0.4.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "acd7dc2490b13d09367f5dc4bf202a5d70958dd5b9b2758e2708ee062752a824" -dependencies = [ - "byteorder", - "fast-float2", - "itoa", - "nom", - "ordered-float", - "rand", - "ryu", - "serde_json", -] - [[package]] name = "julian_day_converter" -version = "0.4.4" +version = "0.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1aa5652b85ab018289638c6b924db618da9edd2ddfff7fa0ec38a8b51a9192d3" +checksum = "f2987f71b89b85c812c8484cbf0c5d7912589e77bfdc66fd3e52f760e7859f16" dependencies = [ "chrono", ] @@ -598,6 +515,12 @@ dependencies = [ "windows-targets", ] +[[package]] +name = "libm" +version = "0.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8355be11b20d696c8f18f6cc018c4e372165b1fa8126cef092399c9951984ffa" + [[package]] name = "libmimalloc-sys" version = "0.1.39" @@ -643,7 +566,7 @@ dependencies = [ [[package]] name = "limbo_core" -version = "0.0.18-pre.3" +version = "0.0.19" dependencies = [ "built", "cfg_block", @@ -652,10 +575,9 @@ dependencies = [ "fallible-iterator", "getrandom 0.2.15", "hex", - "indexmap", - "jsonb", "julian_day_converter", "libloading", + "libm", "limbo_ext", "limbo_macros", "limbo_sqlite3_parser", @@ -664,15 +586,12 @@ dependencies = [ "miette", "mimalloc", "parking_lot", - "pest", - "pest_derive", "polling", "rand", "regex", "regex-syntax", "rustix", "ryu", - "serde", "strum", "thiserror 1.0.69", "tracing", @@ -680,7 +599,7 @@ dependencies = [ [[package]] name = "limbo_ext" -version = "0.0.18-pre.3" +version = "0.0.19" dependencies = [ "chrono", "getrandom 0.3.1", @@ -689,7 +608,7 @@ dependencies = [ [[package]] name = "limbo_macros" -version = "0.0.18-pre.3" +version = "0.0.19" dependencies = [ "proc-macro2", "quote", @@ -698,7 +617,7 @@ dependencies = [ [[package]] name = "limbo_sqlite3_parser" -version = "0.0.18-pre.3" +version = "0.0.19" dependencies = [ "bitflags", "cc", @@ -717,7 +636,7 @@ dependencies = [ [[package]] name = "limbo_time" -version = "0.0.18-pre.3" +version = "0.0.19" dependencies = [ "chrono", "limbo_ext", @@ -729,7 +648,7 @@ dependencies = [ [[package]] name = "limbo_uuid" -version = "0.0.18-pre.3" +version = "0.0.19" dependencies = [ "limbo_ext", "mimalloc", @@ -802,22 +721,6 @@ dependencies = [ "libmimalloc-sys", ] -[[package]] -name = "minimal-lexical" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" - -[[package]] -name = "nom" -version = "7.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" -dependencies = [ - "memchr", - "minimal-lexical", -] - [[package]] name = "num-traits" version = "0.2.19" @@ -833,15 +736,6 @@ version = "1.21.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cde51589ab56b20a6f686b2c68f7a0bd6add753d697abf720d63f8db3ab7b1ad" -[[package]] -name = "ordered-float" -version = "4.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7bb71e1b3fa6ca1c61f383464aaf2bb0e2f8e772a1f01d486832464de363b951" -dependencies = [ - "num-traits", -] - [[package]] name = "parking_lot" version = "0.12.3" @@ -871,51 +765,6 @@ version = "2.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" -[[package]] -name = "pest" -version = "2.7.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b7cafe60d6cf8e62e1b9b2ea516a089c008945bb5a275416789e7db0bc199dc" -dependencies = [ - "memchr", - "thiserror 2.0.12", - "ucd-trie", -] - -[[package]] -name = "pest_derive" -version = "2.7.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "816518421cfc6887a0d62bf441b6ffb4536fcc926395a69e1a85852d4363f57e" -dependencies = [ - "pest", - "pest_generator", -] - -[[package]] -name = "pest_generator" -version = "2.7.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d1396fd3a870fc7838768d171b4616d5c91f6cc25e377b673d714567d99377b" -dependencies = [ - "pest", - "pest_meta", - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "pest_meta" -version = "2.7.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1e58089ea25d717bfd31fb534e4f3afcc2cc569c70de3e239778991ea3b7dea" -dependencies = [ - "once_cell", - "pest", - "sha2", -] - [[package]] name = "phf" version = "0.11.3" @@ -1142,30 +991,6 @@ dependencies = [ "syn", ] -[[package]] -name = "serde_json" -version = "1.0.140" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "20068b6e96dc6c9bd23e01df8827e6c7e1f2fddd43c21810382803c136b99373" -dependencies = [ - "indexmap", - "itoa", - "memchr", - "ryu", - "serde", -] - -[[package]] -name = "sha2" -version = "0.10.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "793db75ad2bcafc3ffa7c68b215fee268f537982cd901d132f89c6343f3a3dc8" -dependencies = [ - "cfg-if", - "cpufeatures", - "digest", -] - [[package]] name = "shlex" version = "1.3.0" @@ -1315,18 +1140,6 @@ dependencies = [ "once_cell", ] -[[package]] -name = "typenum" -version = "1.18.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1dccffe3ce07af9386bfd29e80c0ab1a8205a2fc34e4bcd40364df902cfa8f3f" - -[[package]] -name = "ucd-trie" -version = "0.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2896d95c02a80c6d6a5d6e953d479f5ddf2dfdb6a244441010e373ac0fb88971" - [[package]] name = "uncased" version = "0.9.10" diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index ac411077e..69d6f438f 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -11,7 +11,7 @@ cargo-fuzz = true [dependencies] libfuzzer-sys = "0.4" arbitrary = { version = "1.4.1", features = ["derive"] } -limbo_core = { path = "../core" } +limbo_core = { path = "../core", features = ["fuzz"] } rusqlite = { version = "0.34.0", features = ["bundled"] } # Prevent this from interfering with workspaces @@ -21,3 +21,7 @@ members = ["."] [[bin]] name = "expression" path = "fuzz_targets/expression.rs" + +[[bin]] +name = "cast_real" +path = "fuzz_targets/cast_real.rs" diff --git a/fuzz/fuzz_targets/cast_real.rs b/fuzz/fuzz_targets/cast_real.rs new file mode 100644 index 000000000..4ef4ab2ba --- /dev/null +++ b/fuzz/fuzz_targets/cast_real.rs @@ -0,0 +1,26 @@ +#![no_main] +use libfuzzer_sys::{fuzz_target, Corpus}; +use limbo_core::numeric::StrToF64; +use std::error::Error; + +fn do_fuzz(text: String) -> Result> { + let expected = { + let conn = rusqlite::Connection::open_in_memory()?; + conn.query_row(&format!("SELECT cast(? as real)"), (&text,), |row| { + row.get::<_, f64>(0) + })? + }; + + let actual = limbo_core::numeric::str_to_f64(&text) + .map(|v| { + let (StrToF64::Fractional(non_nan) | StrToF64::Decimal(non_nan)) = v; + f64::from(non_nan) + }) + .unwrap_or(0.0); + + assert_eq!(expected, actual); + + Ok(Corpus::Keep) +} + +fuzz_target!(|blob: String| -> Corpus { do_fuzz(blob).unwrap_or(Corpus::Keep) }); diff --git a/fuzz/fuzz_targets/expression.rs b/fuzz/fuzz_targets/expression.rs index 44338c634..703d64263 100644 --- a/fuzz/fuzz_targets/expression.rs +++ b/fuzz/fuzz_targets/expression.rs @@ -31,13 +31,15 @@ macro_rules! str_enum { str_enum! { enum Binary { - Equal => "=", - Is => "IS", - NotEqual => "<>", - GreaterThan => ">", - GreaterThanOrEqual => ">=", - LessThan => "<", - LessThanOrEqual => "<=", + // TODO: Not compatible yet + // Equal => "=", + // Is => "IS", + // Concat => "||", + // NotEqual => "<>", + // GreaterThan => ">", + // GreaterThanOrEqual => ">=", + // LessThan => "<", + // LessThanOrEqual => "<=", RightShift => ">>", LeftShift => "<<", BitwiseAnd => "&", @@ -49,13 +51,13 @@ str_enum! { Multiply => "*", Divide => "/", Mod => "%", - Concat => "||", } } str_enum! { enum Unary { - Not => "~", + Not => "NOT", + BitwiseNot => "~", Negative => "-", Positive => "+", } @@ -167,7 +169,7 @@ fn do_fuzz(expr: Expr) -> Result> { let sql = format!("SELECT {}", expr.query); // FIX: `limbo_core::translate::expr::translate_expr` causes a overflow if this is any higher. - if expr.depth > 153 { + if expr.depth > 140 { return Ok(Corpus::Reject); } @@ -195,7 +197,7 @@ fn do_fuzz(expr: Expr) -> Result> { StepResult::IO => io.run_once()?, StepResult::Row => { let row = stmt.row().unwrap(); - assert_eq!(row.count(), 1, "expr: {:?}", expr); + assert_eq!(row.len(), 1, "expr: {:?}", expr); break 'value row.get_value(0).clone(); } _ => unreachable!(), @@ -206,12 +208,8 @@ fn do_fuzz(expr: Expr) -> Result> { assert_eq!( OwnedValue::from(expected.clone()), found.clone(), - "with expression {:?} {}", + "with expression {:?}", expr, - match (expected, found) { - (Value::Real(a), OwnedValue::Float(b)) => format!("float diff: {:?}", (a - b).abs()), - _ => "".to_string(), - } ); Ok(Corpus::Keep) diff --git a/macros/src/lib.rs b/macros/src/lib.rs index 3de2797cb..d47101589 100644 --- a/macros/src/lib.rs +++ b/macros/src/lib.rs @@ -404,7 +404,11 @@ pub fn derive_agg_func(input: TokenStream) -> TokenStream { /// /// Delete the row with the provided rowid /// fn delete(&mut self, rowid: i64) -> Result<(), Self::Error> { /// Ok(()) -/// } +/// } +/// /// Destroy the virtual table. Any cleanup logic for when the table is deleted comes heres +/// fn destroy(&mut self) -> Result<(), Self::Error> { +/// Ok(()) +/// } /// /// #[derive(Debug)] /// struct CsvCursor { @@ -450,6 +454,8 @@ pub fn derive_vtab_module(input: TokenStream) -> TokenStream { let eof_fn_name = format_ident!("eof_{}", struct_name); let update_fn_name = format_ident!("update_{}", struct_name); let rowid_fn_name = format_ident!("rowid_{}", struct_name); + let destroy_fn_name = format_ident!("destroy_{}", struct_name); + let best_idx_fn_name = format_ident!("best_idx_{}", struct_name); let expanded = quote! { impl #struct_name { @@ -485,13 +491,20 @@ pub fn derive_vtab_module(input: TokenStream) -> TokenStream { cursor: *const ::std::ffi::c_void, argc: i32, argv: *const ::limbo_ext::Value, + idx_str: *const ::std::ffi::c_char, + idx_num: i32, ) -> ::limbo_ext::ResultCode { if cursor.is_null() { return ::limbo_ext::ResultCode::Error; } let cursor = unsafe { &mut *(cursor as *mut <#struct_name as ::limbo_ext::VTabModule>::VCursor) }; let args = ::std::slice::from_raw_parts(argv, argc as usize); - <#struct_name as ::limbo_ext::VTabModule>::filter(cursor, args) + let idx_str = if idx_str.is_null() { + None + } else { + Some((unsafe { ::std::ffi::CStr::from_ptr(idx_str).to_str().unwrap() }, idx_num)) + }; + <#struct_name as ::limbo_ext::VTabModule>::filter(cursor, args, idx_str) } #[no_mangle] @@ -592,6 +605,34 @@ pub fn derive_vtab_module(input: TokenStream) -> TokenStream { <<#struct_name as ::limbo_ext::VTabModule>::VCursor as ::limbo_ext::VTabCursor>::rowid(cursor) } + #[no_mangle] + unsafe extern "C" fn #destroy_fn_name( + vtab: *const ::std::ffi::c_void, + ) -> ::limbo_ext::ResultCode { + if vtab.is_null() { + return ::limbo_ext::ResultCode::Error; + } + + let vtab = &mut *(vtab as *mut #struct_name); + if <#struct_name as VTabModule>::destroy(vtab).is_err() { + return ::limbo_ext::ResultCode::Error; + } + + return ::limbo_ext::ResultCode::OK; + } + + #[no_mangle] + pub unsafe extern "C" fn #best_idx_fn_name( + constraints: *const ::limbo_ext::ConstraintInfo, + n_constraints: i32, + order_by: *const ::limbo_ext::OrderByInfo, + n_order_by: i32, + ) -> ::limbo_ext::ExtIndexInfo { + let constraints = if n_constraints > 0 { std::slice::from_raw_parts(constraints, n_constraints as usize) } else { &[] }; + let order_by = if n_order_by > 0 { std::slice::from_raw_parts(order_by, n_order_by as usize) } else { &[] }; + <#struct_name as ::limbo_ext::VTabModule>::best_index(constraints, order_by).to_ffi() + } + #[no_mangle] pub unsafe extern "C" fn #register_fn_name( api: *const ::limbo_ext::ExtensionApi @@ -614,6 +655,8 @@ pub fn derive_vtab_module(input: TokenStream) -> TokenStream { eof: Self::#eof_fn_name, update: Self::#update_fn_name, rowid: Self::#rowid_fn_name, + destroy: Self::#destroy_fn_name, + best_idx: Self::#best_idx_fn_name, }; (api.register_vtab_module)(api.ctx, name_c, module, <#struct_name as ::limbo_ext::VTabModule>::VTAB_KIND) } diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 000000000..b652f9f25 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,17 @@ +[project] +dependencies = [ + "rich>=14.0.0", +] +name = "limbo" +readme = "README.md" +requires-python = ">=3.13" +version = "0.1.0" + +[tool.uv] +package = false + +[tool.uv.sources] +limbo_test = { workspace = true } + +[tool.uv.workspace] +members = ["testing", "scripts"] diff --git a/scripts/antithesis/publish-docker.sh b/scripts/antithesis/publish-docker.sh index 74de3981b..ee59f3367 100755 --- a/scripts/antithesis/publish-docker.sh +++ b/scripts/antithesis/publish-docker.sh @@ -18,6 +18,6 @@ fi DOCKER_IMAGE=$DOCKER_REPO_URL/$IMAGE_NAME:$DOCKER_IMAGE_VERSION -docker build -f $DOCKERFILE -t $DOCKER_IMAGE $DOCKER_BUILD_ARGS $DOCKER_DIR +docker build --platform linux/amd64 -f $DOCKERFILE -t $DOCKER_IMAGE $DOCKER_BUILD_ARGS $DOCKER_DIR docker push $DOCKER_IMAGE diff --git a/scripts/limbo-sqlite3 b/scripts/limbo-sqlite3 index 8e9f0389a..d448a2d6a 100755 --- a/scripts/limbo-sqlite3 +++ b/scripts/limbo-sqlite3 @@ -1,3 +1,8 @@ #!/bin/bash -target/debug/limbo -m list "$@" +# if RUST_LOG is non-empty, enable tracing output +if [ -n "$RUST_LOG" ]; then + target/debug/limbo -m list -t testing/test.log "$@" +else + target/debug/limbo -m list "$@" +fi diff --git a/scripts/pyproject.toml b/scripts/pyproject.toml new file mode 100644 index 000000000..a9d988d2e --- /dev/null +++ b/scripts/pyproject.toml @@ -0,0 +1,9 @@ +[project] +name = "scripts" +version = "0.1.0" +description = "Add your description here" +readme = "README.md" +requires-python = ">=3.13" +dependencies = [ + "pygithub>=2.6.1", +] diff --git a/simulator/Cargo.toml b/simulator/Cargo.toml index 991b72fc5..991f10866 100644 --- a/simulator/Cargo.toml +++ b/simulator/Cargo.toml @@ -19,7 +19,6 @@ limbo_core = { path = "../core" } rand = "0.8.5" rand_chacha = "0.3.1" log = "0.4.20" -tempfile = "3.0.7" env_logger = "0.10.1" regex = "1.11.1" regex-syntax = { version = "0.8.5", default-features = false, features = [ @@ -31,3 +30,5 @@ serde = { version = "1.0", features = ["derive"] } serde_json = { version = "1.0" } notify = "8.0.0" rusqlite = { version = "0.34", features = ["bundled"] } +dirs = "6.0.0" +chrono = { version = "0.4.40", features = ["serde"] } diff --git a/simulator/README.md b/simulator/README.md index 4e9081bd7..87d61479d 100644 --- a/simulator/README.md +++ b/simulator/README.md @@ -15,20 +15,18 @@ Based on these parameters, we randomly generate **interaction plans**. Interacti An example of a property is the following: -```json -{ - "name": "Read your own writes", - "queries": [ - "INSERT INTO t1 (id) VALUES (1)", - "SELECT * FROM t1 WHERE id = 1" - ], - "assertions": [ - "result.rows.length == 1", - "result.rows[0].id == 1" - ] -} +```sql +-- begin testing 'Select-Select-Optimizer' +-- ASSUME table marvelous_ideal exists; +SELECT ((devoted_ahmed = -9142609771.541502 AND loving_wicker = -1246708244.164486)) FROM marvelous_ideal WHERE TRUE; +SELECT * FROM marvelous_ideal WHERE (devoted_ahmed = -9142609771.541502 AND loving_wicker = -1246708244.164486); +-- ASSERT select queries should return the same amount of results; +-- end testing 'Select-Select-Optimizer' ``` +The simulator starts from an initially empty database, adding random interactions based on the workload distribution. It can +add random queries unrelated to the properties without breaking the property invariants to reach more diverse states and respect the configured workload distribution. + The simulator executes the interaction plans in a loop, and checks the assertions. It can add random queries unrelated to the properties without breaking the property invariants to reach more diverse states and respect the configured workload distribution. @@ -44,36 +42,72 @@ The simulator code is broken into 4 main parts: To run the simulator, you can use the following command: ```bash -cargo run +RUST_LOG=limbo_sim=debug cargo run --bin limbo_sim ``` -This prompt (in the future) will invoke a clap command line interface to configure the simulator. For now, the simulator runs with the default configurations changing the `main.rs` file. If you want to see the logs, you can change the `RUST_LOG` environment variable. +The simulator CLI has a few configuration options that you can explore via `--help` flag. -```bash -RUST_LOG=info cargo run --bin limbo_sim +```txt +The Limbo deterministic simulator + +Usage: limbo_sim [OPTIONS] + +Options: + -s, --seed set seed for reproducible runs + -d, --doublecheck enable doublechecking, run the simulator with the plan twice and check output equality + -n, --maximum-size change the maximum size of the randomly generated sequence of interactions [default: 5000] + -k, --minimum-size change the minimum size of the randomly generated sequence of interactions [default: 1000] + -t, --maximum-time change the maximum time of the simulation(in seconds) [default: 3600] + -l, --load load plan from the bug base + -w, --watch enable watch mode that reruns the simulation on file changes + --differential run differential testing between sqlite and Limbo + -h, --help Print help + -V, --version Print version ``` ## Adding new properties -Todo +The properties are defined in `simulator/generation/property.rs` in the `Property` enum. Each property is documented with +inline doc comments, an example is given below: -## Adding new generation functions +```rust +/// Insert-Select is a property in which the inserted row +/// must be in the resulting rows of a select query that has a +/// where clause that matches the inserted row. +/// The execution of the property is as follows +/// INSERT INTO VALUES (...) +/// I_0 +/// I_1 +/// ... +/// I_n +/// SELECT * FROM WHERE +/// The interactions in the middle has the following constraints; +/// - There will be no errors in the middle interactions. +/// - The inserted row will not be deleted. +/// - The inserted row will not be updated. +/// - The table `t` will not be renamed, dropped, or altered. +InsertValuesSelect { + /// The insert query + insert: Insert, + /// Selected row index + row_index: usize, + /// Additional interactions in the middle of the property + queries: Vec, + /// The select query + select: Select, +}, +``` -Todo - -## Adding new models - -Todo - -## Coverage with Limbo - -Todo +If you would like to add a new property, you can add a new variant to the `Property` enum, and the corresponding +generation function in `simulator/generation/property.rs`. The generation function should return a `Property` instance, and +it should generate the necessary queries and assertions for the property. ## Automatic Compatibility Testing with SQLite -Todo +You can use the `--differential` flag to run the simulator in differential testing mode. This mode will run the same interaction plan on both Limbo and SQLite, and compare the results. It will also check for any panics or errors in either database. ## Resources + - [(reading) TigerBeetle Deterministic Simulation Testing](https://docs.tigerbeetle.com/about/vopr/) - [(reading) sled simulation guide (jepsen-proof engineering)](https://sled.rs/simulation.html) - [(video) "Testing Distributed Systems w/ Deterministic Simulation" by Will Wilson](https://www.youtube.com/watch?v=4fFDFbi3toc) diff --git a/simulator/generation/plan.rs b/simulator/generation/plan.rs index ecad92344..da2dd3c78 100644 --- a/simulator/generation/plan.rs +++ b/simulator/generation/plan.rs @@ -11,7 +11,7 @@ use crate::{ }, table::Value, }, - runner::env::{SimConnection, SimulatorEnvTrait}, + runner::env::SimConnection, SimulatorEnv, }; @@ -38,7 +38,7 @@ impl InteractionPlan { let interactions = interactions.lines().collect::>(); let plan: InteractionPlan = serde_json::from_str( - std::fs::read_to_string(plan_path.with_extension("plan.json")) + std::fs::read_to_string(plan_path.with_extension("json")) .unwrap() .as_str(), ) @@ -71,7 +71,6 @@ impl InteractionPlan { let _ = plan[j].split_off(k); break; } - if interactions[i].contains(plan[j][k].to_string().as_str()) { i += 1; k += 1; @@ -86,7 +85,7 @@ impl InteractionPlan { j += 1; } } - + let _ = plan.split_off(j); plan } } @@ -239,7 +238,7 @@ impl Display for Interaction { } } -type AssertionFunc = dyn Fn(&Vec, &dyn SimulatorEnvTrait) -> Result; +type AssertionFunc = dyn Fn(&Vec, &SimulatorEnv) -> Result; enum AssertionAST { Pick(), @@ -524,7 +523,7 @@ impl Interaction { pub(crate) fn execute_assertion( &self, stack: &Vec, - env: &impl SimulatorEnvTrait, + env: &SimulatorEnv, ) -> Result<()> { match self { Self::Query(_) => { @@ -555,7 +554,7 @@ impl Interaction { pub(crate) fn execute_assumption( &self, stack: &Vec, - env: &dyn SimulatorEnvTrait, + env: &SimulatorEnv, ) -> Result<()> { match self { Self::Query(_) => { @@ -597,15 +596,12 @@ impl Interaction { Self::Fault(fault) => { match fault { Fault::Disconnect => { - match env.connections[conn_index] { - SimConnection::Connected(ref mut conn) => { - conn.close()?; - } - SimConnection::Disconnected => { - return Err(limbo_core::LimboError::InternalError( - "Tried to disconnect a disconnected connection".to_string(), - )); - } + if env.connections[conn_index].is_connected() { + env.connections[conn_index].disconnect(); + } else { + return Err(limbo_core::LimboError::InternalError( + "connection already disconnected".into(), + )); } env.connections[conn_index] = SimConnection::Disconnected; } diff --git a/simulator/generation/property.rs b/simulator/generation/property.rs index cbcd2c479..a876a833d 100644 --- a/simulator/generation/property.rs +++ b/simulator/generation/property.rs @@ -9,7 +9,7 @@ use crate::{ }, table::Value, }, - runner::env::{SimulatorEnv, SimulatorEnvTrait}, + runner::env::SimulatorEnv, }; use super::{ @@ -170,8 +170,8 @@ impl Property { message: format!("table {} exists", insert.table()), func: Box::new({ let table_name = table.clone(); - move |_: &Vec, env: &dyn SimulatorEnvTrait| { - Ok(env.tables().iter().any(|t| t.name == table_name)) + move |_: &Vec, env: &SimulatorEnv| { + Ok(env.tables.iter().any(|t| t.name == table_name)) } }), }); @@ -182,7 +182,7 @@ impl Property { row.iter().map(|v| v.to_string()).collect::>(), insert.table(), ), - func: Box::new(move |stack: &Vec, _: &dyn SimulatorEnvTrait| { + func: Box::new(move |stack: &Vec, _: &SimulatorEnv| { let rows = stack.last().unwrap(); match rows { Ok(rows) => Ok(rows.iter().any(|r| r == &row)), @@ -206,8 +206,8 @@ impl Property { let assumption = Interaction::Assumption(Assertion { message: "Double-Create-Failure should not be called on an existing table" .to_string(), - func: Box::new(move |_: &Vec, env: &dyn SimulatorEnvTrait| { - Ok(!env.tables().iter().any(|t| t.name == table_name)) + func: Box::new(move |_: &Vec, env: &SimulatorEnv| { + Ok(!env.tables.iter().any(|t| t.name == table_name)) }), }); @@ -220,11 +220,11 @@ impl Property { message: "creating two tables with the name should result in a failure for the second query" .to_string(), - func: Box::new(move |stack: &Vec, _: &dyn SimulatorEnvTrait| { + func: Box::new(move |stack: &Vec, _: &SimulatorEnv| { let last = stack.last().unwrap(); match last { Ok(_) => Ok(false), - Err(e) => Ok(e.to_string().contains(&format!("Table {table_name} already exists"))), + Err(e) => Ok(e.to_string().to_lowercase().contains(&format!("table {table_name} already exists"))), } }), }); @@ -245,8 +245,8 @@ impl Property { message: format!("table {} exists", table_name), func: Box::new({ let table_name = table_name.clone(); - move |_: &Vec, env: &dyn SimulatorEnvTrait| { - Ok(env.tables().iter().any(|t| t.name == table_name)) + move |_: &Vec, env: &SimulatorEnv| { + Ok(env.tables.iter().any(|t| t.name == table_name)) } }), }); @@ -257,7 +257,7 @@ impl Property { let assertion = Interaction::Assertion(Assertion { message: "select query should respect the limit clause".to_string(), - func: Box::new(move |stack: &Vec, _: &dyn SimulatorEnvTrait| { + func: Box::new(move |stack: &Vec, _: &SimulatorEnv| { let last = stack.last().unwrap(); match last { Ok(rows) => Ok(limit >= rows.len()), @@ -281,8 +281,8 @@ impl Property { message: format!("table {} exists", table), func: Box::new({ let table = table.clone(); - move |_: &Vec, env: &dyn SimulatorEnvTrait| { - Ok(env.tables().iter().any(|t| t.name == table)) + move |_: &Vec, env: &SimulatorEnv| { + Ok(env.tables.iter().any(|t| t.name == table)) } }), }); @@ -292,7 +292,7 @@ impl Property { "select '{}' should return no values for table '{}'", predicate, table, ), - func: Box::new(move |stack: &Vec, _: &dyn SimulatorEnvTrait| { + func: Box::new(move |stack: &Vec, _: &SimulatorEnv| { let rows = stack.last().unwrap(); match rows { Ok(rows) => Ok(rows.is_empty()), @@ -332,8 +332,8 @@ impl Property { message: format!("table {} exists", table), func: Box::new({ let table = table.clone(); - move |_: &Vec, env: &dyn SimulatorEnvTrait| { - Ok(env.tables().iter().any(|t| t.name == table)) + move |_: &Vec, env: &SimulatorEnv| { + Ok(env.tables.iter().any(|t| t.name == table)) } }), }); @@ -345,7 +345,7 @@ impl Property { "select query should result in an error for table '{}'", table ), - func: Box::new(move |stack: &Vec, _: &dyn SimulatorEnvTrait| { + func: Box::new(move |stack: &Vec, _: &SimulatorEnv| { let last = stack.last().unwrap(); match last { Ok(_) => Ok(false), @@ -377,8 +377,8 @@ impl Property { message: format!("table {} exists", table), func: Box::new({ let table = table.clone(); - move |_: &Vec, env: &dyn SimulatorEnvTrait| { - Ok(env.tables().iter().any(|t| t.name == table)) + move |_: &Vec, env: &SimulatorEnv| { + Ok(env.tables.iter().any(|t| t.name == table)) } }), }); @@ -401,13 +401,13 @@ impl Property { let assertion = Interaction::Assertion(Assertion { message: "select queries should return the same amount of results".to_string(), - func: Box::new(move |stack: &Vec, _: &dyn SimulatorEnvTrait| { + func: Box::new(move |stack: &Vec, _: &SimulatorEnv| { let select_star = stack.last().unwrap(); let select_predicate = stack.get(stack.len() - 2).unwrap(); match (select_predicate, select_star) { (Ok(rows1), Ok(rows2)) => { // If rows1 results have more than 1 column, there is a problem - if rows1.iter().find(|vs| vs.len() > 1).is_some() { + if rows1.iter().any(|vs| vs.len() > 1) { return Err(LimboError::InternalError( "Select query without the star should return only one column".to_string(), )); diff --git a/simulator/main.rs b/simulator/main.rs index d28c2b017..34fdac17b 100644 --- a/simulator/main.rs +++ b/simulator/main.rs @@ -2,11 +2,11 @@ use clap::Parser; use generation::plan::{Interaction, InteractionPlan, InteractionPlanState}; use generation::ArbitraryFrom; -use limbo_core::Database; use notify::event::{DataChange, ModifyKind}; use notify::{EventKind, RecursiveMode, Watcher}; use rand::prelude::*; -use runner::cli::SimulatorCLI; +use runner::bugbase::{Bug, BugBase, LoadedBug}; +use runner::cli::{SimulatorCLI, SimulatorCommand}; use runner::env::SimulatorEnv; use runner::execution::{execute_plans, Execution, ExecutionHistory, ExecutionResult}; use runner::{differential, watch}; @@ -15,101 +15,159 @@ use std::backtrace::Backtrace; use std::io::Write; use std::path::{Path, PathBuf}; use std::sync::{mpsc, Arc, Mutex}; -use tempfile::TempDir; mod generation; mod model; mod runner; mod shrink; struct Paths { + base: PathBuf, db: PathBuf, plan: PathBuf, shrunk_plan: PathBuf, history: PathBuf, doublecheck_db: PathBuf, shrunk_db: PathBuf, + diff_db: PathBuf, } impl Paths { - fn new(output_dir: &Path, shrink: bool, doublecheck: bool) -> Self { - let paths = Paths { - db: PathBuf::from(output_dir).join("simulator.db"), - plan: PathBuf::from(output_dir).join("simulator.plan"), - shrunk_plan: PathBuf::from(output_dir).join("simulator_shrunk.plan"), - history: PathBuf::from(output_dir).join("simulator.history"), - doublecheck_db: PathBuf::from(output_dir).join("simulator_double.db"), - shrunk_db: PathBuf::from(output_dir).join("simulator_shrunk.db"), - }; - - // Print the seed, the locations of the database and the plan file - log::info!("database path: {:?}", paths.db); - if doublecheck { - log::info!("doublecheck database path: {:?}", paths.doublecheck_db); - } else if shrink { - log::info!("shrunk database path: {:?}", paths.shrunk_db); + fn new(output_dir: &Path) -> Self { + Paths { + base: output_dir.to_path_buf(), + db: PathBuf::from(output_dir).join("test.db"), + plan: PathBuf::from(output_dir).join("plan.sql"), + shrunk_plan: PathBuf::from(output_dir).join("shrunk.sql"), + history: PathBuf::from(output_dir).join("history.txt"), + doublecheck_db: PathBuf::from(output_dir).join("double.db"), + shrunk_db: PathBuf::from(output_dir).join("shrunk.db"), + diff_db: PathBuf::from(output_dir).join("diff.db"), } - log::info!("simulator plan path: {:?}", paths.plan); - log::info!( - "simulator plan serialized path: {:?}", - paths.plan.with_extension("plan.json") - ); - if shrink { - log::info!("shrunk plan path: {:?}", paths.shrunk_plan); - } - log::info!("simulator history path: {:?}", paths.history); - - paths } } fn main() -> Result<(), String> { init_logger(); - - let cli_opts = SimulatorCLI::parse(); + let mut cli_opts = SimulatorCLI::parse(); cli_opts.validate()?; - let seed = cli_opts.seed.unwrap_or_else(|| thread_rng().next_u64()); + match cli_opts.subcommand { + Some(SimulatorCommand::List) => { + let mut bugbase = BugBase::load().map_err(|e| format!("{:?}", e))?; + bugbase.list_bugs() + } + Some(SimulatorCommand::Loop { n, short_circuit }) => { + banner(); + for i in 0..n { + println!("iteration {}", i); + let result = testing_main(&cli_opts); + if result.is_err() && short_circuit { + println!("short circuiting after {} iterations", i); + return result; + } else if result.is_err() { + println!("iteration {} failed", i); + } else { + println!("iteration {} succeeded", i); + } + } + Ok(()) + } + Some(SimulatorCommand::Test { filter }) => { + let mut bugbase = BugBase::load().map_err(|e| format!("{:?}", e))?; + let bugs = bugbase.load_bugs()?; + let mut bugs = bugs + .into_iter() + .flat_map(|bug| { + let runs = bug + .runs + .into_iter() + .filter_map(|run| run.error.clone().map(|_| run)) + .filter(|run| run.error.as_ref().unwrap().contains(&filter)) + .map(|run| run.cli_options) + .collect::>(); - let output_dir = match &cli_opts.output_dir { - Some(dir) => Path::new(dir).to_path_buf(), - None => TempDir::new().map_err(|e| format!("{:?}", e))?.into_path(), - }; + runs.into_iter() + .map(|mut cli_opts| { + cli_opts.seed = Some(bug.seed); + cli_opts.load = None; + cli_opts + }) + .collect::>() + }) + .collect::>(); - banner(); - let paths = Paths::new(&output_dir, cli_opts.shrink, cli_opts.doublecheck); + bugs.sort(); + bugs.dedup_by(|a, b| a == b); - log::info!("seed: {}", seed); + println!( + "found {} previously triggered configurations with {}", + bugs.len(), + filter + ); + + let results = bugs + .into_iter() + .map(|cli_opts| testing_main(&cli_opts)) + .collect::>(); + + let (successes, failures): (Vec<_>, Vec<_>) = + results.into_iter().partition(|result| result.is_ok()); + println!("the results of the change are:"); + println!("\t{} successful runs", successes.len()); + println!("\t{} failed runs", failures.len()); + Ok(()) + } + None => { + banner(); + testing_main(&cli_opts) + } + } +} + +fn testing_main(cli_opts: &SimulatorCLI) -> Result<(), String> { + let mut bugbase = BugBase::load().map_err(|e| format!("{:?}", e))?; let last_execution = Arc::new(Mutex::new(Execution::new(0, 0, 0))); - let (env, plans) = setup_simulation(seed, &cli_opts, &paths.db, &paths.plan); + let (seed, env, plans) = setup_simulation(&mut bugbase, cli_opts, |p| &p.plan, |p| &p.db); + + let paths = bugbase.paths(seed); + + // Create the output directory if it doesn't exist + if !paths.base.exists() { + std::fs::create_dir_all(&paths.base).map_err(|e| format!("{:?}", e))?; + } if cli_opts.watch { - watch_mode(seed, &cli_opts, &paths, last_execution.clone()).unwrap(); - } else if cli_opts.differential { - differential_testing(env, plans, last_execution.clone()) - } else { - run_simulator(&cli_opts, &paths, env, plans, last_execution.clone()); + watch_mode(seed, cli_opts, &paths, last_execution.clone()).unwrap(); + return Ok(()); } + let result = if cli_opts.differential { + differential_testing( + seed, + &mut bugbase, + cli_opts, + &paths, + plans, + last_execution.clone(), + ) + } else { + run_simulator( + seed, + &mut bugbase, + cli_opts, + &paths, + env, + plans, + last_execution.clone(), + ) + }; + // Print the seed, the locations of the database and the plan file at the end again for easily accessing them. - println!("database path: {:?}", paths.db); - if cli_opts.doublecheck { - println!("doublecheck database path: {:?}", paths.doublecheck_db); - } else if cli_opts.shrink { - println!("shrunk database path: {:?}", paths.shrunk_db); - } - println!("simulator plan path: {:?}", paths.plan); - println!( - "simulator plan serialized path: {:?}", - paths.plan.with_extension("plan.json") - ); - if cli_opts.shrink { - println!("shrunk plan path: {:?}", paths.shrunk_plan); - } - println!("simulator history path: {:?}", paths.history); println!("seed: {}", seed); + println!("path: {}", paths.base.display()); - Ok(()) + result } fn watch_mode( @@ -140,14 +198,13 @@ fn watch_mode( std::panic::catch_unwind(|| { let plan: Vec> = InteractionPlan::compute_via_diff(&paths.plan); - let mut env = SimulatorEnv::new(seed, cli_opts, &paths.db); plan.iter().for_each(|is| { is.iter().for_each(|i| { i.shadow(&mut env); }); }); - let env = Arc::new(Mutex::new(env.clone())); + let env = Arc::new(Mutex::new(env.clone_without_connections())); watch::run_simulation(env, &mut [plan], last_execution.clone()) }), last_execution.clone(), @@ -160,7 +217,6 @@ fn watch_mode( SandboxedResult::Panicked { error, .. } | SandboxedResult::FoundBug { error, .. } => { log::error!("simulation failed: '{}'", error); - println!("simulation failed: '{}'", error); } } } @@ -173,12 +229,14 @@ fn watch_mode( } fn run_simulator( + seed: u64, + bugbase: &mut BugBase, cli_opts: &SimulatorCLI, paths: &Paths, env: SimulatorEnv, plans: Vec, last_execution: Arc>, -) { +) -> Result<(), String> { std::panic::set_hook(Box::new(move |info| { log::error!("panic occurred"); @@ -204,13 +262,23 @@ fn run_simulator( ); if cli_opts.doublecheck { - doublecheck(env.clone(), paths, &plans, last_execution.clone(), result); + doublecheck( + seed, + bugbase, + cli_opts, + paths, + &plans, + last_execution.clone(), + result, + ) } else { // No doublecheck, run shrinking if panicking or found a bug. match &result { SandboxedResult::Correct => { log::info!("simulation succeeded"); println!("simulation succeeded"); + bugbase.mark_successful_run(seed, cli_opts).unwrap(); + Ok(()) } SandboxedResult::Panicked { error, @@ -238,61 +306,74 @@ fn run_simulator( } log::error!("simulation failed: '{}'", error); - println!("simulation failed: '{}'", error); + log::info!("Starting to shrink"); - if cli_opts.shrink { - log::info!("Starting to shrink"); + let shrunk_plans = plans + .iter() + .map(|plan| { + let shrunk = plan.shrink_interaction_plan(last_execution); + log::info!("{}", shrunk.stats()); + shrunk + }) + .collect::>(); - let shrunk_plans = plans - .iter() - .map(|plan| { - let shrunk = plan.shrink_interaction_plan(last_execution); - log::info!("{}", shrunk.stats()); - shrunk - }) - .collect::>(); + // Write the shrunk plan to a file + let mut f = std::fs::File::create(&paths.shrunk_plan).unwrap(); + f.write_all(shrunk_plans[0].to_string().as_bytes()).unwrap(); - // Write the shrunk plan to a file - let mut f = std::fs::File::create(&paths.shrunk_plan).unwrap(); - f.write_all(shrunk_plans[0].to_string().as_bytes()).unwrap(); + let last_execution = Arc::new(Mutex::new(*last_execution)); + let env = SimulatorEnv::new(seed, cli_opts, &paths.shrunk_db); - let last_execution = Arc::new(Mutex::new(*last_execution)); - - let shrunk = SandboxedResult::from( - std::panic::catch_unwind(|| { - run_simulation( - env.clone(), - &mut shrunk_plans.clone(), - last_execution.clone(), - ) - }), - last_execution, - ); - - match (&shrunk, &result) { - ( - SandboxedResult::Panicked { error: e1, .. }, - SandboxedResult::Panicked { error: e2, .. }, + let env = Arc::new(Mutex::new(env)); + let shrunk = SandboxedResult::from( + std::panic::catch_unwind(|| { + run_simulation( + env.clone(), + &mut shrunk_plans.clone(), + last_execution.clone(), ) - | ( - SandboxedResult::FoundBug { error: e1, .. }, - SandboxedResult::FoundBug { error: e2, .. }, - ) => { - if e1 != e2 { - log::error!( - "shrinking failed, the error was not properly reproduced" - ); - } else { - log::info!("shrinking succeeded"); - } - } - (_, SandboxedResult::Correct) => { - unreachable!("shrinking should never be called on a correct simulation") - } - _ => { + }), + last_execution, + ); + + match (&shrunk, &result) { + ( + SandboxedResult::Panicked { error: e1, .. }, + SandboxedResult::Panicked { error: e2, .. }, + ) + | ( + SandboxedResult::FoundBug { error: e1, .. }, + SandboxedResult::FoundBug { error: e2, .. }, + ) => { + if e1 != e2 { log::error!("shrinking failed, the error was not properly reproduced"); + bugbase + .add_bug(seed, plans[0].clone(), Some(error.clone()), cli_opts) + .unwrap(); + Err(format!("failed with error: '{}'", error)) + } else { + log::info!( + "shrinking succeeded, reduced the plan from {} to {}", + plans[0].plan.len(), + shrunk_plans[0].plan.len() + ); + // Save the shrunk database + bugbase + .add_bug(seed, shrunk_plans[0].clone(), Some(e1.clone()), cli_opts) + .unwrap(); + Err(format!("failed with error: '{}'", e1)) } } + (_, SandboxedResult::Correct) => { + unreachable!("shrinking should never be called on a correct simulation") + } + _ => { + log::error!("shrinking failed, the error was not properly reproduced"); + bugbase + .add_bug(seed, plans[0].clone(), Some(error.clone()), cli_opts) + .unwrap(); + Err(format!("failed with error: '{}'", error)) + } } } } @@ -300,21 +381,16 @@ fn run_simulator( } fn doublecheck( - env: Arc>, + seed: u64, + bugbase: &mut BugBase, + cli_opts: &SimulatorCLI, paths: &Paths, plans: &[InteractionPlan], last_execution: Arc>, result: SandboxedResult, -) { - { - let mut env_ = env.lock().unwrap(); - env_.db = Database::open_file( - env_.io.clone(), - paths.doublecheck_db.to_str().unwrap(), - false, - ) - .unwrap(); - } +) -> Result<(), String> { + let env = SimulatorEnv::new(seed, cli_opts, &paths.doublecheck_db); + let env = Arc::new(Mutex::new(env)); // Run the simulation again let result2 = SandboxedResult::from( @@ -324,32 +400,24 @@ fn doublecheck( last_execution.clone(), ); - match (result, result2) { + let doublecheck_result = match (result, result2) { (SandboxedResult::Correct, SandboxedResult::Panicked { .. }) => { - log::error!("doublecheck failed! first run succeeded, but second run panicked."); + Err("first run succeeded, but second run panicked.".to_string()) } (SandboxedResult::FoundBug { .. }, SandboxedResult::Panicked { .. }) => { - log::error!( - "doublecheck failed! first run failed an assertion, but second run panicked." - ); + Err("first run failed an assertion, but second run panicked.".to_string()) } (SandboxedResult::Panicked { .. }, SandboxedResult::Correct) => { - log::error!("doublecheck failed! first run panicked, but second run succeeded."); + Err("first run panicked, but second run succeeded.".to_string()) } (SandboxedResult::Panicked { .. }, SandboxedResult::FoundBug { .. }) => { - log::error!( - "doublecheck failed! first run panicked, but second run failed an assertion." - ); + Err("first run panicked, but second run failed an assertion.".to_string()) } (SandboxedResult::Correct, SandboxedResult::FoundBug { .. }) => { - log::error!( - "doublecheck failed! first run succeeded, but second run failed an assertion." - ); + Err("first run succeeded, but second run failed an assertion.".to_string()) } (SandboxedResult::FoundBug { .. }, SandboxedResult::Correct) => { - log::error!( - "doublecheck failed! first run failed an assertion, but second run succeeded." - ); + Err("first run failed an assertion, but second run succeeded.".to_string()) } (SandboxedResult::Correct, SandboxedResult::Correct) | (SandboxedResult::FoundBug { .. }, SandboxedResult::FoundBug { .. }) @@ -358,34 +426,76 @@ fn doublecheck( let db_bytes = std::fs::read(&paths.db).unwrap(); let doublecheck_db_bytes = std::fs::read(&paths.doublecheck_db).unwrap(); if db_bytes != doublecheck_db_bytes { - log::error!("doublecheck failed! database files are different."); + Err( + "database files are different, check binary diffs for more details." + .to_string(), + ) } else { - log::info!("doublecheck succeeded! database files are the same."); + Ok(()) } } + }; + + match doublecheck_result { + Ok(_) => { + log::info!("doublecheck succeeded"); + println!("doublecheck succeeded"); + bugbase.mark_successful_run(seed, cli_opts)?; + Ok(()) + } + Err(e) => { + log::error!("doublecheck failed: '{}'", e); + bugbase + .add_bug(seed, plans[0].clone(), Some(e.clone()), cli_opts) + .unwrap(); + Err(format!("doublecheck failed: '{}'", e)) + } } } fn differential_testing( - env: SimulatorEnv, + seed: u64, + bugbase: &mut BugBase, + cli_opts: &SimulatorCLI, + paths: &Paths, plans: Vec, last_execution: Arc>, -) { - let env = Arc::new(Mutex::new(env)); +) -> Result<(), String> { + let env = Arc::new(Mutex::new(SimulatorEnv::new(seed, cli_opts, &paths.db))); + let rusqlite_env = Arc::new(Mutex::new(SimulatorEnv::new( + seed, + cli_opts, + &paths.diff_db, + ))); + let result = SandboxedResult::from( std::panic::catch_unwind(|| { let plan = plans[0].clone(); - differential::run_simulation(env, &mut [plan], last_execution.clone()) + differential::run_simulation( + env, + rusqlite_env, + &|| rusqlite::Connection::open(paths.diff_db.clone()).unwrap(), + &mut [plan], + last_execution.clone(), + ) }), last_execution.clone(), ); - if let SandboxedResult::Correct = result { - log::info!("simulation succeeded"); - println!("simulation succeeded"); - } else { - log::error!("simulation failed"); - println!("simulation failed"); + match result { + SandboxedResult::Correct => { + log::info!("simulation succeeded, output of Limbo conforms to SQLite"); + println!("simulation succeeded, output of Limbo conforms to SQLite"); + bugbase.mark_successful_run(seed, cli_opts).unwrap(); + Ok(()) + } + SandboxedResult::Panicked { error, .. } | SandboxedResult::FoundBug { error, .. } => { + log::error!("simulation failed: '{}'", error); + bugbase + .add_bug(seed, plans[0].clone(), Some(error.clone()), cli_opts) + .unwrap(); + Err(format!("simulation failed: '{}'", error)) + } } } @@ -443,54 +553,73 @@ impl SandboxedResult { } fn setup_simulation( - mut seed: u64, + bugbase: &mut BugBase, cli_opts: &SimulatorCLI, - db_path: &Path, - plan_path: &Path, -) -> (SimulatorEnv, Vec) { - if let Some(load) = &cli_opts.load { - let seed_path = PathBuf::from(load).with_extension("seed"); - let seed_str = std::fs::read_to_string(&seed_path).unwrap(); - seed = seed_str.parse().unwrap(); - } + plan_path: fn(&Paths) -> &Path, + db_path: fn(&Paths) -> &Path, +) -> (u64, SimulatorEnv, Vec) { + if let Some(seed) = &cli_opts.load { + let seed = seed.parse::().expect("seed should be a number"); + let bug = bugbase + .get_bug(seed) + .unwrap_or_else(|| panic!("bug '{}' not found in bug base", seed)); - let mut env = SimulatorEnv::new(seed, cli_opts, db_path); + let paths = bugbase.paths(seed); + if !paths.base.exists() { + std::fs::create_dir_all(&paths.base).unwrap(); + } + let env = SimulatorEnv::new(bug.seed(), cli_opts, db_path(&paths)); - // todo: the loading works correctly because of a hacky decision - // Right now, the plan generation is the only point we use the rng, so the environment doesn't - // even need it. In the future, especially with multi-connections and multi-threading, we might - // use the RNG for more things such as scheduling, so this assumption will fail. When that happens, - // we'll need to reachitect this logic by saving and loading RNG state. - let plans = if let Some(load) = &cli_opts.load { - log::info!("Loading database interaction plan..."); - let plan = std::fs::read_to_string(load).unwrap(); - let plan: InteractionPlan = serde_json::from_str(&plan).unwrap(); - vec![plan] + let plan = match bug { + Bug::Loaded(LoadedBug { plan, .. }) => plan.clone(), + Bug::Unloaded { seed } => { + let seed = *seed; + bugbase + .load_bug(seed) + .unwrap_or_else(|_| panic!("could not load bug '{}' in bug base", seed)) + .plan + .clone() + } + }; + + std::fs::write(plan_path(&paths), plan.to_string()).unwrap(); + std::fs::write( + plan_path(&paths).with_extension("json"), + serde_json::to_string_pretty(&plan).unwrap(), + ) + .unwrap(); + let plans = vec![plan]; + (seed, env, plans) } else { + let seed = cli_opts.seed.unwrap_or_else(|| { + let mut rng = rand::thread_rng(); + rng.next_u64() + }); + + let paths = bugbase.paths(seed); + if !paths.base.exists() { + std::fs::create_dir_all(&paths.base).unwrap(); + } + let mut env = SimulatorEnv::new(seed, cli_opts, &paths.db); + log::info!("Generating database interaction plan..."); - (1..=env.opts.max_connections) + + let plans = (1..=env.opts.max_connections) .map(|_| InteractionPlan::arbitrary_from(&mut env.rng.clone(), &mut env)) - .collect::>() - }; + .collect::>(); - // todo: for now, we only use 1 connection, so it's safe to use the first plan. - let plan = plans[0].clone(); - - let mut f = std::fs::File::create(plan_path).unwrap(); - // todo: create a detailed plan file with all the plans. for now, we only use 1 connection, so it's safe to use the first plan. - f.write_all(plan.to_string().as_bytes()).unwrap(); - - let serialized_plan_path = plan_path.with_extension("plan.json"); - let mut f = std::fs::File::create(&serialized_plan_path).unwrap(); - f.write_all(serde_json::to_string(&plan).unwrap().as_bytes()) + // todo: for now, we only use 1 connection, so it's safe to use the first plan. + let plan = &plans[0]; + log::info!("{}", plan.stats()); + std::fs::write(plan_path(&paths), plan.to_string()).unwrap(); + std::fs::write( + plan_path(&paths).with_extension("json"), + serde_json::to_string_pretty(&plan).unwrap(), + ) .unwrap(); - let seed_path = plan_path.with_extension("seed"); - let mut f = std::fs::File::create(&seed_path).unwrap(); - f.write_all(seed.to_string().as_bytes()).unwrap(); - - log::info!("{}", plan.stats()); - (env, plans) + (seed, env, plans) + } } fn run_simulation( diff --git a/simulator/runner/bugbase.rs b/simulator/runner/bugbase.rs new file mode 100644 index 000000000..c59744046 --- /dev/null +++ b/simulator/runner/bugbase.rs @@ -0,0 +1,423 @@ +use std::{ + collections::HashMap, + io::{self, Write}, + path::PathBuf, + process::Command, + time::SystemTime, +}; + +use chrono::{DateTime, Utc}; +use serde::{Deserialize, Serialize}; + +use crate::{InteractionPlan, Paths}; + +use super::cli::SimulatorCLI; + +/// A bug is a run that has been identified as buggy. +#[derive(Clone)] +pub(crate) enum Bug { + Unloaded { seed: u64 }, + Loaded(LoadedBug), +} + +#[derive(Clone)] +pub struct LoadedBug { + /// The seed of the bug. + pub seed: u64, + /// The plan of the bug. + pub plan: InteractionPlan, + /// The runs of the bug. + pub runs: Vec, +} + +#[derive(Clone, Serialize, Deserialize)] +pub(crate) struct BugRun { + /// Commit hash of the current version of Limbo. + pub(crate) hash: String, + /// Timestamp of the run. + #[serde(with = "chrono::serde::ts_seconds")] + pub(crate) timestamp: DateTime, + /// Error message of the run. + pub(crate) error: Option, + /// Options + pub(crate) cli_options: SimulatorCLI, +} + +impl Bug { + /// Check if the bug is loaded. + pub(crate) fn is_loaded(&self) -> bool { + match self { + Bug::Unloaded { .. } => false, + Bug::Loaded { .. } => true, + } + } + + /// Get the seed of the bug. + pub(crate) fn seed(&self) -> u64 { + match self { + Bug::Unloaded { seed } => *seed, + Bug::Loaded(LoadedBug { seed, .. }) => *seed, + } + } +} + +/// Bug Base is a local database of buggy runs. +pub(crate) struct BugBase { + /// Path to the bug base directory. + path: PathBuf, + /// The list of buggy runs, uniquely identified by their seed + bugs: HashMap, +} + +impl BugBase { + /// Create a new bug base. + fn new(path: PathBuf) -> Result { + let mut bugs = HashMap::new(); + // list all the bugs in the path as directories + if let Ok(entries) = std::fs::read_dir(&path) { + for entry in entries.flatten() { + if entry.file_type().is_ok_and(|ft| ft.is_dir()) { + let seed = entry + .file_name() + .to_string_lossy() + .to_string() + .parse::() + .or(Err(format!( + "failed to parse seed from directory name {}", + entry.file_name().to_string_lossy() + )))?; + bugs.insert(seed, Bug::Unloaded { seed }); + } + } + } + + Ok(Self { path, bugs }) + } + + /// Load the bug base from one of the potential paths. + pub(crate) fn load() -> Result { + let potential_paths = vec![ + // limbo project directory + BugBase::get_limbo_project_dir()?, + // home directory + dirs::home_dir().ok_or("should be able to get home directory".to_string())?, + // current directory + std::env::current_dir() + .or(Err("should be able to get current directory".to_string()))?, + ]; + + for path in &potential_paths { + let path = path.join(".bugbase"); + if path.exists() { + return BugBase::new(path); + } + } + + for path in potential_paths { + let path = path.join(".bugbase"); + if std::fs::create_dir_all(&path).is_ok() { + log::info!("bug base created at {}", path.display()); + return BugBase::new(path); + } + } + + Err("failed to create bug base".to_string()) + } + + /// Load the bug base from one of the potential paths. + pub(crate) fn interactive_load() -> Result { + let potential_paths = vec![ + // limbo project directory + BugBase::get_limbo_project_dir()?, + // home directory + dirs::home_dir().ok_or("should be able to get home directory".to_string())?, + // current directory + std::env::current_dir() + .or(Err("should be able to get current directory".to_string()))?, + ]; + + for path in potential_paths { + let path = path.join(".bugbase"); + if path.exists() { + return BugBase::new(path); + } + } + + println!("select bug base location:"); + println!("1. limbo project directory"); + println!("2. home directory"); + println!("3. current directory"); + print!("> "); + io::stdout().flush().unwrap(); + let mut choice = String::new(); + io::stdin() + .read_line(&mut choice) + .expect("failed to read line"); + + let choice = choice + .trim() + .parse::() + .or(Err(format!("invalid choice {choice}")))?; + let path = match choice { + 1 => BugBase::get_limbo_project_dir()?.join(".bugbase"), + 2 => { + let home = std::env::var("HOME").or(Err("failed to get home directory"))?; + PathBuf::from(home).join(".bugbase") + } + 3 => PathBuf::from(".bugbase"), + _ => return Err(format!("invalid choice {choice}")), + }; + + if path.exists() { + unreachable!("bug base already exists at {}", path.display()); + } else { + std::fs::create_dir_all(&path).or(Err("failed to create bug base"))?; + log::info!("bug base created at {}", path.display()); + BugBase::new(path) + } + } + + /// Add a new bug to the bug base. + pub(crate) fn add_bug( + &mut self, + seed: u64, + plan: InteractionPlan, + error: Option, + cli_options: &SimulatorCLI, + ) -> Result<(), String> { + log::debug!("adding bug with seed {}", seed); + let bug = self.get_bug(seed); + + if bug.is_some() { + let mut bug = self.load_bug(seed)?; + bug.plan = plan.clone(); + bug.runs.push(BugRun { + hash: Self::get_current_commit_hash()?, + timestamp: SystemTime::now().into(), + error, + cli_options: cli_options.clone(), + }); + self.bugs.insert(seed, Bug::Loaded(bug.clone())); + } else { + let bug = LoadedBug { + seed, + plan: plan.clone(), + runs: vec![BugRun { + hash: Self::get_current_commit_hash()?, + timestamp: SystemTime::now().into(), + error, + cli_options: cli_options.clone(), + }], + }; + self.bugs.insert(seed, Bug::Loaded(bug.clone())); + } + // Save the bug to the bug base. + self.save_bug(seed) + } + + /// Get a bug from the bug base. + pub(crate) fn get_bug(&self, seed: u64) -> Option<&Bug> { + self.bugs.get(&seed) + } + + /// Save a bug to the bug base. + fn save_bug(&self, seed: u64) -> Result<(), String> { + let bug = self.get_bug(seed); + + match bug { + None | Some(Bug::Unloaded { .. }) => { + unreachable!("save should only be called within add_bug"); + } + Some(Bug::Loaded(bug)) => { + let bug_path = self.path.join(seed.to_string()); + std::fs::create_dir_all(&bug_path) + .or(Err("should be able to create bug directory".to_string()))?; + + let seed_path = bug_path.join("seed.txt"); + std::fs::write(&seed_path, seed.to_string()) + .or(Err("should be able to write seed file".to_string()))?; + + let plan_path = bug_path.join("plan.json"); + std::fs::write( + &plan_path, + serde_json::to_string_pretty(&bug.plan) + .or(Err("should be able to serialize plan".to_string()))?, + ) + .or(Err("should be able to write plan file".to_string()))?; + + let readable_plan_path = bug_path.join("plan.sql"); + std::fs::write(&readable_plan_path, bug.plan.to_string()) + .or(Err("should be able to write readable plan file".to_string()))?; + + let runs_path = bug_path.join("runs.json"); + std::fs::write( + &runs_path, + serde_json::to_string_pretty(&bug.runs) + .or(Err("should be able to serialize runs".to_string()))?, + ) + .or(Err("should be able to write runs file".to_string()))?; + } + } + + Ok(()) + } + + pub(crate) fn load_bug(&mut self, seed: u64) -> Result { + let seed_match = self.bugs.get(&seed); + + match seed_match { + None => Err(format!("No bugs found for seed {}", seed)), + Some(Bug::Unloaded { .. }) => { + let plan = + std::fs::read_to_string(self.path.join(seed.to_string()).join("plan.json")) + .or(Err(format!( + "should be able to read plan file at {}", + self.path.join(seed.to_string()).join("plan.json").display() + )))?; + let plan: InteractionPlan = serde_json::from_str(&plan) + .or(Err("should be able to deserialize plan".to_string()))?; + + let runs = + std::fs::read_to_string(self.path.join(seed.to_string()).join("runs.json")) + .or(Err("should be able to read runs file".to_string()))?; + let runs: Vec = serde_json::from_str(&runs) + .or(Err("should be able to deserialize runs".to_string()))?; + + let bug = LoadedBug { + seed, + plan: plan.clone(), + runs, + }; + + self.bugs.insert(seed, Bug::Loaded(bug.clone())); + log::debug!("Loaded bug with seed {}", seed); + Ok(bug) + } + Some(Bug::Loaded(bug)) => { + log::warn!( + "Bug with seed {} is already loaded, returning the existing plan", + seed + ); + Ok(bug.clone()) + } + } + } + + pub(crate) fn mark_successful_run( + &mut self, + seed: u64, + cli_options: &SimulatorCLI, + ) -> Result<(), String> { + let bug = self.get_bug(seed); + match bug { + None => { + log::debug!("removing bug base entry for {}", seed); + std::fs::remove_dir_all(self.path.join(seed.to_string())) + .or(Err("should be able to remove bug directory".to_string()))?; + } + Some(_) => { + let mut bug = self.load_bug(seed)?; + bug.runs.push(BugRun { + hash: Self::get_current_commit_hash()?, + timestamp: SystemTime::now().into(), + error: None, + cli_options: cli_options.clone(), + }); + self.bugs.insert(seed, Bug::Loaded(bug.clone())); + // Save the bug to the bug base. + self.save_bug(seed) + .or(Err("should be able to save bug".to_string()))?; + log::debug!("Updated bug with seed {}", seed); + } + } + + Ok(()) + } + + pub(crate) fn load_bugs(&mut self) -> Result, String> { + let seeds = self.bugs.keys().map(|seed| *seed).collect::>(); + + seeds + .iter() + .map(|seed| self.load_bug(*seed)) + .collect::, _>>() + } + + pub(crate) fn list_bugs(&mut self) -> Result<(), String> { + let bugs = self.load_bugs()?; + for bug in bugs { + println!("seed: {}", bug.seed); + println!("plan: {}", bug.plan.stats()); + println!("runs:"); + println!(" ------------------"); + for run in &bug.runs { + println!(" - hash: {}", run.hash); + println!(" timestamp: {}", run.timestamp); + println!( + " type: {}", + if run.cli_options.differential { + "differential" + } else if run.cli_options.doublecheck { + "doublecheck" + } else { + "default" + } + ); + if let Some(error) = &run.error { + println!(" error: {}", error); + } + } + println!(" ------------------"); + } + + Ok(()) + } +} + +impl BugBase { + /// Get the path to the bug base directory. + pub(crate) fn path(&self) -> &PathBuf { + &self.path + } + + /// Get the path to the database file for a given seed. + pub(crate) fn db_path(&self, seed: u64) -> PathBuf { + self.path.join(format!("{}/test.db", seed)) + } + + /// Get paths to all the files for a given seed. + pub(crate) fn paths(&self, seed: u64) -> Paths { + let base = self.path.join(format!("{}/", seed)); + Paths::new(&base) + } +} + +impl BugBase { + pub(crate) fn get_current_commit_hash() -> Result { + let output = Command::new("git") + .args(["rev-parse", "HEAD"]) + .output() + .or(Err("should be able to get the commit hash".to_string()))?; + let commit_hash = String::from_utf8(output.stdout) + .or(Err("commit hash should be valid utf8".to_string()))? + .trim() + .to_string(); + Ok(commit_hash) + } + + pub(crate) fn get_limbo_project_dir() -> Result { + Ok(PathBuf::from( + String::from_utf8( + Command::new("git") + .args(["rev-parse", "--git-dir"]) + .output() + .or(Err("should be able to get the git path".to_string()))? + .stdout, + ) + .or(Err("commit hash should be valid utf8".to_string()))? + .trim() + .strip_suffix(".git") + .ok_or("should be able to strip .git suffix".to_string())?, + )) + } +} diff --git a/simulator/runner/cli.rs b/simulator/runner/cli.rs index a18c47212..c62c023bb 100644 --- a/simulator/runner/cli.rs +++ b/simulator/runner/cli.rs @@ -1,13 +1,12 @@ use clap::{command, Parser}; +use serde::{Deserialize, Serialize}; -#[derive(Parser)] +#[derive(Parser, Debug, Clone, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Ord)] #[command(name = "limbo-simulator")] #[command(author, version, about, long_about = None)] pub struct SimulatorCLI { #[clap(short, long, help = "set seed for reproducible runs", default_value = None)] pub seed: Option, - #[clap(short, long, help = "set custom output directory for produced files", default_value = None)] - pub output_dir: Option, #[clap( short, long, @@ -20,14 +19,14 @@ pub struct SimulatorCLI { help = "change the maximum size of the randomly generated sequence of interactions", default_value_t = 5000 )] - pub maximum_size: usize, + pub maximum_tests: usize, #[clap( short = 'k', long, help = "change the minimum size of the randomly generated sequence of interactions", default_value_t = 1000 )] - pub minimum_size: usize, + pub minimum_tests: usize, #[clap( short = 't', long, @@ -35,13 +34,7 @@ pub struct SimulatorCLI { default_value_t = 60 * 60 // default to 1 hour )] pub maximum_time: usize, - #[clap( - short = 'm', - long, - help = "minimize(shrink) the failing counterexample" - )] - pub shrink: bool, - #[clap(short = 'l', long, help = "load plan from a file")] + #[clap(short = 'l', long, help = "load plan from the bug base")] pub load: Option, #[clap( short = 'w', @@ -51,29 +44,63 @@ pub struct SimulatorCLI { pub watch: bool, #[clap(long, help = "run differential testing between sqlite and Limbo")] pub differential: bool, + #[clap(subcommand)] + pub subcommand: Option, +} + +#[derive(Parser, Debug, Clone, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Ord)] +pub enum SimulatorCommand { + #[clap(about = "run the simulator in a loop")] + Loop { + #[clap( + short = 'n', + long, + help = "number of iterations to run the simulator", + default_value_t = 5 + )] + n: usize, + #[clap( + short = 's', + long, + help = "short circuit the simulator, stop on the first failure", + default_value_t = false + )] + short_circuit: bool, + }, + #[clap(about = "list all the bugs in the base")] + List, + #[clap(about = "run the simulator against a specific bug")] + Test { + #[clap( + short = 'b', + long, + help = "run the simulator with previous buggy runs for the specific filter" + )] + filter: String, + }, } impl SimulatorCLI { - pub fn validate(&self) -> Result<(), String> { - if self.minimum_size < 1 { + pub fn validate(&mut self) -> Result<(), String> { + if self.minimum_tests < 1 { return Err("minimum size must be at least 1".to_string()); } - if self.maximum_size < 1 { + if self.maximum_tests < 1 { return Err("maximum size must be at least 1".to_string()); } - // todo: fix an issue here where if minimum size is not defined, it prevents setting low maximum sizes. - if self.minimum_size > self.maximum_size { - return Err("Minimum size cannot be greater than maximum size".to_string()); + + if self.minimum_tests > self.maximum_tests { + log::warn!( + "minimum size '{}' is greater than '{}' maximum size, setting both to '{}'", + self.minimum_tests, + self.maximum_tests, + self.maximum_tests + ); + self.minimum_tests = self.maximum_tests - 1; } - // Make sure incompatible options are not set - if self.shrink && self.doublecheck { - return Err("Cannot use shrink and doublecheck at the same time".to_string()); - } - - if let Some(plan_path) = &self.load { - std::fs::File::open(plan_path) - .map_err(|_| format!("Plan file '{}' could not be opened", plan_path))?; + if self.seed.is_some() && self.load.is_some() { + return Err("Cannot set seed and load plan at the same time".to_string()); } Ok(()) diff --git a/simulator/runner/differential.rs b/simulator/runner/differential.rs index 1242c5307..0db6b8ecd 100644 --- a/simulator/runner/differential.rs +++ b/simulator/runner/differential.rs @@ -5,54 +5,20 @@ use crate::{ pick_index, plan::{Interaction, InteractionPlanState, ResultSet}, }, - model::{ - query::Query, - table::{Table, Value}, - }, + model::{query::Query, table::Value}, runner::execution::ExecutionContinuation, InteractionPlan, }; use super::{ - env::{ConnectionTrait, SimConnection, SimulatorEnv, SimulatorEnvTrait}, + env::{SimConnection, SimulatorEnv}, execution::{execute_interaction, Execution, ExecutionHistory, ExecutionResult}, }; -pub(crate) struct SimulatorEnvRusqlite { - pub(crate) tables: Vec, - pub(crate) connections: Vec, -} - -pub(crate) enum RusqliteConnection { - Connected(rusqlite::Connection), - Disconnected, -} - -impl ConnectionTrait for RusqliteConnection { - fn is_connected(&self) -> bool { - match self { - RusqliteConnection::Connected(_) => true, - RusqliteConnection::Disconnected => false, - } - } - - fn disconnect(&mut self) { - *self = RusqliteConnection::Disconnected; - } -} - -impl SimulatorEnvTrait for SimulatorEnvRusqlite { - fn tables(&self) -> &Vec
{ - &self.tables - } - - fn tables_mut(&mut self) -> &mut Vec
{ - &mut self.tables - } -} - pub(crate) fn run_simulation( env: Arc>, + rusqlite_env: Arc>, + rusqlite_conn: &dyn Fn() -> rusqlite::Connection, plans: &mut [InteractionPlan], last_execution: Arc>, ) -> ExecutionResult { @@ -66,14 +32,7 @@ pub(crate) fn run_simulation( secondary_pointer: 0, }) .collect::>(); - let env = env.lock().unwrap(); - let rusqlite_env = SimulatorEnvRusqlite { - tables: env.tables.clone(), - connections: (0..env.connections.len()) - .map(|_| RusqliteConnection::Connected(rusqlite::Connection::open_in_memory().unwrap())) - .collect::>(), - }; let mut rusqlite_states = plans .iter() .map(|_| InteractionPlanState { @@ -84,16 +43,15 @@ pub(crate) fn run_simulation( .collect::>(); let result = execute_plans( - Arc::new(Mutex::new(env.clone())), + env, rusqlite_env, + rusqlite_conn, plans, &mut states, &mut rusqlite_states, last_execution, ); - env.io.print_stats(); - log::info!("Simulation completed"); result @@ -148,7 +106,8 @@ fn execute_query_rusqlite( pub(crate) fn execute_plans( env: Arc>, - mut rusqlite_env: SimulatorEnvRusqlite, + rusqlite_env: Arc>, + rusqlite_conn: &dyn Fn() -> rusqlite::Connection, plans: &mut [InteractionPlan], states: &mut [InteractionPlanState], rusqlite_states: &mut [InteractionPlanState], @@ -158,6 +117,8 @@ pub(crate) fn execute_plans( let now = std::time::Instant::now(); let mut env = env.lock().unwrap(); + let mut rusqlite_env = rusqlite_env.lock().unwrap(); + for _tick in 0..env.opts.ticks { // Pick the connection to interact with let connection_index = pick_index(env.connections.len(), &mut env.rng); @@ -176,6 +137,7 @@ pub(crate) fn execute_plans( match execute_plan( &mut env, &mut rusqlite_env, + rusqlite_conn, connection_index, plans, states, @@ -202,13 +164,15 @@ pub(crate) fn execute_plans( fn execute_plan( env: &mut SimulatorEnv, - rusqlite_env: &mut SimulatorEnvRusqlite, + rusqlite_env: &mut SimulatorEnv, + rusqlite_conn: &dyn Fn() -> rusqlite::Connection, connection_index: usize, plans: &mut [InteractionPlan], states: &mut [InteractionPlanState], rusqlite_states: &mut [InteractionPlanState], ) -> limbo_core::Result<()> { let connection = &env.connections[connection_index]; + let rusqlite_connection = &rusqlite_env.connections[connection_index]; let plan = &mut plans[connection_index]; let state = &mut states[connection_index]; let rusqlite_state = &mut rusqlite_states[connection_index]; @@ -218,83 +182,141 @@ fn execute_plan( let interaction = &plan.plan[state.interaction_pointer].interactions()[state.secondary_pointer]; - if let SimConnection::Disconnected = connection { - log::debug!("connecting {}", connection_index); - env.connections[connection_index] = SimConnection::Connected(env.db.connect().unwrap()); - } else { - let limbo_result = - execute_interaction(env, connection_index, interaction, &mut state.stack); - let ruqlite_result = execute_interaction_rusqlite( - rusqlite_env, - connection_index, - interaction, - &mut rusqlite_state.stack, - ); + match (connection, rusqlite_connection) { + (SimConnection::Disconnected, SimConnection::Disconnected) => { + log::debug!("connecting {}", connection_index); + env.connections[connection_index] = + SimConnection::LimboConnection(env.db.connect().unwrap()); + rusqlite_env.connections[connection_index] = + SimConnection::SQLiteConnection(rusqlite_conn()); + } + (SimConnection::LimboConnection(_), SimConnection::SQLiteConnection(_)) => { + let limbo_result = + execute_interaction(env, connection_index, interaction, &mut state.stack); + let ruqlite_result = execute_interaction_rusqlite( + rusqlite_env, + connection_index, + interaction, + &mut rusqlite_state.stack, + ); + match (limbo_result, ruqlite_result) { + (Ok(next_execution), Ok(next_execution_rusqlite)) => { + if next_execution != next_execution_rusqlite { + log::error!("limbo and rusqlite results do not match"); + return Err(limbo_core::LimboError::InternalError( + "limbo and rusqlite results do not match".into(), + )); + } - match (limbo_result, ruqlite_result) { - (Ok(next_execution), Ok(next_execution_rusqlite)) => { - if next_execution != next_execution_rusqlite { - log::error!("limbo and rusqlite results do not match"); - return Err(limbo_core::LimboError::InternalError( - "limbo and rusqlite results do not match".into(), - )); - } - log::debug!("connection {} processed", connection_index); - // Move to the next interaction or property - match next_execution { - ExecutionContinuation::NextInteraction => { - if state.secondary_pointer + 1 - >= plan.plan[state.interaction_pointer].interactions().len() - { - // If we have reached the end of the interactions for this property, move to the next property - state.interaction_pointer += 1; - state.secondary_pointer = 0; - } else { - // Otherwise, move to the next interaction - state.secondary_pointer += 1; + let limbo_values = state.stack.last(); + let rusqlite_values = rusqlite_state.stack.last(); + match (limbo_values, rusqlite_values) { + (Some(limbo_values), Some(rusqlite_values)) => { + match (limbo_values, rusqlite_values) { + (Ok(limbo_values), Ok(rusqlite_values)) => { + if limbo_values != rusqlite_values { + log::error!("limbo and rusqlite results do not match"); + return Err(limbo_core::LimboError::InternalError( + "limbo and rusqlite results do not match".into(), + )); + } + } + (Err(limbo_err), Err(rusqlite_err)) => { + log::warn!( + "limbo and rusqlite both fail, requires manual check" + ); + log::warn!("limbo error {}", limbo_err); + log::warn!("rusqlite error {}", rusqlite_err); + } + (Ok(limbo_result), Err(rusqlite_err)) => { + log::error!("limbo and rusqlite results do not match"); + log::error!("limbo values {:?}", limbo_result); + log::error!("rusqlite error {}", rusqlite_err); + return Err(limbo_core::LimboError::InternalError( + "limbo and rusqlite results do not match".into(), + )); + } + (Err(limbo_err), Ok(_)) => { + log::error!("limbo and rusqlite results do not match"); + log::error!("limbo error {}", limbo_err); + return Err(limbo_core::LimboError::InternalError( + "limbo and rusqlite results do not match".into(), + )); + } + } + } + (None, None) => {} + _ => { + log::error!("limbo and rusqlite results do not match"); + return Err(limbo_core::LimboError::InternalError( + "limbo and rusqlite results do not match".into(), + )); } } - ExecutionContinuation::NextProperty => { - // Skip to the next property - state.interaction_pointer += 1; - state.secondary_pointer = 0; + + // Move to the next interaction or property + match next_execution { + ExecutionContinuation::NextInteraction => { + if state.secondary_pointer + 1 + >= plan.plan[state.interaction_pointer].interactions().len() + { + // If we have reached the end of the interactions for this property, move to the next property + state.interaction_pointer += 1; + state.secondary_pointer = 0; + } else { + // Otherwise, move to the next interaction + state.secondary_pointer += 1; + } + } + ExecutionContinuation::NextProperty => { + // Skip to the next property + state.interaction_pointer += 1; + state.secondary_pointer = 0; + } } } - } - (Err(err), Ok(_)) => { - log::error!("limbo and rusqlite results do not match"); - log::error!("limbo error {}", err); - return Err(err); - } - (Ok(_), Err(err)) => { - log::error!("limbo and rusqlite results do not match"); - log::error!("rusqlite error {}", err); - return Err(err); - } - (Err(err), Err(err_rusqlite)) => { - log::error!("limbo and rusqlite both fail, requires manual check"); - log::error!("limbo error {}", err); - log::error!("rusqlite error {}", err_rusqlite); - return Err(err); + (Err(err), Ok(_)) => { + log::error!("limbo and rusqlite results do not match"); + log::error!("limbo error {}", err); + return Err(err); + } + (Ok(val), Err(err)) => { + log::error!("limbo and rusqlite results do not match"); + log::error!("limbo {:?}", val); + log::error!("rusqlite error {}", err); + return Err(err); + } + (Err(err), Err(err_rusqlite)) => { + log::error!("limbo and rusqlite both fail, requires manual check"); + log::error!("limbo error {}", err); + log::error!("rusqlite error {}", err_rusqlite); + return Err(err); + } } } + _ => unreachable!("{} vs {}", connection, rusqlite_connection), } Ok(()) } fn execute_interaction_rusqlite( - env: &mut SimulatorEnvRusqlite, + env: &mut SimulatorEnv, connection_index: usize, interaction: &Interaction, stack: &mut Vec, ) -> limbo_core::Result { - log::info!("executing in rusqlite: {}", interaction); + log::trace!( + "execute_interaction_rusqlite(connection_index={}, interaction={})", + connection_index, + interaction + ); match interaction { Interaction::Query(query) => { let conn = match &mut env.connections[connection_index] { - RusqliteConnection::Connected(conn) => conn, - RusqliteConnection::Disconnected => unreachable!(), + SimConnection::SQLiteConnection(conn) => conn, + SimConnection::LimboConnection(_) => unreachable!(), + SimConnection::Disconnected => unreachable!(), }; log::debug!("{}", interaction); @@ -318,7 +340,7 @@ fn execute_interaction_rusqlite( } } Interaction::Fault(_) => { - log::debug!("faults are not supported in differential testing mode"); + interaction.execute_fault(env, connection_index)?; } } diff --git a/simulator/runner/env.rs b/simulator/runner/env.rs index a9409ad7e..8a7a6533a 100644 --- a/simulator/runner/env.rs +++ b/simulator/runner/env.rs @@ -1,8 +1,10 @@ +use std::fmt::Display; +use std::mem; use std::path::Path; use std::rc::Rc; use std::sync::Arc; -use limbo_core::{Connection, Database}; +use limbo_core::Database; use rand::{Rng, SeedableRng}; use rand_chacha::ChaCha8Rng; @@ -12,12 +14,6 @@ use crate::runner::io::SimulatorIO; use super::cli::SimulatorCLI; -pub trait SimulatorEnvTrait { - fn tables(&self) -> &Vec
; - fn tables_mut(&mut self) -> &mut Vec
; -} - -#[derive(Clone)] pub(crate) struct SimulatorEnv { pub(crate) opts: SimulatorOpts, pub(crate) tables: Vec
, @@ -27,13 +23,18 @@ pub(crate) struct SimulatorEnv { pub(crate) rng: ChaCha8Rng, } -impl SimulatorEnvTrait for SimulatorEnv { - fn tables(&self) -> &Vec
{ - &self.tables - } - - fn tables_mut(&mut self) -> &mut Vec
{ - &mut self.tables +impl SimulatorEnv { + pub(crate) fn clone_without_connections(&self) -> Self { + SimulatorEnv { + opts: self.opts.clone(), + tables: self.tables.clone(), + connections: (0..self.connections.len()) + .map(|_| SimConnection::Disconnected) + .collect(), + io: self.io.clone(), + db: self.db.clone(), + rng: self.rng.clone(), + } } } @@ -66,7 +67,7 @@ impl SimulatorEnv { }; let opts = SimulatorOpts { - ticks: rng.gen_range(cli_opts.minimum_size..=cli_opts.maximum_size), + ticks: rng.gen_range(cli_opts.minimum_tests..=cli_opts.maximum_tests), max_connections: 1, // TODO: for now let's use one connection as we didn't implement // correct transactions processing max_tables: rng.gen_range(0..128), @@ -76,7 +77,7 @@ impl SimulatorEnv { delete_percent, drop_percent, page_size: 4096, // TODO: randomize this too - max_interactions: rng.gen_range(cli_opts.minimum_size..=cli_opts.maximum_size), + max_interactions: rng.gen_range(cli_opts.minimum_tests..=cli_opts.maximum_tests), max_time_simulation: cli_opts.maximum_time, }; @@ -87,6 +88,11 @@ impl SimulatorEnv { std::fs::remove_file(db_path).unwrap(); } + let wal_path = db_path.with_extension("db-wal"); + if wal_path.exists() { + std::fs::remove_file(wal_path).unwrap(); + } + let db = match Database::open_file(io.clone(), db_path.to_str().unwrap(), false) { Ok(db) => db, Err(e) => { @@ -94,7 +100,9 @@ impl SimulatorEnv { } }; - let connections = vec![SimConnection::Disconnected; opts.max_connections]; + let connections = (0..opts.max_connections) + .map(|_| SimConnection::Disconnected) + .collect::>(); SimulatorEnv { opts, @@ -107,27 +115,55 @@ impl SimulatorEnv { } } -pub trait ConnectionTrait { +pub trait ConnectionTrait +where + Self: std::marker::Sized + Clone, +{ fn is_connected(&self) -> bool; fn disconnect(&mut self); } -#[derive(Clone)] pub(crate) enum SimConnection { - Connected(Rc), + LimboConnection(Rc), + SQLiteConnection(rusqlite::Connection), Disconnected, } -impl ConnectionTrait for SimConnection { - fn is_connected(&self) -> bool { +impl SimConnection { + pub(crate) fn is_connected(&self) -> bool { match self { - SimConnection::Connected(_) => true, + SimConnection::LimboConnection(_) | SimConnection::SQLiteConnection(_) => true, SimConnection::Disconnected => false, } } + pub(crate) fn disconnect(&mut self) { + let conn = mem::replace(self, SimConnection::Disconnected); - fn disconnect(&mut self) { - *self = SimConnection::Disconnected; + match conn { + SimConnection::LimboConnection(conn) => { + conn.close().unwrap(); + } + SimConnection::SQLiteConnection(conn) => { + conn.close().unwrap(); + } + SimConnection::Disconnected => {} + } + } +} + +impl Display for SimConnection { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + SimConnection::LimboConnection(_) => { + write!(f, "LimboConnection") + } + SimConnection::SQLiteConnection(_) => { + write!(f, "SQLiteConnection") + } + SimConnection::Disconnected => { + write!(f, "Disconnected") + } + } } } diff --git a/simulator/runner/execution.rs b/simulator/runner/execution.rs index 8ae4b0cf6..7b8204604 100644 --- a/simulator/runner/execution.rs +++ b/simulator/runner/execution.rs @@ -68,7 +68,12 @@ pub(crate) fn execute_plans( // Pick the connection to interact with let connection_index = pick_index(env.connections.len(), &mut env.rng); let state = &mut states[connection_index]; - + std::thread::sleep(std::time::Duration::from_millis( + std::env::var("TICK_SLEEP") + .unwrap_or("0".into()) + .parse() + .unwrap_or(0), + )); history.history.push(Execution::new( connection_index, state.interaction_pointer, @@ -117,10 +122,13 @@ fn execute_plan( if let SimConnection::Disconnected = connection { log::debug!("connecting {}", connection_index); - env.connections[connection_index] = SimConnection::Connected(env.db.connect().unwrap()); + env.connections[connection_index] = + SimConnection::LimboConnection(env.db.connect().unwrap()); } else { + log::debug!("connection {} already connected", connection_index); match execute_interaction(env, connection_index, interaction, &mut state.stack) { Ok(next_execution) => { + interaction.shadow(env); log::debug!("connection {} processed", connection_index); // Move to the next interaction or property match next_execution { @@ -157,7 +165,7 @@ fn execute_plan( /// `execute_interaction` uses this type in conjunction with a result, where /// the `Err` case indicates a full-stop due to a bug, and the `Ok` case /// indicates the next step in the plan. -#[derive(PartialEq)] +#[derive(PartialEq, Debug)] pub(crate) enum ExecutionContinuation { /// Default continuation, execute the next interaction. NextInteraction, @@ -179,7 +187,8 @@ pub(crate) fn execute_interaction( match interaction { Interaction::Query(_) => { let conn = match &mut env.connections[connection_index] { - SimConnection::Connected(conn) => conn, + SimConnection::LimboConnection(conn) => conn, + SimConnection::SQLiteConnection(_) => unreachable!(), SimConnection::Disconnected => unreachable!(), }; diff --git a/simulator/runner/io.rs b/simulator/runner/io.rs index 48340d170..c775b3f9e 100644 --- a/simulator/runner/io.rs +++ b/simulator/runner/io.rs @@ -1,6 +1,6 @@ use std::{cell::RefCell, sync::Arc}; -use limbo_core::{OpenFlags, PlatformIO, Result, IO}; +use limbo_core::{Clock, Instant, OpenFlags, PlatformIO, Result, IO}; use rand::{RngCore, SeedableRng}; use rand_chacha::ChaCha8Rng; @@ -52,6 +52,15 @@ impl SimulatorIO { } } +impl Clock for SimulatorIO { + fn now(&self) -> Instant { + Instant { + secs: 1704067200, // 2024-01-01 00:00:00 UTC + micros: 0, + } + } +} + impl IO for SimulatorIO { fn open_file( &self, @@ -89,7 +98,7 @@ impl IO for SimulatorIO { self.rng.borrow_mut().next_u64() as i64 } - fn get_current_time(&self) -> String { - "2024-01-01 00:00:00".to_string() + fn get_memory_io(&self) -> Arc { + todo!() } } diff --git a/simulator/runner/mod.rs b/simulator/runner/mod.rs index 36a6fbb0a..792c4bddd 100644 --- a/simulator/runner/mod.rs +++ b/simulator/runner/mod.rs @@ -1,3 +1,4 @@ +pub mod bugbase; pub mod cli; pub mod differential; pub mod env; diff --git a/simulator/runner/watch.rs b/simulator/runner/watch.rs index cd0e645b8..cb7648fc3 100644 --- a/simulator/runner/watch.rs +++ b/simulator/runner/watch.rs @@ -98,7 +98,8 @@ fn execute_plan( if let SimConnection::Disconnected = connection { log::debug!("connecting {}", connection_index); - env.connections[connection_index] = SimConnection::Connected(env.db.connect().unwrap()); + env.connections[connection_index] = + SimConnection::LimboConnection(env.db.connect().unwrap()); } else { match execute_interaction(env, connection_index, interaction, &mut state.stack) { Ok(next_execution) => { diff --git a/stress/Cargo.toml b/stress/Cargo.toml index 59c4e8256..84e7dc6a4 100644 --- a/stress/Cargo.toml +++ b/stress/Cargo.toml @@ -18,5 +18,9 @@ path = "main.rs" antithesis_sdk = "0.2.5" clap = { version = "4.5", features = ["derive"] } limbo = { path = "../bindings/rust" } -serde_json = "1.0.139" tokio = { version = "1.29.1", features = ["full"] } +anarchist-readable-name-generator-lib = "0.1.0" +hex = "0.4" +tracing = "0.1.41" +tracing-appender = "0.2.3" +tracing-subscriber = { version = "0.3.19", features = ["env-filter"] } diff --git a/stress/docker-compose.yaml b/stress/docker-compose.yaml index 13b1149a8..38c77ec2f 100644 --- a/stress/docker-compose.yaml +++ b/stress/docker-compose.yaml @@ -1,4 +1,6 @@ services: - workload: + limbo: image: us-central1-docker.pkg.dev/molten-verve-216720/turso-repository/limbo-workload:antithesis-latest - command: [ "/bin/limbo_stress" ] + environment: + SANDBOX: "composed" + command: ["sleep", "infinity"] diff --git a/stress/docker-entrypoint.sh b/stress/docker-entrypoint.sh index a09822694..1aa226912 100644 --- a/stress/docker-entrypoint.sh +++ b/stress/docker-entrypoint.sh @@ -1,5 +1,7 @@ #!/bin/bash +echo '{"antithesis_setup": { "status": "complete", "details": null }}' > $ANTITHESIS_OUTPUT_DIR/sdk.jsonl + set -Eeuo pipefail exec "$@" diff --git a/stress/main.rs b/stress/main.rs index c62714e63..743ff5722 100644 --- a/stress/main.rs +++ b/stress/main.rs @@ -1,44 +1,460 @@ mod opts; +use anarchist_readable_name_generator_lib::readable_name_custom; +use antithesis_sdk::random::{get_random, AntithesisRng}; use antithesis_sdk::*; use clap::Parser; -use limbo::{Builder, Value}; +use core::panic; +use hex; +use limbo::Builder; use opts::Opts; -use serde_json::json; +use std::collections::HashSet; +use std::fs::File; +use std::io::{Read, Write}; use std::sync::Arc; +use tracing_appender::non_blocking::WorkerGuard; +use tracing_subscriber::layer::SubscriberExt; +use tracing_subscriber::util::SubscriberInitExt; +use tracing_subscriber::EnvFilter; + +pub struct Plan { + pub ddl_statements: Vec, + pub queries_per_thread: Vec>, + pub nr_iterations: usize, + pub nr_threads: usize, +} + +/// Represents a column in a SQLite table +#[derive(Debug, Clone)] +pub struct Column { + pub name: String, + pub data_type: DataType, + pub constraints: Vec, +} + +/// Represents SQLite data types +#[derive(Debug, Clone)] +pub enum DataType { + Integer, + Real, + Text, + Blob, + Numeric, +} + +/// Represents column constraints +#[derive(Debug, Clone, PartialEq)] +pub enum Constraint { + PrimaryKey, + NotNull, + Unique, +} + +/// Represents a table in a SQLite schema +#[derive(Debug, Clone)] +pub struct Table { + pub name: String, + pub columns: Vec, +} + +/// Represents a complete SQLite schema +#[derive(Debug, Clone)] +pub struct ArbitrarySchema { + pub tables: Vec
, +} + +// Helper functions for generating random data +fn generate_random_identifier() -> String { + readable_name_custom("_", AntithesisRng).replace('-', "_") +} + +fn generate_random_data_type() -> DataType { + match get_random() % 5 { + 0 => DataType::Integer, + 1 => DataType::Real, + 2 => DataType::Text, + 3 => DataType::Blob, + _ => DataType::Numeric, + } +} + +fn generate_random_constraint() -> Constraint { + match get_random() % 2 { + 0 => Constraint::NotNull, + _ => Constraint::Unique, + } +} + +fn generate_random_column() -> Column { + let name = generate_random_identifier(); + let data_type = generate_random_data_type(); + + let constraint_count = (get_random() % 3) as usize; + let mut constraints = Vec::with_capacity(constraint_count); + + for _ in 0..constraint_count { + constraints.push(generate_random_constraint()); + } + + Column { + name, + data_type, + constraints, + } +} + +fn generate_random_table() -> Table { + let name = generate_random_identifier(); + let column_count = (get_random() % 10 + 1) as usize; + let mut columns = Vec::with_capacity(column_count); + let mut column_names = HashSet::new(); + + // First, generate all columns without primary keys + for _ in 0..column_count { + let mut column = generate_random_column(); + + // Ensure column names are unique within the table + while column_names.contains(&column.name) { + column.name = generate_random_identifier(); + } + + column_names.insert(column.name.clone()); + columns.push(column); + } + + // Then, randomly select one column to be the primary key + let pk_index = (get_random() % column_count as u64) as usize; + columns[pk_index].constraints.push(Constraint::PrimaryKey); + + Table { name, columns } +} + +pub fn gen_schema() -> ArbitrarySchema { + let table_count = (get_random() % 10 + 1) as usize; + let mut tables = Vec::with_capacity(table_count); + let mut table_names = HashSet::new(); + + for _ in 0..table_count { + let mut table = generate_random_table(); + + // Ensure table names are unique + while table_names.contains(&table.name) { + table.name = generate_random_identifier(); + } + + table_names.insert(table.name.clone()); + tables.push(table); + } + + ArbitrarySchema { tables } +} + +impl ArbitrarySchema { + /// Convert the schema to a vector of SQL DDL statements + pub fn to_sql(&self) -> Vec { + self.tables + .iter() + .map(|table| { + let columns = table + .columns + .iter() + .map(|col| { + let mut col_def = + format!(" {} {}", col.name, data_type_to_sql(&col.data_type)); + for constraint in &col.constraints { + col_def.push(' '); + col_def.push_str(&constraint_to_sql(constraint)); + } + col_def + }) + .collect::>() + .join(","); + + format!("CREATE TABLE {} ({});", table.name, columns) + }) + .collect() + } +} + +fn data_type_to_sql(data_type: &DataType) -> &'static str { + match data_type { + DataType::Integer => "INTEGER", + DataType::Real => "REAL", + DataType::Text => "TEXT", + DataType::Blob => "BLOB", + DataType::Numeric => "NUMERIC", + } +} + +fn constraint_to_sql(constraint: &Constraint) -> String { + match constraint { + Constraint::PrimaryKey => "PRIMARY KEY".to_string(), + Constraint::NotNull => "NOT NULL".to_string(), + Constraint::Unique => "UNIQUE".to_string(), + } +} + +/// Generate a random value for a given data type +fn generate_random_value(data_type: &DataType) -> String { + match data_type { + DataType::Integer => (get_random() % 1000).to_string(), + DataType::Real => format!("{:.2}", (get_random() % 1000) as f64 / 100.0), + DataType::Text => format!("'{}'", generate_random_identifier()), + DataType::Blob => format!("x'{}'", hex::encode(generate_random_identifier())), + DataType::Numeric => (get_random() % 1000).to_string(), + } +} + +/// Generate a random INSERT statement for a table +fn generate_insert(table: &Table) -> String { + let columns = table + .columns + .iter() + .map(|col| col.name.clone()) + .collect::>() + .join(", "); + + let values = table + .columns + .iter() + .map(|col| generate_random_value(&col.data_type)) + .collect::>() + .join(", "); + + format!( + "INSERT INTO {} ({}) VALUES ({});", + table.name, columns, values + ) +} + +/// Generate a random UPDATE statement for a table +fn generate_update(table: &Table) -> String { + // Find the primary key column + let pk_column = table + .columns + .iter() + .find(|col| col.constraints.contains(&Constraint::PrimaryKey)) + .expect("Table should have a primary key"); + + // Get all non-primary key columns + let non_pk_columns: Vec<_> = table + .columns + .iter() + .filter(|col| col.name != pk_column.name) + .collect(); + + // If we have no non-PK columns, just update the primary key itself + let set_clause = if non_pk_columns.is_empty() { + format!( + "{} = {}", + pk_column.name, + generate_random_value(&pk_column.data_type) + ) + } else { + non_pk_columns + .iter() + .map(|col| format!("{} = {}", col.name, generate_random_value(&col.data_type))) + .collect::>() + .join(", ") + }; + + let where_clause = format!( + "{} = {}", + pk_column.name, + generate_random_value(&pk_column.data_type) + ); + + format!( + "UPDATE {} SET {} WHERE {};", + table.name, set_clause, where_clause + ) +} + +/// Generate a random DELETE statement for a table +fn generate_delete(table: &Table) -> String { + // Find the primary key column + let pk_column = table + .columns + .iter() + .find(|col| col.constraints.contains(&Constraint::PrimaryKey)) + .expect("Table should have a primary key"); + + let where_clause = format!( + "{} = {}", + pk_column.name, + generate_random_value(&pk_column.data_type) + ); + + format!("DELETE FROM {} WHERE {};", table.name, where_clause) +} + +/// Generate a random SQL statement for a schema +fn generate_random_statement(schema: &ArbitrarySchema) -> String { + let table = &schema.tables[get_random() as usize % schema.tables.len()]; + match get_random() % 3 { + 0 => generate_insert(table), + 1 => generate_update(table), + _ => generate_delete(table), + } +} + +fn generate_plan(opts: &Opts) -> Result> { + let schema = gen_schema(); + // Write DDL statements to log file + let mut log_file = File::create(&opts.log_file)?; + let ddl_statements = schema.to_sql(); + let mut plan = Plan { + ddl_statements: vec![], + queries_per_thread: vec![], + nr_iterations: opts.nr_iterations, + nr_threads: opts.nr_threads, + }; + writeln!(log_file, "{}", opts.nr_threads)?; + writeln!(log_file, "{}", opts.nr_iterations)?; + writeln!(log_file, "{}", ddl_statements.len())?; + for stmt in &ddl_statements { + writeln!(log_file, "{}", stmt)?; + } + plan.ddl_statements = ddl_statements; + for _ in 0..opts.nr_threads { + let mut queries = vec![]; + for _ in 0..opts.nr_iterations { + let sql = generate_random_statement(&schema); + // writeln!(log_file, "{}", sql)?; + queries.push(sql); + } + plan.queries_per_thread.push(queries); + } + Ok(plan) +} + +fn read_plan_from_log_file(opts: &Opts) -> Result> { + let mut file = File::open(&opts.log_file)?; + let mut buf = String::new(); + let mut plan = Plan { + ddl_statements: vec![], + queries_per_thread: vec![], + nr_iterations: 0, + nr_threads: 0, + }; + file.read_to_string(&mut buf).unwrap(); + let mut lines = buf.lines(); + plan.nr_threads = lines.next().expect("missing threads").parse().unwrap(); + plan.nr_iterations = lines + .next() + .expect("missing nr_iterations") + .parse() + .unwrap(); + let nr_ddl = lines + .next() + .expect("number of ddl statements") + .parse() + .unwrap(); + for _ in 0..nr_ddl { + plan.ddl_statements + .push(lines.next().expect("expected ddl statement").to_string()); + } + for _ in 0..plan.nr_threads { + let mut queries = vec![]; + for _ in 0..plan.nr_iterations { + queries.push( + lines + .next() + .expect("missing query for thread {}") + .to_string(), + ); + } + plan.queries_per_thread.push(queries); + } + Ok(plan) +} + +pub fn init_tracing() -> Result { + let (non_blocking, guard) = tracing_appender::non_blocking(std::io::stderr()); + if let Err(e) = tracing_subscriber::registry() + .with( + tracing_subscriber::fmt::layer() + .with_writer(non_blocking) + .with_ansi(false) + .with_line_number(true) + .with_thread_ids(true), + ) + .with(EnvFilter::from_default_env()) + .try_init() + { + println!("Unable to setup tracing appender: {:?}", e); + } + Ok(guard) +} #[tokio::main] -async fn main() { - let (num_nodes, main_id) = (1, "n-001"); - let startup_data = json!({ - "num_nodes": num_nodes, - "main_node_id": main_id, - }); - lifecycle::setup_complete(&startup_data); +async fn main() -> Result<(), Box> { + let _g = init_tracing()?; antithesis_init(); - let opts = Opts::parse(); - let mut handles = Vec::new(); + let mut opts = Opts::parse(); + + let plan = if opts.load_log { + read_plan_from_log_file(&mut opts)? + } else { + generate_plan(&opts)? + }; + + let mut handles = Vec::with_capacity(opts.nr_threads); + let plan = Arc::new(plan); + + for thread in 0..opts.nr_threads { + let db = Arc::new(Builder::new_local(&opts.db_file).build().await?); + let plan = plan.clone(); + let conn = db.connect()?; + + // Apply each DDL statement individually + for stmt in &plan.ddl_statements { + println!("executing ddl {}", stmt); + if let Err(e) = conn.execute(stmt, ()).await { + match e { + limbo::Error::SqlExecutionFailure(e) => { + if e.contains("Corrupt database") { + panic!("Error creating table: {}", e); + } else { + println!("Error creating table: {}", e); + } + } + _ => panic!("Error creating table: {}", e), + } + } + } - for _ in 0..opts.nr_threads { - // TODO: share the database between threads - let db = Arc::new(Builder::new_local(":memory:").build().await.unwrap()); let nr_iterations = opts.nr_iterations; let db = db.clone(); - let handle = tokio::spawn(async move { - let conn = db.connect().unwrap(); - for _ in 0..nr_iterations { - let mut rows = conn.query("select 1", ()).await.unwrap(); - let row = rows.next().await.unwrap().unwrap(); - let value = row.get_value(0).unwrap(); - assert_always!(matches!(value, Value::Integer(1)), "value is incorrect"); + let handle = tokio::spawn(async move { + let conn = db.connect()?; + for query_index in 0..nr_iterations { + let sql = &plan.queries_per_thread[thread][query_index]; + println!("executing: {}", sql); + if let Err(e) = conn.execute(&sql, ()).await { + match e { + limbo::Error::SqlExecutionFailure(e) => { + if e.contains("Corrupt database") { + panic!("Error executing query: {}", e); + } else { + println!("Error executing query: {}", e); + } + } + _ => panic!("Error executing query: {}", e), + } + } } + Ok::<_, Box>(()) }); handles.push(handle); } + for handle in handles { - handle.await.unwrap(); + handle.await??; } - println!("Done."); + println!("Done. SQL statements written to {}", opts.log_file); + println!("Database file: {}", opts.db_file); + Ok(()) } diff --git a/stress/opts.rs b/stress/opts.rs index 392d79448..a8cbb5b2a 100644 --- a/stress/opts.rs +++ b/stress/opts.rs @@ -4,13 +4,43 @@ use clap::{command, Parser}; #[command(name = "limbo_stress")] #[command(author, version, about, long_about = None)] pub struct Opts { + /// Number of threads to run #[clap(short = 't', long, help = "the number of threads", default_value_t = 8)] pub nr_threads: usize, + + /// Number of iterations per thread #[clap( short = 'i', long, help = "the number of iterations", - default_value_t = 1000 + default_value_t = 100000 )] pub nr_iterations: usize, + + /// Log file for SQL statements + #[clap( + short = 'l', + long, + help = "log file for SQL statements", + default_value = "limbostress.log" + )] + pub log_file: String, + + /// Load log file instead of creating a new one + #[clap( + short = 'L', + long = "load-log", + help = "load log file instead of creating a new one", + default_value_t = false + )] + pub load_log: bool, + + /// Database file + #[clap( + short = 'd', + long, + help = "database file", + default_value = "limbostress.db" + )] + pub db_file: String, } diff --git a/testing/README.md b/testing/README.md new file mode 100644 index 000000000..ef4d07cde --- /dev/null +++ b/testing/README.md @@ -0,0 +1 @@ +# Limbo Testing \ No newline at end of file diff --git a/testing/agg-functions.test b/testing/agg-functions.test index 52cf2865c..f1a85dde5 100755 --- a/testing/agg-functions.test +++ b/testing/agg-functions.test @@ -99,6 +99,27 @@ do_execsql_test select-agg-binary-unary-positive { SELECT min(age) + +max(age) FROM users; } {101} +do_execsql_test select-non-agg-cols-should-be-not-null { + SELECT id, first_name, sum(age) FROM users LIMIT 1; +} {1|Jamie|503960} + +do_execsql_test select-with-group-by-and-agg-1 { + SELECT id, first_name, avg(age) FROM users group by last_name limit 1; +} {274|Debra|66.25} + +do_execsql_test select-with-group-by-and-agg-2 { + select first_name, last_name from users where state = 'AL' group by last_name limit 10; +} {Jay|Acosta +Daniel|Adams +Aaron|Baker +Sharon|Becker +Kim|Berg +Donald|Bishop +Brian|Bradford +Jesus|Bradley +John|Brown +Hunter|Burke} + do_execsql_test select-agg-json-array { SELECT json_group_array(name) FROM products; } {["hat","cap","shirt","sweater","sweatshirt","shorts","jeans","sneakers","boots","coat","accessories"]} diff --git a/testing/all.test b/testing/all.test index 857224ef6..3bdc1f98b 100755 --- a/testing/all.test +++ b/testing/all.test @@ -28,3 +28,5 @@ source $testdir/scalar-functions-printf.test source $testdir/transactions.test source $testdir/update.test source $testdir/drop_table.test +source $testdir/default_value.test +source $testdir/boolean.test diff --git a/testing/boolean.test b/testing/boolean.test new file mode 100755 index 000000000..51d6633e4 --- /dev/null +++ b/testing/boolean.test @@ -0,0 +1,56 @@ +#!/usr/bin/env tclsh + +set testdir [file dirname $argv0] +source $testdir/tester.tcl + +foreach {testname lhs ans} { + int-1 1 0 + int-2 2 0 + int-3 0 1 + float-1 1.0 0 + float-2 2.0 0 + float-3 0.0 1 + text 'a' 1 + text-int-1 '0' 1 + text-int-2 '1' 0 + text-float-1 '1.0' 0 + text-float-2 '0.0' 1 + text-float-edge '12-23.0' 0 + null NULL {} + empty-blob x'' 1 + cast-blob "CAST ('af' AS BLOB)" 1 + blob x'0000' 1 + blob-2 x'0001' 1 +} { + do_execsql_test boolean-not-$testname "SELECT not $lhs" $::ans +} + +foreach {testname lhs rhs ans} { + + blob-blob x'' x'' 0 + 1-blob 1 x'' 0 + 0-blob 0 x'' 0 + 0-1 0 1 0 + 1-1 1 1 1 + int-int 20 1000 1 + int-float 20 1.0 1 + int-0.0 20 0.0 0 + 0.0-0.0 0.0 0.0 0 + text 'a' 1 0 + text-int-1 '0' 1 0 + text-int-2 '1' 0 0 + text-float-1 '1.0' 0 0 + text-float-2 '0.0' 1 0 + text-float-3 '1.0' 1 1 + text-float-edge '12-23.0' 0 0 + null-null NULL NULL "" + 1-null 1 NULL "" + 1.0-null 1.0 NULL "" + blob-null x'' NULL 0 + blob2-null x'0001' NULL 0 + 0-null 0 NULL 0 + 0.0-null 0.0 NULL 0 + '0.0'-null '0.0' NULL 0 +} { + do_execsql_test boolean-and-$testname "SELECT $lhs AND $rhs" $::ans +} \ No newline at end of file diff --git a/testing/cli_tests/cli_test_cases.py b/testing/cli_tests/cli_test_cases.py index 120d20070..ba5e9a38f 100755 --- a/testing/cli_tests/cli_test_cases.py +++ b/testing/cli_tests/cli_test_cases.py @@ -1,8 +1,9 @@ #!/usr/bin/env python3 -from test_limbo_cli import TestLimboShell +from cli_tests.test_limbo_cli import TestLimboShell from pathlib import Path import time import os +from cli_tests import console def test_basic_queries(): @@ -264,8 +265,44 @@ def test_update_with_limit(): limbo.quit() -if __name__ == "__main__": - print("Running all Limbo CLI tests...") + +def test_update_with_limit_and_offset(): + limbo = TestLimboShell( + "CREATE TABLE t (a,b,c); insert into t values (1,2,3), (4,5,6), (7,8,9), (1,2,3),(4,5,6), (7,8,9);" + ) + limbo.run_test("update-limit-offset", "UPDATE t SET a = 10 LIMIT 1 OFFSET 3;", "") + limbo.run_test( + "update-limit-offset-result", "SELECT COUNT(*) from t WHERE a = 10;", "1" + ) + limbo.run_test("update-limit-result", "SELECT a from t LIMIT 4;", "1\n4\n7\n10") + limbo.run_test( + "update-limit-offset-zero", "UPDATE t SET a = 100 LIMIT 0 OFFSET 0;", "" + ) + limbo.run_test( + "update-limit-zero-result", "SELECT COUNT(*) from t WHERE a = 100;", "0" + ) + limbo.run_test("update-limit-all", "UPDATE t SET a = 100 LIMIT -1 OFFSET 1;", "") + limbo.run_test("update-limit-result", "SELECT COUNT(*) from t WHERE a = 100;", "5") + limbo.run_test( + "udpate-limit-where", "UPDATE t SET a = 333 WHERE b = 5 LIMIT 1 OFFSET 2;", "" + ) + limbo.run_test( + "update-limit-where-result", "SELECT COUNT(*) from t WHERE a = 333;", "0" + ) + limbo.quit() + +def test_insert_default_values(): + limbo = TestLimboShell( + "CREATE TABLE t (a integer default(42),b integer default (43),c integer default(44));" + ) + for _ in range(1, 10): + limbo.execute_dot("INSERT INTO t DEFAULT VALUES;") + limbo.run_test("insert-default-values", "SELECT * FROM t;", "42|43|44\n" * 9) + limbo.quit() + + +def main(): + console.info("Running all Limbo CLI tests...") test_basic_queries() test_schema_operations() test_file_operations() @@ -282,4 +319,9 @@ if __name__ == "__main__": test_import_csv_skip() test_table_patterns() test_update_with_limit() - print("All tests have passed") + test_update_with_limit_and_offset() + console.info("All tests have passed") + + +if __name__ == "__main__": + main() diff --git a/testing/cli_tests/console.py b/testing/cli_tests/console.py new file mode 100644 index 000000000..2f295a90d --- /dev/null +++ b/testing/cli_tests/console.py @@ -0,0 +1,122 @@ +from typing import Any, Optional, Union +from rich.console import Console, JustifyMethod +from rich.theme import Theme +from rich.style import Style + + +custom_theme = Theme( + { + "info": "bold blue", + "error": "bold red", + "debug": "bold blue", + "test": "bold green", + } +) +console = Console(theme=custom_theme, force_terminal=True) + + +def info( + *objects: Any, + sep: str = " ", + end: str = "\n", + style: Optional[Union[str, Style]] = None, + justify: Optional[JustifyMethod] = None, + emoji: Optional[bool] = None, + markup: Optional[bool] = None, + highlight: Optional[bool] = None, + log_locals: bool = False, + _stack_offset: int = 1, +): + console.log( + "[info]INFO[/info]", + *objects, + sep=sep, + end=end, + style=style, + justify=justify, + emoji=emoji, + markup=markup, + highlight=highlight, + log_locals=log_locals, + _stack_offset=_stack_offset + 1, + ) + + +def error( + *objects: Any, + sep: str = " ", + end: str = "\n", + style: Optional[Union[str, Style]] = None, + justify: Optional[JustifyMethod] = None, + emoji: Optional[bool] = None, + markup: Optional[bool] = None, + highlight: Optional[bool] = None, + log_locals: bool = False, + _stack_offset: int = 1, +): + console.log( + "[error]ERROR[/error]", + *objects, + sep=sep, + end=end, + style=style, + justify=justify, + emoji=emoji, + markup=markup, + highlight=highlight, + log_locals=log_locals, + _stack_offset=_stack_offset + 1, + ) + + +def debug( + *objects: Any, + sep: str = " ", + end: str = "\n", + style: Optional[Union[str, Style]] = None, + justify: Optional[JustifyMethod] = None, + emoji: Optional[bool] = None, + markup: Optional[bool] = None, + highlight: Optional[bool] = None, + log_locals: bool = False, + _stack_offset: int = 1, +): + console.log( + "[debug]DEBUG[/debug]", + *objects, + sep=sep, + end=end, + style=style, + justify=justify, + emoji=emoji, + markup=markup, + highlight=highlight, + log_locals=log_locals, + _stack_offset=_stack_offset + 1, + ) + +def test( + *objects: Any, + sep: str = " ", + end: str = "\n", + style: Optional[Union[str, Style]] = None, + justify: Optional[JustifyMethod] = None, + emoji: Optional[bool] = None, + markup: Optional[bool] = None, + highlight: Optional[bool] = None, + log_locals: bool = False, + _stack_offset: int = 1, +): + console.log( + "[test]TEST[/test]", + *objects, + sep=sep, + end=end, + style=style, + justify=justify, + emoji=emoji, + markup=markup, + highlight=highlight, + log_locals=log_locals, + _stack_offset=_stack_offset + 1, + ) \ No newline at end of file diff --git a/testing/cli_tests/constraint.py b/testing/cli_tests/constraint.py new file mode 100644 index 000000000..65758745b --- /dev/null +++ b/testing/cli_tests/constraint.py @@ -0,0 +1,371 @@ +#!/usr/bin/env python3 + +# Eventually extract these tests to be in the fuzzing integration tests +import os +from faker import Faker +from faker.providers.lorem.en_US import Provider as P +from cli_tests.test_limbo_cli import TestLimboShell +from pydantic import BaseModel +from cli_tests import console +from enum import Enum +import random +import sqlite3 + +sqlite_flags = os.getenv("SQLITE_FLAGS", "-q").split(" ") + + +keywords = [ + "ABORT", + "ACTION", + "ADD", + "AFTER", + "ALL", + "ALTER", + "ALWAYS", + "ANALYZE", + "AND", + "AS", + "ASC", + "ATTACH", + "AUTOINCREMENT", + "BEFORE", + "BEGIN", + "BETWEEN", + "BY", + "CASCADE", + "CASE", + "CAST", + "CHECK", + "COLLATE", + "COLUMN", + "COMMIT", + "CONFLICT", + "CONSTRAINT", + "CREATE", + "CROSS", + "CURRENT", + "CURRENT_DATE", + "CURRENT_TIME", + "CURRENT_TIMESTAMP", + "DATABASE", + "DEFAULT", + "DEFERRABLE", + "DEFERRED", + "DELETE", + "DESC", + "DETACH", + "DISTINCT", + "DO", + "DROP", + "EACH", + "ELSE", + "END", + "ESCAPE", + "EXCEPT", + "EXCLUDE", + "EXCLUSIVE", + "EXISTS", + "EXPLAIN", + "FAIL", + "FILTER", + "FIRST", + "FOLLOWING", + "FOR", + "FOREIGN", + "FROM", + "FULL", + "GENERATED", + "GLOB", + "GROUP", + "GROUPS", + "HAVING", + "IF", + "IGNORE", + "IMMEDIATE", + "IN", + "INDEX", + "INDEXED", + "INITIALLY", + "INNER", + "INSERT", + "INSTEAD", + "INTERSECT", + "INTO", + "IS", + "ISNULL", + "JOIN", + "KEY", + "LAST", + "LEFT", + "LIKE", + "LIMIT", + "MATCH", + "MATERIALIZED", + "NATURAL", + "NO", + "NOT", + "NOTHING", + "NOTNULL", + "NULL", + "NULLS", + "OF", + "OFFSET", + "ON", + "OR", + "ORDER", + "OTHERS", + "OUTER", + "OVER", + "PARTITION", + "PLAN", + "PRAGMA", + "PRECEDING", + "PRIMARY", + "QUERY", + "RAISE", + "RANGE", + "RECURSIVE", + "REFERENCES", + "REGEXP", + "REINDEX", + "RELEASE", + "RENAME", + "REPLACE", + "RESTRICT", + "RETURNING", + "RIGHT", + "ROLLBACK", + "ROW", + "ROWS", + "SAVEPOINT", + "SELECT", + "SET", + "TABLE", + "TEMP", + "TEMPORARY", + "THEN", + "TIES", + "TO", + "TRANSACTION", + "TRIGGER", + "UNBOUNDED", + "UNION", + "UNIQUE", + "UPDATE", + "USING", + "VACUUM", + "VALUES", + "VIEW", + "VIRTUAL", + "WHEN", + "WHERE", + "WINDOW", + "WITH", + "WITHOUT", +] +P.word_list = tuple(word for word in P.word_list if word.upper() not in keywords) +del P +fake: Faker = Faker(locale="en_US").unique +Faker.seed(0) + + +class ColumnType(Enum): + blob = "blob" + integer = "integer" + real = "real" + text = "text" + + def generate(self, faker: Faker) -> str: + match self.value: + case "blob": + blob = sqlite3.Binary(faker.binary(length=4)).hex() + return f"x'{blob}'" + case "integer": + return str(faker.pyint()) + case "real": + return str(faker.pyfloat()) + case "text": + return f"'{faker.text(max_nb_chars=20)}'" + + def __str__(self) -> str: + return self.value.upper() + + +class Column(BaseModel): + name: str + col_type: ColumnType + primary_key: bool + + def generate(faker: Faker) -> "Column": + name = faker.word().replace(" ", "_") + return Column( + name=name, + col_type=Faker().enum(ColumnType), + primary_key=False, + ) + + def __str__(self) -> str: + return f"{self.name} {str(self.col_type)}" + + +class Table(BaseModel): + columns: list[Column] + name: str + + def create_table(self) -> str: + accum = f"CREATE TABLE {self.name} " + col_strings = [str(col) for col in self.columns] + + pk_columns = [col.name for col in self.columns if col.primary_key] + primary_key_stmt = "PRIMARY KEY (" + ", ".join(pk_columns) + ")" + col_strings.append(primary_key_stmt) + + accum = accum + "(" + ", ".join(col_strings) + ");" + + return accum + + def generate_insert(self) -> str: + vals = [col.col_type.generate(fake) for col in self.columns] + vals = ", ".join(vals) + + return f"INSERT INTO {self.name} VALUES ({vals});" + + +class ConstraintTest(BaseModel): + table: Table + db_path: str = "testing/constraint.db" + insert_stmts: list[str] + insert_errors: list[str] + + def run( + self, + limbo: TestLimboShell, + ): + big_stmt = [self.table.create_table()] + for insert_stmt in self.insert_stmts: + big_stmt.append(insert_stmt) + + limbo.run_test("Inserting values into table", "\n".join(big_stmt), "") + + for insert_stmt in self.insert_errors: + limbo.run_test_fn( + insert_stmt, + lambda val: "Runtime error: UNIQUE constraint failed" in val, + ) + limbo.run_test( + "Nothing was inserted after error", + f"SELECT count(*) from {self.table.name};", + str(len(self.insert_stmts)), + ) + + +def validate_with_expected(result: str, expected: str): + return (expected in result, expected) + + +def generate_test(col_amount: int, primary_keys: int) -> ConstraintTest: + assert col_amount >= primary_keys, "Cannot have more primary keys than columns" + cols: list[Column] = [] + for _ in range(col_amount): + cols.append(Column.generate(fake)) + + pk_cols = random.sample( + population=cols, + k=primary_keys, + ) + + for col in pk_cols: + for c in cols: + if col.name == c.name: + c.primary_key = True + + table = Table(columns=cols, name=fake.word()) + insert_stmts = [table.generate_insert() for _ in range(col_amount)] + return ConstraintTest( + table=table, insert_stmts=insert_stmts, insert_errors=insert_stmts + ) + + +def custom_test_1() -> ConstraintTest: + cols = [ + Column(name="id", col_type="integer", primary_key=True), + Column(name="username", col_type="text", primary_key=True), + ] + table = Table(columns=cols, name="users") + insert_stmts = [ + "INSERT INTO users VALUES (1, 'alice');", + "INSERT INTO users VALUES (2, 'bob');", + ] + return ConstraintTest( + table=table, insert_stmts=insert_stmts, insert_errors=insert_stmts + ) + + +def custom_test_2(limbo: TestLimboShell): + create = "CREATE TABLE users (id INT PRIMARY KEY, username TEXT);" + first_insert = "INSERT INTO users VALUES (1, 'alice');" + limbo.run_test("Create unique INT index", create + first_insert, "") + fail_insert = "INSERT INTO users VALUES (1, 'bob');" + limbo.run_test_fn( + fail_insert, + lambda val: "Runtime error: UNIQUE constraint failed" in val, + ) + + +def all_tests() -> list[ConstraintTest]: + tests: list[ConstraintTest] = [] + max_cols = 10 + + curr_fake = Faker() + for _ in range(25): + num_cols = curr_fake.pyint(1, max_cols) + test = generate_test(num_cols, curr_fake.pyint(1, num_cols)) + tests.append(test) + + tests.append(custom_test_1()) + return tests + + +def cleanup(db_fullpath: str): + wal_path = f"{db_fullpath}-wal" + shm_path = f"{db_fullpath}-shm" + paths = [db_fullpath, wal_path, shm_path] + for path in paths: + if os.path.exists(path): + os.remove(path) + + +def main(): + tests = all_tests() + for test in tests: + console.info(test.table) + db_path = test.db_path + try: + # Use with syntax to automatically close shell on error + with TestLimboShell("") as limbo: + limbo.execute_dot(f".open {db_path}") + test.run(limbo) + + except Exception as e: + console.error(f"Test FAILED: {e}") + console.debug(test.table.create_table(), test.insert_stmts) + cleanup(db_path) + exit(1) + # delete db after every compat test so we we have fresh db for next test + cleanup(db_path) + + db_path = "testing/constraint.db" + try: + with TestLimboShell("") as limbo: + limbo.execute_dot(f".open {db_path}") + custom_test_2(limbo) + except Exception as e: + console.error(f"Test FAILED: {e}") + cleanup(db_path) + exit(1) + cleanup(db_path) + console.info("All tests passed successfully.") + + +if __name__ == "__main__": + main() diff --git a/testing/cli_tests/extensions.py b/testing/cli_tests/extensions.py index cb73aa760..02d69b13e 100755 --- a/testing/cli_tests/extensions.py +++ b/testing/cli_tests/extensions.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 import os -from test_limbo_cli import TestLimboShell +from cli_tests.test_limbo_cli import TestLimboShell +from cli_tests import console sqlite_exec = "./scripts/limbo-sqlite3" sqlite_flags = os.getenv("SQLITE_FLAGS", "-q").split(" ") @@ -81,7 +82,7 @@ def test_regexp(): lambda res: "Parse error: no such function" in res, ) limbo.run_test_fn(f".load {extension_path}", null) - print(f"Extension {extension_path} loaded successfully.") + console.info(f"Extension {extension_path} loaded successfully.") limbo.run_test_fn("SELECT regexp('a.c', 'abc');", true) limbo.run_test_fn("SELECT regexp('a.c', 'ac');", false) limbo.run_test_fn("SELECT regexp('[0-9]+', 'the year is 2021');", true) @@ -339,16 +340,18 @@ def test_series(): def test_kv(): ext_path = "target/debug/liblimbo_ext_tests" limbo = TestLimboShell() + # first, create a normal table to ensure no issues + limbo.execute_dot("CREATE TABLE other (a,b,c);") + limbo.execute_dot("INSERT INTO other values (23,32,23);") limbo.run_test_fn( "create virtual table t using kv_store;", - lambda res: "Virtual table module not found: kv_store" in res, + lambda res: "Parse error: no such module: kv_store" in res, ) limbo.execute_dot(f".load {ext_path}") - limbo.run_test_fn( + limbo.execute_dot( "create virtual table t using kv_store;", - null, - "can create kv_store vtable", ) + limbo.run_test_fn(".schema", lambda res: "CREATE VIRTUAL TABLE t" in res) limbo.run_test_fn( "insert into t values ('hello', 'world');", null, @@ -395,10 +398,35 @@ def test_kv(): limbo.run_test_fn( "select count(*) from t;", lambda res: "100" == res, "can insert 100 rows" ) + limbo.run_test_fn("update t set value = 'updated' where key = 'key33';", null) + limbo.run_test_fn( + "select * from t where key = 'key33';", + lambda res: res == "key33|updated", + "can update single row", + ) + limbo.run_test_fn( + "select COUNT(*) from t where value = 'updated';", + lambda res: res == "1", + "only updated a single row", + ) + limbo.run_test_fn("update t set value = 'updated2';", null) + limbo.run_test_fn( + "select COUNT(*) from t where value = 'updated2';", + lambda res: res == "100", + "can update all rows", + ) limbo.run_test_fn("delete from t limit 96;", null, "can delete 96 rows") limbo.run_test_fn( "select count(*) from t;", lambda res: "4" == res, "four rows remain" ) + limbo.run_test_fn( + "update t set key = '100' where 1;", null, "where clause evaluates properly" + ) + limbo.run_test_fn( + "select * from t where key = '100';", + lambda res: res == "100|updated2", + "there is only 1 key remaining after setting all keys to same value", + ) limbo.quit() @@ -494,13 +522,33 @@ def test_vfs(): lambda res: res == "50", "Tested large write to testfs", ) - print("Tested large write to testfs") - # Pere: I commented this out because it added an extra row that made the test test_sqlite_vfs_compat fail - # it didn't segfault from my side so maybe this is necessary? - # # open regular db file to ensure we don't segfault when vfs file is dropped - # limbo.execute_dot(".open testing/vfs.db") - # limbo.execute_dot("create table test (id integer primary key, value float);") - # limbo.execute_dot("insert into test (value) values (1.0);") + console.info("Tested large write to testfs") + limbo.quit() + + +def test_drop_virtual_table(): + ext_path = "target/debug/liblimbo_ext_tests" + limbo = TestLimboShell() + limbo.execute_dot(f".load {ext_path}") + limbo.debug_print( + "create virtual table t using kv_store;", + ) + limbo.run_test_fn(".schema", lambda res: "CREATE VIRTUAL TABLE t" in res) + limbo.run_test_fn( + "insert into t values ('hello', 'world');", + null, + "can insert into kv_store vtable", + ) + limbo.run_test_fn( + "DROP TABLE t;", + lambda res: "VDestroy called" in res, + "can drop kv_store vtable", + ) + limbo.run_test_fn( + "DROP TABLE t;", + lambda res: "× Parse error: No such table: t" == res, + "should error when drop kv_store vtable", + ) limbo.quit() @@ -540,20 +588,25 @@ def cleanup(): os.remove("testing/vfs.db-wal") -if __name__ == "__main__": +def main(): try: test_regexp() test_uuid() test_aggregates() test_crypto() test_series() - test_kv() test_ipaddr() test_vfs() test_sqlite_vfs_compat() + test_kv() + test_drop_virtual_table() except Exception as e: - print(f"Test FAILED: {e}") + console.error(f"Test FAILED: {e}") cleanup() exit(1) cleanup() - print("All tests passed successfully.") + console.info("All tests passed successfully.") + + +if __name__ == "__main__": + main() diff --git a/testing/cli_tests/memory.py b/testing/cli_tests/memory.py new file mode 100755 index 000000000..a329ba027 --- /dev/null +++ b/testing/cli_tests/memory.py @@ -0,0 +1,110 @@ +#!/usr/bin/env python3 +import os +from cli_tests.test_limbo_cli import TestLimboShell +from cli_tests import console + +sqlite_flags = os.getenv("SQLITE_FLAGS", "-q").split(" ") + + +def validate_with_expected(result: str, expected: str): + return (expected in result, expected) + + +def stub_memory_test( + limbo: TestLimboShell, + name: str, + blob_size: int = 1024**2, + vals: int = 100, + blobs: bool = True, +): + # zero_blob_size = 1024 **2 + zero_blob = "0" * blob_size * 2 + # vals = 100 + big_stmt = ["CREATE TABLE temp (t1 BLOB, t2 INTEGER);"] + big_stmt = big_stmt + [ + f"INSERT INTO temp (t1) VALUES (zeroblob({blob_size}));" + if i % 2 == 0 and blobs + else f"INSERT INTO temp (t2) VALUES ({i});" + for i in range(vals * 2) + ] + expected = [] + for i in range(vals * 2): + if i % 2 == 0 and blobs: + big_stmt.append(f"SELECT hex(t1) FROM temp LIMIT 1 OFFSET {i};") + expected.append(zero_blob) + else: + big_stmt.append(f"SELECT t2 FROM temp LIMIT 1 OFFSET {i};") + expected.append(f"{i}") + + big_stmt.append("SELECT count(*) FROM temp;") + expected.append(str(vals * 2)) + + big_stmt = "".join(big_stmt) + expected = "\n".join(expected) + + limbo.run_test_fn(big_stmt, lambda res: validate_with_expected(res, expected), name) + + +# TODO no delete tests for now because of limbo outputs some debug information on delete +def memory_tests() -> list[dict]: + tests = [] + + for vals in range(0, 1000, 100): + tests.append( + { + "name": f"small-insert-integer-vals-{vals}", + "vals": vals, + "blobs": False, + } + ) + + tests.append( + { + "name": f"small-insert-blob-interleaved-blob-size-{1024}", + "vals": 10, + "blob_size": 1024, + } + ) + tests.append( + { + "name": f"big-insert-blob-interleaved-blob-size-{1024}", + "vals": 100, + "blob_size": 1024, + } + ) + + for blob_size in range(0, (1024 * 1024) + 1, 1024 * 4**4): + if blob_size == 0: + continue + tests.append( + { + "name": f"small-insert-blob-interleaved-blob-size-{blob_size}", + "vals": 10, + "blob_size": blob_size, + } + ) + tests.append( + { + "name": f"big-insert-blob-interleaved-blob-size-{blob_size}", + "vals": 100, + "blob_size": blob_size, + } + ) + return tests + + +def main(): + tests = memory_tests() + # TODO see how to parallelize this loop with different subprocesses + for test in tests: + try: + with TestLimboShell("") as limbo: + stub_memory_test(limbo, **test) + except Exception as e: + console.error(f"Test FAILED: {e}") + exit(1) + console.info("All tests passed successfully.") + + +if __name__ == "__main__": + main() diff --git a/testing/cli_tests/test_limbo_cli.py b/testing/cli_tests/test_limbo_cli.py index 10e87869d..626d7defe 100755 --- a/testing/cli_tests/test_limbo_cli.py +++ b/testing/cli_tests/test_limbo_cli.py @@ -5,6 +5,7 @@ from time import sleep import subprocess from pathlib import Path from typing import Callable, List, Optional +from cli_tests import console PIPE_BUF = 4096 @@ -50,7 +51,8 @@ class LimboShell: return "" self._write_to_pipe(f"SELECT '{end_marker}';") output = "" - while True: + done = False + while not done: ready, _, errors = select.select( [self.pipe.stdout, self.pipe.stderr], [], @@ -58,7 +60,7 @@ class LimboShell: ) ready_or_errors = set(ready + errors) if self.pipe.stderr in ready_or_errors: - self._handle_error() + done = self._handle_error() if self.pipe.stdout in ready_or_errors: fragment = self.pipe.stdout.read(PIPE_BUF).decode() output += fragment @@ -71,7 +73,7 @@ class LimboShell: self.pipe.stdin.write((command + "\n").encode()) self.pipe.stdin.flush() - def _handle_error(self) -> None: + def _handle_error(self) -> bool: while True: ready, _, errors = select.select( [self.pipe.stderr], [], [self.pipe.stderr], 0 @@ -79,7 +81,7 @@ class LimboShell: if not (ready + errors): break error_output = self.pipe.stderr.read(PIPE_BUF).decode() - print(error_output, end="") + console.error(error_output, end="", _stack_offset=2) raise RuntimeError("Error encountered in Limbo shell.") @staticmethod @@ -111,7 +113,6 @@ class TestLimboShell: if init_commands is None: # Default initialization init_commands = """ -.open :memory: CREATE TABLE users (id INTEGER PRIMARY KEY, first_name TEXT, last_name TEXT, age INTEGER); CREATE TABLE products (id INTEGER PRIMARY KEY, name TEXT, price INTEGER); INSERT INTO users VALUES (1, 'Alice', 'Smith', 30), (2, 'Bob', 'Johnson', 25), @@ -131,7 +132,7 @@ INSERT INTO t VALUES (zeroblob(1024 - 1), zeroblob(1024 - 2), zeroblob(1024 - 3) self.shell.quit() def run_test(self, name: str, sql: str, expected: str) -> None: - print(f"Running test: {name}") + console.test(f"Running test: {name}", _stack_offset=2) actual = self.shell.execute(sql) assert actual == expected, ( f"Test failed: {name}\n" @@ -141,17 +142,26 @@ INSERT INTO t VALUES (zeroblob(1024 - 1), zeroblob(1024 - 2), zeroblob(1024 - 3) ) def debug_print(self, sql: str): - print(f"debugging: {sql}") + console.debug(f"debugging: {sql}", _stack_offset=2) actual = self.shell.execute(sql) - print(f"OUTPUT:\n{repr(actual)}") + console.debug(f"OUTPUT:\n{repr(actual)}", _stack_offset=2) def run_test_fn( self, sql: str, validate: Callable[[str], bool], desc: str = "" ) -> None: - actual = self.shell.execute(sql) + # Print the test that is executing before executing the sql command + # Printing later confuses the user of the code what test has actually failed if desc: - print(f"Testing: {desc}") + console.test(f"Testing: {desc}", _stack_offset=2) + actual = self.shell.execute(sql) assert validate(actual), f"Test failed\nSQL: {sql}\nActual:\n{repr(actual)}" def execute_dot(self, dot_command: str) -> None: self.shell._write_to_pipe(dot_command) + + # Enables the use of `with` syntax + def __enter__(self): + return self + + def __exit__(self, exception_type, exception_value, exception_traceback): + self.quit() diff --git a/testing/cli_tests/update.py b/testing/cli_tests/update.py new file mode 100644 index 000000000..1d0d23b63 --- /dev/null +++ b/testing/cli_tests/update.py @@ -0,0 +1,138 @@ +#!/usr/bin/env python3 +import os +from cli_tests.test_limbo_cli import TestLimboShell +from pydantic import BaseModel +from cli_tests import console + + +sqlite_flags = os.getenv("SQLITE_FLAGS", "-q").split(" ") + + +class UpdateTest(BaseModel): + name: str + db_schema: str = "CREATE TABLE test (key INTEGER, t1 BLOB, t2 INTEGER, t3 TEXT);" + blob_size: int = 1024 + vals: int = 1000 + updates: int = 1 + db_path: str = "testing/update.db" + + def init_db(self): + with TestLimboShell( + init_commands="", + exec_name="sqlite3", + flags=f"{self.db_path}", + ) as sqlite: + sqlite.execute_dot(f".open {self.db_path}") + zero_blob = "0" * self.blob_size * 2 + t2_val = "1" + t3_val = "2" + stmt = [self.db_schema] + stmt = stmt + [ + f"INSERT INTO test (key, t1, t2, t3) VALUES ({i} ,zeroblob({self.blob_size}), {t2_val}, {t3_val});" + for i in range(self.vals) + ] + stmt.append("SELECT count(*) FROM test;") + + sqlite.run_test( + "Init Update Db in Sqlite", + "".join(stmt), + f"{self.vals}", + ) + + stmt = [ + f"SELECT hex(t1), t2, t3 FROM test LIMIT 1 OFFSET {i};" + for i in range(self.vals) + ] + + expected = [f"{zero_blob}|{t2_val}|{t3_val}" for _ in range(self.vals)] + sqlite.run_test( + "Check Values correctly inserted in Sqlite", + "".join(stmt), + "\n".join(expected), + ) + + def run(self, limbo: TestLimboShell): + limbo.execute_dot(f".open {self.db_path}") + # TODO blobs are hard. Forget about blob updates for now + # one_blob = ("0" * ((self.blob_size * 2) - 1)) + "1" + # TODO For now update just on one row. To expand the tests in the future + # use self.updates and do more than 1 update + t2_update_val = "123" + stmt = f"UPDATE test SET t2 = {t2_update_val} WHERE key = {0};" + limbo.run_test(self.name, stmt, "") + + def test_compat(self): + console.info("Testing in SQLite\n") + + with TestLimboShell( + init_commands="", + exec_name="sqlite3", + flags=f"{self.db_path}", + ) as sqlite: + sqlite.execute_dot(f".open {self.db_path}") + zero_blob = "0" * self.blob_size * 2 + + t2_val = "1" + t2_update_val = "123" + t3_val = "2" + stmt = [] + stmt.append("SELECT count(*) FROM test;") + + sqlite.run_test( + "Check all rows present in Sqlite", + "".join(stmt), + f"{self.vals}", + ) + + stmt = [ + f"SELECT hex(t1), t2, t3 FROM test LIMIT 1 OFFSET {i};" + for i in range(self.vals) + ] + + expected = [ + f"{zero_blob}|{t2_val}|{t3_val}" + if i != 0 + else f"{zero_blob}|{t2_update_val}|{t3_val}" + for i in range(self.vals) + ] + sqlite.run_test( + "Check Values correctly updated in Sqlite", + "".join(stmt), + "\n".join(expected), + ) + console.info() + + +def cleanup(db_fullpath: str): + wal_path = f"{db_fullpath}-wal" + shm_path = f"{db_fullpath}-shm" + paths = [db_fullpath, wal_path, shm_path] + for path in paths: + if os.path.exists(path): + os.remove(path) + + +def main(): + test = UpdateTest(name="Update 1 column", vals=1) + console.info(test) + + db_path = test.db_path + try: + test.init_db() + # Use with syntax to automatically close shell on error + with TestLimboShell("") as limbo: + test.run(limbo) + + test.test_compat() + + except Exception as e: + console.error(f"Test FAILED: {e}") + cleanup(db_path) + exit(1) + # delete db after every compat test so we we have fresh db for next test + cleanup(db_path) + console.info("All tests passed successfully.") + + +if __name__ == "__main__": + main() diff --git a/testing/cli_tests/vfs_bench.py b/testing/cli_tests/vfs_bench.py new file mode 100644 index 000000000..ae5a969d0 --- /dev/null +++ b/testing/cli_tests/vfs_bench.py @@ -0,0 +1,116 @@ +#!/usr/bin/env python3 + +# vfs benchmarking/comparison +import os +from pathlib import Path +import subprocess +import statistics +import argparse +from time import perf_counter, sleep +from typing import Dict + +from cli_tests.test_limbo_cli import TestLimboShell +from cli_tests.console import info, error, test + +LIMBO_BIN = Path("./target/release/limbo") +DB_FILE = Path("testing/temp.db") +vfs_list = ["syscall", "io_uring"] + + +def append_time(times, start, perf_counter): + times.append(perf_counter() - start) + return True + + +def bench_one(vfs: str, sql: str, iterations: int) -> list[float]: + """ + Launch a single Limbo process with the requested VFS, run `sql` + `iterations` times, return a list of elapsed wall‑clock times. + """ + shell = TestLimboShell( + exec_name=str(LIMBO_BIN), + flags=f"-q -m list --vfs {vfs} {DB_FILE}", + init_commands="", + ) + + times: list[float] = [] + + for i in range(1, iterations + 1): + start = perf_counter() + _ = shell.run_test_fn( + sql, lambda x: x is not None and append_time(times, start, perf_counter) + ) + test(f" {vfs} | run {i:>3}: {times[-1]:.6f}s") + + shell.quit() + return times + + +def setup_temp_db() -> None: + cmd = ["sqlite3", "testing/testing.db", ".clone testing/temp.db"] + proc = subprocess.run(cmd, check=True) + proc.check_returncode() + sleep(0.3) # make sure it's finished + + +def cleanup_temp_db() -> None: + if DB_FILE.exists(): + DB_FILE.unlink() + os.remove("testing/temp.db-wal") + + +def main() -> None: + parser = argparse.ArgumentParser( + description="Benchmark a SQL statement against all Limbo VFS back‑ends." + ) + parser.add_argument("sql", help="SQL statement to execute (quote it)") + parser.add_argument("iterations", type=int, help="number of repetitions") + args = parser.parse_args() + setup_temp_db() + + sql, iterations = args.sql, args.iterations + if iterations <= 0: + error("iterations must be a positive integer") + parser.error("Invalid Arguments") + + info(f"SQL : {sql}") + info(f"Iterations : {iterations}") + info(f"Database : {DB_FILE.resolve()}") + info("-" * 60) + averages: Dict[str, float] = {} + + for vfs in vfs_list: + test(f"\n### VFS: {vfs} ###") + times = bench_one(vfs, sql, iterations) + info(f"All times ({vfs}):", " ".join(f"{t:.6f}" for t in times)) + avg = statistics.mean(times) + averages[vfs] = avg + + info("\n" + "-" * 60) + info("Average runtime per VFS") + info("-" * 60) + + for vfs in vfs_list: + info(f"vfs: {vfs} : {averages[vfs]:.6f} s") + info("-" * 60) + + baseline = "syscall" + baseline_avg = averages[baseline] + + name_pad = max(len(v) for v in vfs_list) + for vfs in vfs_list: + avg = averages[vfs] + if vfs == baseline: + info(f"{vfs:<{name_pad}} : {avg:.6f} (baseline)") + else: + pct = (avg - baseline_avg) / baseline_avg * 100.0 + faster_slower = "slower" if pct > 0 else "faster" + info( + f"{vfs:<{name_pad}} : {avg:.6f} ({abs(pct):.1f}% {faster_slower} than {baseline})" + ) + info("-" * 60) + cleanup_temp_db() + + +if __name__ == "__main__": + main() diff --git a/testing/cli_tests/write.py b/testing/cli_tests/write.py new file mode 100755 index 000000000..e3f7fd04c --- /dev/null +++ b/testing/cli_tests/write.py @@ -0,0 +1,157 @@ +#!/usr/bin/env python3 +import os +from cli_tests.test_limbo_cli import TestLimboShell +from pydantic import BaseModel +from cli_tests import console + + +sqlite_flags = os.getenv("SQLITE_FLAGS", "-q").split(" ") + + +class InsertTest(BaseModel): + name: str + db_schema: str = "CREATE TABLE test (t1 BLOB, t2 INTEGER);" + blob_size: int = 1024**2 + vals: int = 100 + has_blob: bool = True + db_path: str = "testing/writes.db" + + def run(self, limbo: TestLimboShell): + zero_blob = "0" * self.blob_size * 2 + big_stmt = [self.db_schema] + big_stmt = big_stmt + [ + f"INSERT INTO test (t1) VALUES (zeroblob({self.blob_size}));" + if i % 2 == 0 and self.has_blob + else f"INSERT INTO test (t2) VALUES ({i});" + for i in range(self.vals * 2) + ] + expected = [] + for i in range(self.vals * 2): + if i % 2 == 0 and self.has_blob: + big_stmt.append(f"SELECT hex(t1) FROM test LIMIT 1 OFFSET {i};") + expected.append(zero_blob) + else: + big_stmt.append(f"SELECT t2 FROM test LIMIT 1 OFFSET {i};") + expected.append(f"{i}") + + big_stmt.append("SELECT count(*) FROM test;") + expected.append(str(self.vals * 2)) + + big_stmt = "".join(big_stmt) + expected = "\n".join(expected) + + limbo.run_test_fn( + big_stmt, lambda res: validate_with_expected(res, expected), self.name + ) + + def test_compat(self): + console.info("Testing in SQLite\n") + + with TestLimboShell( + init_commands="", + exec_name="sqlite3", + flags=f"{self.db_path}", + ) as sqlite: + sqlite.run_test_fn( + ".show", + lambda res: f"filename: {self.db_path}" in res, + "Opened db file created with Limbo in sqlite3", + ) + sqlite.run_test_fn( + ".schema", + lambda res: self.db_schema in res, + "Tables created by previous Limbo test exist in db file", + ) + sqlite.run_test_fn( + "SELECT count(*) FROM test;", + lambda res: res == str(self.vals * 2), + "Counting total rows inserted", + ) + console.info() + + +def validate_with_expected(result: str, expected: str): + return (expected in result, expected) + + +# TODO no delete tests for now +def blob_tests() -> list[InsertTest]: + tests: list[InsertTest] = [] + + for vals in range(0, 1000, 100): + tests.append( + InsertTest( + name=f"small-insert-integer-vals-{vals}", + vals=vals, + has_blob=False, + ) + ) + + tests.append( + InsertTest( + name=f"small-insert-blob-interleaved-blob-size-{1024}", + vals=10, + blob_size=1024, + ) + ) + tests.append( + InsertTest( + name=f"big-insert-blob-interleaved-blob-size-{1024}", + vals=100, + blob_size=1024, + ) + ) + + for blob_size in range(0, (1024 * 1024) + 1, 1024 * 4**4): + if blob_size == 0: + continue + tests.append( + InsertTest( + name=f"small-insert-blob-interleaved-blob-size-{blob_size}", + vals=10, + blob_size=blob_size, + ) + ) + tests.append( + InsertTest( + name=f"big-insert-blob-interleaved-blob-size-{blob_size}", + vals=100, + blob_size=blob_size, + ) + ) + return tests + + +def cleanup(db_fullpath: str): + wal_path = f"{db_fullpath}-wal" + shm_path = f"{db_fullpath}-shm" + paths = [db_fullpath, wal_path, shm_path] + for path in paths: + if os.path.exists(path): + os.remove(path) + + +def main(): + tests = blob_tests() + for test in tests: + console.info(test) + db_path = test.db_path + try: + # Use with syntax to automatically close shell on error + with TestLimboShell("") as limbo: + limbo.execute_dot(f".open {db_path}") + test.run(limbo) + + test.test_compat() + + except Exception as e: + console.error(f"Test FAILED: {e}") + cleanup(db_path) + exit(1) + # delete db after every compat test so we we have fresh db for next test + cleanup(db_path) + console.info("All tests passed successfully.") + + +if __name__ == "__main__": + main() diff --git a/testing/default_value.test b/testing/default_value.test new file mode 100644 index 000000000..32a39144b --- /dev/null +++ b/testing/default_value.test @@ -0,0 +1,43 @@ +#!/usr/bin/env tclsh + +set testdir [file dirname $argv0] +source $testdir/tester.tcl + +do_execsql_test_on_specific_db {:memory:} default-value-text { + CREATE TABLE t1(x INTEGER PRIMARY KEY, y TEXT DEFAULT 'default_value'); + INSERT INTO t1 (x) VALUES (1); + SELECT y FROM t1 WHERE x = 1; +} {default_value} + +do_execsql_test_on_specific_db {:memory:} default-value-integer { + CREATE TABLE t2(x INTEGER PRIMARY KEY, y INTEGER DEFAULT 42); + INSERT INTO t2 (x) VALUES (1); + SELECT y FROM t2 WHERE x = 1; +} {42} + +do_execsql_test_on_specific_db {:memory:} default-value-real { + CREATE TABLE t3(x INTEGER PRIMARY KEY, y REAL DEFAULT 3.14); + INSERT INTO t3 (x) VALUES (1); + SELECT y FROM t3 WHERE x = 1; +} {3.14} + +do_execsql_test_on_specific_db {:memory:} default-value-null { + CREATE TABLE t5(x INTEGER PRIMARY KEY, y TEXT DEFAULT NULL); + INSERT INTO t5 (x) VALUES (1); + SELECT y FROM t5 WHERE x = 1; +} {} + +do_execsql_test_on_specific_db {:memory:} default-value-boolean { + CREATE TABLE t6(x INTEGER PRIMARY KEY, y BOOLEAN DEFAULT 1); + INSERT INTO t6 (x) VALUES (1); + SELECT y FROM t6 WHERE x = 1; +} {1} + +do_execsql_test_on_specific_db {:memory:} default-value-function { + CREATE TABLE t7(x INTEGER PRIMARY KEY, y INTEGER DEFAULT (ABS(-5))); + INSERT INTO t7 (x) VALUES (1); + SELECT y FROM t7 WHERE x = 1; +} {5} + + + diff --git a/testing/drop_table.test b/testing/drop_table.test index c6daf04d4..e1c48ec0c 100755 --- a/testing/drop_table.test +++ b/testing/drop_table.test @@ -26,23 +26,23 @@ do_execsql_test_on_specific_db {:memory:} drop-table-if-exists-2 { } {success} # Test dropping table with index -#do_execsql_test_on_specific_db {:memory:} drop-table-with-index-1 { -# CREATE TABLE t3(x INTEGER PRIMARY KEY, y TEXT); -# CREATE INDEX idx_t3_y ON t3(y); -# INSERT INTO t3 VALUES(1, 'one'); -# DROP TABLE t3; -# SELECT count(*) FROM sqlite_schema WHERE tbl_name='t3'; -#} {0} +do_execsql_test_on_specific_db {:memory:} drop-table-with-index-1 { + CREATE TABLE t3(x INTEGER PRIMARY KEY, y TEXT); + CREATE INDEX idx_t3_y ON t3(y); + INSERT INTO t3 VALUES(1, 'one'); + DROP TABLE t3; + SELECT count(*) FROM sqlite_schema WHERE tbl_name='t3'; +} {0} # Test dropping table cleans up related schema entries -#do_execsql_test_on_specific_db {:memory:} drop-table-schema-cleanup-1 { -# CREATE TABLE t4(x INTEGER PRIMARY KEY, y TEXT); -# CREATE INDEX idx1_t4 ON t4(x); -# CREATE INDEX idx2_t4 ON t4(y); -# INSERT INTO t4 VALUES(1, 'one'); -# DROP TABLE t4; -# SELECT count(*) FROM sqlite_schema WHERE tbl_name='t4'; -#} {0} +do_execsql_test_on_specific_db {:memory:} drop-table-schema-cleanup-1 { + CREATE TABLE t4(x INTEGER PRIMARY KEY, y TEXT); + CREATE INDEX idx1_t4 ON t4(x); + CREATE INDEX idx2_t4 ON t4(y); + INSERT INTO t4 VALUES(1, 'one'); + DROP TABLE t4; + SELECT count(*) FROM sqlite_schema WHERE tbl_name='t4'; +} {0} # Test dropping table after multiple inserts and deletes do_execsql_test_on_specific_db {:memory:} drop-table-after-ops-1 { diff --git a/testing/groupby.test b/testing/groupby.test index 9fd6e51bf..9fce2e83e 100644 --- a/testing/groupby.test +++ b/testing/groupby.test @@ -185,3 +185,10 @@ William|111} do_execsql_test group_by_column_number { select u.first_name, count(1) from users u group by 1 limit 1; } {Aaron|41} + +# There was a regression where we incorrectly removed SOME order by terms and left others in place, which is invalid and results in wrong rows being returned. +do_execsql_test groupby_orderby_removal_regression_test { + select id, last_name, count(1) from users GROUP BY 1,2 order by id, last_name desc limit 3; +} {1|Foster|1 +2|Salazar|1 +3|Perry|1} diff --git a/testing/insert.test b/testing/insert.test index 5a37fd692..6c14ee249 100755 --- a/testing/insert.test +++ b/testing/insert.test @@ -15,4 +15,149 @@ do_execsql_test_on_specific_db {:memory:} must-be-int-insert { } {1 2 3 -4} \ No newline at end of file +4} + +do_execsql_test_on_specific_db {:memory:} strict-basic-creation { + CREATE TABLE test1(id INTEGER, name TEXT, price REAL) STRICT; + INSERT INTO test1 VALUES(1, 'item1', 10.5); + SELECT * FROM test1; +} {1|item1|10.5} + +do_execsql_test_in_memory_any_error strict-require-datatype { + CREATE TABLE test2(id INTEGER, name) STRICT; +} + +do_execsql_test_in_memory_any_error strict-valid-datatypes { + CREATE TABLE test2(id INTEGER, value DATETIME) STRICT; +} + +do_execsql_test_in_memory_any_error strict-type-enforcement { + CREATE TABLE test3(id INTEGER, name TEXT, price REAL) STRICT; + INSERT INTO test3 VALUES(1, 'item1', 'not-a-number'); +} + +do_execsql_test_on_specific_db {:memory:} strict-type-coercion { + CREATE TABLE test4(id INTEGER, name TEXT, price REAL) STRICT; + INSERT INTO test4 VALUES(1, 'item1', '10.5'); + SELECT typeof(price), price FROM test4; +} {real|10.5} + +do_execsql_test_on_specific_db {:memory:} strict-any-flexibility { + CREATE TABLE test5(id INTEGER, data ANY) STRICT; + INSERT INTO test5 VALUES(1, 100); + INSERT INTO test5 VALUES(2, 'text'); + INSERT INTO test5 VALUES(3, 3.14); + SELECT id, typeof(data) FROM test5 ORDER BY id; +} {1|integer +2|text +3|real} + +do_execsql_test_on_specific_db {:memory:} strict-any-preservation { + CREATE TABLE test6(id INTEGER, code ANY) STRICT; + INSERT INTO test6 VALUES(1, '000123'); + SELECT typeof(code), code FROM test6; +} {text|000123} + +do_execsql_test_in_memory_any_error strict-int-vs-integer-pk { + CREATE TABLE test8(id INT PRIMARY KEY, name TEXT) STRICT + INSERT INTO test8 VALUES(NULL, 'test'); +} + +do_execsql_test_on_specific_db {:memory:} strict-integer-pk-behavior { + CREATE TABLE test9(id INTEGER PRIMARY KEY, name TEXT) STRICT; + INSERT INTO test9 VALUES(NULL, 'test'); + SELECT id, name FROM test9; +} {1|test} + + +do_execsql_test_on_specific_db {:memory:} strict-mixed-inserts { + CREATE TABLE test11( + id INTEGER PRIMARY KEY, + name TEXT, + price REAL, + quantity INT, + tags ANY + ) STRICT; + + INSERT INTO test11 VALUES(1, 'item1', 10.5, 5, 'tag1'); + INSERT INTO test11 VALUES(2, 'item2', 20.75, 10, 42); + + SELECT id, name, price, quantity, typeof(tags) FROM test11 ORDER BY id; +} {1|item1|10.5|5|text +2|item2|20.75|10|integer} + +do_execsql_test_on_specific_db {:memory:} strict-update-basic { + CREATE TABLE test1(id INTEGER, name TEXT, price REAL) STRICT; + INSERT INTO test1 VALUES(1, 'item1', 10.5); + UPDATE test1 SET price = 15.75 WHERE id = 1; + SELECT * FROM test1; +} {1|item1|15.75} + +do_execsql_test_in_memory_any_error strict-update-type-enforcement { + CREATE TABLE test2(id INTEGER, name TEXT, price REAL) STRICT; + INSERT INTO test2 VALUES(1, 'item1', 10.5); + UPDATE test2 SET price = 'not-a-number' WHERE id = 1; +} + +do_execsql_test_on_specific_db {:memory:} strict-update-type-coercion { + CREATE TABLE test3(id INTEGER, name TEXT, price REAL) STRICT; + INSERT INTO test3 VALUES(1, 'item1', 10.5); + UPDATE test3 SET price = '15.75' WHERE id = 1; + SELECT id, typeof(price), price FROM test3; +} {1|real|15.75} + +do_execsql_test_on_specific_db {:memory:} strict-update-any-flexibility { + CREATE TABLE test4(id INTEGER, data ANY) STRICT; + INSERT INTO test4 VALUES(1, 100); + UPDATE test4 SET data = 'text' WHERE id = 1; + INSERT INTO test4 VALUES(2, 'original'); + UPDATE test4 SET data = 3.14 WHERE id = 2; + SELECT id, typeof(data), data FROM test4 ORDER BY id; +} {1|text|text +2|real|3.14} + +do_execsql_test_on_specific_db {:memory:} strict-update-any-preservation { + CREATE TABLE test5(id INTEGER, code ANY) STRICT; + INSERT INTO test5 VALUES(1, 'text'); + UPDATE test5 SET code = '000123' WHERE id = 1; + SELECT typeof(code), code FROM test5; +} {text|000123} + +do_execsql_test_in_memory_any_error strict-update-not-null-constraint { + CREATE TABLE test7(id INTEGER, name TEXT NOT NULL) STRICT; + INSERT INTO test7 VALUES(1, 'name'); + UPDATE test7 SET name = NULL WHERE id = 1; +} + +# Uncomment following test case when unique constraint is added +#do_execsql_test_any_error strict-update-pk-constraint { +# CREATE TABLE test8(id INTEGER PRIMARY KEY, name TEXT) STRICT; +# INSERT INTO test8 VALUES(1, 'name1'); +# INSERT INTO test8 VALUES(2, 'name2'); +# UPDATE test8 SET id = 2 WHERE id = 1; +#} + +do_execsql_test_on_specific_db {:memory:} strict-update-multiple-columns { + CREATE TABLE test9(id INTEGER, name TEXT, price REAL, quantity INT) STRICT; + INSERT INTO test9 VALUES(1, 'item1', 10.5, 5); + UPDATE test9 SET name = 'updated', price = 20.75, quantity = 10 WHERE id = 1; + SELECT * FROM test9; +} {1|updated|20.75|10} + +do_execsql_test_on_specific_db {:memory:} strict-update-where-clause { + CREATE TABLE test10(id INTEGER, category TEXT, price REAL) STRICT; + INSERT INTO test10 VALUES(1, 'A', 10); + INSERT INTO test10 VALUES(2, 'A', 20); + INSERT INTO test10 VALUES(3, 'B', 30); + UPDATE test10 SET price = price * 2 WHERE category = 'A'; + SELECT id, price FROM test10 ORDER BY id; +} {1|20.0 +2|40.0 +3|30.0} + +do_execsql_test_on_specific_db {:memory:} strict-update-expression { + CREATE TABLE test11(id INTEGER, name TEXT, price REAL, discount REAL) STRICT; + INSERT INTO test11 VALUES(1, 'item1', 100, 0.1); + UPDATE test11 SET price = price - (price * discount); + SELECT id, price FROM test11; +} {1|90.0} diff --git a/testing/join.test b/testing/join.test index 64b3dcbd3..1f5eb0f1f 100755 --- a/testing/join.test +++ b/testing/join.test @@ -272,4 +272,14 @@ do_execsql_test natural-join-and-using-join { select u.id, u2.id, p.id from users u natural join products p join users u2 using (first_name) limit 3; } {"1|1|1 1|1204|1 -1|1261|1"} \ No newline at end of file +1|1261|1"} + +# regression test for a backwards iteration left join case, +# where the null flag of the right table was not cleared after a previous unmatched row. +do_execsql_test left-join-backwards-iteration { + select users.id, users.first_name as user_name, products.name as product_name + from users left join products on users.id = products.id + where users.id < 13 order by users.id desc limit 3; +} {12|Alan| +11|Travis|accessories +10|Daniel|coat} \ No newline at end of file diff --git a/testing/math.test b/testing/math.test index d1747f976..bc44a72ef 100755 --- a/testing/math.test +++ b/testing/math.test @@ -8,18 +8,31 @@ do_execsql_test fuzz-test-failure { SELECT mod(atanh(tanh(-1.0)), ((1.0))) / ((asinh(-1.0) / 2.0 * 1.0) + pow(0.0, 1.0) + 0.5); } {-16.8596516555675} -do_execsql_test add-int { +do_execsql_test add-int-1 { SELECT 10 + 1 } {11} +do_execsql_test add-int-2 { + SELECT 0xA + 0xFF +} {265} + +do_execsql_test add-int-3 { + SELECT 0xA + 1 +} {11} + + do_execsql_test add-float { SELECT 10.1 + 0.3 } {10.4} -do_execsql_test add-int-float { +do_execsql_test add-int-float-1 { SELECT 10 + 0.1 } {10.1} +do_execsql_test add-int-float-2 { + SELECT 0xa + 0.1 +} {10.1} + do_execsql_test add-agg-int-agg-int { SELECT sum(1) + sum(2) } {3} @@ -82,6 +95,18 @@ do_execsql_test subtract-agg-float-agg-int { SELECT sum(3.5) - sum(1) } {2.5} +do_execsql_test subtract-blob { + SELECT -x'11' +} {0} + +do_execsql_test subtract-blob-empty { + SELECT -x'' +} {0} + +do_execsql_test subtract-blob-charcter { + SELECT -'hi'; +} {0} + foreach {testnum lhs rhs ans} { 1 'a' 'a' 0 2 'a' 10 -10 @@ -599,10 +624,14 @@ do_execsql_test bitwise-not-text-float { SELECT ~'823.34' } {-824} -do_execsql_test bitwise-not-text-int { +do_execsql_test bitwise-not-text-int-1 { SELECT ~'1234' } {-1235} +do_execsql_test bitwise-not-text-int-2 { + SELECT ~0xA +} {-11} + do_execsql_test bitwise-not-scalar-float { SELECT ~abs(693.9) } {-694} @@ -627,23 +656,22 @@ do_execsql_test bitwise-not-zero { SELECT ~0 } {-1} -foreach {testname lhs ans} { - int-1 1 0 - int-2 2 0 - int-3 0 1 - float-1 1.0 0 - float-2 2.0 0 - float-3 0.0 1 - text 'a' 1 - text-int-1 '0' 1 - text-int-2 '1' 0 - text-float-1 '1.0' 0 - text-float-2 '0.0' 1 - text-float-edge '12-23.0' 0 - null NULL {} -} { - do_execsql_test boolean-not "SELECT not $lhs" $::ans -} +do_execsql_test bitwise-not-empty-blob { + SELECT ~x'' +} {-1} + +do_execsql_test bitwise-not-cast-blob { + SELECT ~ CAST ('af' AS BLOB); +} {-1} + +do_execsql_test bitwise-not-blob { + SELECT ~ x'0000'; +} {-1} + +do_execsql_test bitwise-not-blob-2 { + SELECT ~ x'0001'; +} {-1} + do_execsql_test pi { SELECT pi() @@ -1309,54 +1337,75 @@ do_execsql_test log-int-null { SELECT log(5, null) } {} -do_execsql_test mod-int-null { +do_execsql_test remainder-int-null { SELECT 183 % null } {} -do_execsql_test mod-int-0 { +do_execsql_test remainder-int-0 { SELECT 183 % 0 } {} -do_execsql_test mod-int-int { +do_execsql_test remainder-int-int { SELECT 183 % 10 } { 3 } -do_execsql_test mod-int-float { +do_execsql_test remainder-int-float { SELECT 38 % 10.35 } { 8.0 } -do_execsql_test mod-float-int { +do_execsql_test remainder-float-int { SELECT 38.43 % 13 } { 12.0 } -do_execsql_test mod-0-float { +do_execsql_test remainder-0-float { SELECT 0 % 12.0 } { 0.0 } -do_execsql_test mod-float-0 { +do_execsql_test remainder-float-0 { SELECT 23.14 % 0 } {} -do_execsql_test mod-float-float { +do_execsql_test remainder-float-float { SELECT 23.14 % 12.0 } { 11.0 } -do_execsql_test mod-float-agg { +do_execsql_test remainder-float-agg { SELECT 23.14 % sum(id) from products } { 23.0 } -do_execsql_test mod-int-agg { +do_execsql_test remainder-int-agg { SELECT 17 % sum(id) from users } { 17 } -do_execsql_test mod-agg-int { +do_execsql_test remainder-agg-int { SELECT count(*) % 17 from users } { 4 } -do_execsql_test mod-agg-float { +do_execsql_test remainder-agg-float { SELECT count(*) % 2.43 from users } { 0.0 } +foreach {testnum lhs rhs ans} { + 1 'a' 'a' {} + 2 'a' 10 0 + 3 10 'a' {} + 4 'a' 11.0 0.0 + 5 11.0 'a' {} + 7 '10' '3' 1 + 8 '10.0' '3' 1.0 + 9 '10.0' -3 1.0 +} { + do_execsql_test remainder-text-$testnum "SELECT $lhs % $rhs" $::ans +} + +foreach {testnum lhs rhs ans} { + 1 '-9223372036854775808' '-1' 0 + 2 -9223372036854775808 -1 0 + 3 -9223372036854775809 -1 0.0 +} { + do_execsql_test remainder-overflow-$testnum "SELECT $lhs % $rhs" $::ans +} + do_execsql_test comp-float-float { SELECT 0.0 = 0.0 } { 1 } diff --git a/testing/orderby.test b/testing/orderby.test index f23c41bfd..b5b56cdd4 100755 --- a/testing/orderby.test +++ b/testing/orderby.test @@ -141,3 +141,62 @@ Collin|15} do_execsql_test case-insensitive-alias { select u.first_name as fF, count(1) > 0 as cC from users u where fF = 'Jamie' group by fF order by cC; } {Jamie|1} + +do_execsql_test age_idx_order_desc { + select first_name from users order by age desc limit 3; +} {Robert +Sydney +Matthew} + +do_execsql_test rowid_or_integer_pk_desc { + select first_name from users order by id desc limit 3; +} {Nicole +Gina +Dorothy} + +# These two following tests may seem dumb but they verify that index scanning by age_idx doesn't drop any rows due to BTree bugs +do_execsql_test orderby_asc_verify_rows { + select count(1) from (select * from users order by age desc) +} {10000} + +do_execsql_test orderby_desc_verify_rows { + select count(1) from (select * from users order by age desc) +} {10000} + +do_execsql_test orderby_desc_with_offset { + select first_name, age from users order by age desc limit 3 offset 666; +} {Francis|94 +Matthew|94 +Theresa|94} + +do_execsql_test orderby_desc_with_filter { + select first_name, age from users where age <= 50 order by age desc limit 5; +} {Gerald|50 +Nicole|50 +Tammy|50 +Marissa|50 +Daniel|50} + +do_execsql_test orderby_asc_with_filter_range { + select first_name, age from users where age <= 50 and age >= 49 order by age asc limit 5; +} {William|49 +Jennifer|49 +Robert|49 +David|49 +Stephanie|49} + +do_execsql_test orderby_desc_with_filter_id_lt { + select id from users where id < 6666 order by id desc limit 5; +} {6665 +6664 +6663 +6662 +6661} + +do_execsql_test orderby_desc_with_filter_id_le { + select id from users where id <= 6666 order by id desc limit 5; +} {6666 +6665 +6664 +6663 +6662} \ No newline at end of file diff --git a/testing/pragma.test b/testing/pragma.test index c478c032c..4d56e06ab 100755 --- a/testing/pragma.test +++ b/testing/pragma.test @@ -33,9 +33,10 @@ do_execsql_test pragma-table-info-invalid-table { PRAGMA table_info=pekka } {} -do_execsql_test_on_specific_db ":memory:" pragma-page-count-empty { - PRAGMA page_count -} {0} +# temporarily skip this test case. The issue is detailed in #1407 +#do_execsql_test_on_specific_db ":memory:" pragma-page-count-empty { +# PRAGMA page_count +#} {0} do_execsql_test_on_specific_db ":memory:" pragma-page-count-table { CREATE TABLE foo(bar); diff --git a/testing/pyproject.toml b/testing/pyproject.toml new file mode 100644 index 000000000..0aed7b99b --- /dev/null +++ b/testing/pyproject.toml @@ -0,0 +1,32 @@ +[project] +description = "Limbo Python Testing Project" +name = "limbo_test" +readme = "README.md" +requires-python = ">=3.13" +version = "0.1.0" +dependencies = [ + "faker>=37.1.0", + "pydantic>=2.11.1", +] + +[project.scripts] +test-write = "cli_tests.write:main" +test-shell = "cli_tests.cli_test_cases:main" +test-extensions = "cli_tests.extensions:main" +test-update = "cli_tests.update:main" +test-memory = "cli_tests.memory:main" +bench-vfs = "cli_tests.vfs_bench:main" +test-constraint = "cli_tests.constraint:main" + +[tool.uv] +package = true + +[build-system] +build-backend = "hatchling.build" +requires = ["hatchling", "hatch-vcs"] + +[tool.hatch.build.targets.wheel] +packages = ["cli_tests"] + +[tool.hatch.metadata] +allow-direct-references = true diff --git a/testing/scalar-functions-datetime.test b/testing/scalar-functions-datetime.test index fd450dc02..33caf52c2 100755 --- a/testing/scalar-functions-datetime.test +++ b/testing/scalar-functions-datetime.test @@ -423,26 +423,33 @@ do_execsql_test julianday-time-only { SELECT julianday('15:30:45'); } {2451545.14635417} -# -# TODO: fix precision issue -# -#do_execsql_test julianday-midnight { -# SELECT julianday('2023-05-18 00:00:00'); -#} {2460082.5} +do_execsql_test julianday-midnight { + SELECT julianday('2023-05-18 00:00:00'); +} {2460082.5} -#do_execsql_test julianday-noon { -# SELECT julianday('2023-05-18 12:00:00'); -#} {2460083.0} +do_execsql_test julianday-noon { + SELECT julianday('2023-05-18 12:00:00'); +} {2460083.0} -#do_execsql_test julianday-fractional-zero { -# SELECT julianday('2023-05-18 00:00:00.000'); -#} {2460082.5} +do_execsql_test julianday-fractional-zero { + SELECT julianday('2023-05-18 00:00:00.000'); +} {2460082.5} -# same issue as above, we return .5000000 because we are using fmt precision -#do_execsql_test julianday-date-only { -# SELECT julianday('2023-05-18'); -#} {2460082.5} +do_execsql_test julianday-date-only { + SELECT julianday('2023-05-18'); +} {2460082.5} +do_execsql_test julianday-with-modifier-day { + SELECT julianday(2454832.5,'+1 day'); +} {2454833.5} + +do_execsql_test julianday-with-modifier-hour { + SELECT julianday(2454832.5,'-3 hours'); +} {2454832.375} + +do_execsql_test julianday-max-day { + SELECT julianday('9999-12-31 23:59:59'); +} {5373484.49998843} @@ -589,3 +596,74 @@ set FMT [list %S.%3f %C %y %b %B %h %a %A %D %x %v %.f %.3f %.6f %.9f %3f %6f %9 foreach i $FMT { do_execsql_test strftime-invalid-$i "SELECT strftime('$i','2025-01-23T13:14:30.567');" {} } + +do_execsql_test strftime-julianday { + SELECT strftime('%Y-%m-%d %H:%M:%fZ', 2459717.08070103); +} {"2022-05-17 13:56:12.569Z"} + + +# Tests for the TIMEDIFF function + +do_execsql_test timediff-basic-positive { + SELECT timediff('14:30:45', '12:00:00'); +} {"+0000-00-00 02:30:45.000"} + +do_execsql_test timediff-basic-negative { + SELECT timediff('12:00:00', '14:30:45'); +} {"-0000-00-00 02:30:45.000"} + +do_execsql_test timediff-with-milliseconds-positive { + SELECT timediff('12:00:01.300', '12:00:00.500'); +} {"+0000-00-00 00:00:00.800"} + +do_execsql_test timediff-same-time { + SELECT timediff('12:00:00', '12:00:00'); +} {"+0000-00-00 00:00:00.000"} + +do_execsql_test timediff-across-dates { + SELECT timediff('2023-05-11 01:15:00', '2023-05-10 23:30:00'); +} {"+0000-00-00 01:45:00.000"} + +do_execsql_test timediff-across-dates-negative { + SELECT timediff('2023-05-10 23:30:00', '2023-05-11 01:15:00'); +} {"-0000-00-00 01:45:00.000"} + +do_execsql_test timediff-different-formats { + SELECT timediff('2023-05-10T23:30:00', '2023-05-10 14:15:00'); +} {"+0000-00-00 09:15:00.000"} + +do_execsql_test timediff-with-timezone { + SELECT timediff('2023-05-10 23:30:00+02:00', '2023-05-10 18:30:00Z'); +} {"+0000-00-00 03:00:00.000"} + +do_execsql_test timediff-large-difference { + SELECT timediff('2023-05-12 10:00:00', '2023-05-10 08:00:00'); +} {"+0000-00-02 02:00:00.000"} + +do_execsql_test timediff-with-seconds-precision { + SELECT timediff('12:30:45.123', '12:30:44.987'); +} {"+0000-00-00 00:00:00.136"} + +do_execsql_test timediff-null-first-arg { + SELECT timediff(NULL, '12:00:00'); +} {{}} + +do_execsql_test timediff-null-second-arg { + SELECT timediff('12:00:00', NULL); +} {{}} + +do_execsql_test timediff-invalid-first-arg { + SELECT timediff('not-a-time', '12:00:00'); +} {{}} + +do_execsql_test timediff-invalid-second-arg { + SELECT timediff('12:00:00', 'not-a-time'); +} {{}} + +do_execsql_test timediff-julian-day { + SELECT timediff(2460000, 2460000.5); +} {"-0000-00-00 12:00:00.000"} + +do_execsql_test timediff-different-time-formats { + SELECT timediff('23:59:59', '00:00:00'); +} {"+0000-00-00 23:59:59.000"} \ No newline at end of file diff --git a/testing/scalar-functions.test b/testing/scalar-functions.test index 01feb7c1b..807c4971d 100755 --- a/testing/scalar-functions.test +++ b/testing/scalar-functions.test @@ -195,6 +195,54 @@ do_execsql_test hex-null { select hex(null) } {} +do_execsql_test likely { + select likely('limbo') +} {limbo} + +do_execsql_test likely-int { + select likely(100) +} {100} + +do_execsql_test likely-decimal { + select likely(12.34) +} {12.34} + +do_execsql_test likely-null { + select likely(NULL) +} {} + +do_execsql_test likelihood-string { + SELECT likelihood('limbo', 0.5); +} {limbo} + +do_execsql_test likelihood-string-high-probability { + SELECT likelihood('database', 0.9375); +} {database} + +do_execsql_test likelihood-integer { + SELECT likelihood(100, 0.0625); +} {100} + +do_execsql_test likelihood-integer-probability-1 { + SELECT likelihood(42, 1.0); +} {42} + +do_execsql_test likelihood-decimal { + SELECT likelihood(12.34, 0.5); +} {12.34} + +do_execsql_test likelihood-null { + SELECT likelihood(NULL, 0.5); +} {} + +do_execsql_test likelihood-blob { + SELECT hex(likelihood(x'01020304', 0.5)); +} {01020304} + +do_execsql_test likelihood-zero-probability { + SELECT likelihood(999, 0.0); +} {999} + do_execsql_test unhex-str-ab { SELECT unhex('6162'); } {ab} diff --git a/testing/select.test b/testing/select.test index 27741aa54..6f0c6997d 100755 --- a/testing/select.test +++ b/testing/select.test @@ -11,6 +11,14 @@ do_execsql_test select-const-2 { SELECT 2 } {2} +do_execsql_test select-const-3 { + SELECT 0xDEAF +} {57007} + +do_execsql_test select-const-4 { + SELECT -0xA +} {-10} + do_execsql_test select-true { SELECT true } {1} @@ -166,6 +174,14 @@ do_execsql_test select-like-expression { select 2 % 0.5 } {} +do_execsql_test select_positive_infinite_float { + SELECT 1.7976931348623157E+308 + 1e308; -- f64::MAX + 1e308 +} {Inf} + +do_execsql_test select_negative_infinite_float { + SELECT -1.7976931348623157E+308 - 1e308 -- f64::MIN - 1e308 +} {-Inf} + do_execsql_test select_shl_large_negative_float { SELECT 1 << -1e19; SELECT 1 << -9223372036854775808; -- i64::MIN diff --git a/testing/tester.tcl b/testing/tester.tcl index 735c91aae..d739b2a39 100644 --- a/testing/tester.tcl +++ b/testing/tester.tcl @@ -2,6 +2,14 @@ set sqlite_exec [expr {[info exists env(SQLITE_EXEC)] ? $env(SQLITE_EXEC) : "sql set test_dbs [list "testing/testing.db" "testing/testing_norowidalias.db"] set test_small_dbs [list "testing/testing_small.db" ] +proc error_put {sql} { + puts [format "\033\[1;31mTest FAILED:\033\[0m %s" $sql ] +} + +proc test_put {msg db test_name} { + puts [format "\033\[1;34m(%s)\033\[0m %s $msg: \033\[1;32m%s\033\[0m" $db [string repeat " " [expr {40 - [string length $db]}]] $test_name] +} + proc evaluate_sql {sqlite_exec db_name sql} { set command [list $sqlite_exec $db_name $sql] set output [exec {*}$command] @@ -11,7 +19,7 @@ proc evaluate_sql {sqlite_exec db_name sql} { proc run_test {sqlite_exec db_name sql expected_output} { set actual_output [evaluate_sql $sqlite_exec $db_name $sql] if {$actual_output ne $expected_output} { - puts "Test FAILED: '$sql'" + error_put $sql puts "returned '$actual_output'" puts "expected '$expected_output'" exit 1 @@ -20,7 +28,7 @@ proc run_test {sqlite_exec db_name sql expected_output} { proc do_execsql_test {test_name sql_statements expected_outputs} { foreach db $::test_dbs { - puts [format "(%s) %s Running test: %s" $db [string repeat " " [expr {40 - [string length $db]}]] $test_name] + test_put "Running test" $db $test_name set combined_sql [string trim $sql_statements] set combined_expected_output [join $expected_outputs "\n"] run_test $::sqlite_exec $db $combined_sql $combined_expected_output @@ -29,7 +37,7 @@ proc do_execsql_test {test_name sql_statements expected_outputs} { proc do_execsql_test_small {test_name sql_statements expected_outputs} { foreach db $::test_small_dbs { - puts [format "(%s) %s Running test: %s" $db [string repeat " " [expr {40 - [string length $db]}]] $test_name] + test_put "Running test" $db $test_name set combined_sql [string trim $sql_statements] set combined_expected_output [join $expected_outputs "\n"] run_test $::sqlite_exec $db $combined_sql $combined_expected_output @@ -39,13 +47,13 @@ proc do_execsql_test_small {test_name sql_statements expected_outputs} { proc do_execsql_test_regex {test_name sql_statements expected_regex} { foreach db $::test_dbs { - puts [format "(%s) %s Running test: %s" $db [string repeat " " [expr {40 - [string length $db]}]] $test_name] + test_put "Running test" $db $test_name set combined_sql [string trim $sql_statements] set actual_output [evaluate_sql $::sqlite_exec $db $combined_sql] # Validate the actual output against the regular expression if {![regexp $expected_regex $actual_output]} { - puts "Test FAILED: '$sql_statements'" + error_put $sql_statements puts "returned '$actual_output'" puts "expected to match regex '$expected_regex'" exit 1 @@ -55,7 +63,7 @@ proc do_execsql_test_regex {test_name sql_statements expected_regex} { proc do_execsql_test_on_specific_db {db_name test_name sql_statements expected_outputs} { - puts [format "(%s) %s Running test: %s" $db_name [string repeat " " [expr {40 - [string length $db_name]}]] $test_name] + test_put "Running test" $db_name $test_name set combined_sql [string trim $sql_statements] set combined_expected_output [join $expected_outputs "\n"] run_test $::sqlite_exec $db_name $combined_sql $combined_expected_output @@ -69,14 +77,14 @@ proc within_tolerance {actual expected tolerance} { # FIXME: When Limbo's floating point presentation matches to SQLite, this could/should be removed proc do_execsql_test_tolerance {test_name sql_statements expected_outputs tolerance} { foreach db $::test_dbs { - puts [format "(%s) %s Running test: %s" $db [string repeat " " [expr {40 - [string length $db]}]] $test_name] + test_put "Running test" $db $test_name set combined_sql [string trim $sql_statements] set actual_output [evaluate_sql $::sqlite_exec $db $combined_sql] set actual_values [split $actual_output "\n"] set expected_values [split $expected_outputs "\n"] if {[llength $actual_values] != [llength $expected_values]} { - puts "Test FAILED: '$sql_statements'" + error_put $sql_statements puts "returned '$actual_output'" puts "expected '$expected_outputs'" exit 1 @@ -89,7 +97,7 @@ proc do_execsql_test_tolerance {test_name sql_statements expected_outputs tolera if {![within_tolerance $actual $expected $tolerance]} { set lower_bound [expr {$expected - $tolerance}] set upper_bound [expr {$expected + $tolerance}] - puts "Test FAILED: '$sql_statements'" + error_put $sql_statements puts "returned '$actual'" puts "expected a value within the range \[$lower_bound, $upper_bound\]" exit 1 @@ -97,3 +105,124 @@ proc do_execsql_test_tolerance {test_name sql_statements expected_outputs tolera } } } +# This procedure passes the test if the output contains error messages +proc run_test_expecting_any_error {sqlite_exec db_name sql} { + # Execute the SQL command and capture output + set command [list $sqlite_exec $db_name $sql] + + # Use catch to handle both successful and error cases + catch {exec {*}$command} result options + + # Check if the output contains error indicators (×, error, syntax error, etc.) + if {[regexp {(error|ERROR|Error|×|syntax error|failed)} $result]} { + # Error found in output - test passed + puts "\033\[1;32mTest PASSED:\033\[0m Got expected error" + return 1 + } + + # No error indicators in output + error_put $sql + puts "Expected an error but command output didn't indicate any error: '$result'" + exit 1 +} + +# This procedure passes if error matches a specific pattern +proc run_test_expecting_error {sqlite_exec db_name sql expected_error_pattern} { + # Execute the SQL command and capture output + set command [list $sqlite_exec $db_name $sql] + + # Capture output whether command succeeds or fails + catch {exec {*}$command} result options + + # Check if the output contains error indicators first + if {![regexp {(error|ERROR|Error|×|syntax error|failed)} $result]} { + error_put $sql + puts "Expected an error matching '$expected_error_pattern'" + puts "But command output didn't indicate any error: '$result'" + exit 1 + } + + # Now check if the error message matches the expected pattern + if {![regexp $expected_error_pattern $result]} { + error_put $sql + puts "Error occurred but didn't match expected pattern." + puts "Output was: '$result'" + puts "Expected pattern: '$expected_error_pattern'" + exit 1 + } + + # If we get here, the test passed - got expected error matching pattern + return 1 +} + +# This version accepts exact error text, ignoring formatting +proc run_test_expecting_error_content {sqlite_exec db_name sql expected_error_text} { + # Execute the SQL command and capture output + set command [list $sqlite_exec $db_name $sql] + + # Capture output whether command succeeds or fails + catch {exec {*}$command} result options + + # Check if the output contains error indicators first + if {![regexp {(error|ERROR|Error|×|syntax error|failed)} $result]} { + error_put $sql + puts "Expected an error with text: '$expected_error_text'" + puts "But command output didn't indicate any error: '$result'" + exit 1 + } + + # Normalize both the actual and expected error messages + # Remove all whitespace, newlines, and special characters for comparison + set normalized_actual [regsub -all {[[:space:]]|[[:punct:]]} $result ""] + set normalized_expected [regsub -all {[[:space:]]|[[:punct:]]} $expected_error_text ""] + + # Convert to lowercase for case-insensitive comparison + set normalized_actual [string tolower $normalized_actual] + set normalized_expected [string tolower $normalized_expected] + + # Check if the normalized strings contain the same text + if {[string first $normalized_expected $normalized_actual] == -1} { + error_put $sql + puts "Error occurred but content didn't match." + puts "Output was: '$result'" + puts "Expected text: '$expected_error_text'" + exit 1 + } + + # If we get here, the test passed - got error with expected content + return 1 +} + +proc do_execsql_test_error {test_name sql_statements expected_error_pattern} { + foreach db $::test_dbs { + test_put "Running error test" $db $test_name + set combined_sql [string trim $sql_statements] + run_test_expecting_error $::sqlite_exec $db $combined_sql $expected_error_pattern + } +} + +proc do_execsql_test_error_content {test_name sql_statements expected_error_text} { + foreach db $::test_dbs { + test_put "Running error content test" $db $test_name + set combined_sql [string trim $sql_statements] + run_test_expecting_error_content $::sqlite_exec $db $combined_sql $expected_error_text + } +} + +proc do_execsql_test_any_error {test_name sql_statements} { + foreach db $::test_dbs { + test_put "Running any-error test" $db $test_name + set combined_sql [string trim $sql_statements] + run_test_expecting_any_error $::sqlite_exec $db $combined_sql + } +} + +proc do_execsql_test_in_memory_any_error {test_name sql_statements} { + test_put "Running any-error test" in-memory $test_name + + # Use ":memory:" special filename for in-memory database + set db_name ":memory:" + + set combined_sql [string trim $sql_statements] + run_test_expecting_any_error $::sqlite_exec $db_name $combined_sql +} diff --git a/testing/where.test b/testing/where.test index a5bdc91e8..958d7825f 100755 --- a/testing/where.test +++ b/testing/where.test @@ -572,3 +572,13 @@ do_execsql_test where-constant-condition-no-tables { do_execsql_test where-constant-condition-no-tables-2 { select 1 where 1 IS NOT NULL; } {1} + +# We had a bug where NULL was incorrectly used as a seek key, returning all rows (because NULL < everything in index keys) +do_execsql_test where-null-comparison-index-seek-regression-test { + select age from users where age > NULL; +} {} + +# We had a bug where Limbo tried to use an index when there was a WHERE term like 't.x = t.x' +do_execsql_test where-self-referential-regression { + select count(1) from users where id = id; +} {10000} diff --git a/tests/integration/common.rs b/tests/integration/common.rs index a034b36ae..1ef890db9 100644 --- a/tests/integration/common.rs +++ b/tests/integration/common.rs @@ -1,6 +1,7 @@ use limbo_core::{CheckpointStatus, Connection, Database, IO}; use rand::{rng, RngCore}; -use std::path::PathBuf; +use rusqlite::params; +use std::path::{Path, PathBuf}; use std::rc::Rc; use std::sync::Arc; use tempfile::TempDir; @@ -28,6 +29,14 @@ impl TempDatabase { Self { path, io } } + pub fn new_with_existent(db_path: &Path) -> Self { + let io: Arc = Arc::new(limbo_core::PlatformIO::new().unwrap()); + Self { + path: db_path.to_path_buf(), + io, + } + } + pub fn new_with_rusqlite(table_sql: &str) -> Self { let mut path = TempDir::new().unwrap().into_path(); path.push("test.db"); @@ -44,8 +53,21 @@ impl TempDatabase { } pub fn connect_limbo(&self) -> Rc { + Self::connect_limbo_with_flags(&self, limbo_core::OpenFlags::default()) + } + + pub fn connect_limbo_with_flags( + &self, + flags: limbo_core::OpenFlags, + ) -> Rc { log::debug!("conneting to limbo"); - let db = Database::open_file(self.io.clone(), self.path.to_str().unwrap(), false).unwrap(); + let db = Database::open_file_with_flags( + self.io.clone(), + self.path.to_str().unwrap(), + flags, + false, + ) + .unwrap(); let conn = db.connect().unwrap(); log::debug!("connected to limbo"); @@ -104,9 +126,97 @@ pub fn maybe_setup_tracing() { .with(EnvFilter::from_default_env()) .try_init(); } + +pub(crate) fn sqlite_exec_rows( + conn: &rusqlite::Connection, + query: &str, +) -> Vec> { + let mut stmt = conn.prepare(&query).unwrap(); + let mut rows = stmt.query(params![]).unwrap(); + let mut results = Vec::new(); + while let Some(row) = rows.next().unwrap() { + let mut result = Vec::new(); + for i in 0.. { + let column: rusqlite::types::Value = match row.get(i) { + Ok(column) => column, + Err(rusqlite::Error::InvalidColumnIndex(_)) => break, + Err(err) => panic!("unexpected rusqlite error: {}", err), + }; + result.push(column); + } + results.push(result) + } + + results +} + +pub(crate) fn limbo_exec_rows( + db: &TempDatabase, + conn: &Rc, + query: &str, +) -> Vec> { + let mut stmt = conn.prepare(query).unwrap(); + let mut rows = Vec::new(); + 'outer: loop { + let row = loop { + let result = stmt.step().unwrap(); + match result { + limbo_core::StepResult::Row => { + let row = stmt.row().unwrap(); + break row; + } + limbo_core::StepResult::IO => { + db.io.run_once().unwrap(); + continue; + } + limbo_core::StepResult::Done => break 'outer, + r => panic!("unexpected result {:?}: expecting single row", r), + } + }; + let row = row + .get_values() + .map(|x| match x { + limbo_core::OwnedValue::Null => rusqlite::types::Value::Null, + limbo_core::OwnedValue::Integer(x) => rusqlite::types::Value::Integer(*x), + limbo_core::OwnedValue::Float(x) => rusqlite::types::Value::Real(*x), + limbo_core::OwnedValue::Text(x) => { + rusqlite::types::Value::Text(x.as_str().to_string()) + } + limbo_core::OwnedValue::Blob(x) => rusqlite::types::Value::Blob(x.to_vec()), + }) + .collect(); + rows.push(row); + } + rows +} + +pub(crate) fn limbo_exec_rows_error( + db: &TempDatabase, + conn: &Rc, + query: &str, +) -> limbo_core::Result<()> { + let mut stmt = conn.prepare(query)?; + loop { + let result = stmt.step()?; + match result { + limbo_core::StepResult::IO => { + db.io.run_once()?; + continue; + } + limbo_core::StepResult::Done => return Ok(()), + r => panic!("unexpected result {:?}: expecting single row", r), + } + } +} + #[cfg(test)] mod tests { - use super::TempDatabase; + use std::vec; + + use tempfile::TempDir; + + use super::{limbo_exec_rows, limbo_exec_rows_error, TempDatabase}; + use rusqlite::types::Value; #[test] fn test_statement_columns() -> anyhow::Result<()> { @@ -120,16 +230,16 @@ mod tests { let columns = stmt.num_columns(); assert_eq!(columns, 3); - assert_eq!(stmt.get_column_name(0), "foo".into()); - assert_eq!(stmt.get_column_name(1), "bar".into()); - assert_eq!(stmt.get_column_name(2), "baz".into()); + assert_eq!(stmt.get_column_name(0), "foo"); + assert_eq!(stmt.get_column_name(1), "bar"); + assert_eq!(stmt.get_column_name(2), "baz"); let stmt = conn.prepare("select foo, bar from test;")?; let columns = stmt.num_columns(); assert_eq!(columns, 2); - assert_eq!(stmt.get_column_name(0), "foo".into()); - assert_eq!(stmt.get_column_name(1), "bar".into()); + assert_eq!(stmt.get_column_name(0), "foo"); + assert_eq!(stmt.get_column_name(1), "bar"); let stmt = conn.prepare("delete from test;")?; let columns = stmt.num_columns(); @@ -145,4 +255,29 @@ mod tests { Ok(()) } + + #[test] + fn test_limbo_open_read_only() -> anyhow::Result<()> { + let path = TempDir::new().unwrap().into_path().join("temp_read_only"); + let db = TempDatabase::new_with_existent(&path); + { + let conn = db.connect_limbo(); + let ret = limbo_exec_rows(&db, &conn, "CREATE table t(a)"); + assert!(ret.is_empty(), "{:?}", ret); + limbo_exec_rows(&db, &conn, "INSERT INTO t values (1)"); + conn.close().unwrap() + } + + { + let conn = db.connect_limbo_with_flags( + limbo_core::OpenFlags::default() | limbo_core::OpenFlags::ReadOnly, + ); + let ret = limbo_exec_rows(&db, &conn, "SELECT * from t"); + assert_eq!(ret, vec![vec![Value::Integer(1)]]); + + let err = limbo_exec_rows_error(&db, &conn, "INSERT INTO t values (1)").unwrap_err(); + assert!(matches!(err, limbo_core::LimboError::ReadOnly), "{:?}", err); + } + Ok(()) + } } diff --git a/tests/integration/fuzz/mod.rs b/tests/integration/fuzz/mod.rs index f776fc9a7..82d1c11ac 100644 --- a/tests/integration/fuzz/mod.rs +++ b/tests/integration/fuzz/mod.rs @@ -2,14 +2,14 @@ pub mod grammar_generator; #[cfg(test)] mod tests { - use std::rc::Rc; + use std::collections::HashSet; - use rand::SeedableRng; + use rand::{seq::IndexedRandom, Rng, SeedableRng}; use rand_chacha::ChaCha8Rng; use rusqlite::params; use crate::{ - common::TempDatabase, + common::{limbo_exec_rows, sqlite_exec_rows, TempDatabase}, fuzz::grammar_generator::{const_str, rand_int, rand_str, GrammarGenerator}, }; @@ -24,69 +24,6 @@ mod tests { (rng, seed) } - fn sqlite_exec_rows( - conn: &rusqlite::Connection, - query: &str, - ) -> Vec> { - let mut stmt = conn.prepare(&query).unwrap(); - let mut rows = stmt.query(params![]).unwrap(); - let mut results = Vec::new(); - while let Some(row) = rows.next().unwrap() { - let mut result = Vec::new(); - for i in 0.. { - let column: rusqlite::types::Value = match row.get(i) { - Ok(column) => column, - Err(rusqlite::Error::InvalidColumnIndex(_)) => break, - Err(err) => panic!("unexpected rusqlite error: {}", err), - }; - result.push(column); - } - results.push(result) - } - - results - } - - fn limbo_exec_rows( - db: &TempDatabase, - conn: &Rc, - query: &str, - ) -> Vec> { - let mut stmt = conn.prepare(query).unwrap(); - let mut rows = Vec::new(); - 'outer: loop { - let row = loop { - let result = stmt.step().unwrap(); - match result { - limbo_core::StepResult::Row => { - let row = stmt.row().unwrap(); - break row; - } - limbo_core::StepResult::IO => { - db.io.run_once().unwrap(); - continue; - } - limbo_core::StepResult::Done => break 'outer, - r => panic!("unexpected result {:?}: expecting single row", r), - } - }; - let row = row - .get_values() - .map(|x| match x { - limbo_core::OwnedValue::Null => rusqlite::types::Value::Null, - limbo_core::OwnedValue::Integer(x) => rusqlite::types::Value::Integer(*x), - limbo_core::OwnedValue::Float(x) => rusqlite::types::Value::Real(*x), - limbo_core::OwnedValue::Text(x) => { - rusqlite::types::Value::Text(x.as_str().to_string()) - } - limbo_core::OwnedValue::Blob(x) => rusqlite::types::Value::Blob(x.to_vec()), - }) - .collect(); - rows.push(row); - } - rows - } - #[test] pub fn arithmetic_expression_fuzz_ex1() { let db = TempDatabase::new_empty(); @@ -107,6 +44,379 @@ mod tests { } } + #[test] + pub fn rowid_seek_fuzz() { + let db = TempDatabase::new_with_rusqlite("CREATE TABLE t(x INTEGER PRIMARY KEY)"); // INTEGER PRIMARY KEY is a rowid alias, so an index is not created + let sqlite_conn = rusqlite::Connection::open(db.path.clone()).unwrap(); + + let insert = format!( + "INSERT INTO t VALUES {}", + (1..10000) + .map(|x| format!("({})", x)) + .collect::>() + .join(", ") + ); + sqlite_conn.execute(&insert, params![]).unwrap(); + sqlite_conn.close().unwrap(); + let sqlite_conn = rusqlite::Connection::open(db.path.clone()).unwrap(); + let limbo_conn = db.connect_limbo(); + + const COMPARISONS: [&str; 4] = ["<", "<=", ">", ">="]; + const ORDER_BY: [Option<&str>; 4] = [ + None, + Some("ORDER BY x"), + Some("ORDER BY x DESC"), + Some("ORDER BY x ASC"), + ]; + + for comp in COMPARISONS.iter() { + for order_by in ORDER_BY.iter() { + for max in 0..=10000 { + let query = format!( + "SELECT * FROM t WHERE x {} {} {} LIMIT 3", + comp, + max, + order_by.unwrap_or("") + ); + log::trace!("query: {}", query); + let limbo = limbo_exec_rows(&db, &limbo_conn, &query); + let sqlite = sqlite_exec_rows(&sqlite_conn, &query); + assert_eq!( + limbo, sqlite, + "query: {}, limbo: {:?}, sqlite: {:?}", + query, limbo, sqlite + ); + } + } + } + } + + #[test] + pub fn index_scan_fuzz() { + let db = TempDatabase::new_with_rusqlite("CREATE TABLE t(x PRIMARY KEY)"); + let sqlite_conn = rusqlite::Connection::open(db.path.clone()).unwrap(); + + let insert = format!( + "INSERT INTO t VALUES {}", + (0..10000) + .map(|x| format!("({})", x)) + .collect::>() + .join(", ") + ); + sqlite_conn.execute(&insert, params![]).unwrap(); + sqlite_conn.close().unwrap(); + let sqlite_conn = rusqlite::Connection::open(db.path.clone()).unwrap(); + let limbo_conn = db.connect_limbo(); + + const COMPARISONS: [&str; 5] = ["=", "<", "<=", ">", ">="]; + + const ORDER_BY: [Option<&str>; 4] = [ + None, + Some("ORDER BY x"), + Some("ORDER BY x DESC"), + Some("ORDER BY x ASC"), + ]; + + for comp in COMPARISONS.iter() { + for order_by in ORDER_BY.iter() { + for max in 0..=10000 { + let query = format!( + "SELECT * FROM t WHERE x {} {} {} LIMIT 3", + comp, + max, + order_by.unwrap_or(""), + ); + let limbo = limbo_exec_rows(&db, &limbo_conn, &query); + let sqlite = sqlite_exec_rows(&sqlite_conn, &query); + assert_eq!( + limbo, sqlite, + "query: {}, limbo: {:?}, sqlite: {:?}", + query, limbo, sqlite + ); + } + } + } + } + + #[test] + /// A test for verifying that index seek+scan works correctly for compound keys + /// on indexes with various column orderings. + pub fn index_scan_compound_key_fuzz() { + let (mut rng, seed) = if std::env::var("SEED").is_ok() { + let seed = std::env::var("SEED").unwrap().parse::().unwrap(); + (ChaCha8Rng::seed_from_u64(seed), seed) + } else { + rng_from_time() + }; + let table_defs: [&str; 8] = [ + "CREATE TABLE t(x, y, z, nonindexed_col, PRIMARY KEY (x, y, z))", + "CREATE TABLE t(x, y, z, nonindexed_col, PRIMARY KEY (x desc, y, z))", + "CREATE TABLE t(x, y, z, nonindexed_col, PRIMARY KEY (x, y desc, z))", + "CREATE TABLE t(x, y, z, nonindexed_col, PRIMARY KEY (x, y, z desc))", + "CREATE TABLE t(x, y, z, nonindexed_col, PRIMARY KEY (x desc, y desc, z))", + "CREATE TABLE t(x, y, z, nonindexed_col, PRIMARY KEY (x desc, y, z desc))", + "CREATE TABLE t(x, y, z, nonindexed_col, PRIMARY KEY (x, y desc, z desc))", + "CREATE TABLE t(x, y, z, nonindexed_col, PRIMARY KEY (x desc, y desc, z desc))", + ]; + // Create all different 3-column primary key permutations + let dbs = [ + TempDatabase::new_with_rusqlite(table_defs[0]), + TempDatabase::new_with_rusqlite(table_defs[1]), + TempDatabase::new_with_rusqlite(table_defs[2]), + TempDatabase::new_with_rusqlite(table_defs[3]), + TempDatabase::new_with_rusqlite(table_defs[4]), + TempDatabase::new_with_rusqlite(table_defs[5]), + TempDatabase::new_with_rusqlite(table_defs[6]), + TempDatabase::new_with_rusqlite(table_defs[7]), + ]; + let mut pk_tuples = HashSet::new(); + while pk_tuples.len() < 100000 { + pk_tuples.insert(( + rng.random_range(0..3000), + rng.random_range(0..3000), + rng.random_range(0..3000), + )); + } + let mut tuples = Vec::new(); + for pk_tuple in pk_tuples { + tuples.push(format!( + "({}, {}, {}, {})", + pk_tuple.0, + pk_tuple.1, + pk_tuple.2, + rng.random_range(0..3000) + )); + } + let insert = format!("INSERT INTO t VALUES {}", tuples.join(", ")); + + // Insert all tuples into all databases + let sqlite_conns = dbs + .iter() + .map(|db| rusqlite::Connection::open(db.path.clone()).unwrap()) + .collect::>(); + for sqlite_conn in sqlite_conns.into_iter() { + sqlite_conn.execute(&insert, params![]).unwrap(); + sqlite_conn.close().unwrap(); + } + let sqlite_conns = dbs + .iter() + .map(|db| rusqlite::Connection::open(db.path.clone()).unwrap()) + .collect::>(); + let limbo_conns = dbs.iter().map(|db| db.connect_limbo()).collect::>(); + + const COMPARISONS: [&str; 5] = ["=", "<", "<=", ">", ">="]; + + // For verifying index scans, we only care about cases where all but potentially the last column are constrained by an equality (=), + // because this is the only way to utilize an index efficiently for seeking. This is called the "left-prefix rule" of indexes. + // Hence we generate constraint combinations in this manner; as soon as a comparison is not an equality, we stop generating more constraints for the where clause. + // Examples: + // x = 1 AND y = 2 AND z > 3 + // x = 1 AND y > 2 + // x > 1 + let col_comp_first = COMPARISONS + .iter() + .cloned() + .map(|x| (Some(x), None, None)) + .collect::>(); + let col_comp_second = COMPARISONS + .iter() + .cloned() + .map(|x| (Some("="), Some(x), None)) + .collect::>(); + let col_comp_third = COMPARISONS + .iter() + .cloned() + .map(|x| (Some("="), Some("="), Some(x))) + .collect::>(); + + let all_comps = [col_comp_first, col_comp_second, col_comp_third].concat(); + + const ORDER_BY: [Option<&str>; 3] = [None, Some("DESC"), Some("ASC")]; + + const ITERATIONS: usize = 10000; + for i in 0..ITERATIONS { + if i % (ITERATIONS / 100) == 0 { + println!( + "index_scan_compound_key_fuzz: iteration {}/{}", + i + 1, + ITERATIONS + ); + } + // let's choose random columns from the table + let col_choices = ["x", "y", "z", "nonindexed_col"]; + let col_choices_weights = [10.0, 10.0, 10.0, 3.0]; + let num_cols_in_select = rng.random_range(1..=4); + let mut select_cols = col_choices + .choose_multiple_weighted(&mut rng, num_cols_in_select, |s| { + let idx = col_choices.iter().position(|c| c == s).unwrap(); + col_choices_weights[idx] + }) + .unwrap() + .collect::>() + .iter() + .map(|x| x.to_string()) + .collect::>(); + + // sort select cols by index of col_choices + select_cols.sort_by_cached_key(|x| col_choices.iter().position(|c| c == x).unwrap()); + + let (comp1, comp2, comp3) = all_comps[rng.random_range(0..all_comps.len())]; + // Similarly as for the constraints, generate order by permutations so that the only columns involved in the index seek are potentially part of the ORDER BY. + let (order_by1, order_by2, order_by3) = { + if comp1.is_some() && comp2.is_some() && comp3.is_some() { + ( + ORDER_BY[rng.random_range(0..ORDER_BY.len())], + ORDER_BY[rng.random_range(0..ORDER_BY.len())], + ORDER_BY[rng.random_range(0..ORDER_BY.len())], + ) + } else if comp1.is_some() && comp2.is_some() { + ( + ORDER_BY[rng.random_range(0..ORDER_BY.len())], + ORDER_BY[rng.random_range(0..ORDER_BY.len())], + None, + ) + } else { + (ORDER_BY[rng.random_range(0..ORDER_BY.len())], None, None) + } + }; + + // Generate random values for the WHERE clause constraints. Only involve primary key columns. + let (col_val_first, col_val_second, col_val_third) = { + if comp1.is_some() && comp2.is_some() && comp3.is_some() { + ( + Some(rng.random_range(0..=3000)), + Some(rng.random_range(0..=3000)), + Some(rng.random_range(0..=3000)), + ) + } else if comp1.is_some() && comp2.is_some() { + ( + Some(rng.random_range(0..=3000)), + Some(rng.random_range(0..=3000)), + None, + ) + } else { + (Some(rng.random_range(0..=3000)), None, None) + } + }; + + // Use a small limit to make the test complete faster + let limit = 5; + + // Generate WHERE clause string + let where_clause_components = vec![ + comp1.map(|x| format!("x {} {}", x, col_val_first.unwrap())), + comp2.map(|x| format!("y {} {}", x, col_val_second.unwrap())), + comp3.map(|x| format!("z {} {}", x, col_val_third.unwrap())), + ] + .into_iter() + .filter_map(|x| x) + .collect::>(); + let where_clause = if where_clause_components.is_empty() { + "".to_string() + } else { + format!("WHERE {}", where_clause_components.join(" AND ")) + }; + + // Generate ORDER BY string + let order_by_components = vec![ + order_by1.map(|x| format!("x {}", x)), + order_by2.map(|x| format!("y {}", x)), + order_by3.map(|x| format!("z {}", x)), + ] + .into_iter() + .filter_map(|x| x) + .collect::>(); + let order_by = if order_by_components.is_empty() { + "".to_string() + } else { + format!("ORDER BY {}", order_by_components.join(", ")) + }; + + // Generate final query string + let query = format!( + "SELECT {} FROM t {} {} LIMIT {}", + select_cols.join(", "), + where_clause, + order_by, + limit + ); + log::debug!("query: {}", query); + + // Execute the query on all databases and compare the results + for (i, sqlite_conn) in sqlite_conns.iter().enumerate() { + let limbo = limbo_exec_rows(&dbs[i], &limbo_conns[i], &query); + let sqlite = sqlite_exec_rows(&sqlite_conn, &query); + if limbo != sqlite { + // if the order by contains exclusively components that are constrained by an equality (=), + // sqlite sometimes doesn't bother with ASC/DESC because it doesn't semantically matter + // so we need to check that limbo and sqlite return the same results when the ordering is reversed. + // because we are generally using LIMIT (to make the test complete faster), we need to rerun the query + // without limit and then check that the results are the same if reversed. + let order_by_only_equalities = !order_by_components.is_empty() + && order_by_components.iter().all(|o: &String| { + if o.starts_with("x ") { + comp1.map_or(false, |c| c == "=") + } else if o.starts_with("y ") { + comp2.map_or(false, |c| c == "=") + } else { + comp3.map_or(false, |c| c == "=") + } + }); + + let query_no_limit = + format!("SELECT * FROM t {} {} {}", where_clause, order_by, ""); + let limbo_no_limit = limbo_exec_rows(&dbs[i], &limbo_conns[i], &query_no_limit); + let sqlite_no_limit = sqlite_exec_rows(&sqlite_conn, &query_no_limit); + let limbo_rev = limbo_no_limit.iter().cloned().rev().collect::>(); + if limbo_rev == sqlite_no_limit && order_by_only_equalities { + continue; + } + + // finally, if the order by columns specified contain duplicates, sqlite might've returned the rows in an arbitrary different order. + // e.g. SELECT x,y,z FROM t ORDER BY x,y -- if there are duplicates on (x,y), the ordering returned might be different for limbo and sqlite. + // let's check this case and forgive ourselves if the ordering is different for this reason (but no other reason!) + let order_by_cols = select_cols + .iter() + .enumerate() + .filter(|(i, _)| { + order_by_components + .iter() + .any(|o| o.starts_with(col_choices[*i])) + }) + .map(|(i, _)| i) + .collect::>(); + let duplicate_on_order_by_exists = { + let mut exists = false; + 'outer: for (i, row) in limbo_no_limit.iter().enumerate() { + for (j, other_row) in limbo_no_limit.iter().enumerate() { + if i != j + && order_by_cols.iter().all(|&col| row[col] == other_row[col]) + { + exists = true; + break 'outer; + } + } + } + exists + }; + if duplicate_on_order_by_exists { + let len_equal = limbo_no_limit.len() == sqlite_no_limit.len(); + let all_contained = + len_equal && limbo_no_limit.iter().all(|x| sqlite_no_limit.contains(x)); + if all_contained { + continue; + } + } + + panic!( + "DIFFERENT RESULTS! limbo: {:?}, sqlite: {:?}, seed: {}, query: {}, table def: {}", + limbo, sqlite, seed, query, table_defs[i] + ); + } + } + } + } + #[test] pub fn arithmetic_expression_fuzz() { let _ = env_logger::try_init(); @@ -872,19 +1182,38 @@ mod tests { let limbo_conn = db.connect_limbo(); let sqlite_conn = rusqlite::Connection::open_in_memory().unwrap(); for table in tables.iter() { - let query = format!("CREATE TABLE {} ({})", table.name, table.columns.join(", ")); + let columns_with_first_column_as_pk = { + let mut columns = vec![]; + columns.push(format!("{} PRIMARY KEY", table.columns[0])); + columns.extend(table.columns[1..].iter().map(|c| c.to_string())); + columns.join(", ") + }; + let query = format!( + "CREATE TABLE {} ({})", + table.name, columns_with_first_column_as_pk + ); + let limbo = limbo_exec_rows(&db, &limbo_conn, &query); + let sqlite = sqlite_exec_rows(&sqlite_conn, &query); + assert_eq!( - limbo_exec_rows(&db, &limbo_conn, &query), - sqlite_exec_rows(&sqlite_conn, &query) + limbo, sqlite, + "query: {}, limbo: {:?}, sqlite: {:?}", + query, limbo, sqlite ); } let (mut rng, seed) = rng_from_time(); log::info!("seed: {}", seed); - for _ in 0..100 { - let (x, y, z) = ( - g.generate(&mut rng, builders.number, 1), + let mut i = 0; + let mut primary_key_set = HashSet::with_capacity(100); + while i < 100 { + let x = g.generate(&mut rng, builders.number, 1); + if primary_key_set.contains(&x) { + continue; + } + primary_key_set.insert(x.clone()); + let (y, z) = ( g.generate(&mut rng, builders.number, 1), g.generate(&mut rng, builders.number, 1), ); @@ -896,7 +1225,13 @@ mod tests { "seed: {}", seed, ); + i += 1; } + // verify the same number of rows in both tables + let query = format!("SELECT COUNT(*) FROM t"); + let limbo = limbo_exec_rows(&db, &limbo_conn, &query); + let sqlite = sqlite_exec_rows(&sqlite_conn, &query); + assert_eq!(limbo, sqlite, "seed: {}", seed); let sql = g .create() @@ -910,11 +1245,25 @@ mod tests { log::info!("query: {}", query); let limbo = limbo_exec_rows(&db, &limbo_conn, &query); let sqlite = sqlite_exec_rows(&sqlite_conn, &query); - assert_eq!( - limbo, sqlite, - "query: {}, limbo: {:?}, sqlite: {:?} seed: {}", - query, limbo, sqlite, seed - ); + + if limbo.len() != sqlite.len() { + panic!("MISMATCHING ROW COUNT (limbo: {}, sqlite: {}) for query: {}\n\n limbo: {:?}\n\n sqlite: {:?}", limbo.len(), sqlite.len(), query, limbo, sqlite); + } + // find first row where limbo and sqlite differ + let diff_rows = limbo + .iter() + .zip(sqlite.iter()) + .filter(|(l, s)| l != s) + .collect::>(); + if !diff_rows.is_empty() { + // due to different choices in index usage (usually in these cases sqlite is smart enough to use an index and we aren't), + // sqlite might return rows in a different order + // check if all limbo rows are present in sqlite + let all_present = limbo.iter().all(|l| sqlite.iter().any(|s| l == s)); + if !all_present { + panic!("MISMATCHING ROWS (limbo: {}, sqlite: {}) for query: {}\n\n limbo: {:?}\n\n sqlite: {:?}\n\n differences: {:?}", limbo.len(), sqlite.len(), query, limbo, sqlite, diff_rows); + } + } } } } diff --git a/tests/integration/query_processing/test_write_path.rs b/tests/integration/query_processing/test_write_path.rs index e948ed5d1..407d1e366 100644 --- a/tests/integration/query_processing/test_write_path.rs +++ b/tests/integration/query_processing/test_write_path.rs @@ -1,6 +1,6 @@ use crate::common::{self, maybe_setup_tracing}; use crate::common::{compare_string, do_flush, TempDatabase}; -use limbo_core::{Connection, OwnedValue, StepResult}; +use limbo_core::{Connection, OwnedValue, Row, StepResult}; use log::debug; use std::rc::Rc; @@ -153,52 +153,19 @@ fn test_sequential_write() -> anyhow::Result<()> { println!("progress {:.1}%", progress); } let insert_query = format!("INSERT INTO test VALUES ({})", i); - match conn.query(insert_query) { - Ok(Some(ref mut rows)) => loop { - match rows.step()? { - StepResult::IO => { - tmp_db.io.run_once()?; - } - StepResult::Done => break, - _ => unreachable!(), - } - }, - Ok(None) => {} - Err(err) => { - eprintln!("{}", err); - } - }; + run_query(&tmp_db, &conn, &insert_query)?; let mut current_read_index = 0; - match conn.query(list_query) { - Ok(Some(ref mut rows)) => loop { - match rows.step()? { - StepResult::Row => { - let row = rows.row().unwrap(); - let first_value = row.get::<&OwnedValue>(0).expect("missing id"); - let id = match first_value { - limbo_core::OwnedValue::Integer(i) => *i as i32, - limbo_core::OwnedValue::Float(f) => *f as i32, - _ => unreachable!(), - }; - assert_eq!(current_read_index, id); - current_read_index += 1; - } - StepResult::IO => { - tmp_db.io.run_once()?; - } - StepResult::Interrupt => break, - StepResult::Done => break, - StepResult::Busy => { - panic!("Database is busy"); - } - } - }, - Ok(None) => {} - Err(err) => { - eprintln!("{}", err); - } - } + run_query_on_row(&tmp_db, &conn, &list_query, |row: &Row| { + let first_value = row.get::<&OwnedValue>(0).expect("missing id"); + let id = match first_value { + limbo_core::OwnedValue::Integer(i) => *i as i32, + limbo_core::OwnedValue::Float(f) => *f as i32, + _ => unreachable!(), + }; + assert_eq!(current_read_index, id); + current_read_index += 1; + })?; common::do_flush(&conn, &tmp_db)?; } Ok(()) @@ -215,55 +182,22 @@ fn test_regression_multi_row_insert() -> anyhow::Result<()> { let insert_query = "INSERT INTO test VALUES (-2), (-3), (-1)"; let list_query = "SELECT * FROM test"; - match conn.query(insert_query) { - Ok(Some(ref mut rows)) => loop { - match rows.step()? { - StepResult::IO => { - tmp_db.io.run_once()?; - } - StepResult::Done => break, - _ => unreachable!(), - } - }, - Ok(None) => {} - Err(err) => { - eprintln!("{}", err); - } - }; + run_query(&tmp_db, &conn, insert_query)?; common::do_flush(&conn, &tmp_db)?; let mut current_read_index = 1; let expected_ids = vec![-3, -2, -1]; let mut actual_ids = Vec::new(); - match conn.query(list_query) { - Ok(Some(ref mut rows)) => loop { - match rows.step()? { - StepResult::Row => { - let row = rows.row().unwrap(); - let first_value = row.get::<&OwnedValue>(0).expect("missing id"); - let id = match first_value { - OwnedValue::Float(f) => *f as i32, - _ => panic!("expected float"), - }; - actual_ids.push(id); - current_read_index += 1; - } - StepResult::IO => { - tmp_db.io.run_once()?; - } - StepResult::Interrupt => break, - StepResult::Done => break, - StepResult::Busy => { - panic!("Database is busy"); - } - } - }, - Ok(None) => {} - Err(err) => { - eprintln!("{}", err); - } - } + run_query_on_row(&tmp_db, &conn, list_query, |row: &Row| { + let first_value = row.get::<&OwnedValue>(0).expect("missing id"); + let id = match first_value { + OwnedValue::Float(f) => *f as i32, + _ => panic!("expected float"), + }; + actual_ids.push(id); + current_read_index += 1; + })?; assert_eq!(current_read_index, 4); // Verify we read all rows // sort ids @@ -331,49 +265,18 @@ fn test_wal_checkpoint() -> anyhow::Result<()> { let insert_query = format!("INSERT INTO test VALUES ({})", i); do_flush(&conn, &tmp_db)?; conn.checkpoint()?; - match conn.query(insert_query) { - Ok(Some(ref mut rows)) => loop { - match rows.step()? { - StepResult::IO => { - tmp_db.io.run_once()?; - } - StepResult::Done => break, - _ => unreachable!(), - } - }, - Ok(None) => {} - Err(err) => { - eprintln!("{}", err); - } - }; + run_query(&tmp_db, &conn, &insert_query)?; } do_flush(&conn, &tmp_db)?; conn.clear_page_cache()?; let list_query = "SELECT * FROM test LIMIT 1"; let mut current_index = 0; - match conn.query(list_query) { - Ok(Some(ref mut rows)) => loop { - match rows.step()? { - StepResult::Row => { - let row = rows.row().unwrap(); - let id = row.get::(0).unwrap(); - assert_eq!(current_index, id as usize); - current_index += 1; - } - StepResult::IO => { - tmp_db.io.run_once()?; - } - StepResult::Interrupt => break, - StepResult::Done => break, - StepResult::Busy => unreachable!(), - } - }, - Ok(None) => {} - Err(err) => { - eprintln!("{}", err); - } - } + run_query_on_row(&tmp_db, &conn, list_query, |row: &Row| { + let id = row.get::(0).unwrap(); + assert_eq!(current_index, id as usize); + current_index += 1; + })?; do_flush(&conn, &tmp_db)?; Ok(()) } @@ -387,21 +290,7 @@ fn test_wal_restart() -> anyhow::Result<()> { fn insert(i: usize, conn: &Rc, tmp_db: &TempDatabase) -> anyhow::Result<()> { debug!("inserting {}", i); let insert_query = format!("INSERT INTO test VALUES ({})", i); - match conn.query(insert_query) { - Ok(Some(ref mut rows)) => loop { - match rows.step()? { - StepResult::IO => { - tmp_db.io.run_once()?; - } - StepResult::Done => break, - _ => unreachable!(), - } - }, - Ok(None) => {} - Err(err) => { - eprintln!("{}", err); - } - }; + run_query(tmp_db, conn, &insert_query)?; debug!("inserted {}", i); tmp_db.io.run_once()?; Ok(()) @@ -410,26 +299,13 @@ fn test_wal_restart() -> anyhow::Result<()> { fn count(conn: &Rc, tmp_db: &TempDatabase) -> anyhow::Result { debug!("counting"); let list_query = "SELECT count(x) FROM test"; - loop { - if let Some(ref mut rows) = conn.query(list_query)? { - loop { - match rows.step()? { - StepResult::Row => { - let row = rows.row().unwrap(); - let count = row.get::(0).unwrap(); - debug!("counted {}", count); - return Ok(count as usize); - } - StepResult::IO => { - tmp_db.io.run_once()?; - } - StepResult::Interrupt => break, - StepResult::Done => break, - StepResult::Busy => panic!("Database is busy"), - } - } - } - } + let mut count = None; + run_query_on_row(tmp_db, conn, list_query, |row: &Row| { + assert!(count.is_none()); + count = Some(row.get::(0).unwrap() as usize); + debug!("counted {:?}", count); + })?; + Ok(count.unwrap()) } { @@ -461,3 +337,137 @@ fn test_insert_after_big_blob() -> anyhow::Result<()> { Ok(()) } + +#[test_log::test] +#[ignore = "this takes too long :)"] +fn test_write_delete_with_index() -> anyhow::Result<()> { + let _ = env_logger::try_init(); + + maybe_setup_tracing(); + + let tmp_db = TempDatabase::new_with_rusqlite("CREATE TABLE test (x PRIMARY KEY);"); + let conn = tmp_db.connect_limbo(); + + let list_query = "SELECT * FROM test"; + let max_iterations = 1000; + for i in 0..max_iterations { + println!("inserting {} ", i); + let insert_query = format!("INSERT INTO test VALUES ({})", i); + run_query(&tmp_db, &conn, &insert_query)?; + } + for i in 0..max_iterations { + println!("deleting {} ", i); + let delete_query = format!("delete from test where x={}", i); + run_query(&tmp_db, &conn, &delete_query)?; + println!("listing after deleting {} ", i); + let mut current_read_index = i + 1; + run_query_on_row(&tmp_db, &conn, list_query, |row: &Row| { + let first_value = row.get::<&OwnedValue>(0).expect("missing id"); + let id = match first_value { + limbo_core::OwnedValue::Integer(i) => *i as i32, + limbo_core::OwnedValue::Float(f) => *f as i32, + _ => unreachable!(), + }; + assert_eq!(current_read_index, id); + current_read_index += 1; + })?; + for i in i + 1..max_iterations { + // now test with seek + run_query_on_row( + &tmp_db, + &conn, + &format!("select * from test where x = {}", i), + |row| { + let first_value = row.get::<&OwnedValue>(0).expect("missing id"); + let id = match first_value { + limbo_core::OwnedValue::Integer(i) => *i as i32, + limbo_core::OwnedValue::Float(f) => *f as i32, + _ => unreachable!(), + }; + assert_eq!(i, id); + }, + )?; + } + } + + Ok(()) +} + +#[test] +fn test_update_with_index() -> anyhow::Result<()> { + let _ = env_logger::try_init(); + + maybe_setup_tracing(); + + let tmp_db = TempDatabase::new_with_rusqlite("CREATE TABLE test (x REAL PRIMARY KEY, y TEXT);"); + let conn = tmp_db.connect_limbo(); + + run_query(&tmp_db, &conn, "INSERT INTO test VALUES (1.0, 'foo')")?; + run_query(&tmp_db, &conn, "INSERT INTO test VALUES (2.0, 'bar')")?; + + run_query_on_row(&tmp_db, &conn, "SELECT * from test WHERE x=10.0", |row| { + assert_eq!(row.get::(0).unwrap(), 1.0); + })?; + run_query(&tmp_db, &conn, "UPDATE test SET x=10.0 WHERE x=1.0")?; + run_query_on_row(&tmp_db, &conn, "SELECT * from test WHERE x=10.0", |row| { + assert_eq!(row.get::(0).unwrap(), 10.0); + })?; + + let mut count_1 = 0; + let mut count_10 = 0; + run_query_on_row(&tmp_db, &conn, "SELECT * from test", |row| { + let v = row.get::(0).unwrap(); + if v == 1.0 { + count_1 += 1; + } else if v == 10.0 { + count_10 += 1; + } + })?; + assert_eq!(count_1, 0, "1.0 shouldn't be inside table"); + assert_eq!(count_10, 1, "10.0 should have existed"); + + Ok(()) +} + +fn run_query(tmp_db: &TempDatabase, conn: &Rc, query: &str) -> anyhow::Result<()> { + run_query_core(tmp_db, conn, query, None::) +} + +fn run_query_on_row( + tmp_db: &TempDatabase, + conn: &Rc, + query: &str, + on_row: impl FnMut(&Row), +) -> anyhow::Result<()> { + run_query_core(tmp_db, conn, query, Some(on_row)) +} + +fn run_query_core( + tmp_db: &TempDatabase, + conn: &Rc, + query: &str, + mut on_row: Option, +) -> anyhow::Result<()> { + match conn.query(query) { + Ok(Some(ref mut rows)) => loop { + match rows.step()? { + StepResult::IO => { + tmp_db.io.run_once()?; + } + StepResult::Done => break, + StepResult::Row => { + if let Some(on_row) = on_row.as_mut() { + let row = rows.row().unwrap(); + on_row(row) + } + } + _ => unreachable!(), + } + }, + Ok(None) => {} + Err(err) => { + eprintln!("{}", err); + } + }; + Ok(()) +} diff --git a/uv.lock b/uv.lock new file mode 100644 index 000000000..7c6a5bc43 --- /dev/null +++ b/uv.lock @@ -0,0 +1,407 @@ +version = 1 +requires-python = ">=3.13" + +[manifest] +members = [ + "limbo", + "limbo-test", + "scripts", +] + +[[package]] +name = "annotated-types" +version = "0.7.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ee/67/531ea369ba64dcff5ec9c3402f9f51bf748cec26dde048a2f973a4eea7f5/annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89", size = 16081 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53", size = 13643 }, +] + +[[package]] +name = "certifi" +version = "2025.1.31" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/1c/ab/c9f1e32b7b1bf505bf26f0ef697775960db7932abeb7b516de930ba2705f/certifi-2025.1.31.tar.gz", hash = "sha256:3d5da6925056f6f18f119200434a4780a94263f10d1c21d032a6f6b2baa20651", size = 167577 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/38/fc/bce832fd4fd99766c04d1ee0eead6b0ec6486fb100ae5e74c1d91292b982/certifi-2025.1.31-py3-none-any.whl", hash = "sha256:ca78db4565a652026a4db2bcdf68f2fb589ea80d0be70e03929ed730746b84fe", size = 166393 }, +] + +[[package]] +name = "cffi" +version = "1.17.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pycparser" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/fc/97/c783634659c2920c3fc70419e3af40972dbaf758daa229a7d6ea6135c90d/cffi-1.17.1.tar.gz", hash = "sha256:1c39c6016c32bc48dd54561950ebd6836e1670f2ae46128f67cf49e789c52824", size = 516621 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8d/f8/dd6c246b148639254dad4d6803eb6a54e8c85c6e11ec9df2cffa87571dbe/cffi-1.17.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f3a2b4222ce6b60e2e8b337bb9596923045681d71e5a082783484d845390938e", size = 182989 }, + { url = "https://files.pythonhosted.org/packages/8b/f1/672d303ddf17c24fc83afd712316fda78dc6fce1cd53011b839483e1ecc8/cffi-1.17.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:0984a4925a435b1da406122d4d7968dd861c1385afe3b45ba82b750f229811e2", size = 178802 }, + { url = "https://files.pythonhosted.org/packages/0e/2d/eab2e858a91fdff70533cab61dcff4a1f55ec60425832ddfdc9cd36bc8af/cffi-1.17.1-cp313-cp313-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d01b12eeeb4427d3110de311e1774046ad344f5b1a7403101878976ecd7a10f3", size = 454792 }, + { url = "https://files.pythonhosted.org/packages/75/b2/fbaec7c4455c604e29388d55599b99ebcc250a60050610fadde58932b7ee/cffi-1.17.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:706510fe141c86a69c8ddc029c7910003a17353970cff3b904ff0686a5927683", size = 478893 }, + { url = "https://files.pythonhosted.org/packages/4f/b7/6e4a2162178bf1935c336d4da8a9352cccab4d3a5d7914065490f08c0690/cffi-1.17.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:de55b766c7aa2e2a3092c51e0483d700341182f08e67c63630d5b6f200bb28e5", size = 485810 }, + { url = "https://files.pythonhosted.org/packages/c7/8a/1d0e4a9c26e54746dc08c2c6c037889124d4f59dffd853a659fa545f1b40/cffi-1.17.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c59d6e989d07460165cc5ad3c61f9fd8f1b4796eacbd81cee78957842b834af4", size = 471200 }, + { url = "https://files.pythonhosted.org/packages/26/9f/1aab65a6c0db35f43c4d1b4f580e8df53914310afc10ae0397d29d697af4/cffi-1.17.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd398dbc6773384a17fe0d3e7eeb8d1a21c2200473ee6806bb5e6a8e62bb73dd", size = 479447 }, + { url = "https://files.pythonhosted.org/packages/5f/e4/fb8b3dd8dc0e98edf1135ff067ae070bb32ef9d509d6cb0f538cd6f7483f/cffi-1.17.1-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:3edc8d958eb099c634dace3c7e16560ae474aa3803a5df240542b305d14e14ed", size = 484358 }, + { url = "https://files.pythonhosted.org/packages/f1/47/d7145bf2dc04684935d57d67dff9d6d795b2ba2796806bb109864be3a151/cffi-1.17.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:72e72408cad3d5419375fc87d289076ee319835bdfa2caad331e377589aebba9", size = 488469 }, + { url = "https://files.pythonhosted.org/packages/bf/ee/f94057fa6426481d663b88637a9a10e859e492c73d0384514a17d78ee205/cffi-1.17.1-cp313-cp313-win32.whl", hash = "sha256:e03eab0a8677fa80d646b5ddece1cbeaf556c313dcfac435ba11f107ba117b5d", size = 172475 }, + { url = "https://files.pythonhosted.org/packages/7c/fc/6a8cb64e5f0324877d503c854da15d76c1e50eb722e320b15345c4d0c6de/cffi-1.17.1-cp313-cp313-win_amd64.whl", hash = "sha256:f6a16c31041f09ead72d69f583767292f750d24913dadacf5756b966aacb3f1a", size = 182009 }, +] + +[[package]] +name = "charset-normalizer" +version = "3.4.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/16/b0/572805e227f01586461c80e0fd25d65a2115599cc9dad142fee4b747c357/charset_normalizer-3.4.1.tar.gz", hash = "sha256:44251f18cd68a75b56585dd00dae26183e102cd5e0f9f1466e6df5da2ed64ea3", size = 123188 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/38/94/ce8e6f63d18049672c76d07d119304e1e2d7c6098f0841b51c666e9f44a0/charset_normalizer-3.4.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:aabfa34badd18f1da5ec1bc2715cadc8dca465868a4e73a0173466b688f29dda", size = 195698 }, + { url = "https://files.pythonhosted.org/packages/24/2e/dfdd9770664aae179a96561cc6952ff08f9a8cd09a908f259a9dfa063568/charset_normalizer-3.4.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:22e14b5d70560b8dd51ec22863f370d1e595ac3d024cb8ad7d308b4cd95f8313", size = 140162 }, + { url = "https://files.pythonhosted.org/packages/24/4e/f646b9093cff8fc86f2d60af2de4dc17c759de9d554f130b140ea4738ca6/charset_normalizer-3.4.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8436c508b408b82d87dc5f62496973a1805cd46727c34440b0d29d8a2f50a6c9", size = 150263 }, + { url = "https://files.pythonhosted.org/packages/5e/67/2937f8d548c3ef6e2f9aab0f6e21001056f692d43282b165e7c56023e6dd/charset_normalizer-3.4.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2d074908e1aecee37a7635990b2c6d504cd4766c7bc9fc86d63f9c09af3fa11b", size = 142966 }, + { url = "https://files.pythonhosted.org/packages/52/ed/b7f4f07de100bdb95c1756d3a4d17b90c1a3c53715c1a476f8738058e0fa/charset_normalizer-3.4.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:955f8851919303c92343d2f66165294848d57e9bba6cf6e3625485a70a038d11", size = 144992 }, + { url = "https://files.pythonhosted.org/packages/96/2c/d49710a6dbcd3776265f4c923bb73ebe83933dfbaa841c5da850fe0fd20b/charset_normalizer-3.4.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:44ecbf16649486d4aebafeaa7ec4c9fed8b88101f4dd612dcaf65d5e815f837f", size = 147162 }, + { url = "https://files.pythonhosted.org/packages/b4/41/35ff1f9a6bd380303dea55e44c4933b4cc3c4850988927d4082ada230273/charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:0924e81d3d5e70f8126529951dac65c1010cdf117bb75eb02dd12339b57749dd", size = 140972 }, + { url = "https://files.pythonhosted.org/packages/fb/43/c6a0b685fe6910d08ba971f62cd9c3e862a85770395ba5d9cad4fede33ab/charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:2967f74ad52c3b98de4c3b32e1a44e32975e008a9cd2a8cc8966d6a5218c5cb2", size = 149095 }, + { url = "https://files.pythonhosted.org/packages/4c/ff/a9a504662452e2d2878512115638966e75633519ec11f25fca3d2049a94a/charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:c75cb2a3e389853835e84a2d8fb2b81a10645b503eca9bcb98df6b5a43eb8886", size = 152668 }, + { url = "https://files.pythonhosted.org/packages/6c/71/189996b6d9a4b932564701628af5cee6716733e9165af1d5e1b285c530ed/charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:09b26ae6b1abf0d27570633b2b078a2a20419c99d66fb2823173d73f188ce601", size = 150073 }, + { url = "https://files.pythonhosted.org/packages/e4/93/946a86ce20790e11312c87c75ba68d5f6ad2208cfb52b2d6a2c32840d922/charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:fa88b843d6e211393a37219e6a1c1df99d35e8fd90446f1118f4216e307e48cd", size = 145732 }, + { url = "https://files.pythonhosted.org/packages/cd/e5/131d2fb1b0dddafc37be4f3a2fa79aa4c037368be9423061dccadfd90091/charset_normalizer-3.4.1-cp313-cp313-win32.whl", hash = "sha256:eb8178fe3dba6450a3e024e95ac49ed3400e506fd4e9e5c32d30adda88cbd407", size = 95391 }, + { url = "https://files.pythonhosted.org/packages/27/f2/4f9a69cc7712b9b5ad8fdb87039fd89abba997ad5cbe690d1835d40405b0/charset_normalizer-3.4.1-cp313-cp313-win_amd64.whl", hash = "sha256:b1ac5992a838106edb89654e0aebfc24f5848ae2547d22c2c3f66454daa11971", size = 102702 }, + { url = "https://files.pythonhosted.org/packages/0e/f6/65ecc6878a89bb1c23a086ea335ad4bf21a588990c3f535a227b9eea9108/charset_normalizer-3.4.1-py3-none-any.whl", hash = "sha256:d98b1668f06378c6dbefec3b92299716b931cd4e6061f3c875a71ced1780ab85", size = 49767 }, +] + +[[package]] +name = "cryptography" +version = "44.0.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cffi", marker = "platform_python_implementation != 'PyPy'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/cd/25/4ce80c78963834b8a9fd1cc1266be5ed8d1840785c0f2e1b73b8d128d505/cryptography-44.0.2.tar.gz", hash = "sha256:c63454aa261a0cf0c5b4718349629793e9e634993538db841165b3df74f37ec0", size = 710807 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/92/ef/83e632cfa801b221570c5f58c0369db6fa6cef7d9ff859feab1aae1a8a0f/cryptography-44.0.2-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:efcfe97d1b3c79e486554efddeb8f6f53a4cdd4cf6086642784fa31fc384e1d7", size = 6676361 }, + { url = "https://files.pythonhosted.org/packages/30/ec/7ea7c1e4c8fc8329506b46c6c4a52e2f20318425d48e0fe597977c71dbce/cryptography-44.0.2-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:29ecec49f3ba3f3849362854b7253a9f59799e3763b0c9d0826259a88efa02f1", size = 3952350 }, + { url = "https://files.pythonhosted.org/packages/27/61/72e3afdb3c5ac510330feba4fc1faa0fe62e070592d6ad00c40bb69165e5/cryptography-44.0.2-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc821e161ae88bfe8088d11bb39caf2916562e0a2dc7b6d56714a48b784ef0bb", size = 4166572 }, + { url = "https://files.pythonhosted.org/packages/26/e4/ba680f0b35ed4a07d87f9e98f3ebccb05091f3bf6b5a478b943253b3bbd5/cryptography-44.0.2-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:3c00b6b757b32ce0f62c574b78b939afab9eecaf597c4d624caca4f9e71e7843", size = 3958124 }, + { url = "https://files.pythonhosted.org/packages/9c/e8/44ae3e68c8b6d1cbc59040288056df2ad7f7f03bbcaca6b503c737ab8e73/cryptography-44.0.2-cp37-abi3-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:7bdcd82189759aba3816d1f729ce42ffded1ac304c151d0a8e89b9996ab863d5", size = 3678122 }, + { url = "https://files.pythonhosted.org/packages/27/7b/664ea5e0d1eab511a10e480baf1c5d3e681c7d91718f60e149cec09edf01/cryptography-44.0.2-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:4973da6ca3db4405c54cd0b26d328be54c7747e89e284fcff166132eb7bccc9c", size = 4191831 }, + { url = "https://files.pythonhosted.org/packages/2a/07/79554a9c40eb11345e1861f46f845fa71c9e25bf66d132e123d9feb8e7f9/cryptography-44.0.2-cp37-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:4e389622b6927d8133f314949a9812972711a111d577a5d1f4bee5e58736b80a", size = 3960583 }, + { url = "https://files.pythonhosted.org/packages/bb/6d/858e356a49a4f0b591bd6789d821427de18432212e137290b6d8a817e9bf/cryptography-44.0.2-cp37-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:f514ef4cd14bb6fb484b4a60203e912cfcb64f2ab139e88c2274511514bf7308", size = 4191753 }, + { url = "https://files.pythonhosted.org/packages/b2/80/62df41ba4916067fa6b125aa8c14d7e9181773f0d5d0bd4dcef580d8b7c6/cryptography-44.0.2-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:1bc312dfb7a6e5d66082c87c34c8a62176e684b6fe3d90fcfe1568de675e6688", size = 4079550 }, + { url = "https://files.pythonhosted.org/packages/f3/cd/2558cc08f7b1bb40683f99ff4327f8dcfc7de3affc669e9065e14824511b/cryptography-44.0.2-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:3b721b8b4d948b218c88cb8c45a01793483821e709afe5f622861fc6182b20a7", size = 4298367 }, + { url = "https://files.pythonhosted.org/packages/71/59/94ccc74788945bc3bd4cf355d19867e8057ff5fdbcac781b1ff95b700fb1/cryptography-44.0.2-cp37-abi3-win32.whl", hash = "sha256:51e4de3af4ec3899d6d178a8c005226491c27c4ba84101bfb59c901e10ca9f79", size = 2772843 }, + { url = "https://files.pythonhosted.org/packages/ca/2c/0d0bbaf61ba05acb32f0841853cfa33ebb7a9ab3d9ed8bb004bd39f2da6a/cryptography-44.0.2-cp37-abi3-win_amd64.whl", hash = "sha256:c505d61b6176aaf982c5717ce04e87da5abc9a36a5b39ac03905c4aafe8de7aa", size = 3209057 }, + { url = "https://files.pythonhosted.org/packages/9e/be/7a26142e6d0f7683d8a382dd963745e65db895a79a280a30525ec92be890/cryptography-44.0.2-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:8e0ddd63e6bf1161800592c71ac794d3fb8001f2caebe0966e77c5234fa9efc3", size = 6677789 }, + { url = "https://files.pythonhosted.org/packages/06/88/638865be7198a84a7713950b1db7343391c6066a20e614f8fa286eb178ed/cryptography-44.0.2-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:81276f0ea79a208d961c433a947029e1a15948966658cf6710bbabb60fcc2639", size = 3951919 }, + { url = "https://files.pythonhosted.org/packages/d7/fc/99fe639bcdf58561dfad1faa8a7369d1dc13f20acd78371bb97a01613585/cryptography-44.0.2-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9a1e657c0f4ea2a23304ee3f964db058c9e9e635cc7019c4aa21c330755ef6fd", size = 4167812 }, + { url = "https://files.pythonhosted.org/packages/53/7b/aafe60210ec93d5d7f552592a28192e51d3c6b6be449e7fd0a91399b5d07/cryptography-44.0.2-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:6210c05941994290f3f7f175a4a57dbbb2afd9273657614c506d5976db061181", size = 3958571 }, + { url = "https://files.pythonhosted.org/packages/16/32/051f7ce79ad5a6ef5e26a92b37f172ee2d6e1cce09931646eef8de1e9827/cryptography-44.0.2-cp39-abi3-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:d1c3572526997b36f245a96a2b1713bf79ce99b271bbcf084beb6b9b075f29ea", size = 3679832 }, + { url = "https://files.pythonhosted.org/packages/78/2b/999b2a1e1ba2206f2d3bca267d68f350beb2b048a41ea827e08ce7260098/cryptography-44.0.2-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:b042d2a275c8cee83a4b7ae30c45a15e6a4baa65a179a0ec2d78ebb90e4f6699", size = 4193719 }, + { url = "https://files.pythonhosted.org/packages/72/97/430e56e39a1356e8e8f10f723211a0e256e11895ef1a135f30d7d40f2540/cryptography-44.0.2-cp39-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:d03806036b4f89e3b13b6218fefea8d5312e450935b1a2d55f0524e2ed7c59d9", size = 3960852 }, + { url = "https://files.pythonhosted.org/packages/89/33/c1cf182c152e1d262cac56850939530c05ca6c8d149aa0dcee490b417e99/cryptography-44.0.2-cp39-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:c7362add18b416b69d58c910caa217f980c5ef39b23a38a0880dfd87bdf8cd23", size = 4193906 }, + { url = "https://files.pythonhosted.org/packages/e1/99/87cf26d4f125380dc674233971069bc28d19b07f7755b29861570e513650/cryptography-44.0.2-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:8cadc6e3b5a1f144a039ea08a0bdb03a2a92e19c46be3285123d32029f40a922", size = 4081572 }, + { url = "https://files.pythonhosted.org/packages/b3/9f/6a3e0391957cc0c5f84aef9fbdd763035f2b52e998a53f99345e3ac69312/cryptography-44.0.2-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:6f101b1f780f7fc613d040ca4bdf835c6ef3b00e9bd7125a4255ec574c7916e4", size = 4298631 }, + { url = "https://files.pythonhosted.org/packages/e2/a5/5bc097adb4b6d22a24dea53c51f37e480aaec3465285c253098642696423/cryptography-44.0.2-cp39-abi3-win32.whl", hash = "sha256:3dc62975e31617badc19a906481deacdeb80b4bb454394b4098e3f2525a488c5", size = 2773792 }, + { url = "https://files.pythonhosted.org/packages/33/cf/1f7649b8b9a3543e042d3f348e398a061923ac05b507f3f4d95f11938aa9/cryptography-44.0.2-cp39-abi3-win_amd64.whl", hash = "sha256:5f6f90b72d8ccadb9c6e311c775c8305381db88374c65fa1a68250aa8a9cb3a6", size = 3210957 }, +] + +[[package]] +name = "deprecated" +version = "1.2.18" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "wrapt" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/98/97/06afe62762c9a8a86af0cfb7bfdab22a43ad17138b07af5b1a58442690a2/deprecated-1.2.18.tar.gz", hash = "sha256:422b6f6d859da6f2ef57857761bfb392480502a64c3028ca9bbe86085d72115d", size = 2928744 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/6e/c6/ac0b6c1e2d138f1002bcf799d330bd6d85084fece321e662a14223794041/Deprecated-1.2.18-py2.py3-none-any.whl", hash = "sha256:bd5011788200372a32418f888e326a09ff80d0214bd961147cfed01b5c018eec", size = 9998 }, +] + +[[package]] +name = "faker" +version = "37.1.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "tzdata" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/ba/a6/b77f42021308ec8b134502343da882c0905d725a4d661c7adeaf7acaf515/faker-37.1.0.tar.gz", hash = "sha256:ad9dc66a3b84888b837ca729e85299a96b58fdaef0323ed0baace93c9614af06", size = 1875707 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d7/a1/8936bc8e79af80ca38288dd93ed44ed1f9d63beb25447a4c59e746e01f8d/faker-37.1.0-py3-none-any.whl", hash = "sha256:dc2f730be71cb770e9c715b13374d80dbcee879675121ab51f9683d262ae9a1c", size = 1918783 }, +] + +[[package]] +name = "idna" +version = "3.10" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f1/70/7703c29685631f5a7590aa73f1f1d3fa9a380e654b86af429e0934a32f7d/idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9", size = 190490 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/76/c6/c88e154df9c4e1a2a66ccf0005a88dfb2650c1dffb6f5ce603dfbd452ce3/idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3", size = 70442 }, +] + +[[package]] +name = "limbo" +version = "0.1.0" +source = { virtual = "." } +dependencies = [ + { name = "rich" }, +] + +[package.metadata] +requires-dist = [{ name = "rich", specifier = ">=14.0.0" }] + +[[package]] +name = "limbo-test" +version = "0.1.0" +source = { editable = "testing" } +dependencies = [ + { name = "faker" }, + { name = "pydantic" }, +] + +[package.metadata] +requires-dist = [ + { name = "faker", specifier = ">=37.1.0" }, + { name = "pydantic", specifier = ">=2.11.1" }, +] + +[[package]] +name = "markdown-it-py" +version = "3.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "mdurl" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/38/71/3b932df36c1a044d397a1f92d1cf91ee0a503d91e470cbd670aa66b07ed0/markdown-it-py-3.0.0.tar.gz", hash = "sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb", size = 74596 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/42/d7/1ec15b46af6af88f19b8e5ffea08fa375d433c998b8a7639e76935c14f1f/markdown_it_py-3.0.0-py3-none-any.whl", hash = "sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1", size = 87528 }, +] + +[[package]] +name = "mdurl" +version = "0.1.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d6/54/cfe61301667036ec958cb99bd3efefba235e65cdeb9c84d24a8293ba1d90/mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba", size = 8729 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979 }, +] + +[[package]] +name = "pycparser" +version = "2.22" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/1d/b2/31537cf4b1ca988837256c910a668b553fceb8f069bedc4b1c826024b52c/pycparser-2.22.tar.gz", hash = "sha256:491c8be9c040f5390f5bf44a5b07752bd07f56edf992381b05c701439eec10f6", size = 172736 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/13/a3/a812df4e2dd5696d1f351d58b8fe16a405b234ad2886a0dab9183fb78109/pycparser-2.22-py3-none-any.whl", hash = "sha256:c3702b6d3dd8c7abc1afa565d7e63d53a1d0bd86cdc24edd75470f4de499cfcc", size = 117552 }, +] + +[[package]] +name = "pydantic" +version = "2.11.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "annotated-types" }, + { name = "pydantic-core" }, + { name = "typing-extensions" }, + { name = "typing-inspection" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/93/a3/698b87a4d4d303d7c5f62ea5fbf7a79cab236ccfbd0a17847b7f77f8163e/pydantic-2.11.1.tar.gz", hash = "sha256:442557d2910e75c991c39f4b4ab18963d57b9b55122c8b2a9cd176d8c29ce968", size = 782817 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/cc/12/f9221a949f2419e2e23847303c002476c26fbcfd62dc7f3d25d0bec5ca99/pydantic-2.11.1-py3-none-any.whl", hash = "sha256:5b6c415eee9f8123a14d859be0c84363fec6b1feb6b688d6435801230b56e0b8", size = 442648 }, +] + +[[package]] +name = "pydantic-core" +version = "2.33.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b9/05/91ce14dfd5a3a99555fce436318cc0fd1f08c4daa32b3248ad63669ea8b4/pydantic_core-2.33.0.tar.gz", hash = "sha256:40eb8af662ba409c3cbf4a8150ad32ae73514cd7cb1f1a2113af39763dd616b3", size = 434080 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/79/20/de2ad03ce8f5b3accf2196ea9b44f31b0cd16ac6e8cfc6b21976ed45ec35/pydantic_core-2.33.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:f00e8b59e1fc8f09d05594aa7d2b726f1b277ca6155fc84c0396db1b373c4555", size = 2032214 }, + { url = "https://files.pythonhosted.org/packages/f9/af/6817dfda9aac4958d8b516cbb94af507eb171c997ea66453d4d162ae8948/pydantic_core-2.33.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:1a73be93ecef45786d7d95b0c5e9b294faf35629d03d5b145b09b81258c7cd6d", size = 1852338 }, + { url = "https://files.pythonhosted.org/packages/44/f3/49193a312d9c49314f2b953fb55740b7c530710977cabe7183b8ef111b7f/pydantic_core-2.33.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ff48a55be9da6930254565ff5238d71d5e9cd8c5487a191cb85df3bdb8c77365", size = 1896913 }, + { url = "https://files.pythonhosted.org/packages/06/e0/c746677825b2e29a2fa02122a8991c83cdd5b4c5f638f0664d4e35edd4b2/pydantic_core-2.33.0-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:26a4ea04195638dcd8c53dadb545d70badba51735b1594810e9768c2c0b4a5da", size = 1986046 }, + { url = "https://files.pythonhosted.org/packages/11/ec/44914e7ff78cef16afb5e5273d480c136725acd73d894affdbe2a1bbaad5/pydantic_core-2.33.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:41d698dcbe12b60661f0632b543dbb119e6ba088103b364ff65e951610cb7ce0", size = 2128097 }, + { url = "https://files.pythonhosted.org/packages/fe/f5/c6247d424d01f605ed2e3802f338691cae17137cee6484dce9f1ac0b872b/pydantic_core-2.33.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ae62032ef513fe6281ef0009e30838a01057b832dc265da32c10469622613885", size = 2681062 }, + { url = "https://files.pythonhosted.org/packages/f0/85/114a2113b126fdd7cf9a9443b1b1fe1b572e5bd259d50ba9d5d3e1927fa9/pydantic_core-2.33.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f225f3a3995dbbc26affc191d0443c6c4aa71b83358fd4c2b7d63e2f6f0336f9", size = 2007487 }, + { url = "https://files.pythonhosted.org/packages/e6/40/3c05ed28d225c7a9acd2b34c5c8010c279683a870219b97e9f164a5a8af0/pydantic_core-2.33.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:5bdd36b362f419c78d09630cbaebc64913f66f62bda6d42d5fbb08da8cc4f181", size = 2121382 }, + { url = "https://files.pythonhosted.org/packages/8a/22/e70c086f41eebd323e6baa92cc906c3f38ddce7486007eb2bdb3b11c8f64/pydantic_core-2.33.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:2a0147c0bef783fd9abc9f016d66edb6cac466dc54a17ec5f5ada08ff65caf5d", size = 2072473 }, + { url = "https://files.pythonhosted.org/packages/3e/84/d1614dedd8fe5114f6a0e348bcd1535f97d76c038d6102f271433cd1361d/pydantic_core-2.33.0-cp313-cp313-musllinux_1_1_armv7l.whl", hash = "sha256:c860773a0f205926172c6644c394e02c25421dc9a456deff16f64c0e299487d3", size = 2249468 }, + { url = "https://files.pythonhosted.org/packages/b0/c0/787061eef44135e00fddb4b56b387a06c303bfd3884a6df9bea5cb730230/pydantic_core-2.33.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:138d31e3f90087f42aa6286fb640f3c7a8eb7bdae829418265e7e7474bd2574b", size = 2254716 }, + { url = "https://files.pythonhosted.org/packages/ae/e2/27262eb04963201e89f9c280f1e10c493a7a37bc877e023f31aa72d2f911/pydantic_core-2.33.0-cp313-cp313-win32.whl", hash = "sha256:d20cbb9d3e95114325780f3cfe990f3ecae24de7a2d75f978783878cce2ad585", size = 1916450 }, + { url = "https://files.pythonhosted.org/packages/13/8d/25ff96f1e89b19e0b70b3cd607c9ea7ca27e1dcb810a9cd4255ed6abf869/pydantic_core-2.33.0-cp313-cp313-win_amd64.whl", hash = "sha256:ca1103d70306489e3d006b0f79db8ca5dd3c977f6f13b2c59ff745249431a606", size = 1956092 }, + { url = "https://files.pythonhosted.org/packages/1b/64/66a2efeff657b04323ffcd7b898cb0354d36dae3a561049e092134a83e9c/pydantic_core-2.33.0-cp313-cp313-win_arm64.whl", hash = "sha256:6291797cad239285275558e0a27872da735b05c75d5237bbade8736f80e4c225", size = 1908367 }, + { url = "https://files.pythonhosted.org/packages/52/54/295e38769133363d7ec4a5863a4d579f331728c71a6644ff1024ee529315/pydantic_core-2.33.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:7b79af799630af263eca9ec87db519426d8c9b3be35016eddad1832bac812d87", size = 1813331 }, + { url = "https://files.pythonhosted.org/packages/4c/9c/0c8ea02db8d682aa1ef48938abae833c1d69bdfa6e5ec13b21734b01ae70/pydantic_core-2.33.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eabf946a4739b5237f4f56d77fa6668263bc466d06a8036c055587c130a46f7b", size = 1986653 }, + { url = "https://files.pythonhosted.org/packages/8e/4f/3fb47d6cbc08c7e00f92300e64ba655428c05c56b8ab6723bd290bae6458/pydantic_core-2.33.0-cp313-cp313t-win_amd64.whl", hash = "sha256:8a1d581e8cdbb857b0e0e81df98603376c1a5c34dc5e54039dcc00f043df81e7", size = 1931234 }, +] + +[[package]] +name = "pygithub" +version = "2.6.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "deprecated" }, + { name = "pyjwt", extra = ["crypto"] }, + { name = "pynacl" }, + { name = "requests" }, + { name = "typing-extensions" }, + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c0/88/e08ab18dc74b2916f48703ed1a797d57cb64eca0e23b0a9254e13cfe3911/pygithub-2.6.1.tar.gz", hash = "sha256:b5c035392991cca63959e9453286b41b54d83bf2de2daa7d7ff7e4312cebf3bf", size = 3659473 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ac/fc/a444cd19ccc8c4946a512f3827ed0b3565c88488719d800d54a75d541c0b/PyGithub-2.6.1-py3-none-any.whl", hash = "sha256:6f2fa6d076ccae475f9fc392cc6cdbd54db985d4f69b8833a28397de75ed6ca3", size = 410451 }, +] + +[[package]] +name = "pygments" +version = "2.19.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/7c/2d/c3338d48ea6cc0feb8446d8e6937e1408088a72a39937982cc6111d17f84/pygments-2.19.1.tar.gz", hash = "sha256:61c16d2a8576dc0649d9f39e089b5f02bcd27fba10d8fb4dcc28173f7a45151f", size = 4968581 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8a/0b/9fcc47d19c48b59121088dd6da2488a49d5f72dacf8262e2790a1d2c7d15/pygments-2.19.1-py3-none-any.whl", hash = "sha256:9ea1544ad55cecf4b8242fab6dd35a93bbce657034b0611ee383099054ab6d8c", size = 1225293 }, +] + +[[package]] +name = "pyjwt" +version = "2.10.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e7/46/bd74733ff231675599650d3e47f361794b22ef3e3770998dda30d3b63726/pyjwt-2.10.1.tar.gz", hash = "sha256:3cc5772eb20009233caf06e9d8a0577824723b44e6648ee0a2aedb6cf9381953", size = 87785 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/61/ad/689f02752eeec26aed679477e80e632ef1b682313be70793d798c1d5fc8f/PyJWT-2.10.1-py3-none-any.whl", hash = "sha256:dcdd193e30abefd5debf142f9adfcdd2b58004e644f25406ffaebd50bd98dacb", size = 22997 }, +] + +[package.optional-dependencies] +crypto = [ + { name = "cryptography" }, +] + +[[package]] +name = "pynacl" +version = "1.5.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cffi" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a7/22/27582568be639dfe22ddb3902225f91f2f17ceff88ce80e4db396c8986da/PyNaCl-1.5.0.tar.gz", hash = "sha256:8ac7448f09ab85811607bdd21ec2464495ac8b7c66d146bf545b0f08fb9220ba", size = 3392854 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ce/75/0b8ede18506041c0bf23ac4d8e2971b4161cd6ce630b177d0a08eb0d8857/PyNaCl-1.5.0-cp36-abi3-macosx_10_10_universal2.whl", hash = "sha256:401002a4aaa07c9414132aaed7f6836ff98f59277a234704ff66878c2ee4a0d1", size = 349920 }, + { url = "https://files.pythonhosted.org/packages/59/bb/fddf10acd09637327a97ef89d2a9d621328850a72f1fdc8c08bdf72e385f/PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:52cb72a79269189d4e0dc537556f4740f7f0a9ec41c1322598799b0bdad4ef92", size = 601722 }, + { url = "https://files.pythonhosted.org/packages/5d/70/87a065c37cca41a75f2ce113a5a2c2aa7533be648b184ade58971b5f7ccc/PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a36d4a9dda1f19ce6e03c9a784a2921a4b726b02e1c736600ca9c22029474394", size = 680087 }, + { url = "https://files.pythonhosted.org/packages/ee/87/f1bb6a595f14a327e8285b9eb54d41fef76c585a0edef0a45f6fc95de125/PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:0c84947a22519e013607c9be43706dd42513f9e6ae5d39d3613ca1e142fba44d", size = 856678 }, + { url = "https://files.pythonhosted.org/packages/66/28/ca86676b69bf9f90e710571b67450508484388bfce09acf8a46f0b8c785f/PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:06b8f6fa7f5de8d5d2f7573fe8c863c051225a27b61e6860fd047b1775807858", size = 1133660 }, + { url = "https://files.pythonhosted.org/packages/3d/85/c262db650e86812585e2bc59e497a8f59948a005325a11bbbc9ecd3fe26b/PyNaCl-1.5.0-cp36-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:a422368fc821589c228f4c49438a368831cb5bbc0eab5ebe1d7fac9dded6567b", size = 663824 }, + { url = "https://files.pythonhosted.org/packages/fd/1a/cc308a884bd299b651f1633acb978e8596c71c33ca85e9dc9fa33a5399b9/PyNaCl-1.5.0-cp36-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:61f642bf2378713e2c2e1de73444a3778e5f0a38be6fee0fe532fe30060282ff", size = 1117912 }, + { url = "https://files.pythonhosted.org/packages/25/2d/b7df6ddb0c2a33afdb358f8af6ea3b8c4d1196ca45497dd37a56f0c122be/PyNaCl-1.5.0-cp36-abi3-win32.whl", hash = "sha256:e46dae94e34b085175f8abb3b0aaa7da40767865ac82c928eeb9e57e1ea8a543", size = 204624 }, + { url = "https://files.pythonhosted.org/packages/5e/22/d3db169895faaf3e2eda892f005f433a62db2decbcfbc2f61e6517adfa87/PyNaCl-1.5.0-cp36-abi3-win_amd64.whl", hash = "sha256:20f42270d27e1b6a29f54032090b972d97f0a1b0948cc52392041ef7831fee93", size = 212141 }, +] + +[[package]] +name = "requests" +version = "2.32.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "certifi" }, + { name = "charset-normalizer" }, + { name = "idna" }, + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/63/70/2bf7780ad2d390a8d301ad0b550f1581eadbd9a20f896afe06353c2a2913/requests-2.32.3.tar.gz", hash = "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760", size = 131218 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f9/9b/335f9764261e915ed497fcdeb11df5dfd6f7bf257d4a6a2a686d80da4d54/requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6", size = 64928 }, +] + +[[package]] +name = "rich" +version = "14.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "markdown-it-py" }, + { name = "pygments" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a1/53/830aa4c3066a8ab0ae9a9955976fb770fe9c6102117c8ec4ab3ea62d89e8/rich-14.0.0.tar.gz", hash = "sha256:82f1bc23a6a21ebca4ae0c45af9bdbc492ed20231dcb63f297d6d1021a9d5725", size = 224078 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0d/9b/63f4c7ebc259242c89b3acafdb37b41d1185c07ff0011164674e9076b491/rich-14.0.0-py3-none-any.whl", hash = "sha256:1c9491e1951aac09caffd42f448ee3d04e58923ffe14993f6e83068dc395d7e0", size = 243229 }, +] + +[[package]] +name = "scripts" +version = "0.1.0" +source = { virtual = "scripts" } +dependencies = [ + { name = "pygithub" }, +] + +[package.metadata] +requires-dist = [{ name = "pygithub", specifier = ">=2.6.1" }] + +[[package]] +name = "typing-extensions" +version = "4.13.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/0e/3e/b00a62db91a83fff600de219b6ea9908e6918664899a2d85db222f4fbf19/typing_extensions-4.13.0.tar.gz", hash = "sha256:0a4ac55a5820789d87e297727d229866c9650f6521b64206413c4fbada24d95b", size = 106520 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e0/86/39b65d676ec5732de17b7e3c476e45bb80ec64eb50737a8dce1a4178aba1/typing_extensions-4.13.0-py3-none-any.whl", hash = "sha256:c8dd92cc0d6425a97c18fbb9d1954e5ff92c1ca881a309c45f06ebc0b79058e5", size = 45683 }, +] + +[[package]] +name = "typing-inspection" +version = "0.4.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/82/5c/e6082df02e215b846b4b8c0b887a64d7d08ffaba30605502639d44c06b82/typing_inspection-0.4.0.tar.gz", hash = "sha256:9765c87de36671694a67904bf2c96e395be9c6439bb6c87b5142569dcdd65122", size = 76222 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/31/08/aa4fdfb71f7de5176385bd9e90852eaf6b5d622735020ad600f2bab54385/typing_inspection-0.4.0-py3-none-any.whl", hash = "sha256:50e72559fcd2a6367a19f7a7e610e6afcb9fac940c650290eed893d61386832f", size = 14125 }, +] + +[[package]] +name = "tzdata" +version = "2025.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/95/32/1a225d6164441be760d75c2c42e2780dc0873fe382da3e98a2e1e48361e5/tzdata-2025.2.tar.gz", hash = "sha256:b60a638fcc0daffadf82fe0f57e53d06bdec2f36c4df66280ae79bce6bd6f2b9", size = 196380 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5c/23/c7abc0ca0a1526a0774eca151daeb8de62ec457e77262b66b359c3c7679e/tzdata-2025.2-py2.py3-none-any.whl", hash = "sha256:1a403fada01ff9221ca8044d701868fa132215d84beb92242d9acd2147f667a8", size = 347839 }, +] + +[[package]] +name = "urllib3" +version = "2.4.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/8a/78/16493d9c386d8e60e442a35feac5e00f0913c0f4b7c217c11e8ec2ff53e0/urllib3-2.4.0.tar.gz", hash = "sha256:414bc6535b787febd7567804cc015fee39daab8ad86268f1310a9250697de466", size = 390672 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/6b/11/cc635220681e93a0183390e26485430ca2c7b5f9d33b15c74c2861cb8091/urllib3-2.4.0-py3-none-any.whl", hash = "sha256:4e16665048960a0900c702d4a66415956a584919c03361cac9f1df5c5dd7e813", size = 128680 }, +] + +[[package]] +name = "wrapt" +version = "1.17.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/c3/fc/e91cc220803d7bc4db93fb02facd8461c37364151b8494762cc88b0fbcef/wrapt-1.17.2.tar.gz", hash = "sha256:41388e9d4d1522446fe79d3213196bd9e3b301a336965b9e27ca2788ebd122f3", size = 55531 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ce/b9/0ffd557a92f3b11d4c5d5e0c5e4ad057bd9eb8586615cdaf901409920b14/wrapt-1.17.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:6ed6ffac43aecfe6d86ec5b74b06a5be33d5bb9243d055141e8cabb12aa08125", size = 53800 }, + { url = "https://files.pythonhosted.org/packages/c0/ef/8be90a0b7e73c32e550c73cfb2fa09db62234227ece47b0e80a05073b375/wrapt-1.17.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:35621ae4c00e056adb0009f8e86e28eb4a41a4bfa8f9bfa9fca7d343fe94f998", size = 38824 }, + { url = "https://files.pythonhosted.org/packages/36/89/0aae34c10fe524cce30fe5fc433210376bce94cf74d05b0d68344c8ba46e/wrapt-1.17.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a604bf7a053f8362d27eb9fefd2097f82600b856d5abe996d623babd067b1ab5", size = 38920 }, + { url = "https://files.pythonhosted.org/packages/3b/24/11c4510de906d77e0cfb5197f1b1445d4fec42c9a39ea853d482698ac681/wrapt-1.17.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5cbabee4f083b6b4cd282f5b817a867cf0b1028c54d445b7ec7cfe6505057cf8", size = 88690 }, + { url = "https://files.pythonhosted.org/packages/71/d7/cfcf842291267bf455b3e266c0c29dcb675b5540ee8b50ba1699abf3af45/wrapt-1.17.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:49703ce2ddc220df165bd2962f8e03b84c89fee2d65e1c24a7defff6f988f4d6", size = 80861 }, + { url = "https://files.pythonhosted.org/packages/d5/66/5d973e9f3e7370fd686fb47a9af3319418ed925c27d72ce16b791231576d/wrapt-1.17.2-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8112e52c5822fc4253f3901b676c55ddf288614dc7011634e2719718eaa187dc", size = 89174 }, + { url = "https://files.pythonhosted.org/packages/a7/d3/8e17bb70f6ae25dabc1aaf990f86824e4fd98ee9cadf197054e068500d27/wrapt-1.17.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:9fee687dce376205d9a494e9c121e27183b2a3df18037f89d69bd7b35bcf59e2", size = 86721 }, + { url = "https://files.pythonhosted.org/packages/6f/54/f170dfb278fe1c30d0ff864513cff526d624ab8de3254b20abb9cffedc24/wrapt-1.17.2-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:18983c537e04d11cf027fbb60a1e8dfd5190e2b60cc27bc0808e653e7b218d1b", size = 79763 }, + { url = "https://files.pythonhosted.org/packages/4a/98/de07243751f1c4a9b15c76019250210dd3486ce098c3d80d5f729cba029c/wrapt-1.17.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:703919b1633412ab54bcf920ab388735832fdcb9f9a00ae49387f0fe67dad504", size = 87585 }, + { url = "https://files.pythonhosted.org/packages/f9/f0/13925f4bd6548013038cdeb11ee2cbd4e37c30f8bfd5db9e5a2a370d6e20/wrapt-1.17.2-cp313-cp313-win32.whl", hash = "sha256:abbb9e76177c35d4e8568e58650aa6926040d6a9f6f03435b7a522bf1c487f9a", size = 36676 }, + { url = "https://files.pythonhosted.org/packages/bf/ae/743f16ef8c2e3628df3ddfd652b7d4c555d12c84b53f3d8218498f4ade9b/wrapt-1.17.2-cp313-cp313-win_amd64.whl", hash = "sha256:69606d7bb691b50a4240ce6b22ebb319c1cfb164e5f6569835058196e0f3a845", size = 38871 }, + { url = "https://files.pythonhosted.org/packages/3d/bc/30f903f891a82d402ffb5fda27ec1d621cc97cb74c16fea0b6141f1d4e87/wrapt-1.17.2-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:4a721d3c943dae44f8e243b380cb645a709ba5bd35d3ad27bc2ed947e9c68192", size = 56312 }, + { url = "https://files.pythonhosted.org/packages/8a/04/c97273eb491b5f1c918857cd26f314b74fc9b29224521f5b83f872253725/wrapt-1.17.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:766d8bbefcb9e00c3ac3b000d9acc51f1b399513f44d77dfe0eb026ad7c9a19b", size = 40062 }, + { url = "https://files.pythonhosted.org/packages/4e/ca/3b7afa1eae3a9e7fefe499db9b96813f41828b9fdb016ee836c4c379dadb/wrapt-1.17.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:e496a8ce2c256da1eb98bd15803a79bee00fc351f5dfb9ea82594a3f058309e0", size = 40155 }, + { url = "https://files.pythonhosted.org/packages/89/be/7c1baed43290775cb9030c774bc53c860db140397047cc49aedaf0a15477/wrapt-1.17.2-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:40d615e4fe22f4ad3528448c193b218e077656ca9ccb22ce2cb20db730f8d306", size = 113471 }, + { url = "https://files.pythonhosted.org/packages/32/98/4ed894cf012b6d6aae5f5cc974006bdeb92f0241775addad3f8cd6ab71c8/wrapt-1.17.2-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a5aaeff38654462bc4b09023918b7f21790efb807f54c000a39d41d69cf552cb", size = 101208 }, + { url = "https://files.pythonhosted.org/packages/ea/fd/0c30f2301ca94e655e5e057012e83284ce8c545df7661a78d8bfca2fac7a/wrapt-1.17.2-cp313-cp313t-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9a7d15bbd2bc99e92e39f49a04653062ee6085c0e18b3b7512a4f2fe91f2d681", size = 109339 }, + { url = "https://files.pythonhosted.org/packages/75/56/05d000de894c4cfcb84bcd6b1df6214297b8089a7bd324c21a4765e49b14/wrapt-1.17.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:e3890b508a23299083e065f435a492b5435eba6e304a7114d2f919d400888cc6", size = 110232 }, + { url = "https://files.pythonhosted.org/packages/53/f8/c3f6b2cf9b9277fb0813418e1503e68414cd036b3b099c823379c9575e6d/wrapt-1.17.2-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:8c8b293cd65ad716d13d8dd3624e42e5a19cc2a2f1acc74b30c2c13f15cb61a6", size = 100476 }, + { url = "https://files.pythonhosted.org/packages/a7/b1/0bb11e29aa5139d90b770ebbfa167267b1fc548d2302c30c8f7572851738/wrapt-1.17.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:4c82b8785d98cdd9fed4cac84d765d234ed3251bd6afe34cb7ac523cb93e8b4f", size = 106377 }, + { url = "https://files.pythonhosted.org/packages/6a/e1/0122853035b40b3f333bbb25f1939fc1045e21dd518f7f0922b60c156f7c/wrapt-1.17.2-cp313-cp313t-win32.whl", hash = "sha256:13e6afb7fe71fe7485a4550a8844cc9ffbe263c0f1a1eea569bc7091d4898555", size = 37986 }, + { url = "https://files.pythonhosted.org/packages/09/5e/1655cf481e079c1f22d0cabdd4e51733679932718dc23bf2db175f329b76/wrapt-1.17.2-cp313-cp313t-win_amd64.whl", hash = "sha256:eaf675418ed6b3b31c7a989fd007fa7c3be66ce14e5c3b27336383604c9da85c", size = 40750 }, + { url = "https://files.pythonhosted.org/packages/2d/82/f56956041adef78f849db6b289b282e72b55ab8045a75abad81898c28d19/wrapt-1.17.2-py3-none-any.whl", hash = "sha256:b18f2d1533a71f069c7f82d524a52599053d4c7166e9dd374ae2136b7f40f7c8", size = 23594 }, +] diff --git a/vendored/sqlite3-parser/sqlparser_bench/Cargo.toml b/vendored/sqlite3-parser/sqlparser_bench/Cargo.toml index 1ec87cbe4..0bb6e16c5 100644 --- a/vendored/sqlite3-parser/sqlparser_bench/Cargo.toml +++ b/vendored/sqlite3-parser/sqlparser_bench/Cargo.toml @@ -5,7 +5,7 @@ authors = ["Dandandan "] edition = "2018" [dependencies] -sqlite3-parser = { path = "..", default-features = false, features = [ +limbo_sqlite3_parser = { path = "..", default-features = false, features = [ "YYNOERRORRECOVERY", "NDEBUG", ] } diff --git a/vendored/sqlite3-parser/src/lexer/scan.rs b/vendored/sqlite3-parser/src/lexer/scan.rs index e0d22cbd5..6c0085b29 100644 --- a/vendored/sqlite3-parser/src/lexer/scan.rs +++ b/vendored/sqlite3-parser/src/lexer/scan.rs @@ -9,7 +9,7 @@ use std::io; /// Error with position pub trait ScanError: Error + From + Sized { /// Update the position where the error occurs - fn position(&mut self, line: u64, column: usize); + fn position(&mut self, line: u64, column: usize, offset: usize); } /// The `(&[u8], TokenType)` is the token. @@ -126,7 +126,7 @@ impl Scanner { let data = &input[self.offset..]; match self.splitter.split(data) { Err(mut e) => { - e.position(self.line, self.column); + e.position(self.line, self.column, self.offset); return Err(e); } Ok((None, 0)) => { diff --git a/vendored/sqlite3-parser/src/lexer/sql/error.rs b/vendored/sqlite3-parser/src/lexer/sql/error.rs index d3e3ac345..b85dad504 100644 --- a/vendored/sqlite3-parser/src/lexer/sql/error.rs +++ b/vendored/sqlite3-parser/src/lexer/sql/error.rs @@ -38,9 +38,12 @@ pub enum Error { #[label("here")] Option, ), /// Invalid number format + #[diagnostic(help("Invalid digit in `{3}`"))] BadNumber( Option<(u64, usize)>, #[label("here")] Option, + Option, + String, // Holds the offending number as a string ), /// Invalid or missing sign after `!` ExpectedEqualsSign( @@ -56,6 +59,8 @@ pub enum Error { MalformedHexInteger( Option<(u64, usize)>, #[label("here")] Option, + Option, + #[help] Option<&'static str>, ), /// Grammar error ParserError( @@ -82,12 +87,12 @@ impl fmt::Display for Error { write!(f, "non-terminated block comment at {:?}", pos.unwrap()) } Self::BadVariableName(pos, _) => write!(f, "bad variable name at {:?}", pos.unwrap()), - Self::BadNumber(pos, _) => write!(f, "bad number at {:?}", pos.unwrap()), + Self::BadNumber(pos, _, _, _) => write!(f, "bad number at {:?}", pos.unwrap()), Self::ExpectedEqualsSign(pos, _) => write!(f, "expected = sign at {:?}", pos.unwrap()), Self::MalformedBlobLiteral(pos, _) => { write!(f, "malformed blob literal at {:?}", pos.unwrap()) } - Self::MalformedHexInteger(pos, _) => { + Self::MalformedHexInteger(pos, _, _, _) => { write!(f, "malformed hex integer at {:?}", pos.unwrap()) } Self::ParserError(ref msg, Some(pos), _) => write!(f, "{msg} at {pos:?}"), @@ -111,18 +116,43 @@ impl From for Error { } impl ScanError for Error { - fn position(&mut self, line: u64, column: usize) { + fn position(&mut self, line: u64, column: usize, offset: usize) { match *self { Self::Io(_) => {} - Self::UnrecognizedToken(ref mut pos, _) => *pos = Some((line, column)), - Self::UnterminatedLiteral(ref mut pos, _) => *pos = Some((line, column)), - Self::UnterminatedBracket(ref mut pos, _) => *pos = Some((line, column)), - Self::UnterminatedBlockComment(ref mut pos, _) => *pos = Some((line, column)), - Self::BadVariableName(ref mut pos, _) => *pos = Some((line, column)), - Self::BadNumber(ref mut pos, _) => *pos = Some((line, column)), - Self::ExpectedEqualsSign(ref mut pos, _) => *pos = Some((line, column)), - Self::MalformedBlobLiteral(ref mut pos, _) => *pos = Some((line, column)), - Self::MalformedHexInteger(ref mut pos, _) => *pos = Some((line, column)), + Self::UnrecognizedToken(ref mut pos, ref mut src) => { + *pos = Some((line, column)); + *src = Some((offset).into()); + } + Self::UnterminatedLiteral(ref mut pos, ref mut src) => { + *pos = Some((line, column)); + *src = Some((offset).into()); + } + Self::UnterminatedBracket(ref mut pos, ref mut src) => { + *pos = Some((line, column)); + *src = Some((offset).into()); + } + Self::UnterminatedBlockComment(ref mut pos, ref mut src) => { + *pos = Some((line, column)); + *src = Some((offset).into()); + } + Self::BadVariableName(ref mut pos, ref mut src) => { + *pos = Some((line, column)); + *src = Some((offset).into()); + } + Self::ExpectedEqualsSign(ref mut pos, ref mut src) => { + *pos = Some((line, column)); + *src = Some((offset).into()); + } + Self::MalformedBlobLiteral(ref mut pos, ref mut src) => { + *pos = Some((line, column)); + *src = Some((offset).into()); + } + // Exact same handling here + Self::MalformedHexInteger(ref mut pos, ref mut src, len, _) + | Self::BadNumber(ref mut pos, ref mut src, len, _) => { + *pos = Some((line, column)); + *src = Some((offset, len.unwrap_or(0)).into()); + } Self::ParserError(_, ref mut pos, _) => *pos = Some((line, column)), } } diff --git a/vendored/sqlite3-parser/src/lexer/sql/mod.rs b/vendored/sqlite3-parser/src/lexer/sql/mod.rs index fba3d72d1..84f59eaeb 100644 --- a/vendored/sqlite3-parser/src/lexer/sql/mod.rs +++ b/vendored/sqlite3-parser/src/lexer/sql/mod.rs @@ -172,7 +172,7 @@ macro_rules! try_with_position { Ok(val) => val, Err(err) => { let mut err = Error::from(err); - err.position($scanner.line(), $scanner.column()); + err.position($scanner.line(), $scanner.column(), $scanner.offset() - 1); return Err(err); } } @@ -596,7 +596,9 @@ fn number(data: &[u8]) -> Result<(Option>, usize), Error> { } else if b == b'e' || b == b'E' { return exponential_part(data, i); } else if is_identifier_start(b) { - return Err(Error::BadNumber(None, None)); + return Err(Error::BadNumber(None, None, Some(i + 1), unsafe { + String::from_utf8_unchecked(data[..i + 1].to_vec()) + })); } Ok((Some((&data[..i], TK_INTEGER)), i)) } else { @@ -610,13 +612,28 @@ fn hex_integer(data: &[u8]) -> Result<(Option>, usize), Error> { if let Some((i, b)) = find_end_of_number(data, 2, u8::is_ascii_hexdigit)? { // Must not be empty (Ox is invalid) if i == 2 || is_identifier_start(b) { - return Err(Error::MalformedHexInteger(None, None)); + let (len, help) = if i == 2 && !is_identifier_start(b) { + (i, "Did you forget to add digits after '0x' or '0X'?") + } else { + (i + 1, "There are some invalid digits after '0x' or '0X'") + }; + return Err(Error::MalformedHexInteger( + None, + None, + Some(len), // Length of the malformed hex + Some(help), // Help Message + )); } Ok((Some((&data[..i], TK_INTEGER)), i)) } else { // Must not be empty (Ox is invalid) if data.len() == 2 { - return Err(Error::MalformedHexInteger(None, None)); + return Err(Error::MalformedHexInteger( + None, + None, + Some(2), // Length of the malformed hex + Some("Did you forget to add digits after '0x' or '0X'?"), // Help Message + )); } Ok((Some((data, TK_INTEGER)), data.len())) } @@ -628,7 +645,9 @@ fn fractional_part(data: &[u8], i: usize) -> Result<(Option>, usize), if b == b'e' || b == b'E' { return exponential_part(data, i); } else if is_identifier_start(b) { - return Err(Error::BadNumber(None, None)); + return Err(Error::BadNumber(None, None, Some(i + 1), unsafe { + String::from_utf8_unchecked(data[..i + 1].to_vec()) + })); } Ok((Some((&data[..i], TK_FLOAT)), i)) } else { @@ -643,17 +662,24 @@ fn exponential_part(data: &[u8], i: usize) -> Result<(Option>, usize), let i = if *b == b'+' || *b == b'-' { i + 1 } else { i }; if let Some((j, b)) = find_end_of_number(data, i + 1, u8::is_ascii_digit)? { if j == i + 1 || is_identifier_start(b) { - return Err(Error::BadNumber(None, None)); + let len = if is_identifier_start(b) { j + 1 } else { j }; + return Err(Error::BadNumber(None, None, Some(len), unsafe { + String::from_utf8_unchecked(data[..len].to_vec()) + })); } Ok((Some((&data[..j], TK_FLOAT)), j)) } else { if data.len() == i + 1 { - return Err(Error::BadNumber(None, None)); + return Err(Error::BadNumber(None, None, Some(i + 1), unsafe { + String::from_utf8_unchecked(data[..i + 1].to_vec()) + })); } Ok((Some((data, TK_FLOAT)), data.len())) } } else { - Err(Error::BadNumber(None, None)) + Err(Error::BadNumber(None, None, Some(data.len()), unsafe { + String::from_utf8_unchecked(data.to_vec()) + })) } } @@ -670,7 +696,9 @@ fn find_end_of_number( { continue; } - return Err(Error::BadNumber(None, None)); + return Err(Error::BadNumber(None, None, Some(j), unsafe { + String::from_utf8_unchecked(data[..j].to_vec()) + })); } else { return Ok(Some((j, b))); } @@ -724,7 +752,7 @@ mod tests { let mut s = Scanner::new(tokenizer); expect_token(&mut s, input, b"SELECT", TokenType::TK_SELECT)?; let err = s.scan(input).unwrap_err(); - assert!(matches!(err, Error::BadNumber(_, _))); + assert!(matches!(err, Error::BadNumber(_, _, _, _))); Ok(()) } diff --git a/vendored/sqlite3-parser/src/parser/ast/mod.rs b/vendored/sqlite3-parser/src/parser/ast/mod.rs index 1aac9c2c4..f2275e952 100644 --- a/vendored/sqlite3-parser/src/parser/ast/mod.rs +++ b/vendored/sqlite3-parser/src/parser/ast/mod.rs @@ -1622,6 +1622,10 @@ pub enum PragmaName { LegacyFileFormat, /// Return the total number of pages in the database file. PageCount, + /// Return the page size of the database in bytes. + PageSize, + /// Returns schema version of the database file. + SchemaVersion, /// returns information about the columns of a table TableInfo, /// Returns the user version of the database file.