diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index daf032087..ca92fbd34 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -1,10 +1,12 @@ -# Copyright 2022-2024, axodotdev +# This file was autogenerated by dist: https://github.com/astral-sh/cargo-dist +# +# Copyright 2025 Astral Software Inc. # SPDX-License-Identifier: MIT or Apache-2.0 # # CI that: # # * checks for a Git Tag that looks like a release -# * builds artifacts with cargo-dist (archives, installers, hashes) +# * builds artifacts with dist (archives, installers, hashes) # * uploads those artifacts to temporary workflow zip # * on success, uploads the artifacts to a GitHub Release # @@ -24,10 +26,10 @@ permissions: # must be a Cargo-style SemVer Version (must have at least major.minor.patch). # # If PACKAGE_NAME is specified, then the announcement will be for that -# package (erroring out if it doesn't have the given version or isn't cargo-dist-able). +# package (erroring out if it doesn't have the given version or isn't dist-able). # # If PACKAGE_NAME isn't specified, then the announcement will be for all -# (cargo-dist-able) packages in the workspace with that version (this mode is +# (dist-able) packages in the workspace with that version (this mode is # intended for workspaces with only one dist-able package, or with all dist-able # packages versioned/released in lockstep). # @@ -45,9 +47,9 @@ on: - '**[0-9]+.[0-9]+.[0-9]+*' jobs: - # Run 'cargo dist plan' (or host) to determine what tasks we need to do + # Run 'dist plan' (or host) to determine what tasks we need to do plan: - runs-on: "ubuntu-20.04" + runs-on: "ubuntu-22.04" outputs: val: ${{ steps.plan.outputs.manifest }} tag: ${{ !github.event.pull_request && github.ref_name || '' }} @@ -59,16 +61,16 @@ jobs: - uses: actions/checkout@v4 with: submodules: recursive - - name: Install cargo-dist + - name: Install dist # we specify bash to get pipefail; it guards against the `curl` command # failing. otherwise `sh` won't catch that `curl` returned non-0 shell: bash - run: "curl --proto '=https' --tlsv1.2 -LsSf https://github.com/axodotdev/cargo-dist/releases/download/v0.21.0/cargo-dist-installer.sh | sh" - - name: Cache cargo-dist + run: "curl --proto '=https' --tlsv1.2 -LsSf https://github.com/astral-sh/cargo-dist/releases/download/v0.28.3/cargo-dist-installer.sh | sh" + - name: Cache dist uses: actions/upload-artifact@v4 with: name: cargo-dist-cache - path: ~/.cargo/bin/cargo-dist + path: ~/.cargo/bin/dist # sure would be cool if github gave us proper conditionals... # so here's a doubly-nested ternary-via-truthiness to try to provide the best possible # functionality based on whether this is a pull_request, and whether it's from a fork. @@ -76,8 +78,8 @@ jobs: # but also really annoying to build CI around when it needs secrets to work right.) - id: plan run: | - cargo dist ${{ (!github.event.pull_request && format('host --steps=create --tag={0}', github.ref_name)) || 'plan' }} --output-format=json > plan-dist-manifest.json - echo "cargo dist ran successfully" + dist ${{ (!github.event.pull_request && format('host --steps=create --tag={0}', github.ref_name)) || 'plan' }} --output-format=json > plan-dist-manifest.json + echo "dist ran successfully" cat plan-dist-manifest.json echo "manifest=$(jq -c "." plan-dist-manifest.json)" >> "$GITHUB_OUTPUT" - name: "Upload dist-manifest.json" @@ -95,18 +97,19 @@ jobs: if: ${{ fromJson(needs.plan.outputs.val).ci.github.artifacts_matrix.include != null && (needs.plan.outputs.publishing == 'true' || fromJson(needs.plan.outputs.val).ci.github.pr_run_mode == 'upload') }} strategy: fail-fast: false - # Target platforms/runners are computed by cargo-dist in create-release. + # Target platforms/runners are computed by dist in create-release. # Each member of the matrix has the following arguments: # # - runner: the github runner - # - dist-args: cli flags to pass to cargo dist - # - install-dist: expression to run to install cargo-dist on the runner + # - dist-args: cli flags to pass to dist + # - install-dist: expression to run to install dist on the runner # # Typically there will be: # - 1 "global" task that builds universal installers # - N "local" tasks that build each platform's binaries and platform-specific installers matrix: ${{ fromJson(needs.plan.outputs.val).ci.github.artifacts_matrix }} runs-on: ${{ matrix.runner }} + container: ${{ matrix.container && matrix.container.image || null }} env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} BUILD_MANIFEST_NAME: target/distrib/${{ join(matrix.targets, '-') }}-dist-manifest.json @@ -117,8 +120,15 @@ jobs: - uses: actions/checkout@v4 with: submodules: recursive - - name: Install cargo-dist - run: ${{ matrix.install_dist }} + - name: Install Rust non-interactively if not already installed + if: ${{ matrix.container }} + run: | + if ! command -v cargo > /dev/null 2>&1; then + curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y + echo "$HOME/.cargo/bin" >> $GITHUB_PATH + fi + - name: Install dist + run: ${{ matrix.install_dist.run }} # Get the dist-manifest - name: Fetch local artifacts uses: actions/download-artifact@v4 @@ -132,10 +142,10 @@ jobs: - name: Build artifacts run: | # Actually do builds and make zips and whatnot - cargo dist build ${{ needs.plan.outputs.tag-flag }} --print=linkage --output-format=json ${{ matrix.dist_args }} > dist-manifest.json - echo "cargo dist ran successfully" + dist build ${{ needs.plan.outputs.tag-flag }} --print=linkage --output-format=json ${{ matrix.dist_args }} > dist-manifest.json + echo "dist ran successfully" - name: Attest - uses: actions/attest-build-provenance@v1 + uses: actions/attest-build-provenance@v2 with: subject-path: "target/distrib/*${{ join(matrix.targets, ', ') }}*" - id: cargo-dist @@ -147,7 +157,7 @@ jobs: run: | # Parse out what we just built and upload it to scratch storage echo "paths<> "$GITHUB_OUTPUT" - jq --raw-output ".upload_files[]" dist-manifest.json >> "$GITHUB_OUTPUT" + dist print-upload-files-from-manifest --manifest dist-manifest.json >> "$GITHUB_OUTPUT" echo "EOF" >> "$GITHUB_OUTPUT" cp dist-manifest.json "$BUILD_MANIFEST_NAME" @@ -164,7 +174,7 @@ jobs: needs: - plan - build-local-artifacts - runs-on: "ubuntu-20.04" + runs-on: "ubuntu-22.04" env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} BUILD_MANIFEST_NAME: target/distrib/global-dist-manifest.json @@ -172,12 +182,12 @@ jobs: - uses: actions/checkout@v4 with: submodules: recursive - - name: Install cached cargo-dist + - name: Install cached dist uses: actions/download-artifact@v4 with: name: cargo-dist-cache path: ~/.cargo/bin/ - - run: chmod +x ~/.cargo/bin/cargo-dist + - run: chmod +x ~/.cargo/bin/dist # Get all the local artifacts for the global tasks to use (for e.g. checksums) - name: Fetch local artifacts uses: actions/download-artifact@v4 @@ -188,8 +198,8 @@ jobs: - id: cargo-dist shell: bash run: | - cargo dist build ${{ needs.plan.outputs.tag-flag }} --output-format=json "--artifacts=global" > dist-manifest.json - echo "cargo dist ran successfully" + dist build ${{ needs.plan.outputs.tag-flag }} --output-format=json "--artifacts=global" > dist-manifest.json + echo "dist ran successfully" # Parse out what we just built and upload it to scratch storage echo "paths<> "$GITHUB_OUTPUT" @@ -214,19 +224,19 @@ jobs: if: ${{ always() && needs.plan.outputs.publishing == 'true' && (needs.build-global-artifacts.result == 'skipped' || needs.build-global-artifacts.result == 'success') && (needs.build-local-artifacts.result == 'skipped' || needs.build-local-artifacts.result == 'success') }} env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - runs-on: "ubuntu-20.04" + runs-on: "ubuntu-22.04" outputs: val: ${{ steps.host.outputs.manifest }} steps: - uses: actions/checkout@v4 with: submodules: recursive - - name: Install cached cargo-dist + - name: Install cached dist uses: actions/download-artifact@v4 with: name: cargo-dist-cache path: ~/.cargo/bin/ - - run: chmod +x ~/.cargo/bin/cargo-dist + - run: chmod +x ~/.cargo/bin/dist # Fetch artifacts from scratch-storage - name: Fetch artifacts uses: actions/download-artifact@v4 @@ -237,7 +247,7 @@ jobs: - id: host shell: bash run: | - cargo dist host ${{ needs.plan.outputs.tag-flag }} --steps=upload --steps=release --output-format=json > dist-manifest.json + dist host ${{ needs.plan.outputs.tag-flag }} --steps=upload --steps=release --output-format=json > dist-manifest.json echo "artifacts uploaded and released successfully" cat dist-manifest.json echo "manifest=$(jq -c "." dist-manifest.json)" >> "$GITHUB_OUTPUT" @@ -278,7 +288,7 @@ jobs: # still allowing individual publish jobs to skip themselves (for prereleases). # "host" however must run to completion, no skipping allowed! if: ${{ always() && needs.host.result == 'success' }} - runs-on: "ubuntu-20.04" + runs-on: "ubuntu-22.04" env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} steps: diff --git a/COMPAT.md b/COMPAT.md index 7fff65b87..b0cc85d83 100644 --- a/COMPAT.md +++ b/COMPAT.md @@ -56,7 +56,7 @@ Limbo aims to be fully compatible with SQLite, with opt-in features not supporte | ATTACH DATABASE | No | | | BEGIN TRANSACTION | Partial | Transaction names are not supported. | | COMMIT TRANSACTION | Partial | Transaction names are not supported. | -| CREATE INDEX | No | | +| CREATE INDEX | Yes | | | CREATE TABLE | Partial | | | CREATE TRIGGER | No | | | CREATE VIEW | No | | @@ -461,6 +461,8 @@ Modifiers: | IdxDelete | No | | | IdxGE | Yes | | | IdxInsert | No | | +| IdxInsertAsync | Yes | | +| IdxInsertAwait | Yes | | | IdxLE | Yes | | | IdxLT | Yes | | | IdxRowid | No | | @@ -548,6 +550,7 @@ Modifiers: | SeekLe | No | | | SeekLt | No | | | SeekRowid | Yes | | +| SeekEnd | Yes | | | Sequence | No | | | SetCookie | No | | | ShiftLeft | Yes | | diff --git a/Cargo.toml b/Cargo.toml index edbae0cec..ac7490880 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -50,32 +50,6 @@ limbo_sqlite3_parser = { path = "vendored/sqlite3-parser", version = "0.0.19-pre limbo_time = { path = "extensions/time", version = "0.0.19-pre.4" } limbo_uuid = { path = "extensions/uuid", version = "0.0.19-pre.4" } -# Config for 'cargo dist' -[workspace.metadata.dist] -# The preferred cargo-dist version to use in CI (Cargo.toml SemVer syntax) -cargo-dist-version = "0.21.0" -# CI backends to support -ci = "github" -# The installers to generate for each app -installers = ["shell", "powershell"] -# Target platforms to build apps for (Rust target-triple syntax) -targets = [ - "aarch64-apple-darwin", - "x86_64-apple-darwin", - "x86_64-unknown-linux-gnu", - "x86_64-pc-windows-msvc", -] -# Which actions to run on pull requests -pr-run-mode = "plan" -# Path that installers should place binaries in -install-path = "~/.limbo" -# Whether to install an updater program -install-updater = true -# Whether to consider the binaries in a package for distribution (defaults true) -dist = false -# Whether to enable GitHub Attestations -github-attestations = true - [profile.release] debug = "line-tables-only" codegen-units = 1 diff --git a/bindings/rust/src/lib.rs b/bindings/rust/src/lib.rs index fe653fdb5..60a7ffd77 100644 --- a/bindings/rust/src/lib.rs +++ b/bindings/rust/src/lib.rs @@ -6,6 +6,7 @@ pub use value::Value; pub use params::params_from_iter; use crate::params::*; +use std::fmt::Debug; use std::num::NonZero; use std::rc::Rc; use std::sync::{Arc, Mutex}; @@ -55,6 +56,7 @@ impl Builder { } } +#[derive(Clone)] pub struct Database { inner: Arc, } @@ -62,6 +64,12 @@ pub struct Database { unsafe impl Send for Database {} unsafe impl Sync for Database {} +impl Debug for Database { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("Database").finish() + } +} + impl Database { pub fn connect(&self) -> Result { let conn = self.inner.connect()?; diff --git a/core/schema.rs b/core/schema.rs index fbec7627f..dda37d15b 100644 --- a/core/schema.rs +++ b/core/schema.rs @@ -2,7 +2,7 @@ use crate::VirtualTable; use crate::{util::normalize_ident, Result}; use core::fmt; use fallible_iterator::FallibleIterator; -use limbo_sqlite3_parser::ast::{Expr, Literal, TableOptions}; +use limbo_sqlite3_parser::ast::{Expr, Literal, SortOrder, TableOptions}; use limbo_sqlite3_parser::{ ast::{Cmd, CreateTableBody, QualifiedName, ResultColumn, Stmt}, lexer::sql::Parser, @@ -30,6 +30,13 @@ impl Schema { Self { tables, indexes } } + pub fn is_unique_idx_name(&self, name: &str) -> bool { + !self + .indexes + .iter() + .any(|idx| idx.1.iter().any(|i| i.name == name)) + } + pub fn add_btree_table(&mut self, table: Rc) { let name = normalize_ident(&table.name); self.tables.insert(name, Table::BTree(table).into()); @@ -209,7 +216,7 @@ impl BTreeTable { } } -#[derive(Debug)] +#[derive(Debug, Default)] pub struct PseudoTable { pub columns: Vec, } @@ -245,12 +252,6 @@ impl PseudoTable { } } -impl Default for PseudoTable { - fn default() -> Self { - Self::new() - } -} - fn create_table( tbl_name: QualifiedName, body: CreateTableBody, @@ -616,13 +617,7 @@ pub struct Index { #[derive(Debug, Clone)] pub struct IndexColumn { pub name: String, - pub order: Order, -} - -#[derive(Debug, Clone, PartialEq)] -pub enum Order { - Ascending, - Descending, + pub order: SortOrder, } impl Index { @@ -642,11 +637,7 @@ impl Index { .into_iter() .map(|col| IndexColumn { name: normalize_ident(&col.expr.to_string()), - order: match col.order { - Some(limbo_sqlite3_parser::ast::SortOrder::Asc) => Order::Ascending, - Some(limbo_sqlite3_parser::ast::SortOrder::Desc) => Order::Descending, - None => Order::Ascending, - }, + order: col.order.unwrap_or(SortOrder::Asc), }) .collect(); Ok(Index { @@ -685,7 +676,7 @@ impl Index { } Ok(IndexColumn { name: normalize_ident(col_name), - order: Order::Ascending, // Primary key indexes are always ascending + order: SortOrder::Asc, // Primary key indexes are always ascending }) }) .collect::>>()?; @@ -1012,7 +1003,7 @@ mod tests { assert!(index.unique); assert_eq!(index.columns.len(), 1); assert_eq!(index.columns[0].name, "a"); - assert!(matches!(index.columns[0].order, Order::Ascending)); + assert!(matches!(index.columns[0].order, SortOrder::Asc)); Ok(()) } @@ -1029,8 +1020,8 @@ mod tests { assert_eq!(index.columns.len(), 2); assert_eq!(index.columns[0].name, "a"); assert_eq!(index.columns[1].name, "b"); - assert!(matches!(index.columns[0].order, Order::Ascending)); - assert!(matches!(index.columns[1].order, Order::Ascending)); + assert!(matches!(index.columns[0].order, SortOrder::Asc)); + assert!(matches!(index.columns[1].order, SortOrder::Asc)); Ok(()) } diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 558a6eee3..789f9cb39 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -12,12 +12,14 @@ use crate::types::{ use crate::{return_corrupt, LimboError, Result}; use std::cell::{Cell, Ref, RefCell}; +use std::cmp::Ordering; use std::pin::Pin; use std::rc::Rc; use super::pager::PageRef; use super::sqlite3_ondisk::{ - write_varint_to_vec, IndexInteriorCell, IndexLeafCell, OverflowCell, DATABASE_HEADER_SIZE, + read_record, write_varint_to_vec, IndexInteriorCell, IndexLeafCell, OverflowCell, + DATABASE_HEADER_SIZE, }; /* @@ -169,6 +171,57 @@ enum ReadPayloadOverflow { }, } +#[derive(Clone, Debug)] +pub enum BTreeKey<'a> { + TableRowId((u64, Option<&'a ImmutableRecord>)), + IndexKey(&'a ImmutableRecord), +} + +impl BTreeKey<'_> { + /// Create a new table rowid key from a rowid and an optional immutable record. + /// The record is optional because it may not be available when the key is created. + pub fn new_table_rowid(rowid: u64, record: Option<&ImmutableRecord>) -> BTreeKey<'_> { + BTreeKey::TableRowId((rowid, record)) + } + + /// Create a new index key from an immutable record. + pub fn new_index_key(record: &ImmutableRecord) -> BTreeKey<'_> { + BTreeKey::IndexKey(record) + } + + /// Get the record, if present. Index will always be present, + fn get_record(&self) -> Option<&'_ ImmutableRecord> { + match self { + BTreeKey::TableRowId((_, record)) => *record, + BTreeKey::IndexKey(record) => Some(record), + } + } + + /// Get the rowid, if present. Index will never be present. + fn maybe_rowid(&self) -> Option { + match self { + BTreeKey::TableRowId((rowid, _)) => Some(*rowid), + BTreeKey::IndexKey(_) => None, + } + } + + /// Assert that the key is an integer rowid and return it. + fn to_rowid(&self) -> u64 { + match self { + BTreeKey::TableRowId((rowid, _)) => *rowid, + BTreeKey::IndexKey(_) => panic!("BTreeKey::to_rowid called on IndexKey"), + } + } + + /// Assert that the key is an index key and return it. + fn to_index_key_values(&self) -> &'_ Vec { + match self { + BTreeKey::TableRowId(_) => panic!("BTreeKey::to_index_key called on TableRowId"), + BTreeKey::IndexKey(key) => key.get_values(), + } + } +} + #[derive(Clone)] struct BalanceInfo { /// Old pages being balanced. @@ -599,8 +652,8 @@ impl BTreeCursor { BTreeCell::TableLeafCell(TableLeafCell { _rowid, _payload, - first_overflow_page, payload_size, + first_overflow_page, }) => { assert!(predicate.is_none()); if let Some(next_page) = first_overflow_page { @@ -814,10 +867,8 @@ impl BTreeCursor { }; let record = self.get_immutable_record(); let record = record.as_ref().unwrap(); - let order = compare_immutable( - &record.get_values().as_slice()[..record.len() - 1], - &index_key.get_values().as_slice()[..], - ); + let without_rowid = &record.get_values().as_slice()[..record.len() - 1]; + let order = without_rowid.cmp(index_key.get_values()); let found = match op { SeekOp::GT => order.is_gt(), SeekOp::GE => order.is_ge(), @@ -1049,11 +1100,11 @@ impl BTreeCursor { /// Insert a record into the btree. /// If the insert operation overflows the page, it will be split and the btree will be balanced. - fn insert_into_page( - &mut self, - key: &OwnedValue, - record: &ImmutableRecord, - ) -> Result> { + fn insert_into_page(&mut self, bkey: &BTreeKey) -> Result> { + let record = bkey + .get_record() + .expect("expected record present on insert"); + if let CursorState::None = &self.state { self.state = CursorState::Write(WriteInfo::new()); } @@ -1069,10 +1120,6 @@ impl BTreeCursor { WriteState::Start => { let page = self.stack.top(); return_if_locked_maybe_load!(self.pager, page); - let int_key = match key { - OwnedValue::Integer(i) => *i as u64, - _ => unreachable!("btree tables are indexed by integers!"), - }; // get page and find cell let (cell_idx, page_type) = { @@ -1082,23 +1129,27 @@ impl BTreeCursor { self.pager.add_dirty(page.get().id); let page = page.get().contents.as_mut().unwrap(); - assert!(matches!(page.page_type(), PageType::TableLeaf)); + assert!(matches!( + page.page_type(), + PageType::TableLeaf | PageType::IndexLeaf + )); // find cell - (self.find_cell(page, int_key), page.page_type()) + (self.find_cell(page, bkey), page.page_type()) }; tracing::debug!("insert_into_page(cell_idx={})", cell_idx); // if the cell index is less than the total cells, check: if its an existing // rowid, we are going to update / overwrite the cell if cell_idx < page.get_contents().cell_count() { - if let BTreeCell::TableLeafCell(tbl_leaf) = page.get_contents().cell_get( + match page.get_contents().cell_get( cell_idx, payload_overflow_threshold_max(page_type, self.usable_space() as u16), payload_overflow_threshold_min(page_type, self.usable_space() as u16), self.usable_space(), )? { - if tbl_leaf._rowid == int_key { + BTreeCell::TableLeafCell(tbl_leaf) => { + if tbl_leaf._rowid == bkey.to_rowid() { tracing::debug!("insert_into_page: found exact match with cell_idx={cell_idx}, overwriting"); self.overwrite_cell(page.clone(), cell_idx, record)?; self.state @@ -1108,12 +1159,37 @@ impl BTreeCursor { continue; } } + BTreeCell::IndexLeafCell(idx_leaf) => { + read_record( + idx_leaf.payload, + self.get_immutable_record_or_create().as_mut().unwrap(), + ) + .expect("failed to read record"); + if compare_immutable( + record.get_values(), + self.get_immutable_record() + .as_ref() + .unwrap() + .get_values() + ) == Ordering::Equal { + + tracing::debug!("insert_into_page: found exact match with cell_idx={cell_idx}, overwriting"); + self.overwrite_cell(page.clone(), cell_idx, record)?; + self.state + .mut_write_info() + .expect("expected write info") + .state = WriteState::Finish; + continue; + } + } + other => panic!("unexpected cell type, expected TableLeaf or IndexLeaf, found: {:?}", other), + } } // insert cell let mut cell_payload: Vec = Vec::with_capacity(record.len() + 4); fill_cell_payload( page_type, - Some(int_key), + bkey.maybe_rowid(), &mut cell_payload, record, self.usable_space() as u16, @@ -1912,8 +1988,7 @@ impl BTreeCursor { } /// Find the index of the cell in the page that contains the given rowid. - /// BTree tables only. - fn find_cell(&self, page: &PageContent, int_key: u64) -> usize { + fn find_cell(&self, page: &PageContent, key: &BTreeKey) -> usize { let mut cell_idx = 0; let cell_count = page.cell_count(); while cell_idx < cell_count { @@ -1927,22 +2002,68 @@ impl BTreeCursor { .unwrap() { BTreeCell::TableLeafCell(cell) => { - if int_key <= cell._rowid { + if key.to_rowid() <= cell._rowid { break; } } BTreeCell::TableInteriorCell(cell) => { - if int_key <= cell._rowid { + if key.to_rowid() <= cell._rowid { break; } } - _ => todo!(), + BTreeCell::IndexInteriorCell(IndexInteriorCell { payload, .. }) + | BTreeCell::IndexLeafCell(IndexLeafCell { payload, .. }) => { + // TODO: implement efficient comparison of records + // e.g. https://github.com/sqlite/sqlite/blob/master/src/vdbeaux.c#L4719 + read_record( + payload, + self.get_immutable_record_or_create().as_mut().unwrap(), + ) + .expect("failed to read record"); + let order = compare_immutable( + key.to_index_key_values(), + self.get_immutable_record().as_ref().unwrap().get_values(), + ); + match order { + Ordering::Less | Ordering::Equal => { + break; + } + Ordering::Greater => {} + } + } } cell_idx += 1; } cell_idx } + pub fn seek_end(&mut self) -> Result> { + assert!(self.mv_cursor.is_none()); // unsure about this -_- + self.move_to_root(); + loop { + let mem_page = self.stack.top(); + let page_id = mem_page.get().id; + let page = self.pager.read_page(page_id)?; + return_if_locked!(page); + + let contents = page.get().contents.as_ref().unwrap(); + if contents.is_leaf() { + // set cursor just past the last cell to append + self.stack.set_cell_index(contents.cell_count() as i32); + return Ok(CursorResult::Ok(())); + } + + match contents.rightmost_pointer() { + Some(right_most_pointer) => { + self.stack.set_cell_index(contents.cell_count() as i32 + 1); // invalid on interior + let child = self.pager.read_page(right_most_pointer as usize)?; + self.stack.push(child); + } + None => unreachable!("interior page must have rightmost pointer"), + } + } + } + pub fn seek_to_last(&mut self) -> Result> { return_if_io!(self.move_to_rightmost()); let rowid = return_if_io!(self.get_next_record(None)); @@ -2032,28 +2153,36 @@ impl BTreeCursor { pub fn insert( &mut self, - key: &OwnedValue, - record: &ImmutableRecord, + key: &BTreeKey, moved_before: bool, /* Indicate whether it's necessary to traverse to find the leaf page */ ) -> Result> { - let int_key = match key { - OwnedValue::Integer(i) => i, - _ => unreachable!("btree tables are indexed by integers!"), - }; match &self.mv_cursor { - Some(mv_cursor) => { - let row_id = - crate::mvcc::database::RowID::new(self.table_id() as u64, *int_key as u64); - let record_buf = record.get_payload().to_vec(); - let row = crate::mvcc::database::Row::new(row_id, record_buf); - mv_cursor.borrow_mut().insert(row).unwrap(); - } + Some(mv_cursor) => match key.maybe_rowid() { + Some(rowid) => { + let row_id = crate::mvcc::database::RowID::new(self.table_id() as u64, rowid); + let record_buf = key.get_record().unwrap().get_payload().to_vec(); + let row = crate::mvcc::database::Row::new(row_id, record_buf); + mv_cursor.borrow_mut().insert(row).unwrap(); + } + None => todo!("Support mvcc inserts with index btrees"), + }, None => { if !moved_before { - return_if_io!(self.move_to(SeekKey::TableRowId(*int_key as u64), SeekOp::EQ)); + match key { + BTreeKey::IndexKey(_) => { + return_if_io!(self + .move_to(SeekKey::IndexKey(key.get_record().unwrap()), SeekOp::GE)) + } + BTreeKey::TableRowId(_) => return_if_io!( + self.move_to(SeekKey::TableRowId(key.to_rowid()), SeekOp::EQ) + ), + } + } + return_if_io!(self.insert_into_page(key)); + if key.maybe_rowid().is_some() { + let int_key = key.to_rowid(); + self.rowid.replace(Some(int_key)); } - return_if_io!(self.insert_into_page(key, record)); - self.rowid.replace(Some(*int_key as u64)); } }; Ok(CursorResult::Ok(())) @@ -2372,6 +2501,33 @@ impl BTreeCursor { self.null_flag } + /// Search for a key in an Index Btree. Looking up indexes that need to be unique, we cannot compare the rowid + pub fn key_exists_in_index(&mut self, key: &ImmutableRecord) -> Result> { + return_if_io!(self.do_seek(SeekKey::IndexKey(key), SeekOp::GE)); + + let record_opt = self.record(); + match record_opt.as_ref() { + Some(record) => { + // Existing record found — compare prefix + let existing_key = &record.get_values()[..record.count().saturating_sub(1)]; + let inserted_key_vals = &key.get_values(); + if existing_key + .iter() + .zip(inserted_key_vals.iter()) + .all(|(a, b)| a == b) + { + return Ok(CursorResult::Ok(true)); // duplicate + } + } + None => { + // Cursor not pointing at a record — table is empty or past last + return Ok(CursorResult::Ok(false)); + } + } + + Ok(CursorResult::Ok(false)) // not a duplicate + } + pub fn exists(&mut self, key: &OwnedValue) -> Result> { assert!(self.mv_cursor.is_none()); let int_key = match key { @@ -2390,7 +2546,7 @@ impl BTreeCursor { OwnedValue::Integer(i) => *i as u64, _ => unreachable!("btree tables are indexed by integers!"), }; - let cell_idx = self.find_cell(contents, int_key); + let cell_idx = self.find_cell(contents, &BTreeKey::new_table_rowid(int_key, None)); if cell_idx >= contents.cell_count() { Ok(CursorResult::Ok(false)) } else { @@ -4040,25 +4196,28 @@ mod tests { for (key, size) in sequence.iter() { run_until_done( || { - let key = SeekKey::TableRowId(*key as u64); + let key = SeekKey::TableRowId(*key); cursor.move_to(key, SeekOp::EQ) }, pager.deref(), ) .unwrap(); - let key = OwnedValue::Integer(*key); let value = ImmutableRecord::from_registers(&[Register::OwnedValue( OwnedValue::Blob(vec![0; *size]), )]); tracing::info!("insert key:{}", key); - run_until_done(|| cursor.insert(&key, &value, true), pager.deref()).unwrap(); + run_until_done( + || cursor.insert(&BTreeKey::new_table_rowid(*key, Some(&value)), true), + pager.deref(), + ) + .unwrap(); tracing::info!( "=========== btree ===========\n{}\n\n", format_btree(pager.clone(), root_page, 0) ); } for (key, _) in sequence.iter() { - let seek_key = SeekKey::TableRowId(*key as u64); + let seek_key = SeekKey::TableRowId(*key); assert!( matches!( cursor.seek(seek_key, SeekOp::EQ).unwrap(), @@ -4126,12 +4285,14 @@ mod tests { pager.deref(), ) .unwrap(); - - let key = OwnedValue::Integer(key); let value = ImmutableRecord::from_registers(&[Register::OwnedValue( OwnedValue::Blob(vec![0; size]), )]); - run_until_done(|| cursor.insert(&key, &value, true), pager.deref()).unwrap(); + run_until_done( + || cursor.insert(&BTreeKey::new_table_rowid(key as u64, Some(&value)), true), + pager.deref(), + ) + .unwrap(); if matches!(validate_btree(pager.clone(), root_page), (_, false)) { panic!("invalid btree"); } @@ -5005,7 +5166,6 @@ mod tests { for i in 0..10000 { let mut cursor = BTreeCursor::new(None, pager.clone(), root_page); tracing::info!("INSERT INTO t VALUES ({});", i,); - let key = OwnedValue::Integer(i); let value = ImmutableRecord::from_registers(&[Register::OwnedValue(OwnedValue::Integer(i))]); tracing::trace!("before insert {}", i); @@ -5017,7 +5177,11 @@ mod tests { pager.deref(), ) .unwrap(); - run_until_done(|| cursor.insert(&key, &value, true), pager.deref()).unwrap(); + run_until_done( + || cursor.insert(&BTreeKey::new_table_rowid(i as u64, Some(&value)), true), + pager.deref(), + ) + .unwrap(); keys.push(i); } if matches!(validate_btree(pager.clone(), root_page), (_, false)) { @@ -5081,7 +5245,6 @@ mod tests { // Insert 10,000 records in to the BTree. for i in 1..=10000 { let mut cursor = BTreeCursor::new(None, pager.clone(), root_page); - let key = OwnedValue::Integer(i); let value = ImmutableRecord::from_registers(&[Register::OwnedValue(OwnedValue::Text( Text::new("hello world"), ))]); @@ -5095,7 +5258,11 @@ mod tests { ) .unwrap(); - run_until_done(|| cursor.insert(&key, &value, true), pager.deref()).unwrap(); + run_until_done( + || cursor.insert(&BTreeKey::new_table_rowid(i as u64, Some(&value)), true), + pager.deref(), + ) + .unwrap(); } match validate_btree(pager.clone(), root_page) { @@ -5154,7 +5321,6 @@ mod tests { for i in 0..iterations { let mut cursor = BTreeCursor::new(None, pager.clone(), root_page); tracing::info!("INSERT INTO t VALUES ({});", i,); - let key = OwnedValue::Integer(i as i64); let value = ImmutableRecord::from_registers(&[Register::OwnedValue(OwnedValue::Text(Text { value: huge_texts[i].as_bytes().to_vec(), @@ -5173,7 +5339,11 @@ mod tests { pager.deref(), ) .unwrap(); - run_until_done(|| cursor.insert(&key, &value, true), pager.deref()).unwrap(); + run_until_done( + || cursor.insert(&BTreeKey::new_table_rowid(i as u64, Some(&value)), true), + pager.deref(), + ) + .unwrap(); tracing::debug!( "=========== btree after ===========\n{}\n\n", format_btree(pager.clone(), root_page, 0) diff --git a/core/translate/index.rs b/core/translate/index.rs new file mode 100644 index 000000000..32d7cd2e9 --- /dev/null +++ b/core/translate/index.rs @@ -0,0 +1,319 @@ +use std::sync::Arc; + +use crate::{ + schema::{BTreeTable, Column, Index, IndexColumn, PseudoTable, Schema}, + types::Record, + util::normalize_ident, + vdbe::{ + builder::{CursorType, ProgramBuilder, QueryMode}, + insn::{IdxInsertFlags, Insn, RegisterOrLiteral}, + }, + OwnedValue, +}; +use limbo_sqlite3_parser::ast::{self, Expr, Id, SortOrder, SortedColumn}; + +use super::schema::{emit_schema_entry, SchemaEntryType, SQLITE_TABLEID}; + +pub fn translate_create_index( + mode: QueryMode, + unique_if_not_exists: (bool, bool), + idx_name: &str, + tbl_name: &str, + columns: &[SortedColumn], + schema: &Schema, +) -> crate::Result { + let idx_name = normalize_ident(idx_name); + let tbl_name = normalize_ident(tbl_name); + let mut program = ProgramBuilder::new(crate::vdbe::builder::ProgramBuilderOpts { + query_mode: mode, + num_cursors: 5, + approx_num_insns: 40, + approx_num_labels: 5, + }); + + // Check if the index is being created on a valid btree table and + // the name is globally unique in the schema. + if !schema.is_unique_idx_name(&idx_name) { + crate::bail_parse_error!("Error: index with name '{idx_name}' already exists."); + } + let Some(tbl) = schema.tables.get(&tbl_name) else { + crate::bail_parse_error!("Error: table '{tbl_name}' does not exist."); + }; + let Some(tbl) = tbl.btree() else { + crate::bail_parse_error!("Error: table '{tbl_name}' is not a b-tree table."); + }; + let columns = resolve_sorted_columns(&tbl, columns)?; + + // Prologue: + let init_label = program.emit_init(); + let start_offset = program.offset(); + + let idx = Arc::new(Index { + name: idx_name.clone(), + table_name: tbl.name.clone(), + root_page: 0, // we dont have access till its created, after we parse the schema table + columns: columns + .iter() + .map(|c| IndexColumn { + name: c.0 .1.name.as_ref().unwrap().clone(), + order: c.1, + }) + .collect(), + unique: unique_if_not_exists.0, + }); + + // Allocate the necessary cursors: + // + // 1. sqlite_schema_cursor_id - sqlite_schema table + // 2. btree_cursor_id - new index btree + // 3. table_cursor_id - table we are creating the index on + // 4. sorter_cursor_id - sorter + // 5. pseudo_cursor_id - pseudo table to store the sorted index values + let sqlite_table = schema.get_btree_table(SQLITE_TABLEID).unwrap(); + let sqlite_schema_cursor_id = program.alloc_cursor_id( + Some(SQLITE_TABLEID.to_owned()), + CursorType::BTreeTable(sqlite_table.clone()), + ); + let btree_cursor_id = program.alloc_cursor_id( + Some(idx_name.to_owned()), + CursorType::BTreeIndex(idx.clone()), + ); + let table_cursor_id = program.alloc_cursor_id( + Some(tbl_name.to_owned()), + CursorType::BTreeTable(tbl.clone()), + ); + let sorter_cursor_id = program.alloc_cursor_id(None, CursorType::Sorter); + let pseudo_table = PseudoTable::new_with_columns(tbl.columns.clone()); + let pseudo_cursor_id = program.alloc_cursor_id(None, CursorType::Pseudo(pseudo_table.into())); + + // Create a new B-Tree and store the root page index in a register + let root_page_reg = program.alloc_register(); + program.emit_insn(Insn::CreateBtree { + db: 0, + root: root_page_reg, + flags: 2, // index leaf + }); + + // open the sqlite schema table for writing and create a new entry for the index + program.emit_insn(Insn::OpenWriteAsync { + cursor_id: sqlite_schema_cursor_id, + root_page: RegisterOrLiteral::Literal(sqlite_table.root_page), + }); + program.emit_insn(Insn::OpenWriteAwait {}); + let sql = create_idx_stmt_to_sql(&tbl_name, &idx_name, unique_if_not_exists, &columns); + emit_schema_entry( + &mut program, + sqlite_schema_cursor_id, + SchemaEntryType::Index, + &idx_name, + &tbl_name, + root_page_reg, + Some(sql), + ); + + // determine the order of the columns in the index for the sorter + let order = idx + .columns + .iter() + .map(|c| { + OwnedValue::Integer(match c.order { + SortOrder::Asc => 0, + SortOrder::Desc => 1, + }) + }) + .collect(); + // open the sorter and the pseudo table + program.emit_insn(Insn::SorterOpen { + cursor_id: sorter_cursor_id, + columns: columns.len(), + order: Record::new(order), + }); + let content_reg = program.alloc_register(); + program.emit_insn(Insn::OpenPseudo { + cursor_id: pseudo_cursor_id, + content_reg, + num_fields: columns.len() + 1, + }); + + // open the table we are creating the index on for reading + program.emit_insn(Insn::OpenReadAsync { + cursor_id: table_cursor_id, + root_page: tbl.root_page, + }); + program.emit_insn(Insn::OpenReadAwait {}); + + program.emit_insn(Insn::RewindAsync { + cursor_id: table_cursor_id, + }); + let loop_start_label = program.allocate_label(); + let loop_end_label = program.allocate_label(); + program.emit_insn(Insn::RewindAwait { + cursor_id: table_cursor_id, + pc_if_empty: loop_end_label, + }); + + program.resolve_label(loop_start_label, program.offset()); + + // Loop start: + // Collect index values into start_reg..rowid_reg + // emit MakeRecord (index key + rowid) into record_reg. + // + // Then insert the record into the sorter + let start_reg = program.alloc_registers(columns.len() + 1); + for (i, (col, _)) in columns.iter().enumerate() { + program.emit_insn(Insn::Column { + cursor_id: table_cursor_id, + column: col.0, + dest: start_reg + i, + }); + } + let rowid_reg = start_reg + columns.len(); + program.emit_insn(Insn::RowId { + cursor_id: table_cursor_id, + dest: rowid_reg, + }); + let record_reg = program.alloc_register(); + program.emit_insn(Insn::MakeRecord { + start_reg, + count: columns.len() + 1, + dest_reg: record_reg, + }); + program.emit_insn(Insn::SorterInsert { + cursor_id: sorter_cursor_id, + record_reg, + }); + + program.emit_insn(Insn::NextAsync { + cursor_id: table_cursor_id, + }); + program.emit_insn(Insn::NextAwait { + cursor_id: table_cursor_id, + pc_if_next: loop_start_label, + }); + program.resolve_label(loop_end_label, program.offset()); + + // Open the index btree we created for writing to insert the + // newly sorted index records. + program.emit_insn(Insn::OpenWriteAsync { + cursor_id: btree_cursor_id, + root_page: RegisterOrLiteral::Register(root_page_reg), + }); + program.emit_insn(Insn::OpenWriteAwait {}); + + let sorted_loop_start = program.allocate_label(); + let sorted_loop_end = program.allocate_label(); + + // Sort the index records in the sorter + program.emit_insn(Insn::SorterSort { + cursor_id: sorter_cursor_id, + pc_if_empty: sorted_loop_end, + }); + program.resolve_label(sorted_loop_start, program.offset()); + let sorted_record_reg = program.alloc_register(); + program.emit_insn(Insn::SorterData { + pseudo_cursor: pseudo_cursor_id, + cursor_id: sorter_cursor_id, + dest_reg: sorted_record_reg, + }); + + // seek to the end of the index btree to position the cursor for appending + program.emit_insn(Insn::SeekEnd { + cursor_id: btree_cursor_id, + }); + // insert new index record + program.emit_insn(Insn::IdxInsertAsync { + cursor_id: btree_cursor_id, + record_reg: sorted_record_reg, + unpacked_start: None, // TODO: optimize with these to avoid decoding record twice + unpacked_count: None, + flags: IdxInsertFlags::new().use_seek(false), + }); + program.emit_insn(Insn::IdxInsertAwait { + cursor_id: btree_cursor_id, + }); + program.emit_insn(Insn::SorterNext { + cursor_id: sorter_cursor_id, + pc_if_next: sorted_loop_start, + }); + program.resolve_label(sorted_loop_end, program.offset()); + + // End of the outer loop + // + // Keep schema table open to emit ParseSchema, close the other cursors. + program.close_cursors(&[sorter_cursor_id, table_cursor_id, btree_cursor_id]); + + // TODO: SetCookie for schema change + // + // Parse the schema table to get the index root page and add new index to Schema + let parse_schema_where_clause = format!("name = '{}' AND type = 'index'", idx_name); + program.emit_insn(Insn::ParseSchema { + db: sqlite_schema_cursor_id, + where_clause: parse_schema_where_clause, + }); + // Close the final sqlite_schema cursor + program.emit_insn(Insn::Close { + cursor_id: sqlite_schema_cursor_id, + }); + + // Epilogue: + program.emit_halt(); + program.resolve_label(init_label, program.offset()); + program.emit_transaction(true); + program.emit_constant_insns(); + program.emit_goto(start_offset); + + Ok(program) +} + +fn resolve_sorted_columns<'a>( + table: &'a BTreeTable, + cols: &[SortedColumn], +) -> crate::Result> { + let mut resolved = Vec::with_capacity(cols.len()); + for sc in cols { + let ident = normalize_ident(match &sc.expr { + Expr::Id(Id(col_name)) | Expr::Name(ast::Name(col_name)) => col_name, + _ => crate::bail_parse_error!("Error: cannot use expressions in CREATE INDEX"), + }); + let Some(col) = table.get_column(&ident) else { + crate::bail_parse_error!( + "Error: column '{ident}' does not exist in table '{}'", + table.name + ); + }; + resolved.push((col, sc.order.unwrap_or(SortOrder::Asc))); + } + Ok(resolved) +} + +fn create_idx_stmt_to_sql( + tbl_name: &str, + idx_name: &str, + unique_if_not_exists: (bool, bool), + cols: &[((usize, &Column), SortOrder)], +) -> String { + let mut sql = String::with_capacity(128); + sql.push_str("CREATE "); + if unique_if_not_exists.0 { + sql.push_str("UNIQUE "); + } + sql.push_str("INDEX "); + if unique_if_not_exists.1 { + sql.push_str("IF NOT EXISTS "); + } + sql.push_str(idx_name); + sql.push_str(" ON "); + sql.push_str(tbl_name); + sql.push_str(" ("); + for (i, (col, order)) in cols.iter().enumerate() { + if i > 0 { + sql.push_str(", "); + } + sql.push_str(col.1.name.as_ref().unwrap()); + if *order == SortOrder::Desc { + sql.push_str(" DESC"); + } + } + sql.push(')'); + sql +} diff --git a/core/translate/insert.rs b/core/translate/insert.rs index 5fda098e6..7713b9355 100644 --- a/core/translate/insert.rs +++ b/core/translate/insert.rs @@ -9,6 +9,7 @@ use crate::error::SQLITE_CONSTRAINT_PRIMARYKEY; use crate::schema::Table; use crate::util::normalize_ident; use crate::vdbe::builder::{ProgramBuilderOpts, QueryMode}; +use crate::vdbe::insn::RegisterOrLiteral; use crate::vdbe::BranchOffset; use crate::{ schema::{Column, Schema}, @@ -84,11 +85,11 @@ pub fn translate_insert( ); let root_page = btree_table.root_page; let values = match body { - InsertBody::Select(select, None) => match &select.body.select.deref() { + InsertBody::Select(select, _) => match &select.body.select.deref() { OneSelect::Values(values) => values, _ => todo!(), }, - _ => todo!(), + InsertBody::DefaultValues => &vec![vec![]], }; let column_mappings = resolve_columns_for_insert(&table, columns, values)?; @@ -152,7 +153,7 @@ pub fn translate_insert( program.emit_insn(Insn::OpenWriteAsync { cursor_id, - root_page, + root_page: RegisterOrLiteral::Literal(root_page), }); program.emit_insn(Insn::OpenWriteAwait {}); @@ -168,7 +169,7 @@ pub fn translate_insert( // Single row - populate registers directly program.emit_insn(Insn::OpenWriteAsync { cursor_id, - root_page, + root_page: RegisterOrLiteral::Literal(root_page), }); program.emit_insn(Insn::OpenWriteAwait {}); diff --git a/core/translate/main_loop.rs b/core/translate/main_loop.rs index 0dca99b20..fd1a8ac9c 100644 --- a/core/translate/main_loop.rs +++ b/core/translate/main_loop.rs @@ -102,7 +102,7 @@ pub fn init_loop( let root_page = btree.root_page; program.emit_insn(Insn::OpenWriteAsync { cursor_id, - root_page, + root_page: root_page.into(), }); program.emit_insn(Insn::OpenWriteAwait {}); } @@ -110,7 +110,7 @@ pub fn init_loop( let root_page = btree.root_page; program.emit_insn(Insn::OpenWriteAsync { cursor_id, - root_page, + root_page: root_page.into(), }); program.emit_insn(Insn::OpenWriteAwait {}); } @@ -144,14 +144,14 @@ pub fn init_loop( OperationMode::DELETE => { program.emit_insn(Insn::OpenWriteAsync { cursor_id: table_cursor_id, - root_page: table.table.get_root_page(), + root_page: table.table.get_root_page().into(), }); program.emit_insn(Insn::OpenWriteAwait {}); } OperationMode::UPDATE => { program.emit_insn(Insn::OpenWriteAsync { cursor_id: table_cursor_id, - root_page: table.table.get_root_page(), + root_page: table.table.get_root_page().into(), }); program.emit_insn(Insn::OpenWriteAwait {}); } @@ -177,14 +177,14 @@ pub fn init_loop( OperationMode::DELETE => { program.emit_insn(Insn::OpenWriteAsync { cursor_id: index_cursor_id, - root_page: index.root_page, + root_page: index.root_page.into(), }); program.emit_insn(Insn::OpenWriteAwait {}); } OperationMode::UPDATE => { program.emit_insn(Insn::OpenWriteAsync { cursor_id: index_cursor_id, - root_page: index.root_page, + root_page: index.root_page.into(), }); program.emit_insn(Insn::OpenWriteAwait {}); } diff --git a/core/translate/mod.rs b/core/translate/mod.rs index 739ae5f03..cf93b34ba 100644 --- a/core/translate/mod.rs +++ b/core/translate/mod.rs @@ -12,6 +12,7 @@ pub(crate) mod delete; pub(crate) mod emitter; pub(crate) mod expr; pub(crate) mod group_by; +pub(crate) mod index; pub(crate) mod insert; pub(crate) mod main_loop; pub(crate) mod optimizer; @@ -34,6 +35,7 @@ use crate::translate::delete::translate_delete; use crate::vdbe::builder::{ProgramBuilder, ProgramBuilderOpts, QueryMode}; use crate::vdbe::Program; use crate::{bail_parse_error, Connection, Result, SymbolTable}; +use index::translate_create_index; use insert::translate_insert; use limbo_sqlite3_parser::ast::{self, Delete, Insert}; use schema::{translate_create_table, translate_create_virtual_table, translate_drop_table}; @@ -61,7 +63,24 @@ pub fn translate( ast::Stmt::Attach { .. } => bail_parse_error!("ATTACH not supported yet"), ast::Stmt::Begin(tx_type, tx_name) => translate_tx_begin(tx_type, tx_name)?, ast::Stmt::Commit(tx_name) => translate_tx_commit(tx_name)?, - ast::Stmt::CreateIndex { .. } => bail_parse_error!("CREATE INDEX not supported yet"), + ast::Stmt::CreateIndex { + unique, + if_not_exists, + idx_name, + tbl_name, + columns, + .. + } => { + change_cnt_on = true; + translate_create_index( + query_mode, + (unique, if_not_exists), + &idx_name.name.0, + &tbl_name.0, + &columns, + schema, + )? + } ast::Stmt::CreateTable { temporary, if_not_exists, diff --git a/core/translate/schema.rs b/core/translate/schema.rs index 29cf29644..3d5aa79db 100644 --- a/core/translate/schema.rs +++ b/core/translate/schema.rs @@ -103,7 +103,7 @@ pub fn translate_create_table( ); program.emit_insn(Insn::OpenWriteAsync { cursor_id: sqlite_schema_cursor_id, - root_page: 1, + root_page: 1usize.into(), }); program.emit_insn(Insn::OpenWriteAwait {}); @@ -155,8 +155,8 @@ pub fn translate_create_table( Ok(program) } -#[derive(Debug)] -enum SchemaEntryType { +#[derive(Debug, Clone, Copy)] +pub enum SchemaEntryType { Table, Index, } @@ -169,9 +169,9 @@ impl SchemaEntryType { } } } -const SQLITE_TABLEID: &str = "sqlite_schema"; +pub const SQLITE_TABLEID: &str = "sqlite_schema"; -fn emit_schema_entry( +pub fn emit_schema_entry( program: &mut ProgramBuilder, sqlite_schema_cursor_id: usize, entry_type: SchemaEntryType, @@ -500,7 +500,7 @@ pub fn translate_create_virtual_table( ); program.emit_insn(Insn::OpenWriteAsync { cursor_id: sqlite_schema_cursor_id, - root_page: 1, + root_page: 1usize.into(), }); program.emit_insn(Insn::OpenWriteAwait {}); @@ -572,14 +572,14 @@ pub fn translate_drop_table( let row_id_reg = program.alloc_register(); // r5 let table_name = "sqlite_schema"; - let schema_table = schema.get_btree_table(&table_name).unwrap(); + let schema_table = schema.get_btree_table(table_name).unwrap(); let sqlite_schema_cursor_id = program.alloc_cursor_id( Some(table_name.to_string()), CursorType::BTreeTable(schema_table.clone()), ); program.emit_insn(Insn::OpenWriteAsync { cursor_id: sqlite_schema_cursor_id, - root_page: 1, + root_page: 1usize.into(), }); program.emit_insn(Insn::OpenWriteAwait {}); diff --git a/core/types.rs b/core/types.rs index 9e7869b76..1556ee100 100644 --- a/core/types.rs +++ b/core/types.rs @@ -732,6 +732,10 @@ impl ImmutableRecord { &self.values[idx] } + pub fn get_value_opt(&self, idx: usize) -> Option<&RefValue> { + self.values.get(idx) + } + pub fn len(&self) -> usize { self.values.len() } diff --git a/core/vdbe/builder.rs b/core/vdbe/builder.rs index 058fc8aab..19a71a68d 100644 --- a/core/vdbe/builder.rs +++ b/core/vdbe/builder.rs @@ -131,6 +131,12 @@ impl ProgramBuilder { self.insns.push((insn, function)); } + pub fn close_cursors(&mut self, cursors: &[CursorID]) { + for cursor in cursors { + self.emit_insn(Insn::Close { cursor_id: *cursor }); + } + } + pub fn emit_string8(&mut self, value: String, dest: usize) { self.emit_insn(Insn::String8 { value, dest }); } diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index 3c511a0db..d2c636bae 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -11,7 +11,7 @@ use std::{borrow::BorrowMut, rc::Rc}; use crate::pseudo::PseudoCursor; use crate::result::LimboResult; use crate::schema::{affinity, Affinity}; -use crate::storage::btree::BTreeCursor; +use crate::storage::btree::{BTreeCursor, BTreeKey}; use crate::storage::wal::CheckpointResult; use crate::types::{ AggContext, Cursor, CursorResult, ExternalAggState, OwnedValue, SeekKey, SeekOp, @@ -21,7 +21,7 @@ use crate::util::{ checked_cast_text_to_numeric, parse_schema_rows, RoundToPrecision, }; use crate::vdbe::builder::CursorType; -use crate::vdbe::insn::Insn; +use crate::vdbe::insn::{IdxInsertFlags, Insn}; use crate::vector::{vector32, vector64, vector_distance_cos, vector_extract}; use crate::{info, MvCursor, RefValue, Row, StepResult, TransactionState}; @@ -29,7 +29,7 @@ use crate::{info, MvCursor, RefValue, Row, StepResult, TransactionState}; use super::insn::{ exec_add, exec_and, exec_bit_and, exec_bit_not, exec_bit_or, exec_boolean_not, exec_concat, exec_divide, exec_multiply, exec_or, exec_remainder, exec_shift_left, exec_shift_right, - exec_subtract, Cookie, + exec_subtract, Cookie, RegisterOrLiteral, }; use super::HaltState; use rand::thread_rng; @@ -1278,7 +1278,10 @@ pub fn op_column( if cursor.get_null_flag() { RefValue::Null } else { - record.get_value(*column).clone() + match record.get_value_opt(*column) { + Some(val) => val.clone(), + None => RefValue::Null, + } } } else { RefValue::Null @@ -1305,10 +1308,14 @@ pub fn op_column( let record = { let mut cursor = state.get_cursor(*cursor_id); let cursor = cursor.as_sorter_mut(); - cursor.record().map(|r| r.clone()) + cursor.record().cloned() }; if let Some(record) = record { - state.registers[*dest] = Register::OwnedValue(record.get_value(*column).to_owned()); + state.registers[*dest] = + Register::OwnedValue(match record.get_value_opt(*column) { + Some(val) => val.to_owned(), + None => OwnedValue::Null, + }); } else { state.registers[*dest] = Register::OwnedValue(OwnedValue::Null); } @@ -2049,6 +2056,24 @@ pub fn op_idx_ge( Ok(InsnFunctionStepResult::Step) } +pub fn op_seek_end( + program: &Program, + state: &mut ProgramState, + insn: &Insn, + pager: &Rc, + mv_store: Option<&Rc>, +) -> Result { + if let Insn::SeekEnd { cursor_id } = *insn { + let mut cursor = state.get_cursor(cursor_id); + let cursor = cursor.as_btree_mut(); + return_if_io!(cursor.seek_end()); + } else { + unreachable!("unexpected Insn {:?}", insn) + } + state.pc += 1; + Ok(InsnFunctionStepResult::Step) +} + pub fn op_idx_le( program: &Program, state: &mut ProgramState, @@ -3648,11 +3673,14 @@ pub fn op_insert_async( Register::Record(r) => r, _ => unreachable!("Not a record! Cannot insert a non record value."), }; - let key = &state.registers[*key_reg]; + let key = match &state.registers[*key_reg].get_owned_value() { + OwnedValue::Integer(i) => *i, + _ => unreachable!("expected integer key"), + }; // NOTE(pere): Sending moved_before == true is okay because we moved before but // if we were to set to false after starting a balance procedure, it might // leave undefined state. - return_if_io!(cursor.insert(key.get_owned_value(), record, true)); + return_if_io!(cursor.insert(&BTreeKey::new_table_rowid(key as u64, Some(record)), true)); } state.pc += 1; Ok(InsnFunctionStepResult::Step) @@ -3706,6 +3734,73 @@ pub fn op_delete_async( Ok(InsnFunctionStepResult::Step) } +pub fn op_idx_insert_async( + program: &Program, + state: &mut ProgramState, + insn: &Insn, + pager: &Rc, + mv_store: Option<&Rc>, +) -> Result { + if let Insn::IdxInsertAsync { + cursor_id, + record_reg, + flags, + .. + } = *insn + { + let (_, cursor_type) = program.cursor_ref.get(cursor_id).unwrap(); + let CursorType::BTreeIndex(index_meta) = cursor_type else { + panic!("IdxInsert: not a BTree index cursor"); + }; + { + let mut cursor = state.get_cursor(cursor_id); + let cursor = cursor.as_btree_mut(); + let record = match &state.registers[record_reg] { + Register::Record(ref r) => r, + _ => return Err(LimboError::InternalError("expected record".into())), + }; + let moved_before = if index_meta.unique { + // check for uniqueness violation + match cursor.key_exists_in_index(record)? { + CursorResult::Ok(true) => { + return Err(LimboError::Constraint( + "UNIQUE constraint failed: duplicate key".into(), + )) + } + CursorResult::IO => return Ok(InsnFunctionStepResult::IO), + CursorResult::Ok(false) => {} + }; + false + } else { + flags.has(IdxInsertFlags::USE_SEEK) + }; + // insert record as key + return_if_io!(cursor.insert(&BTreeKey::new_index_key(record), moved_before)); + } + state.pc += 1; + } + Ok(InsnFunctionStepResult::Step) +} + +pub fn op_idx_insert_await( + program: &Program, + state: &mut ProgramState, + insn: &Insn, + pager: &Rc, + mv_store: Option<&Rc>, +) -> Result { + if let Insn::IdxInsertAwait { cursor_id } = *insn { + { + let mut cursor = state.get_cursor(cursor_id); + let cursor = cursor.as_btree_mut(); + cursor.wait_for_completion()?; + } + // TODO: flag optimizations, update n_change if OPFLAG_NCHANGE + state.pc += 1; + } + Ok(InsnFunctionStepResult::Step) +} + pub fn op_delete_await( program: &Program, state: &mut ProgramState, @@ -3889,16 +3984,28 @@ pub fn op_open_write_async( let Insn::OpenWriteAsync { cursor_id, root_page, + .. } = insn else { unreachable!("unexpected Insn {:?}", insn) }; + let root_page = match root_page { + RegisterOrLiteral::Literal(lit) => *lit as u64, + RegisterOrLiteral::Register(reg) => match &state.registers[*reg].get_owned_value() { + OwnedValue::Integer(val) => *val as u64, + _ => { + return Err(LimboError::InternalError( + "OpenWriteAsync: the value in root_page is not an integer".into(), + )); + } + }, + }; let (_, cursor_type) = program.cursor_ref.get(*cursor_id).unwrap(); let mut cursors = state.cursors.borrow_mut(); let is_index = cursor_type.is_index(); let mv_cursor = match state.mv_tx_id { Some(tx_id) => { - let table_id = *root_page as u64; + let table_id = root_page; let mv_store = mv_store.unwrap().clone(); let mv_cursor = Rc::new(RefCell::new( MvCursor::new(mv_store.clone(), tx_id, table_id).unwrap(), @@ -3907,7 +4014,7 @@ pub fn op_open_write_async( } None => None, }; - let cursor = BTreeCursor::new(mv_cursor, pager.clone(), *root_page); + let cursor = BTreeCursor::new(mv_cursor, pager.clone(), root_page as usize); if is_index { cursors .get_mut(*cursor_id) diff --git a/core/vdbe/explain.rs b/core/vdbe/explain.rs index 67333c334..66c68d9c0 100644 --- a/core/vdbe/explain.rs +++ b/core/vdbe/explain.rs @@ -1,4 +1,4 @@ -use crate::vdbe::builder::CursorType; +use crate::vdbe::{builder::CursorType, insn::RegisterOrLiteral}; use super::{Insn, InsnReference, OwnedValue, Program}; use crate::function::{Func, ScalarFunc}; @@ -760,6 +760,39 @@ pub fn insn_to_str( 0, "".to_string(), ), + Insn::SeekEnd { cursor_id } => ( + "SeekEnd", + *cursor_id as i32, + 0, + 0, + OwnedValue::build_text(""), + 0, + "".to_string(), + ), + Insn::IdxInsertAsync { + cursor_id, + record_reg, + unpacked_start, + flags, + .. + } => ( + "IdxInsertAsync", + *cursor_id as i32, + *record_reg as i32, + unpacked_start.unwrap_or(0) as i32, + OwnedValue::build_text(""), + flags.0 as u16, + format!("key=r[{}]", record_reg), + ), + Insn::IdxInsertAwait { cursor_id } => ( + "IdxInsertAwait", + *cursor_id as i32, + 0, + 0, + OwnedValue::build_text(""), + 0, + "".to_string(), + ), Insn::IdxGT { cursor_id, start_reg, @@ -1097,10 +1130,14 @@ pub fn insn_to_str( Insn::OpenWriteAsync { cursor_id, root_page, + .. } => ( "OpenWriteAsync", *cursor_id as i32, - *root_page as i32, + match root_page { + RegisterOrLiteral::Literal(i) => *i as _, + RegisterOrLiteral::Register(i) => *i as _, + }, 0, OwnedValue::build_text(""), 0, diff --git a/core/vdbe/insn.rs b/core/vdbe/insn.rs index f45e7ce35..8d3a9afca 100644 --- a/core/vdbe/insn.rs +++ b/core/vdbe/insn.rs @@ -38,6 +38,56 @@ impl CmpInsFlags { } } +#[derive(Clone, Copy, Debug, Default)] +pub struct IdxInsertFlags(pub u8); +impl IdxInsertFlags { + pub const APPEND: u8 = 0x01; // Hint: insert likely at the end + pub const NCHANGE: u8 = 0x02; // Increment the change counter + pub const USE_SEEK: u8 = 0x04; // Skip seek if last one was same key + pub fn new() -> Self { + IdxInsertFlags(0) + } + pub fn has(&self, flag: u8) -> bool { + (self.0 & flag) != 0 + } + pub fn append(mut self, append: bool) -> Self { + if append { + self.0 |= IdxInsertFlags::APPEND; + } else { + self.0 &= !IdxInsertFlags::APPEND; + } + self + } + pub fn use_seek(mut self, seek: bool) -> Self { + if seek { + self.0 |= IdxInsertFlags::USE_SEEK; + } else { + self.0 &= !IdxInsertFlags::USE_SEEK; + } + self + } + pub fn nchange(mut self, change: bool) -> Self { + if change { + self.0 |= IdxInsertFlags::NCHANGE; + } else { + self.0 &= !IdxInsertFlags::NCHANGE; + } + self + } +} + +#[derive(Clone, Copy, Debug)] +pub enum RegisterOrLiteral { + Register(usize), + Literal(T), +} + +impl From for RegisterOrLiteral { + fn from(value: PageIdx) -> Self { + RegisterOrLiteral::Literal(value) + } +} + #[derive(Description, Debug)] pub enum Insn { /// Initialize the program state and jump to the given PC. @@ -401,6 +451,9 @@ pub enum Insn { src_reg: usize, target_pc: BranchOffset, }, + SeekEnd { + cursor_id: CursorID, + }, /// P1 is an open index cursor and P3 is a cursor on the corresponding table. This opcode does a deferred seek of the P3 table cursor to the row that corresponds to the current row of P1. /// This is a deferred seek. Nothing actually happens until the cursor is used to read a record. That way, if no reads occur, no unnecessary I/O happens. @@ -431,6 +484,21 @@ pub enum Insn { target_pc: BranchOffset, }, + /// cursor_id is a cursor pointing to a B-Tree index that uses integer keys, this op writes the value obtained from MakeRecord into the index. + /// P3 + P4 are for the original column values that make up that key in unpacked (pre-serialized) form. + /// If P5 has the OPFLAG_APPEND bit set, that is a hint to the b-tree layer that this insert is likely to be an append. + /// OPFLAG_NCHANGE bit set, then the change counter is incremented by this instruction. If the OPFLAG_NCHANGE bit is clear, then the change counter is unchanged + IdxInsertAsync { + cursor_id: CursorID, + record_reg: usize, // P2 the register containing the record to insert + unpacked_start: Option, // P3 the index of the first register for the unpacked key + unpacked_count: Option, // P4 # of unpacked values in the key in P2 + flags: IdxInsertFlags, // TODO: optimization + }, + IdxInsertAwait { + cursor_id: CursorID, + }, + /// The P4 register values beginning with P3 form an unpacked index key that omits the PRIMARY KEY. Compare this key value against the index that P1 is currently pointing to, ignoring the PRIMARY KEY or ROWID fields at the end. /// If the P1 index entry is greater or equal than the key value then jump to P2. Otherwise fall through to the next instruction. IdxGE { @@ -587,7 +655,7 @@ pub enum Insn { OpenWriteAsync { cursor_id: CursorID, - root_page: PageIdx, + root_page: RegisterOrLiteral, }, OpenWriteAwait {}, @@ -1237,6 +1305,7 @@ impl Insn { Insn::DeferredSeek { .. } => execute::op_deferred_seek, Insn::SeekGE { .. } => execute::op_seek_ge, Insn::SeekGT { .. } => execute::op_seek_gt, + Insn::SeekEnd { .. } => execute::op_seek_end, Insn::IdxGE { .. } => execute::op_idx_ge, Insn::IdxGT { .. } => execute::op_idx_gt, Insn::IdxLE { .. } => execute::op_idx_le, @@ -1258,7 +1327,8 @@ impl Insn { Insn::Yield { .. } => execute::op_yield, Insn::InsertAsync { .. } => execute::op_insert_async, Insn::InsertAwait { .. } => execute::op_insert_await, - + Insn::IdxInsertAsync { .. } => execute::op_idx_insert_async, + Insn::IdxInsertAwait { .. } => execute::op_idx_insert_await, Insn::DeleteAsync { .. } => execute::op_delete_async, Insn::DeleteAwait { .. } => execute::op_delete_await, diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index 673b836a4..8794b208a 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -394,7 +394,7 @@ impl Program { } mv_transactions.clear(); } - return Ok(StepResult::Done); + Ok(StepResult::Done) } else { let connection = self .connection @@ -408,30 +408,28 @@ impl Program { ); if program_state.halt_state.is_some() { self.step_end_write_txn(&pager, &mut program_state.halt_state, connection.deref()) - } else { - if auto_commit { - let current_state = connection.transaction_state.borrow().clone(); - match current_state { - TransactionState::Write => self.step_end_write_txn( - &pager, - &mut program_state.halt_state, - connection.deref(), - ), - TransactionState::Read => { - connection.transaction_state.replace(TransactionState::None); - pager.end_read_tx()?; - Ok(StepResult::Done) - } - TransactionState::None => Ok(StepResult::Done), + } else if auto_commit { + let current_state = connection.transaction_state.borrow().clone(); + match current_state { + TransactionState::Write => self.step_end_write_txn( + &pager, + &mut program_state.halt_state, + connection.deref(), + ), + TransactionState::Read => { + connection.transaction_state.replace(TransactionState::None); + pager.end_read_tx()?; + Ok(StepResult::Done) } - } else { - if self.change_cnt_on { - if let Some(conn) = self.connection.upgrade() { - conn.set_changes(self.n_change.get()); - } - } - Ok(StepResult::Done) + TransactionState::None => Ok(StepResult::Done), } + } else { + if self.change_cnt_on { + if let Some(conn) = self.connection.upgrade() { + conn.set_changes(self.n_change.get()); + } + } + Ok(StepResult::Done) } } } diff --git a/dist-workspace.toml b/dist-workspace.toml new file mode 100644 index 000000000..bdeb71f97 --- /dev/null +++ b/dist-workspace.toml @@ -0,0 +1,28 @@ +[workspace] +members = ["cargo:."] + +# Config for 'dist' +[dist] +# The preferred dist version to use in CI (Cargo.toml SemVer syntax) +cargo-dist-version = "0.28.3" +# CI backends to support +ci = "github" +# The installers to generate for each app +installers = ["shell", "powershell"] +# Target platforms to build apps for (Rust target-triple syntax) +targets = [ + "aarch64-apple-darwin", + "x86_64-apple-darwin", + "x86_64-unknown-linux-gnu", + "x86_64-pc-windows-msvc", +] +# Which actions to run on pull requests +pr-run-mode = "plan" +# Path that installers should place binaries in +install-path = "~/.limbo" +# Whether to install an updater program +install-updater = true +# Whether to consider the binaries in a package for distribution (defaults true) +dist = false +# Whether to enable GitHub Attestations +github-attestations = true diff --git a/testing/cli_tests/cli_test_cases.py b/testing/cli_tests/cli_test_cases.py index 2e46b3f03..17035b0bf 100755 --- a/testing/cli_tests/cli_test_cases.py +++ b/testing/cli_tests/cli_test_cases.py @@ -264,6 +264,7 @@ def test_update_with_limit(): limbo.quit() + def test_update_with_limit_and_offset(): limbo = TestLimboShell( "CREATE TABLE t (a,b,c); insert into t values (1,2,3), (4,5,6), (7,8,9), (1,2,3),(4,5,6), (7,8,9);" @@ -288,6 +289,15 @@ def test_update_with_limit_and_offset(): "update-limit-where-result", "SELECT COUNT(*) from t WHERE a = 333;", "0" ) limbo.quit() + +def test_insert_default_values(): + limbo = TestLimboShell( + "CREATE TABLE t (a integer default(42),b integer default (43),c integer default(44));" + ) + for _ in range(1, 10): + limbo.execute_dot("INSERT INTO t DEFAULT VALUES;") + limbo.run_test("insert-default-values", "SELECT * FROM t;", "42|43|44\n" * 9) + limbo.quit() if __name__ == "__main__": diff --git a/testing/cli_tests/extensions.py b/testing/cli_tests/extensions.py index d898908f9..bab8cb74f 100755 --- a/testing/cli_tests/extensions.py +++ b/testing/cli_tests/extensions.py @@ -342,6 +342,7 @@ def test_kv(): # first, create a normal table to ensure no issues limbo.execute_dot("CREATE TABLE other (a,b,c);") limbo.execute_dot("INSERT INTO other values (23,32,23);") + limbo = TestLimboShell() limbo.run_test_fn( "create virtual table t using kv_store;", lambda res: "Virtual table module not found: kv_store" in res, @@ -350,6 +351,7 @@ def test_kv(): limbo.debug_print( "create virtual table t using kv_store;", ) + limbo.run_test_fn(".schema", lambda res: "CREATE VIRTUAL TABLE t" in res) limbo.run_test_fn( "insert into t values ('hello', 'world');", null, @@ -496,12 +498,6 @@ def test_vfs(): "Tested large write to testfs", ) print("Tested large write to testfs") - # Pere: I commented this out because it added an extra row that made the test test_sqlite_vfs_compat fail - # it didn't segfault from my side so maybe this is necessary? - # # open regular db file to ensure we don't segfault when vfs file is dropped - # limbo.execute_dot(".open testing/vfs.db") - # limbo.execute_dot("create table test (id integer primary key, value float);") - # limbo.execute_dot("insert into test (value) values (1.0);") limbo.quit() @@ -548,10 +544,10 @@ if __name__ == "__main__": test_aggregates() test_crypto() test_series() - test_kv() test_ipaddr() test_vfs() test_sqlite_vfs_compat() + test_kv() except Exception as e: print(f"Test FAILED: {e}") cleanup()