Add sorter utility functions and opcodes

This adds basic in-memory sorting utility functions, similar to SQLite's
src/vdbesort.c. We need to improve this later with external sorting so
to support large data sets.

This also adds sorting functionality to the VDBE. Note that none of this
is wired to SQL translation yet so it's unused for now.
This commit is contained in:
Pekka Enberg
2024-07-07 13:43:49 +03:00
parent dac8f4dcba
commit 30ec86a81e
9 changed files with 525 additions and 95 deletions

187
Cargo.lock generated
View File

@@ -195,9 +195,9 @@ dependencies = [
[[package]]
name = "cc"
version = "1.0.101"
version = "1.0.104"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac367972e516d45567c7eafc73d24e1c193dcf200a8d94e9db7b3d38b349572d"
checksum = "74b6a57f98764a267ff415d50a25e6e166f3831a5071af4995296ea97d210490"
[[package]]
name = "cfg-if"
@@ -255,9 +255,9 @@ dependencies = [
[[package]]
name = "clap"
version = "4.5.7"
version = "4.5.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5db83dced34638ad474f39f250d7fea9598bdd239eaced1bdf45d597da0f433f"
checksum = "84b3edb18336f4df585bc9aa31dd99c036dfa5dc5e9a2939a722a188f3a8970d"
dependencies = [
"clap_builder",
"clap_derive",
@@ -265,9 +265,9 @@ dependencies = [
[[package]]
name = "clap_builder"
version = "4.5.7"
version = "4.5.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f7e204572485eb3fbf28f871612191521df159bc3e15a9f5064c66dba3a8c05f"
checksum = "c1c09dd5ada6c6c78075d6fd0da3f90d8080651e2d6cc8eb2f1aaa4034ced708"
dependencies = [
"anstream",
"anstyle",
@@ -277,14 +277,14 @@ dependencies = [
[[package]]
name = "clap_derive"
version = "4.5.5"
version = "4.5.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c780290ccf4fb26629baa7a1081e68ced113f1d3ec302fa5948f1c381ebf06c6"
checksum = "2bac35c6dafb060fd4d275d9a4ffae97917c13a6327903a8be2153cd964f7085"
dependencies = [
"heck 0.5.0",
"proc-macro2",
"quote",
"syn 2.0.68",
"syn 2.0.69",
]
[[package]]
@@ -304,9 +304,9 @@ checksum = "4b82cf0babdbd58558212896d1a4272303a57bdb245c2bf1147185fb45640e70"
[[package]]
name = "cli-table"
version = "0.4.7"
version = "0.4.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "adfbb116d9e2c4be7011360d0c0bee565712c11e969c9609b25b619366dc379d"
checksum = "b53f9241f288a7b12c56565f04aaeaeeab6b8923d42d99255d4ca428b4d97f89"
dependencies = [
"cli-table-derive",
"csv",
@@ -316,9 +316,9 @@ dependencies = [
[[package]]
name = "cli-table-derive"
version = "0.4.5"
version = "0.4.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2af3bfb9da627b0a6c467624fb7963921433774ed435493b5c08a3053e829ad4"
checksum = "3e83a93253aaae7c74eb7428ce4faa6e219ba94886908048888701819f82fb94"
dependencies = [
"proc-macro2",
"quote",
@@ -342,6 +342,26 @@ version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b6a852b24ab71dffc585bcb46eaf7959d175cb865a7152e35b348d1b2960422"
[[package]]
name = "const-random"
version = "0.1.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "87e00182fe74b066627d63b85fd550ac2998d4b0bd86bfed477a0ae4c7c71359"
dependencies = [
"const-random-macro",
]
[[package]]
name = "const-random-macro"
version = "0.1.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e"
dependencies = [
"getrandom",
"once_cell",
"tiny-keccak",
]
[[package]]
name = "cpp_demangle"
version = "0.4.3"
@@ -360,7 +380,7 @@ dependencies = [
"anes",
"cast",
"ciborium",
"clap 4.5.7",
"clap 4.5.8",
"criterion-plot",
"futures",
"is-terminal",
@@ -470,6 +490,15 @@ dependencies = [
"windows-sys 0.48.0",
]
[[package]]
name = "dlv-list"
version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "442039f5147480ba31067cb00ada1adae6892028e40e45fc5de7b7df6dcc1b5f"
dependencies = [
"const-random",
]
[[package]]
name = "either"
version = "1.13.0"
@@ -624,7 +653,7 @@ checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.68",
"syn 2.0.69",
]
[[package]]
@@ -899,7 +928,7 @@ name = "limbo"
version = "0.0.0"
dependencies = [
"anyhow",
"clap 4.5.7",
"clap 4.5.8",
"cli-table",
"dirs",
"env_logger",
@@ -927,6 +956,7 @@ dependencies = [
"io-uring",
"log",
"mimalloc",
"ordered-multimap",
"pprof",
"rstest",
"rusqlite",
@@ -970,9 +1000,9 @@ dependencies = [
[[package]]
name = "log"
version = "0.4.21"
version = "0.4.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c"
checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24"
[[package]]
name = "memchr"
@@ -1048,9 +1078,9 @@ dependencies = [
[[package]]
name = "object"
version = "0.36.0"
version = "0.36.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "576dfe1fc8f9df304abb159d767a29d0476f7750fbf8aa7ad07816004a207434"
checksum = "081b846d1d56ddfc18fdf1a922e4f6e07a11768ea1b92dec44e42b72712ccfce"
dependencies = [
"memchr",
]
@@ -1063,9 +1093,9 @@ checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92"
[[package]]
name = "oorandom"
version = "11.1.3"
version = "11.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575"
checksum = "b410bbe7e14ab526a0e86877eb47c6996a2bd7746f027ba551028c925390e4e9"
[[package]]
name = "option-ext"
@@ -1073,6 +1103,16 @@ version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d"
[[package]]
name = "ordered-multimap"
version = "0.7.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49203cdcae0030493bad186b28da2fa25645fa276a51b6fec8010d281e02ef79"
dependencies = [
"dlv-list",
"hashbrown 0.14.5",
]
[[package]]
name = "os_str_bytes"
version = "6.6.1"
@@ -1099,7 +1139,7 @@ dependencies = [
"libc",
"redox_syscall",
"smallvec",
"windows-targets 0.52.5",
"windows-targets 0.52.6",
]
[[package]]
@@ -1359,9 +1399,9 @@ checksum = "ba39f3699c378cd8970968dcbff9c43159ea4cfbd88d43c00b22f2ef10a435d2"
[[package]]
name = "rgb"
version = "0.8.37"
version = "0.8.40"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "05aaa8004b64fd573fc9d002f4e632d51ad4f026c2b5ba95fcb6c2f32c2c47d8"
checksum = "a7439be6844e40133eda024efd85bf07f59d0dd2f59b10c00dd6cfb92cc5c741"
dependencies = [
"bytemuck",
]
@@ -1391,7 +1431,7 @@ dependencies = [
"regex",
"relative-path",
"rustc_version",
"syn 2.0.68",
"syn 2.0.69",
"unicode-ident",
]
@@ -1489,29 +1529,29 @@ checksum = "61697e0a1c7e512e84a621326239844a24d8207b4669b41bc18b32ea5cbf988b"
[[package]]
name = "serde"
version = "1.0.203"
version = "1.0.204"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7253ab4de971e72fb7be983802300c30b5a7f0c2e56fab8abfc6a214307c0094"
checksum = "bc76f558e0cbb2a839d37354c575f1dc3fdc6546b5be373ba43d95f231bf7c12"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.203"
version = "1.0.204"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "500cbc0ebeb6f46627f50f3f5811ccf6bf00643be300b4c3eabc0ef55dc5b5ba"
checksum = "e0cd7e117be63d3c3678776753929474f3b04a43a080c744d6b0ae2a8c28e222"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.68",
"syn 2.0.69",
]
[[package]]
name = "serde_json"
version = "1.0.118"
version = "1.0.120"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d947f6b3163d8857ea16c4fa0dd4840d52f3041039a85decd46867eb1abef2e4"
checksum = "4e0d21c9a8cae1235ad58a00c11cb40d4b1e5c784f1ef2c537876ed6ffd8b7c5"
dependencies = [
"itoa",
"ryu",
@@ -1630,9 +1670,9 @@ dependencies = [
[[package]]
name = "syn"
version = "2.0.68"
version = "2.0.69"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "901fa70d88b9d6c98022e23b4136f9f3e54e4662c3bc1bd1d84a42a9a0f0c1e9"
checksum = "201fcda3845c23e8212cd466bfebf0bd20694490fc0356ae8e428e0824a915a6"
dependencies = [
"proc-macro2",
"quote",
@@ -1683,7 +1723,16 @@ checksum = "46c3384250002a6d5af4d114f2845d37b57521033f30d5c3f46c4d70e1197533"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.68",
"syn 2.0.69",
]
[[package]]
name = "tiny-keccak"
version = "2.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237"
dependencies = [
"crunchy",
]
[[package]]
@@ -1793,7 +1842,7 @@ dependencies = [
"once_cell",
"proc-macro2",
"quote",
"syn 2.0.68",
"syn 2.0.69",
"wasm-bindgen-shared",
]
@@ -1815,7 +1864,7 @@ checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.68",
"syn 2.0.69",
"wasm-bindgen-backend",
"wasm-bindgen-shared",
]
@@ -1882,7 +1931,7 @@ version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d"
dependencies = [
"windows-targets 0.52.5",
"windows-targets 0.52.6",
]
[[package]]
@@ -1902,18 +1951,18 @@ dependencies = [
[[package]]
name = "windows-targets"
version = "0.52.5"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6f0713a46559409d202e70e28227288446bf7841d3211583a4b53e3f6d96e7eb"
checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
dependencies = [
"windows_aarch64_gnullvm 0.52.5",
"windows_aarch64_msvc 0.52.5",
"windows_i686_gnu 0.52.5",
"windows_aarch64_gnullvm 0.52.6",
"windows_aarch64_msvc 0.52.6",
"windows_i686_gnu 0.52.6",
"windows_i686_gnullvm",
"windows_i686_msvc 0.52.5",
"windows_x86_64_gnu 0.52.5",
"windows_x86_64_gnullvm 0.52.5",
"windows_x86_64_msvc 0.52.5",
"windows_i686_msvc 0.52.6",
"windows_x86_64_gnu 0.52.6",
"windows_x86_64_gnullvm 0.52.6",
"windows_x86_64_msvc 0.52.6",
]
[[package]]
@@ -1924,9 +1973,9 @@ checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8"
[[package]]
name = "windows_aarch64_gnullvm"
version = "0.52.5"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7088eed71e8b8dda258ecc8bac5fb1153c5cffaf2578fc8ff5d61e23578d3263"
checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
[[package]]
name = "windows_aarch64_msvc"
@@ -1936,9 +1985,9 @@ checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc"
[[package]]
name = "windows_aarch64_msvc"
version = "0.52.5"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9985fd1504e250c615ca5f281c3f7a6da76213ebd5ccc9561496568a2752afb6"
checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
[[package]]
name = "windows_i686_gnu"
@@ -1948,15 +1997,15 @@ checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e"
[[package]]
name = "windows_i686_gnu"
version = "0.52.5"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "88ba073cf16d5372720ec942a8ccbf61626074c6d4dd2e745299726ce8b89670"
checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
[[package]]
name = "windows_i686_gnullvm"
version = "0.52.5"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "87f4261229030a858f36b459e748ae97545d6f1ec60e5e0d6a3d32e0dc232ee9"
checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
[[package]]
name = "windows_i686_msvc"
@@ -1966,9 +2015,9 @@ checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406"
[[package]]
name = "windows_i686_msvc"
version = "0.52.5"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "db3c2bf3d13d5b658be73463284eaf12830ac9a26a90c717b7f771dfe97487bf"
checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
[[package]]
name = "windows_x86_64_gnu"
@@ -1978,9 +2027,9 @@ checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e"
[[package]]
name = "windows_x86_64_gnu"
version = "0.52.5"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4e4246f76bdeff09eb48875a0fd3e2af6aada79d409d33011886d3e1581517d9"
checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
[[package]]
name = "windows_x86_64_gnullvm"
@@ -1990,9 +2039,9 @@ checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc"
[[package]]
name = "windows_x86_64_gnullvm"
version = "0.52.5"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "852298e482cd67c356ddd9570386e2862b5673c85bd5f88df9ab6802b334c596"
checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
[[package]]
name = "windows_x86_64_msvc"
@@ -2002,26 +2051,26 @@ checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538"
[[package]]
name = "windows_x86_64_msvc"
version = "0.52.5"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bec47e5bfd1bff0eeaf6d8b485cc1074891a197ab4225d504cb7a1ab88b02bf0"
checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
[[package]]
name = "zerocopy"
version = "0.7.34"
version = "0.7.35"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ae87e3fcd617500e5d106f0380cf7b77f3c6092aae37191433159dda23cfb087"
checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0"
dependencies = [
"zerocopy-derive",
]
[[package]]
name = "zerocopy-derive"
version = "0.7.34"
version = "0.7.35"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "15e934569e47891f7d9411f1a451d947a60e000ab3bd24fbb970f000387d1b3b"
checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.68",
"syn 2.0.69",
]

View File

@@ -28,6 +28,7 @@ anyhow = "1.0.75"
cfg_block = "0.1.1"
fallible-iterator = "0.3.0"
log = "0.4.20"
ordered-multimap = "0.7.1"
sieve-cache = "0.1.4"
sqlite3-parser = "0.11.0"

View File

@@ -147,4 +147,8 @@ impl Cursor for BTreeCursor {
fn record(&self) -> Result<Ref<Option<OwnedRecord>>> {
Ok(self.record.borrow())
}
fn insert(&mut self, _record: &OwnedRecord) -> Result<()> {
unimplemented!()
}
}

View File

@@ -4,6 +4,7 @@ mod function;
mod io;
mod pager;
mod schema;
mod sorter;
mod sqlite3_ondisk;
mod storage;
mod translate;
@@ -70,8 +71,8 @@ impl Database {
}
let root_page: i64 = row.get::<i64>(3)?;
let sql: String = row.get::<String>(4)?;
let table = schema::Table::from_sql(&sql, root_page as usize)?;
schema.add_table(table);
let table = schema::BTreeTable::from_sql(&sql, root_page as usize)?;
schema.add_table(Rc::new(table));
}
RowResult::IO => {
// TODO: How do we ensure that the I/O we submitted to

View File

@@ -3,42 +3,78 @@ use core::fmt;
use fallible_iterator::FallibleIterator;
use log::trace;
use sqlite3_parser::{
ast::{Cmd, CreateTableBody, QualifiedName, Stmt},
ast::{Cmd, CreateTableBody, QualifiedName, ResultColumn, Stmt},
lexer::sql::Parser,
};
use std::collections::HashMap;
use std::rc::Rc;
use crate::util::normalize_ident;
pub struct Schema {
pub tables: HashMap<String, Table>,
pub tables: HashMap<String, Rc<BTreeTable>>,
}
impl Schema {
pub fn new() -> Self {
let mut tables: HashMap<String, Table> = HashMap::new();
tables.insert("sqlite_schema".to_string(), sqlite_schema_table());
let mut tables: HashMap<String, Rc<BTreeTable>> = HashMap::new();
tables.insert("sqlite_schema".to_string(), Rc::new(sqlite_schema_table()));
Self { tables }
}
pub fn add_table(&mut self, table: Table) {
pub fn add_table(&mut self, table: Rc<BTreeTable>) {
let name = normalize_ident(&table.name);
self.tables.insert(name, table);
}
pub fn get_table(&self, name: &str) -> Option<&Table> {
pub fn get_table(&self, name: &str) -> Option<Rc<BTreeTable>> {
let name = normalize_ident(name);
self.tables.get(&name)
self.tables.get(&name).cloned()
}
}
pub struct Table {
pub enum Table {
BTree(Rc<BTreeTable>),
Pseudo(Rc<PseudoTable>),
}
impl Table {
pub fn is_pseudo(&self) -> bool {
match self {
Table::Pseudo(_) => true,
_ => false,
}
}
pub fn column_is_rowid_alias(&self, col: &Column) -> bool {
match self {
Table::BTree(table) => table.column_is_rowid_alias(col),
Table::Pseudo(_) => false,
}
}
pub fn get_column(&self, name: &str) -> Option<(usize, &Column)> {
match self {
Table::BTree(table) => table.get_column(name),
Table::Pseudo(table) => table.get_column(name),
}
}
pub fn columns(&self) -> &Vec<Column> {
match self {
Table::BTree(table) => &table.columns,
Table::Pseudo(table) => &table.columns,
}
}
}
pub struct BTreeTable {
pub root_page: usize,
pub name: String,
pub columns: Vec<Column>,
}
impl Table {
impl BTreeTable {
pub fn column_is_rowid_alias(&self, col: &Column) -> bool {
let composite_primary_key = self.columns.iter().filter(|col| col.primary_key).count() > 1;
col.primary_key && col.ty == Type::Integer && !composite_primary_key
@@ -54,7 +90,7 @@ impl Table {
None
}
pub fn from_sql(sql: &str, root_page: usize) -> Result<Table> {
pub fn from_sql(sql: &str, root_page: usize) -> Result<BTreeTable> {
let mut parser = Parser::new(sql.as_bytes());
let cmd = parser.next()?;
match cmd {
@@ -94,7 +130,38 @@ impl Table {
}
}
fn create_table(tbl_name: QualifiedName, body: CreateTableBody, root_page: usize) -> Result<Table> {
pub struct PseudoTable {
pub columns: Vec<Column>,
}
impl PseudoTable {
pub fn new() -> Self {
Self { columns: vec![] }
}
pub fn add_column(&mut self, name: &str, ty: Type, primary_key: bool) {
self.columns.push(Column {
name: name.to_string(),
ty,
primary_key,
});
}
pub fn get_column(&self, name: &str) -> Option<(usize, &Column)> {
let name = normalize_ident(name);
for (i, column) in self.columns.iter().enumerate() {
if column.name == name {
return Some((i, column));
}
}
None
}
}
fn create_table(
tbl_name: QualifiedName,
body: CreateTableBody,
root_page: usize,
) -> Result<BTreeTable> {
let table_name = normalize_ident(&tbl_name.name.0);
trace!("Creating table {}", table_name);
let mut cols = vec![];
@@ -140,13 +207,33 @@ fn create_table(tbl_name: QualifiedName, body: CreateTableBody, root_page: usize
}
CreateTableBody::AsSelect(_) => todo!(),
};
Ok(Table {
Ok(BTreeTable {
root_page,
name: table_name,
columns: cols,
})
}
pub fn build_pseudo_table(columns: &[ResultColumn]) -> PseudoTable {
let table = PseudoTable::new();
for column in columns {
match column {
ResultColumn::Expr(expr, _as_name) => match expr {
_ => {
todo!("unsupported expression {:?}", expr);
}
},
ResultColumn::Star => {
todo!();
}
ResultColumn::TableStar(_) => {
todo!();
}
}
}
table
}
pub struct Column {
pub name: String,
pub ty: Type,
@@ -177,8 +264,8 @@ impl fmt::Display for Type {
}
}
pub fn sqlite_schema_table() -> Table {
Table {
pub fn sqlite_schema_table() -> BTreeTable {
BTreeTable {
root_page: 1,
name: "sqlite_schema".to_string(),
columns: vec![

78
core/sorter.rs Normal file
View File

@@ -0,0 +1,78 @@
use crate::types::{Cursor, CursorResult, OwnedRecord, OwnedValue};
use anyhow::Result;
use log::trace;
use ordered_multimap::ListOrderedMultimap;
use std::cell::{Ref, RefCell};
pub struct Sorter {
records: ListOrderedMultimap<String, OwnedRecord>,
current: RefCell<Option<OwnedRecord>>,
}
impl Sorter {
pub fn new() -> Self {
Self {
records: ListOrderedMultimap::new(),
current: RefCell::new(None),
}
}
pub fn insert(&mut self, key: String, record: OwnedRecord) {
self.records.insert(key, record);
}
}
impl Cursor for Sorter {
fn is_empty(&self) -> bool {
self.current.borrow().is_none()
}
fn rewind(&mut self) -> Result<CursorResult<()>> {
let current = self.records.pop_front();
match current {
Some((_, record)) => {
*self.current.borrow_mut() = Some(record);
}
None => {
*self.current.borrow_mut() = None;
}
};
Ok(CursorResult::Ok(()))
}
fn next(&mut self) -> Result<CursorResult<()>> {
let current = self.records.pop_front();
match current {
Some((_, record)) => {
*self.current.borrow_mut() = Some(record);
}
None => {
*self.current.borrow_mut() = None;
}
};
Ok(CursorResult::Ok(()))
}
fn wait_for_completion(&mut self) -> Result<()> {
Ok(())
}
fn rowid(&self) -> Result<Ref<Option<u64>>> {
todo!();
}
fn record(&self) -> Result<Ref<Option<OwnedRecord>>> {
Ok(self.current.borrow())
}
fn insert(&mut self, record: &OwnedRecord) -> Result<()> {
let key = match record.values[0] {
OwnedValue::Integer(i) => i.to_string(),
OwnedValue::Text(ref s) => s.to_string(),
_ => todo!(),
};
trace!("Inserting record with key: {}", key);
self.insert(key, record.clone());
Ok(())
}
}

View File

@@ -3,17 +3,17 @@ use std::rc::Rc;
use crate::function::AggFunc;
use crate::pager::Pager;
use crate::schema::{Schema, Table, Type};
use crate::schema::{Schema, Table};
use crate::sqlite3_ondisk::{DatabaseHeader, MIN_PAGE_CACHE_SIZE};
use crate::util::normalize_ident;
use crate::vdbe::{Insn, Program, ProgramBuilder};
use anyhow::Result;
use sqlite3_parser::ast;
struct Select<'a> {
struct Select {
columns: Vec<ast::ResultColumn>,
column_info: Vec<ColumnInfo>,
from: Option<&'a Table>,
from: Option<Table>,
limit: Option<ast::Limit>,
exist_aggregation: bool,
}
@@ -74,12 +74,13 @@ fn build_select(schema: &Schema, select: ast::Select) -> Result<Select> {
Some(table) => table,
None => anyhow::bail!("Parse error: no such table: {}", table_name),
};
let table = Table::BTree(table);
let column_info = analyze_columns(&columns, Some(&table));
let exist_aggregation = column_info.iter().any(|info| info.func.is_some());
Ok(Select {
columns,
column_info,
from: Some(&table),
from: Some(table),
limit: select.limit.clone(),
exist_aggregation,
})
@@ -129,10 +130,13 @@ fn translate_select(select: Select) -> Result<Program> {
None
}
});
let limit_insn = match (parsed_limit, select.from) {
let limit_insn = match (parsed_limit, &select.from) {
(Some(0), _) => Some(program.emit_placeholder()),
(_, Some(table)) => {
let root_page = table.root_page;
let root_page = match table {
Table::BTree(table) => table.root_page,
Table::Pseudo(_) => todo!(),
};
program.emit_insn(Insn::OpenReadAsync {
cursor_id,
root_page,
@@ -241,7 +245,7 @@ fn translate_columns(
let mut target = register_start;
for (col, info) in select.columns.iter().zip(select.column_info.iter()) {
translate_column(program, cursor_id, select.from, col, info, target);
translate_column(program, cursor_id, select.from.as_ref(), col, info, target);
target += info.columns_to_allocate;
}
(register_start, register_end)
@@ -266,7 +270,7 @@ fn translate_column(
}
sqlite3_parser::ast::ResultColumn::Star => {
let table = table.unwrap();
for (i, col) in table.columns.iter().enumerate() {
for (i, col) in table.columns().iter().enumerate() {
if table.column_is_rowid_alias(col) {
program.emit_insn(Insn::RowId {
cursor_id: cursor_id.unwrap(),
@@ -294,7 +298,7 @@ fn analyze_columns(
let mut info = ColumnInfo::new();
info.columns_to_allocate = 1;
if let sqlite3_parser::ast::ResultColumn::Star = column {
info.columns_to_allocate = table.unwrap().columns.len();
info.columns_to_allocate = table.unwrap().columns().len();
} else {
analyze_column(column, &mut info);
}

View File

@@ -32,6 +32,7 @@ pub enum OwnedValue {
Text(Rc<String>),
Blob(Rc<Vec<u8>>),
Agg(Box<AggContext>), // TODO(pere): make this without Box. Currently this might cause cache miss but let's leave it for future analysis
Record(OwnedRecord),
}
impl Display for OwnedValue {
@@ -46,6 +47,7 @@ impl Display for OwnedValue {
AggContext::Avg(acc, _count) => write!(f, "{}", acc),
AggContext::Sum(acc) => write!(f, "{}", acc),
},
OwnedValue::Record(r) => write!(f, "{:?}", r),
}
}
}
@@ -159,6 +161,7 @@ pub fn to_value(value: &OwnedValue) -> Value<'_> {
AggContext::Avg(acc, _count) => to_value(acc), // we assume aggfinal was called
AggContext::Sum(acc) => to_value(acc),
},
OwnedValue::Record(_) => todo!(),
}
}
@@ -197,6 +200,7 @@ impl<'a> Record<'a> {
}
}
#[derive(Debug, Clone, PartialEq)]
pub struct OwnedRecord {
pub values: Vec<OwnedValue>,
}
@@ -219,4 +223,5 @@ pub trait Cursor {
fn wait_for_completion(&mut self) -> Result<()>;
fn rowid(&self) -> Result<Ref<Option<u64>>>;
fn record(&self) -> Result<Ref<Option<OwnedRecord>>>;
fn insert(&mut self, record: &OwnedRecord) -> Result<()>;
}

View File

@@ -1,7 +1,7 @@
use crate::btree::BTreeCursor;
use crate::function::AggFunc;
use crate::pager::Pager;
use crate::types::{AggContext, Cursor, CursorResult, OwnedValue, Record};
use crate::types::{AggContext, Cursor, CursorResult, OwnedRecord, OwnedValue, Record};
use anyhow::Result;
use std::borrow::BorrowMut;
@@ -30,6 +30,13 @@ pub enum Insn {
// Await for the competion of open cursor.
OpenReadAwait,
// Open a cursor for a pseudo-table that contains a single row.
OpenPseudo {
cursor_id: CursorID,
content_reg: usize,
num_fields: usize,
},
// Rewind the cursor to the beginning of the B-Tree.
RewindAsync {
cursor_id: CursorID,
@@ -48,6 +55,13 @@ pub enum Insn {
dest: usize,
},
// Make a record and write it to destination register.
MakeRecord {
start_reg: usize, // P1
count: usize, // P2
dest_reg: usize, // P3
},
// Emit a row of results.
ResultRow {
start_reg: usize, // P1
@@ -116,6 +130,34 @@ pub enum Insn {
register: usize,
func: AggFunc,
},
// Open a sorter.
SorterOpen {
cursor_id: CursorID,
},
// Insert a row into the sorter.
SorterInsert {
cursor_id: CursorID,
record_reg: usize,
},
// Sort the rows in the sorter.
SorterSort {
cursor_id: CursorID,
},
// Retrieve the next row from the sorter.
SorterData {
cursor_id: CursorID, // P1
dest_reg: usize, // P2
},
// Advance to the next row in the sorter.
SorterNext {
cursor_id: CursorID,
pc_if_next: BranchOffset,
},
}
pub struct ProgramBuilder {
@@ -253,6 +295,16 @@ impl Program {
Insn::OpenReadAwait => {
state.pc += 1;
}
Insn::OpenPseudo {
cursor_id,
content_reg,
num_fields,
} => {
let _ = cursor_id;
let _ = content_reg;
let _ = num_fields;
todo!();
}
Insn::RewindAsync { cursor_id } => {
let cursor = cursors.get_mut(cursor_id).unwrap();
match cursor.rewind()? {
@@ -289,6 +341,15 @@ impl Program {
}
state.pc += 1;
}
Insn::MakeRecord {
start_reg,
count,
dest_reg,
} => {
let record = make_owned_record(&state.registers, start_reg, count);
state.registers[*dest_reg] = OwnedValue::Record(record);
state.pc += 1;
}
Insn::ResultRow { start_reg, count } => {
let record = make_record(&state.registers, start_reg, count);
state.pc += 1;
@@ -423,6 +484,58 @@ impl Program {
};
state.pc += 1;
}
Insn::SorterOpen { cursor_id } => {
let cursor = Box::new(crate::sorter::Sorter::new());
cursors.insert(*cursor_id, cursor);
state.pc += 1;
}
Insn::SorterData {
cursor_id,
dest_reg,
} => {
let cursor = cursors.get_mut(cursor_id).unwrap();
if let Some(ref record) = *cursor.record()? {
state.registers[*dest_reg] = OwnedValue::Record(record.clone());
} else {
todo!();
}
state.pc += 1;
}
Insn::SorterInsert {
cursor_id,
record_reg,
} => {
let cursor = cursors.get_mut(cursor_id).unwrap();
let record = match &state.registers[*record_reg] {
OwnedValue::Record(record) => record,
_ => unreachable!("SorterInsert on non-record register"),
};
cursor.insert(record)?;
state.pc += 1;
}
Insn::SorterSort { cursor_id } => {
let cursor = cursors.get_mut(cursor_id).unwrap();
cursor.rewind()?;
state.pc += 1;
}
Insn::SorterNext {
cursor_id,
pc_if_next,
} => {
let cursor = cursors.get_mut(cursor_id).unwrap();
match cursor.next()? {
CursorResult::Ok(_) => {}
CursorResult::IO => {
// If there is I/O, the instruction is restarted.
return Ok(StepResult::IO);
}
}
if !cursor.is_empty() {
state.pc = *pc_if_next;
} else {
state.pc += 1;
}
}
}
}
}
@@ -436,6 +549,14 @@ fn make_record<'a>(registers: &'a [OwnedValue], start_reg: &usize, count: &usize
Record::new(values)
}
fn make_owned_record(registers: &[OwnedValue], start_reg: &usize, count: &usize) -> OwnedRecord {
let mut values = Vec::with_capacity(*count);
for i in *start_reg..*start_reg + count {
values.push(registers[i].clone());
}
OwnedRecord::new(values)
}
fn trace_insn(addr: BranchOffset, insn: &Insn) {
if !log::log_enabled!(log::Level::Trace) {
return;
@@ -481,6 +602,19 @@ fn insn_to_str(addr: BranchOffset, insn: &Insn) -> String {
0,
"".to_string(),
),
Insn::OpenPseudo {
cursor_id,
content_reg,
num_fields,
} => (
"OpenPseudo",
*cursor_id as i32,
*content_reg as i32,
*num_fields as i32,
OwnedValue::Text(Rc::new("".to_string())),
0,
format!("{} columns in r[{}]", num_fields, content_reg),
),
Insn::RewindAsync { cursor_id } => (
"RewindAsync",
*cursor_id as i32,
@@ -515,6 +649,19 @@ fn insn_to_str(addr: BranchOffset, insn: &Insn) -> String {
0,
format!("r[{}]= cursor {} column {}", dest, cursor_id, column),
),
Insn::MakeRecord {
start_reg,
count,
dest_reg,
} => (
"MakeRecord",
*start_reg as i32,
*count as i32,
*dest_reg as i32,
OwnedValue::Text(Rc::new("".to_string())),
0,
format!("r[{}..{}] -> r[{}]", start_reg, start_reg + count, dest_reg),
),
Insn::ResultRow { start_reg, count } => (
"ResultRow",
*start_reg as i32,
@@ -635,6 +782,60 @@ fn insn_to_str(addr: BranchOffset, insn: &Insn) -> String {
0,
format!("accum=r[{}]", *register),
),
Insn::SorterOpen { cursor_id } => (
"SorterOpen",
*cursor_id as i32,
0,
0,
OwnedValue::Text(Rc::new("".to_string())),
0,
format!("cursor={}", cursor_id),
),
Insn::SorterData {
cursor_id,
dest_reg,
} => (
"SorterData",
*cursor_id as i32,
*dest_reg as i32,
0,
OwnedValue::Text(Rc::new("".to_string())),
0,
format!("r[{}]=data", dest_reg),
),
Insn::SorterInsert {
cursor_id,
record_reg,
} => (
"SorterInsert",
*cursor_id as i32,
*record_reg as i32,
0,
OwnedValue::Text(Rc::new("".to_string())),
0,
format!("key=r[{}]", record_reg),
),
Insn::SorterSort { cursor_id } => (
"SorterSort",
*cursor_id as i32,
0,
0,
OwnedValue::Text(Rc::new("".to_string())),
0,
"".to_string(),
),
Insn::SorterNext {
cursor_id,
pc_if_next,
} => (
"SorterNext",
*cursor_id as i32,
*pc_if_next as i32,
0,
OwnedValue::Text(Rc::new("".to_string())),
0,
"".to_string(),
),
};
format!(
"{:<4} {:<13} {:<4} {:<4} {:<4} {:<13} {:<2} {}",