Merge 'Handle parsing URI according to SQLite specification' from Preston Thorpe

closes #977
In order to properly get #960 merged and keep some sort of the same API
we have now, we need to support URIs/query parameters for opening new
databases.
This PR doesn't attempt to implement anything useful with this, it only
handles parsing, but it will allow #960 to properly open a new file with
a specific VFS without having to entirely re-design the `open_file`
method/API. The existing option in that PR right now is less than ideal.
e.g. All of the existing methods already accept an `IO` impl
```rust
    pub fn open_file(io: Arc<dyn IO>, path: &str) -> Result<Arc<Database>> {
// or
      pub fn open(
        io: Arc<dyn IO>,
        page_io: Rc<dyn DatabaseStorage>,
        wal: Rc<RefCell<dyn Wal>>,
        shared_wal: Arc<RwLock<WalFileShared>>,
        buffer_pool: Rc<BufferPool>,
    ) -> Result<Arc<Database>> {

```
Right now, most of the parsed query parameters are not options we
support yet, but I figured it's better to handle parsing them now and
using them later on when we support them.
Also, if this looks way overly complicated for what it does... that's
because the cross platform edge-cases are a super pain in the ass.

Closes #1039
This commit is contained in:
Pekka Enberg
2025-02-24 11:17:39 +02:00
2 changed files with 519 additions and 1 deletions

View File

@@ -14,6 +14,7 @@ mod schema;
mod storage;
mod translate;
mod types;
#[allow(dead_code)]
mod util;
mod vdbe;
mod vector;

View File

@@ -3,7 +3,7 @@ use std::{rc::Rc, sync::Arc};
use crate::{
schema::{self, Column, Schema, Type},
Result, Statement, StepResult, IO,
LimboError, OpenFlags, Result, Statement, StepResult, IO,
};
// https://sqlite.org/lang_keywords.html
@@ -380,6 +380,228 @@ pub fn columns_from_create_table_body(body: ast::CreateTableBody) -> Result<Vec<
.collect::<Vec<_>>())
}
#[derive(Debug, Default, PartialEq)]
pub struct OpenOptions<'a> {
/// The authority component of the URI. may be 'localhost' or empty
pub authority: Option<&'a str>,
/// The normalized path to the database file
pub path: String,
/// The vfs query parameter causes the database connection to be opened using the VFS called NAME
pub vfs: Option<String>,
/// read-only, read-write, read-write and created if it does not exist, or pure in-memory database that never interacts with disk
pub mode: OpenMode,
/// Attempt to set the permissions of the new database file to match the existing file "filename".
pub modeof: Option<String>,
/// Specifies Cache mode shared | private
pub cache: CacheMode,
/// immutable=1|0 specifies that the database is stored on read-only media
pub immutable: bool,
}
#[derive(Clone, Default, Debug, Copy, PartialEq)]
pub enum OpenMode {
ReadOnly,
ReadWrite,
Memory,
#[default]
ReadWriteCreate,
}
#[derive(Debug, Default, Clone, Copy, PartialEq)]
pub enum CacheMode {
#[default]
Private,
Shared,
}
impl From<&str> for CacheMode {
fn from(s: &str) -> Self {
match s {
"private" => CacheMode::Private,
"shared" => CacheMode::Shared,
_ => CacheMode::Private,
}
}
}
impl OpenMode {
pub fn from_str(s: &str) -> Result<Self> {
match s.trim().to_lowercase().as_str() {
"ro" => Ok(OpenMode::ReadOnly),
"rw" => Ok(OpenMode::ReadWrite),
"memory" => Ok(OpenMode::Memory),
"rwc" => Ok(OpenMode::ReadWriteCreate),
_ => Err(LimboError::InvalidArgument(format!(
"Invalid mode: '{}'. Expected one of 'ro', 'rw', 'memory', 'rwc'",
s
))),
}
}
pub fn get_flags(&self) -> OpenFlags {
match self {
OpenMode::ReadWriteCreate => OpenFlags::Create,
_ => OpenFlags::None,
}
}
}
fn is_windows_path(path: &str) -> bool {
path.len() >= 3
&& path.chars().nth(1) == Some(':')
&& (path.chars().nth(2) == Some('/') || path.chars().nth(2) == Some('\\'))
}
/// converts windows-style paths to forward slashes, per SQLite spec.
fn normalize_windows_path(path: &str) -> String {
let mut normalized = path.replace("\\", "/");
// remove duplicate slashes (`//` → `/`)
while normalized.contains("//") {
normalized = normalized.replace("//", "/");
}
// if absolute windows path (`C:/...`), ensure it starts with `/`
if normalized.len() >= 3
&& !normalized.starts_with('/')
&& normalized.chars().nth(1) == Some(':')
&& normalized.chars().nth(2) == Some('/')
{
normalized.insert(0, '/');
}
normalized
}
/// Parses a SQLite URI, handling Windows and Unix paths separately.
pub fn parse_sqlite_uri(uri: &str) -> Result<OpenOptions> {
if !uri.starts_with("file:") {
return Ok(OpenOptions {
path: uri.to_string(),
..Default::default()
});
}
let mut opts = OpenOptions::default();
let without_scheme = &uri[5..];
let (without_fragment, _) = without_scheme
.split_once('#')
.unwrap_or((without_scheme, ""));
let (without_query, query) = without_fragment
.split_once('?')
.unwrap_or((without_fragment, ""));
parse_query_params(query, &mut opts)?;
// handle authority + path separately
if let Some(after_slashes) = without_query.strip_prefix("//") {
let (authority, path) = after_slashes.split_once('/').unwrap_or((after_slashes, ""));
// sqlite allows only `localhost` or empty authority.
if !(authority.is_empty() || authority == "localhost") {
return Err(LimboError::InvalidArgument(format!(
"Invalid authority '{}'. Only '' or 'localhost' allowed.",
authority
)));
}
opts.authority = if authority.is_empty() {
None
} else {
Some(authority)
};
if is_windows_path(path) {
opts.path = normalize_windows_path(&decode_percent(path));
} else if !path.is_empty() {
opts.path = format!("/{}", decode_percent(path));
} else {
opts.path = String::new();
}
} else {
// no authority, must be a normal absolute or relative path.
opts.path = decode_percent(without_query);
}
Ok(opts)
}
// parses query parameters and updates OpenOptions
fn parse_query_params(query: &str, opts: &mut OpenOptions) -> Result<()> {
for param in query.split('&') {
if let Some((key, value)) = param.split_once('=') {
let decoded_value = decode_percent(value);
match key {
"mode" => opts.mode = OpenMode::from_str(value)?,
"modeof" => opts.modeof = Some(decoded_value),
"cache" => opts.cache = decoded_value.as_str().into(),
"immutable" => opts.immutable = decoded_value == "1",
"vfs" => opts.vfs = Some(decoded_value),
_ => {}
}
}
}
Ok(())
}
/// Decodes percent-encoded characters
/// this function was adapted from the 'urlencoding' crate. MIT
pub fn decode_percent(uri: &str) -> String {
let from_hex_digit = |digit: u8| -> Option<u8> {
match digit {
b'0'..=b'9' => Some(digit - b'0'),
b'A'..=b'F' => Some(digit - b'A' + 10),
b'a'..=b'f' => Some(digit - b'a' + 10),
_ => None,
}
};
let offset = uri.chars().take_while(|&c| c != '%').count();
if offset >= uri.len() {
return uri.to_string();
}
let mut decoded: Vec<u8> = Vec::with_capacity(uri.len());
let (ascii, mut data) = uri.as_bytes().split_at(offset);
decoded.extend_from_slice(ascii);
loop {
let mut parts = data.splitn(2, |&c| c == b'%');
let non_escaped_part = parts.next().unwrap();
let rest = parts.next();
if rest.is_none() && decoded.is_empty() {
return String::from_utf8_lossy(data).to_string();
}
decoded.extend_from_slice(non_escaped_part);
match rest {
Some(rest) => match rest.get(0..2) {
Some([first, second]) => match from_hex_digit(*first) {
Some(first_val) => match from_hex_digit(*second) {
Some(second_val) => {
decoded.push((first_val << 4) | second_val);
data = &rest[2..];
}
None => {
decoded.extend_from_slice(&[b'%', *first]);
data = &rest[1..];
}
},
None => {
decoded.push(b'%');
data = rest;
}
},
_ => {
decoded.push(b'%');
decoded.extend_from_slice(rest);
break;
}
},
None => break,
}
}
String::from_utf8_lossy(&decoded).to_string()
}
#[cfg(test)]
pub mod tests {
use super::*;
@@ -635,4 +857,299 @@ pub mod tests {
assert!(!check_ident_equivalency("\"foo\"", "[bar]"));
assert!(!check_ident_equivalency("foo", "\"bar\""));
}
#[test]
fn test_simple_uri() {
let uri = "file:/home/user/db.sqlite";
let opts = parse_sqlite_uri(uri).unwrap();
assert_eq!(opts.path, "/home/user/db.sqlite");
assert_eq!(opts.authority, None);
}
#[test]
fn test_uri_with_authority() {
let uri = "file://localhost/home/user/db.sqlite";
let opts = parse_sqlite_uri(uri).unwrap();
assert_eq!(opts.path, "/home/user/db.sqlite");
assert_eq!(opts.authority, Some("localhost"));
}
#[test]
fn test_uri_with_invalid_authority() {
let uri = "file://example.com/home/user/db.sqlite";
let result = parse_sqlite_uri(uri);
assert!(result.is_err());
}
#[test]
fn test_uri_with_query_params() {
let uri = "file:/home/user/db.sqlite?vfs=unix&mode=ro&immutable=1";
let opts = parse_sqlite_uri(uri).unwrap();
assert_eq!(opts.path, "/home/user/db.sqlite");
assert_eq!(opts.vfs, Some("unix".to_string()));
assert_eq!(opts.mode, OpenMode::ReadOnly);
assert_eq!(opts.immutable, true);
}
#[test]
fn test_uri_with_fragment() {
let uri = "file:/home/user/db.sqlite#section1";
let opts = parse_sqlite_uri(uri).unwrap();
assert_eq!(opts.path, "/home/user/db.sqlite");
}
#[test]
fn test_uri_with_percent_encoding() {
let uri = "file:/home/user/db%20with%20spaces.sqlite?vfs=unix";
let opts = parse_sqlite_uri(uri).unwrap();
assert_eq!(opts.path, "/home/user/db with spaces.sqlite");
assert_eq!(opts.vfs, Some("unix".to_string()));
}
#[test]
fn test_uri_without_scheme() {
let uri = "/home/user/db.sqlite";
let result = parse_sqlite_uri(uri);
assert!(result.is_ok());
assert_eq!(result.unwrap().path, "/home/user/db.sqlite");
}
#[test]
fn test_uri_with_empty_query() {
let uri = "file:/home/user/db.sqlite?";
let opts = parse_sqlite_uri(uri).unwrap();
assert_eq!(opts.path, "/home/user/db.sqlite");
assert_eq!(opts.vfs, None);
}
#[test]
fn test_uri_with_partial_query() {
let uri = "file:/home/user/db.sqlite?mode=rw";
let opts = parse_sqlite_uri(uri).unwrap();
assert_eq!(opts.path, "/home/user/db.sqlite");
assert_eq!(opts.mode, OpenMode::ReadWrite);
assert_eq!(opts.vfs, None);
}
#[test]
fn test_uri_windows_style_path() {
let uri = "file:///C:/Users/test/db.sqlite";
let opts = parse_sqlite_uri(uri).unwrap();
assert_eq!(opts.path, "/C:/Users/test/db.sqlite");
}
#[test]
fn test_uri_with_only_query_params() {
let uri = "file:?mode=memory&cache=shared";
let opts = parse_sqlite_uri(uri).unwrap();
assert_eq!(opts.path, "");
assert_eq!(opts.mode, OpenMode::Memory);
assert_eq!(opts.cache, CacheMode::Shared);
}
#[test]
fn test_uri_with_only_fragment() {
let uri = "file:#fragment";
let opts = parse_sqlite_uri(uri).unwrap();
assert_eq!(opts.path, "");
}
#[test]
fn test_uri_with_invalid_scheme() {
let uri = "http:/home/user/db.sqlite";
let result = parse_sqlite_uri(uri);
assert!(result.is_ok());
assert_eq!(result.unwrap().path, "http:/home/user/db.sqlite");
}
#[test]
fn test_uri_with_multiple_query_params() {
let uri = "file:/home/user/db.sqlite?vfs=unix&mode=rw&cache=private&immutable=0";
let opts = parse_sqlite_uri(uri).unwrap();
assert_eq!(opts.path, "/home/user/db.sqlite");
assert_eq!(opts.vfs, Some("unix".to_string()));
assert_eq!(opts.mode, OpenMode::ReadWrite);
assert_eq!(opts.cache, CacheMode::Private);
assert_eq!(opts.immutable, false);
}
#[test]
fn test_uri_with_unknown_query_param() {
let uri = "file:/home/user/db.sqlite?unknown=param";
let opts = parse_sqlite_uri(uri).unwrap();
assert_eq!(opts.path, "/home/user/db.sqlite");
assert_eq!(opts.vfs, None);
}
#[test]
fn test_uri_with_multiple_equal_signs() {
let uri = "file:/home/user/db.sqlite?vfs=unix=custom";
let opts = parse_sqlite_uri(uri).unwrap();
assert_eq!(opts.path, "/home/user/db.sqlite");
assert_eq!(opts.vfs, Some("unix=custom".to_string()));
}
#[test]
fn test_uri_with_trailing_slash() {
let uri = "file:/home/user/db.sqlite/";
let opts = parse_sqlite_uri(uri).unwrap();
assert_eq!(opts.path, "/home/user/db.sqlite/");
}
#[test]
fn test_uri_with_encoded_characters_in_query() {
let uri = "file:/home/user/db.sqlite?vfs=unix%20mode";
let opts = parse_sqlite_uri(uri).unwrap();
assert_eq!(opts.path, "/home/user/db.sqlite");
assert_eq!(opts.vfs, Some("unix mode".to_string()));
}
#[test]
fn test_uri_windows_network_path() {
let uri = "file://server/share/db.sqlite";
let result = parse_sqlite_uri(uri);
assert!(result.is_err()); // non-localhost authority should fail
}
#[test]
fn test_uri_windows_drive_letter_with_slash() {
let uri = "file:///C:/database.sqlite";
let opts = parse_sqlite_uri(uri).unwrap();
assert_eq!(opts.path, "/C:/database.sqlite");
}
#[test]
fn test_localhost_with_double_slash_and_no_path() {
let uri = "file://localhost";
let opts = parse_sqlite_uri(uri).unwrap();
assert_eq!(opts.path, "");
assert_eq!(opts.authority, Some("localhost"));
}
#[test]
fn test_uri_windows_drive_letter_without_slash() {
let uri = "file:///C:/database.sqlite";
let opts = parse_sqlite_uri(uri).unwrap();
assert_eq!(opts.path, "/C:/database.sqlite");
}
#[test]
fn test_improper_mode() {
// any other mode but ro, rwc, rw, memory should fail per sqlite
let uri = "file:data.db?mode=readonly";
let res = parse_sqlite_uri(uri);
assert!(res.is_err());
// including empty
let uri = "file:/home/user/db.sqlite?vfs=&mode=";
let res = parse_sqlite_uri(uri);
assert!(res.is_err());
}
// Some examples from https://www.sqlite.org/c3ref/open.html#urifilenameexamples
#[test]
fn test_simple_file_current_dir() {
let uri = "file:data.db";
let opts = parse_sqlite_uri(uri).unwrap();
assert_eq!(opts.path, "data.db");
assert_eq!(opts.authority, None);
assert_eq!(opts.vfs, None);
assert_eq!(opts.mode, OpenMode::ReadWriteCreate);
}
#[test]
fn test_simple_file_three_slash() {
let uri = "file:///home/data/data.db";
let opts = parse_sqlite_uri(uri).unwrap();
assert_eq!(opts.path, "/home/data/data.db");
assert_eq!(opts.authority, None);
assert_eq!(opts.vfs, None);
assert_eq!(opts.mode, OpenMode::ReadWriteCreate);
}
#[test]
fn test_simple_file_two_slash_localhost() {
let uri = "file://localhost/home/fred/data.db";
let opts = parse_sqlite_uri(uri).unwrap();
assert_eq!(opts.path, "/home/fred/data.db");
assert_eq!(opts.authority, Some("localhost"));
assert_eq!(opts.vfs, None);
}
#[test]
fn test_windows_double_invalid() {
let uri = "file://C:/home/fred/data.db?mode=ro";
let opts = parse_sqlite_uri(uri);
assert!(opts.is_err());
}
#[test]
fn test_simple_file_two_slash() {
let uri = "file:///C:/Documents%20and%20Settings/fred/Desktop/data.db";
let opts = parse_sqlite_uri(uri).unwrap();
assert_eq!(opts.path, "/C:/Documents and Settings/fred/Desktop/data.db");
assert_eq!(opts.vfs, None);
}
#[test]
fn test_decode_percent_basic() {
assert_eq!(decode_percent("hello%20world"), "hello world");
assert_eq!(decode_percent("file%3Adata.db"), "file:data.db");
assert_eq!(decode_percent("path%2Fto%2Ffile"), "path/to/file");
}
#[test]
fn test_decode_percent_edge_cases() {
assert_eq!(decode_percent(""), "");
assert_eq!(decode_percent("plain_text"), "plain_text");
assert_eq!(
decode_percent("%2Fhome%2Fuser%2Fdb.sqlite"),
"/home/user/db.sqlite"
);
// multiple percent-encoded characters in sequence
assert_eq!(decode_percent("%41%42%43"), "ABC");
assert_eq!(decode_percent("%61%62%63"), "abc");
}
#[test]
fn test_decode_percent_invalid_sequences() {
// invalid percent encoding (single % without two hex digits)
assert_eq!(decode_percent("hello%"), "hello%");
// only one hex digit after %
assert_eq!(decode_percent("file%2"), "file%2");
// invalid hex digits (not 0-9, A-F, a-f)
assert_eq!(decode_percent("file%2X.db"), "file%2X.db");
// Incomplete sequence at the end, leave untouched
assert_eq!(decode_percent("path%2Fto%2"), "path/to%2");
}
#[test]
fn test_decode_percent_mixed_valid_invalid() {
assert_eq!(decode_percent("hello%20world%"), "hello world%");
assert_eq!(decode_percent("%2Fpath%2Xto%2Ffile"), "/path%2Xto/file");
assert_eq!(decode_percent("file%3Adata.db%2"), "file:data.db%2");
}
#[test]
fn test_decode_percent_special_characters() {
assert_eq!(
decode_percent("%21%40%23%24%25%5E%26%2A%28%29"),
"!@#$%^&*()"
);
assert_eq!(decode_percent("%5B%5D%7B%7D%7C%5C%3A"), "[]{}|\\:");
}
#[test]
fn test_decode_percent_unmodified_valid_text() {
// ensure already valid text remains unchanged
assert_eq!(
decode_percent("C:/Users/Example/Database.sqlite"),
"C:/Users/Example/Database.sqlite"
);
assert_eq!(
decode_percent("/home/user/db.sqlite"),
"/home/user/db.sqlite"
);
}
}