mirror of
https://github.com/aljazceru/turso.git
synced 2025-12-17 08:34:19 +01:00
Add built-in manual pages for Turso
In the hopes of doing a good job at teaching people what Turso can do, I am adding built-in manual pages. When the CLI starts, it picks a feature at random, and tells the user that the feature exists: ``` Turso v0.2.0-pre.8 Enter ".help" for usage hints. Did you know that Turso supports Change Data Capture? Type .manual cdc to learn more. This software is ALPHA, only use for development, testing, and experimentation. Connected to a transient in-memory database. Use ".open FILENAME" to reopen on a persistent database ``` There is a lot we can do to make this feature world class: - we can automatically compile examples during compile time like rust-doc, to make sure examples used in the manuals always work - we can implement scrolling and navigation - we can document a lot more features But for now, this is a start!
This commit is contained in:
223
Cargo.lock
generated
223
Cargo.lock
generated
@@ -592,7 +592,7 @@ version = "7.1.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4a65ebfec4fb190b6f90e944a817d60499ee0744e582530e2c9900a22e591d9a"
|
||||
dependencies = [
|
||||
"crossterm",
|
||||
"crossterm 0.28.1",
|
||||
"unicode-segmentation",
|
||||
"unicode-width 0.2.0",
|
||||
]
|
||||
@@ -649,6 +649,15 @@ version = "0.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7c74b8349d32d297c9134b8c88677813a227df8f779daa29bfc29c183fe3dca6"
|
||||
|
||||
[[package]]
|
||||
name = "convert_case"
|
||||
version = "0.7.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bb402b8d4c85569410425650ce3eddc7d698ed96d39a73f941b08fb63082f1e7"
|
||||
dependencies = [
|
||||
"unicode-segmentation",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "convert_case"
|
||||
version = "0.8.0"
|
||||
@@ -658,6 +667,15 @@ dependencies = [
|
||||
"unicode-segmentation",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "coolor"
|
||||
version = "1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "980c2afde4af43d6a05c5be738f9eae595cff86dce1f38f88b95058a98c027f3"
|
||||
dependencies = [
|
||||
"crossterm 0.29.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "core-foundation-sys"
|
||||
version = "0.8.7"
|
||||
@@ -751,6 +769,45 @@ dependencies = [
|
||||
"itertools 0.10.5",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crokey"
|
||||
version = "1.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "51360853ebbeb3df20c76c82aecf43d387a62860f1a59ba65ab51f00eea85aad"
|
||||
dependencies = [
|
||||
"crokey-proc_macros",
|
||||
"crossterm 0.29.0",
|
||||
"once_cell",
|
||||
"serde",
|
||||
"strict",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crokey-proc_macros"
|
||||
version = "1.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3bf1a727caeb5ee5e0a0826a97f205a9cf84ee964b0b48239fef5214a00ae439"
|
||||
dependencies = [
|
||||
"crossterm 0.29.0",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"strict",
|
||||
"syn 2.0.100",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam"
|
||||
version = "0.8.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1137cd7e7fc0fb5d3c5a8678be38ec56e819125d8d7907411fe24ccb943faca8"
|
||||
dependencies = [
|
||||
"crossbeam-channel",
|
||||
"crossbeam-deque",
|
||||
"crossbeam-epoch",
|
||||
"crossbeam-queue",
|
||||
"crossbeam-utils",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam-channel"
|
||||
version = "0.5.14"
|
||||
@@ -779,6 +836,15 @@ dependencies = [
|
||||
"crossbeam-utils",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam-queue"
|
||||
version = "0.3.12"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0f58bbc28f91df819d0aa2a2c00cd19754769c2fad90579b3592b1c9ba7a3115"
|
||||
dependencies = [
|
||||
"crossbeam-utils",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam-skiplist"
|
||||
version = "0.1.3"
|
||||
@@ -808,6 +874,24 @@ dependencies = [
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crossterm"
|
||||
version = "0.29.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d8b9f2e4c67f833b660cdb0a3523065869fb35570177239812ed4c905aeff87b"
|
||||
dependencies = [
|
||||
"bitflags 2.9.4",
|
||||
"crossterm_winapi",
|
||||
"derive_more",
|
||||
"document-features",
|
||||
"mio",
|
||||
"parking_lot",
|
||||
"rustix 1.0.7",
|
||||
"signal-hook",
|
||||
"signal-hook-mio",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crossterm_winapi"
|
||||
version = "0.9.1"
|
||||
@@ -995,6 +1079,27 @@ dependencies = [
|
||||
"powerfmt",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "derive_more"
|
||||
version = "2.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "093242cf7570c207c83073cf82f79706fe7b8317e98620a47d5be7c3d8497678"
|
||||
dependencies = [
|
||||
"derive_more-impl",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "derive_more-impl"
|
||||
version = "2.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bda628edc44c4bb645fbe0f758797143e4e07926f7ebf4e9bdfbd3d2ce621df3"
|
||||
dependencies = [
|
||||
"convert_case 0.7.1",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.100",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "difflib"
|
||||
version = "0.4.0"
|
||||
@@ -1070,6 +1175,15 @@ version = "0.3.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10"
|
||||
|
||||
[[package]]
|
||||
name = "document-features"
|
||||
version = "0.2.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "95249b50c6c185bee49034bcb378a49dc2b5dff0be90ff6616d31d64febab05d"
|
||||
dependencies = [
|
||||
"litrs",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "dtor"
|
||||
version = "0.0.6"
|
||||
@@ -1793,6 +1907,25 @@ dependencies = [
|
||||
"icu_properties",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "include_dir"
|
||||
version = "0.7.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "923d117408f1e49d914f1a379a309cffe4f18c05cf4e3d12e613a15fc81bd0dd"
|
||||
dependencies = [
|
||||
"include_dir_macros",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "include_dir_macros"
|
||||
version = "0.7.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7cab85a7ed0bd5f0e76d93846e0147172bed2e2d3f859bcc33a8d9699cad1a75"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "indexmap"
|
||||
version = "1.9.3"
|
||||
@@ -2022,6 +2155,29 @@ dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lazy-regex"
|
||||
version = "3.4.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "60c7310b93682b36b98fa7ea4de998d3463ccbebd94d935d6b48ba5b6ffa7126"
|
||||
dependencies = [
|
||||
"lazy-regex-proc_macros",
|
||||
"once_cell",
|
||||
"regex",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lazy-regex-proc_macros"
|
||||
version = "3.4.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4ba01db5ef81e17eb10a5e0f2109d1b3a3e29bac3070fdbd7d156bf7dbd206a1"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"regex",
|
||||
"syn 2.0.100",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lazy_static"
|
||||
version = "1.5.0"
|
||||
@@ -2250,6 +2406,12 @@ version = "0.7.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "23fb14cb19457329c82206317a5663005a4d404783dc74f4252769b0d5f42856"
|
||||
|
||||
[[package]]
|
||||
name = "litrs"
|
||||
version = "0.4.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f5e54036fe321fd421e10d732f155734c4e4afd610dd556d9a82833ab3ee0bed"
|
||||
|
||||
[[package]]
|
||||
name = "lock_api"
|
||||
version = "0.4.13"
|
||||
@@ -2364,6 +2526,15 @@ dependencies = [
|
||||
"libmimalloc-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "minimad"
|
||||
version = "0.13.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a9c5d708226d186590a7b6d4a9780e2bdda5f689e0d58cd17012a298efd745d2"
|
||||
dependencies = [
|
||||
"once_cell",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "miniz_oxide"
|
||||
version = "0.8.5"
|
||||
@@ -2411,7 +2582,7 @@ version = "3.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "43e61844e0c0bb81e711f2084abe7cff187b03ca21ff8b000cb59bbda61e15a9"
|
||||
dependencies = [
|
||||
"convert_case",
|
||||
"convert_case 0.8.0",
|
||||
"ctor 0.4.2",
|
||||
"napi-derive-backend",
|
||||
"proc-macro2",
|
||||
@@ -2425,7 +2596,7 @@ version = "2.0.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b7ab19e9b98efb13895f492a2e367ca50c955ac3c4723613af73fdda4011afcc"
|
||||
dependencies = [
|
||||
"convert_case",
|
||||
"convert_case 0.8.0",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"semver",
|
||||
@@ -3551,6 +3722,27 @@ version = "1.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
|
||||
|
||||
[[package]]
|
||||
name = "signal-hook"
|
||||
version = "0.3.18"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d881a16cf4426aa584979d30bd82cb33429027e42122b169753d6ef1085ed6e2"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"signal-hook-registry",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "signal-hook-mio"
|
||||
version = "0.2.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "34db1a06d485c9142248b7a054f034b349b212551f3dfd19c94d45a754a217cd"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"mio",
|
||||
"signal-hook",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "signal-hook-registry"
|
||||
version = "1.4.2"
|
||||
@@ -3666,6 +3858,12 @@ version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9091b6114800a5f2141aee1d1b9d6ca3592ac062dc5decb3764ec5895a47b4eb"
|
||||
|
||||
[[package]]
|
||||
name = "strict"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f42444fea5b87a39db4218d9422087e66a85d0e7a0963a439b07bcdf91804006"
|
||||
|
||||
[[package]]
|
||||
name = "strsim"
|
||||
version = "0.11.1"
|
||||
@@ -3817,6 +4015,22 @@ dependencies = [
|
||||
"windows-sys 0.59.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "termimad"
|
||||
version = "0.30.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "22117210909e9dfff30a558f554c7fb3edb198ef614e7691386785fb7679677c"
|
||||
dependencies = [
|
||||
"coolor",
|
||||
"crokey",
|
||||
"crossbeam",
|
||||
"lazy-regex",
|
||||
"minimad",
|
||||
"serde",
|
||||
"thiserror 1.0.69",
|
||||
"unicode-width 0.1.14",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "terminal_size"
|
||||
version = "0.4.2"
|
||||
@@ -4155,17 +4369,20 @@ dependencies = [
|
||||
"ctrlc",
|
||||
"dirs 5.0.1",
|
||||
"env_logger 0.11.7",
|
||||
"include_dir",
|
||||
"libc",
|
||||
"limbo_completion",
|
||||
"miette",
|
||||
"mimalloc",
|
||||
"nu-ansi-term",
|
||||
"rand 0.8.5",
|
||||
"rustyline",
|
||||
"schemars 0.8.22",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"shlex",
|
||||
"syntect",
|
||||
"termimad",
|
||||
"toml",
|
||||
"toml_edit",
|
||||
"tracing",
|
||||
|
||||
@@ -46,6 +46,9 @@ serde = { workspace = true, features = ["derive"]}
|
||||
validator = {version = "0.20.0", features = ["derive"]}
|
||||
toml_edit = {version = "0.22.24", features = ["serde"]}
|
||||
serde_json = "1.0"
|
||||
termimad = "0.30"
|
||||
include_dir = "0.7"
|
||||
rand = "0.8"
|
||||
mimalloc = { workspace = true }
|
||||
|
||||
[features]
|
||||
@@ -55,3 +58,4 @@ tracing_release = ["turso_core/tracing_release"]
|
||||
|
||||
[build-dependencies]
|
||||
syntect = { git = "https://github.com/trishume/syntect.git", rev = "64644ffe064457265cbcee12a0c1baf9485ba6ee" }
|
||||
include_dir = "0.7"
|
||||
|
||||
13
cli/app.rs
13
cli/app.rs
@@ -10,6 +10,7 @@ use crate::{
|
||||
get_io, get_writer, ApplyWriter, DbLocation, NoopProgress, OutputMode, ProgressSink,
|
||||
Settings, StderrProgress,
|
||||
},
|
||||
manual,
|
||||
opcodes_dictionary::OPCODE_DESCRIPTIONS,
|
||||
HISTORY_FILE,
|
||||
};
|
||||
@@ -255,6 +256,12 @@ impl Limbo {
|
||||
if !quiet {
|
||||
self.writeln_fmt(format_args!("Turso v{}", env!("CARGO_PKG_VERSION")))?;
|
||||
self.writeln("Enter \".help\" for usage hints.")?;
|
||||
|
||||
// Add random feature hint
|
||||
if let Some(hint) = manual::get_random_feature_hint() {
|
||||
self.writeln(&hint)?;
|
||||
}
|
||||
|
||||
self.writeln(
|
||||
"This software is ALPHA, only use for development, testing, and experimentation.",
|
||||
)?;
|
||||
@@ -732,6 +739,12 @@ impl Limbo {
|
||||
let _ = self.writeln(e.to_string());
|
||||
}
|
||||
}
|
||||
Command::Manual(args) => {
|
||||
let w = self.writer.as_mut().unwrap();
|
||||
if let Err(e) = manual::display_manual(args.page.as_deref(), w) {
|
||||
let _ = self.writeln(e.to_string());
|
||||
}
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
@@ -11,6 +11,7 @@ use syntect::parsing::SyntaxSet;
|
||||
fn main() {
|
||||
println!("cargo::rerun-if-changed=SQL.sublime-syntax");
|
||||
println!("cargo::rerun-if-changed=build.rs");
|
||||
println!("cargo::rerun-if-changed=manuals");
|
||||
|
||||
let out_dir = env::var_os("OUT_DIR").unwrap();
|
||||
let syntax =
|
||||
|
||||
@@ -166,6 +166,12 @@ pub struct CloneArgs {
|
||||
pub output_file: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Args)]
|
||||
pub struct ManualArgs {
|
||||
/// The manual page to display (e.g., "mcp")
|
||||
pub page: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(ValueEnum, Copy, Clone, Debug, PartialEq, Eq)]
|
||||
pub enum HeadersMode {
|
||||
On,
|
||||
|
||||
@@ -3,8 +3,8 @@ pub mod import;
|
||||
|
||||
use args::{
|
||||
CwdArgs, DbConfigArgs, EchoArgs, ExitArgs, HeadersArgs, IndexesArgs, LoadExtensionArgs,
|
||||
NullValueArgs, OpcodesArgs, OpenArgs, OutputModeArgs, SchemaArgs, SetOutputArgs, StatsArgs,
|
||||
TablesArgs, TimerArgs,
|
||||
ManualArgs, NullValueArgs, OpcodesArgs, OpenArgs, OutputModeArgs, SchemaArgs, SetOutputArgs,
|
||||
StatsArgs, TablesArgs, TimerArgs,
|
||||
};
|
||||
use clap::Parser;
|
||||
use import::ImportArgs;
|
||||
@@ -94,6 +94,9 @@ pub enum Command {
|
||||
Headers(HeadersArgs),
|
||||
#[command(name = "clone", display_name = ".clone")]
|
||||
Clone(CloneArgs),
|
||||
/// Display manual pages for features
|
||||
#[command(name = "manual", display_name = ".manual", alias = "man")]
|
||||
Manual(ManualArgs),
|
||||
}
|
||||
|
||||
const _HELP_TEMPLATE: &str = "{before-help}{name}
|
||||
|
||||
@@ -361,6 +361,10 @@ pub const AFTER_HELP_MSG: &str = r#"Usage Examples:
|
||||
18. To clone the open database to another file:
|
||||
.clone output_file.db
|
||||
|
||||
19. To view manual pages for features:
|
||||
.manual mcp # View MCP server documentation
|
||||
.man # List all available manuals
|
||||
|
||||
Note:
|
||||
- All SQL commands must end with a semicolon (;).
|
||||
- Special commands start with a dot (.) and are not required to end with a semicolon."#;
|
||||
|
||||
@@ -4,6 +4,7 @@ mod commands;
|
||||
mod config;
|
||||
mod helper;
|
||||
mod input;
|
||||
mod manual;
|
||||
mod mcp_server;
|
||||
mod opcodes_dictionary;
|
||||
|
||||
|
||||
144
cli/manual.rs
Normal file
144
cli/manual.rs
Normal file
@@ -0,0 +1,144 @@
|
||||
use include_dir::{include_dir, Dir};
|
||||
use rand::seq::SliceRandom;
|
||||
use std::io::{IsTerminal, Write};
|
||||
use termimad::MadSkin;
|
||||
|
||||
static MANUAL_DIR: Dir = include_dir!("$CARGO_MANIFEST_DIR/manuals");
|
||||
|
||||
/// Get a random feature to highlight from available manuals
|
||||
pub fn get_random_feature_hint() -> Option<String> {
|
||||
let features: Vec<(&str, String)> = MANUAL_DIR
|
||||
.files()
|
||||
.filter_map(|file| {
|
||||
let path = file.path();
|
||||
let name = path.file_stem()?.to_str()?;
|
||||
|
||||
if name == "index" {
|
||||
return None;
|
||||
}
|
||||
|
||||
let content = file.contents_utf8()?;
|
||||
let display_name = extract_display_name(content).unwrap_or_else(|| name.to_string());
|
||||
Some((name, display_name))
|
||||
})
|
||||
.collect();
|
||||
|
||||
if features.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
features
|
||||
.choose(&mut rand::thread_rng())
|
||||
.map(|(feature, display_name)| {
|
||||
format!("Did you know that Turso supports {display_name}? Type .manual {feature} to learn more.")
|
||||
})
|
||||
}
|
||||
|
||||
fn extract_display_name(content: &str) -> Option<String> {
|
||||
if !content.starts_with("---") {
|
||||
return None;
|
||||
}
|
||||
|
||||
let lines: Vec<&str> = content.lines().collect();
|
||||
let end_idx = lines[1..].iter().position(|&line| line == "---")? + 1;
|
||||
|
||||
for line in &lines[1..end_idx] {
|
||||
if let Some(display_name) = line.strip_prefix("display_name: ") {
|
||||
return Some(display_name.trim_matches('"').to_string());
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
fn strip_frontmatter(content: &str) -> &str {
|
||||
if !content.starts_with("---") {
|
||||
return content;
|
||||
}
|
||||
|
||||
if let Some(end_pos) = content[3..].find("\n---\n") {
|
||||
&content[end_pos + 7..]
|
||||
} else {
|
||||
content
|
||||
}
|
||||
}
|
||||
|
||||
pub fn display_manual(page: Option<&str>, writer: &mut dyn Write) -> anyhow::Result<()> {
|
||||
let page_name = page.unwrap_or("index");
|
||||
let file_name = format!("{page_name}.md");
|
||||
|
||||
// Try to find the manual page
|
||||
let content = if let Some(file) = MANUAL_DIR.get_file(&file_name) {
|
||||
file.contents_utf8()
|
||||
.ok_or_else(|| anyhow::anyhow!("Failed to read manual page: {}", page_name))?
|
||||
} else if page.is_none() {
|
||||
// If no page specified, list available pages
|
||||
return list_available_manuals(writer);
|
||||
} else {
|
||||
return Err(anyhow::anyhow!("Manual page not found: {}", page_name));
|
||||
};
|
||||
|
||||
// Strip frontmatter before displaying
|
||||
let content = strip_frontmatter(content);
|
||||
|
||||
// Check if we're in a terminal or piped output
|
||||
if IsTerminal::is_terminal(&std::io::stdout()) {
|
||||
// Use termimad for nice terminal rendering
|
||||
render_in_terminal(content)?;
|
||||
} else {
|
||||
// Plain output for pipes/redirects
|
||||
writeln!(writer, "{content}")?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn render_in_terminal(content: &str) -> anyhow::Result<()> {
|
||||
// Create a skin with nice styling
|
||||
let mut skin = MadSkin::default();
|
||||
|
||||
// Customize the skin for better appearance
|
||||
skin.set_headers_fg(termimad::crossterm::style::Color::Cyan);
|
||||
skin.bold.set_fg(termimad::crossterm::style::Color::Yellow);
|
||||
skin.italic
|
||||
.set_fg(termimad::crossterm::style::Color::Magenta);
|
||||
skin.inline_code
|
||||
.set_fg(termimad::crossterm::style::Color::Green);
|
||||
skin.code_block
|
||||
.set_fg(termimad::crossterm::style::Color::Green);
|
||||
|
||||
// Just print the formatted content
|
||||
skin.print_text(content);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn list_available_manuals(writer: &mut dyn Write) -> anyhow::Result<()> {
|
||||
writeln!(writer, "Available manual pages:")?;
|
||||
writeln!(writer)?;
|
||||
|
||||
let mut pages: Vec<String> = Vec::new();
|
||||
|
||||
for file in MANUAL_DIR.files() {
|
||||
if let Some(name) = file.path().file_stem() {
|
||||
if let Some(name_str) = name.to_str() {
|
||||
pages.push(name_str.to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pages.sort();
|
||||
|
||||
for page in pages {
|
||||
writeln!(writer, " .manual {page} # or .man {page}")?;
|
||||
}
|
||||
|
||||
if MANUAL_DIR.files().count() == 0 {
|
||||
writeln!(writer, " (No manual pages found)")?;
|
||||
}
|
||||
|
||||
writeln!(writer)?;
|
||||
writeln!(writer, "Usage: .manual <page> or .man <page>")?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
187
cli/manuals/cdc.md
Normal file
187
cli/manuals/cdc.md
Normal file
@@ -0,0 +1,187 @@
|
||||
---
|
||||
display_name: "Change Data Capture"
|
||||
---
|
||||
|
||||
# CDC - Change Data Capture
|
||||
|
||||
## Overview
|
||||
|
||||
Change Data Capture (CDC) allows you to track and capture all data changes (inserts, updates, deletes) made to your database tables. This is useful for building reactive applications, syncing data between systems, replication, auditing, and more.
|
||||
|
||||
**Note:** This feature is currently marked as unstable, meaning the API is subject to change in future versions. The functionality itself is reliable for use.
|
||||
|
||||
## Enabling CDC
|
||||
|
||||
CDC is enabled per connection using the PRAGMA command:
|
||||
|
||||
```sql
|
||||
PRAGMA unstable_capture_data_changes_conn('<mode>[,<table_name>]');
|
||||
```
|
||||
|
||||
### Parameters
|
||||
|
||||
- **mode**: The capture mode (see below)
|
||||
- **table_name**: Optional custom table name for storing changes (defaults to `turso_cdc`)
|
||||
|
||||
### Capture Modes
|
||||
|
||||
- **`off`**: Disable CDC for this connection
|
||||
- **`id`**: Capture only the primary key/rowid of changed rows
|
||||
- **`before`**: Capture row state before changes (for updates/deletes)
|
||||
- **`after`**: Capture row state after changes (for inserts/updates)
|
||||
- **`full`**: Capture both before and after states, plus update details
|
||||
|
||||
## Examples
|
||||
|
||||
### Basic Usage
|
||||
|
||||
Enable CDC with ID mode (captures primary keys only):
|
||||
```sql
|
||||
PRAGMA unstable_capture_data_changes_conn('id');
|
||||
```
|
||||
|
||||
### Using Different Modes
|
||||
|
||||
Capture the state before changes:
|
||||
```sql
|
||||
PRAGMA unstable_capture_data_changes_conn('before');
|
||||
```
|
||||
|
||||
Capture the state after changes:
|
||||
```sql
|
||||
PRAGMA unstable_capture_data_changes_conn('after');
|
||||
```
|
||||
|
||||
Capture complete change information:
|
||||
```sql
|
||||
PRAGMA unstable_capture_data_changes_conn('full');
|
||||
```
|
||||
|
||||
### Custom CDC Table
|
||||
|
||||
Store changes in a custom table instead of the default `turso_cdc`:
|
||||
```sql
|
||||
PRAGMA unstable_capture_data_changes_conn('full,my_changes_table');
|
||||
```
|
||||
|
||||
### Disable CDC
|
||||
|
||||
Turn off CDC for the current connection:
|
||||
```sql
|
||||
PRAGMA unstable_capture_data_changes_conn('off');
|
||||
```
|
||||
|
||||
## CDC Table Structure
|
||||
|
||||
The CDC table (default name: `turso_cdc`) contains the following columns:
|
||||
|
||||
| Column | Type | Description |
|
||||
|--------|------|-------------|
|
||||
| `change_id` | INTEGER | Auto-incrementing unique identifier for each change |
|
||||
| `change_time` | INTEGER | Timestamp of the change (Unix epoch) |
|
||||
| `change_type` | INTEGER | Type of change: 1 (INSERT), 0 (UPDATE), -1 (DELETE) |
|
||||
| `table_name` | TEXT | Name of the table that was changed |
|
||||
| `id` | varies | Primary key/rowid of the changed row |
|
||||
| `before` | BLOB | Row data before the change (for modes: before, full) |
|
||||
| `after` | BLOB | Row data after the change (for modes: after, full) |
|
||||
| `updates` | BLOB | Details of updated columns (for mode: full) |
|
||||
|
||||
## Querying Changes
|
||||
|
||||
Once CDC is enabled, you can query the changes table like any other table:
|
||||
|
||||
```sql
|
||||
-- View all captured changes
|
||||
SELECT * FROM turso_cdc;
|
||||
|
||||
-- View only inserts
|
||||
SELECT * FROM turso_cdc WHERE change_type = 1;
|
||||
|
||||
-- View only updates
|
||||
SELECT * FROM turso_cdc WHERE change_type = 0;
|
||||
|
||||
-- View only deletes
|
||||
SELECT * FROM turso_cdc WHERE change_type = -1;
|
||||
|
||||
-- View changes for a specific table
|
||||
SELECT * FROM turso_cdc WHERE table_name = 'users';
|
||||
|
||||
-- View recent changes (last hour)
|
||||
SELECT * FROM turso_cdc
|
||||
WHERE change_time > unixepoch() - 3600;
|
||||
```
|
||||
|
||||
## Practical Example
|
||||
|
||||
```sql
|
||||
-- Create a table
|
||||
CREATE TABLE users (
|
||||
id INTEGER PRIMARY KEY,
|
||||
name TEXT,
|
||||
email TEXT
|
||||
);
|
||||
|
||||
-- Enable full CDC
|
||||
PRAGMA unstable_capture_data_changes_conn('full');
|
||||
|
||||
-- Make some changes
|
||||
INSERT INTO users VALUES (1, 'Alice', 'alice@example.com');
|
||||
INSERT INTO users VALUES (2, 'Bob', 'bob@example.com');
|
||||
UPDATE users SET email = 'alice@newdomain.com' WHERE id = 1;
|
||||
DELETE FROM users WHERE id = 2;
|
||||
|
||||
-- View the captured changes
|
||||
SELECT change_type, table_name, id
|
||||
FROM turso_cdc;
|
||||
|
||||
-- Results will show:
|
||||
-- 1 (INSERT) for Alice
|
||||
-- 1 (INSERT) for Bob
|
||||
-- 0 (UPDATE) for Alice's email change
|
||||
-- -1 (DELETE) for Bob
|
||||
```
|
||||
|
||||
## Multiple Connections
|
||||
|
||||
Each connection can have its own CDC configuration:
|
||||
|
||||
```sql
|
||||
-- Connection 1: Capture to 'audit_log' table
|
||||
PRAGMA unstable_capture_data_changes_conn('full,audit_log');
|
||||
|
||||
-- Connection 2: Capture to 'sync_queue' table
|
||||
PRAGMA unstable_capture_data_changes_conn('id,sync_queue');
|
||||
|
||||
-- Changes from Connection 1 go to 'audit_log'
|
||||
-- Changes from Connection 2 go to 'sync_queue'
|
||||
```
|
||||
|
||||
## Transactions
|
||||
|
||||
CDC respects transaction boundaries. Changes are only recorded when a transaction commits:
|
||||
|
||||
```sql
|
||||
BEGIN;
|
||||
INSERT INTO users VALUES (3, 'Charlie', 'charlie@example.com');
|
||||
UPDATE users SET name = 'Charles' WHERE id = 3;
|
||||
-- CDC table is not yet updated
|
||||
|
||||
COMMIT;
|
||||
-- Now both the INSERT and UPDATE appear in the CDC table
|
||||
```
|
||||
|
||||
If a transaction rolls back, no CDC entries are created for those changes.
|
||||
|
||||
## Schema Changes
|
||||
|
||||
CDC also tracks schema changes when using full mode:
|
||||
|
||||
```sql
|
||||
PRAGMA unstable_capture_data_changes_conn('full');
|
||||
|
||||
CREATE TABLE products (id INTEGER PRIMARY KEY, name TEXT);
|
||||
-- Recorded in CDC as change to sqlite_schema
|
||||
|
||||
DROP TABLE products;
|
||||
-- Also recorded as a schema change
|
||||
```
|
||||
96
cli/manuals/encryption.md
Normal file
96
cli/manuals/encryption.md
Normal file
@@ -0,0 +1,96 @@
|
||||
---
|
||||
display_name: "encryption at-rest"
|
||||
---
|
||||
|
||||
# Encryption - At-Rest Database Encryption
|
||||
|
||||
## Overview
|
||||
|
||||
Turso supports transparent at-rest encryption to protect your database files from unauthorized access. When enabled, all data written to disk is automatically encrypted, and decrypted when read, with no changes required to your application code.
|
||||
|
||||
## Supported Ciphers
|
||||
|
||||
Turso supports multiple encryption algorithms with different performance and security characteristics:
|
||||
|
||||
### AES-GCM Family
|
||||
- **`aes128gcm`** - AES-128 in Galois/Counter Mode (16-byte key)
|
||||
- **`aes256gcm`** - AES-256 in Galois/Counter Mode (32-byte key)
|
||||
|
||||
### AEGIS Family (High Performance)
|
||||
- **`aegis256`** - AEGIS-256 (32-byte key) - Recommended for most use cases
|
||||
- **`aegis128l`** - AEGIS-128L (16-byte key)
|
||||
- **`aegis128x2`** - AEGIS-128 with 2x parallelization (16-byte key)
|
||||
- **`aegis128x4`** - AEGIS-128 with 4x parallelization (16-byte key)
|
||||
- **`aegis256x2`** - AEGIS-256 with 2x parallelization (32-byte key)
|
||||
- **`aegis256x4`** - AEGIS-256 with 4x parallelization (32-byte key)
|
||||
|
||||
**Note:** AEGIS ciphers generally offer better performance than AES-GCM while maintaining excellent security properties. AEGIS-256 is recommended as the default choice.
|
||||
|
||||
## Generating Encryption Keys
|
||||
|
||||
Generate a secure encryption key using OpenSSL:
|
||||
|
||||
```bash
|
||||
# For 32-byte key (256-bit) - use with aes256gcm, aegis256, etc.
|
||||
openssl rand -hex 32
|
||||
|
||||
# For 16-byte key (128-bit) - use with aes128gcm, aegis128l, etc.
|
||||
openssl rand -hex 16
|
||||
```
|
||||
|
||||
Example output:
|
||||
```
|
||||
2d7a30108d3eb3e45c90a732041fe54778bdcf707c76749fab7da335d1b39c1d
|
||||
```
|
||||
|
||||
**Important:** Store your encryption key securely. If you lose the key, your encrypted data cannot be recovered.
|
||||
|
||||
## Creating an Encrypted Database
|
||||
|
||||
### Method 1: Using PRAGMAs
|
||||
|
||||
Start Turso and set encryption parameters before creating tables:
|
||||
|
||||
```bash
|
||||
tursodb database.db
|
||||
```
|
||||
|
||||
Then in the SQL shell:
|
||||
```sql
|
||||
PRAGMA cipher = 'aegis256';
|
||||
PRAGMA hexkey = '2d7a30108d3eb3e45c90a732041fe54778bdcf707c76749fab7da335d1b39c1d';
|
||||
|
||||
-- Now create your tables and insert data
|
||||
CREATE TABLE users (id INTEGER PRIMARY KEY, name TEXT);
|
||||
INSERT INTO users VALUES (1, 'Alice');
|
||||
```
|
||||
|
||||
### Method 2: Using URI Parameters
|
||||
|
||||
Specify encryption parameters directly in the database URI:
|
||||
|
||||
```bash
|
||||
tursodb "file:database.db?cipher=aegis256&hexkey=2d7a30108d3eb3e45c90a732041fe54778bdcf707c76749fab7da335d1b39c1d"
|
||||
```
|
||||
|
||||
## Opening an Encrypted Database
|
||||
|
||||
**Important:** To open an existing encrypted database, you MUST provide the cipher and key as URI parameters:
|
||||
|
||||
```bash
|
||||
tursodb "file:database.db?cipher=aegis256&hexkey=2d7a30108d3eb3e45c90a732041fe54778bdcf707c76749fab7da335d1b39c1d"
|
||||
```
|
||||
|
||||
Attempting to open an encrypted database without the correct cipher and key will fail.
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### "Database is encrypted or is not a database"
|
||||
This error occurs when:
|
||||
- Opening an encrypted database without providing cipher/key
|
||||
- Using the wrong cipher or key
|
||||
- The database file is corrupted
|
||||
|
||||
### "Invalid hex string"
|
||||
- Ensure your key is valid hexadecimal (0-9, a-f)
|
||||
- Check the key length matches your cipher (32 hex chars for 16 bytes, 64 for 32 bytes)
|
||||
53
cli/manuals/index.md
Normal file
53
cli/manuals/index.md
Normal file
@@ -0,0 +1,53 @@
|
||||
# Turso Manual Pages
|
||||
|
||||
Welcome to the Turso manual pages. These pages provide detailed documentation for various features and capabilities.
|
||||
|
||||
## Available Manuals
|
||||
|
||||
### cdc - Change Data Capture
|
||||
Track and capture all data changes made to your database tables for replication, syncing, and reactive applications.
|
||||
|
||||
```
|
||||
.manual cdc
|
||||
```
|
||||
|
||||
### encryption - At-Rest Database Encryption
|
||||
Protect your database files with transparent encryption using AES-GCM or high-performance AEGIS ciphers.
|
||||
|
||||
```
|
||||
.manual encryption
|
||||
```
|
||||
|
||||
### mcp - Model Context Protocol
|
||||
Learn about Turso's built-in MCP server that enables AI assistants and other tools to interact with your databases.
|
||||
|
||||
```
|
||||
.manual mcp
|
||||
```
|
||||
|
||||
### vector - Vector Search
|
||||
Build similarity search and semantic search applications using vector embeddings and distance functions.
|
||||
|
||||
```
|
||||
.manual vector
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
To view a manual page, use the `.manual` or `.man` command:
|
||||
|
||||
```
|
||||
.manual <page> # Full command
|
||||
.man <page> # Short alias
|
||||
```
|
||||
|
||||
### Examples
|
||||
|
||||
```
|
||||
.manual mcp # View the MCP server documentation
|
||||
.man mcp # Same as above, using the alias
|
||||
```
|
||||
|
||||
## Adding More Manuals
|
||||
|
||||
Additional manual pages will be added for other features as they become available.
|
||||
145
cli/manuals/mcp.md
Normal file
145
cli/manuals/mcp.md
Normal file
@@ -0,0 +1,145 @@
|
||||
---
|
||||
display_name: "a built-in MCP server"
|
||||
---
|
||||
|
||||
# MCP Server - Model Context Protocol
|
||||
|
||||
## Overview
|
||||
|
||||
Turso includes a built-in MCP (Model Context Protocol) server that allows AI assistants and other tools to interact with your databases programmatically.
|
||||
|
||||
## Starting the MCP Server
|
||||
|
||||
To start Turso in MCP server mode, use the `--mcp` flag:
|
||||
|
||||
```bash
|
||||
/path/to/tursodb --mcp
|
||||
```
|
||||
|
||||
This will start an MCP server that listens on stdio for commands. The server starts without a database connection, allowing you to select or create databases using MCP commands.
|
||||
|
||||
## Available Tools
|
||||
|
||||
The MCP server exposes the following tools:
|
||||
|
||||
### `query`
|
||||
Execute a SQL query and get results.
|
||||
|
||||
**Parameters:**
|
||||
- `sql` (string, required): The SQL query to execute
|
||||
|
||||
**Example:**
|
||||
```json
|
||||
{
|
||||
"tool": "query",
|
||||
"arguments": {
|
||||
"sql": "SELECT * FROM users WHERE age > 21"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### `execute`
|
||||
Execute a SQL statement that modifies data (INSERT, UPDATE, DELETE).
|
||||
|
||||
**Parameters:**
|
||||
- `sql` (string, required): The SQL statement to execute
|
||||
|
||||
**Example:**
|
||||
```json
|
||||
{
|
||||
"tool": "execute",
|
||||
"arguments": {
|
||||
"sql": "INSERT INTO users (name, age) VALUES ('Alice', 30)"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### `list_tables`
|
||||
List all tables in the database.
|
||||
|
||||
**Example:**
|
||||
```json
|
||||
{
|
||||
"tool": "list_tables",
|
||||
"arguments": {}
|
||||
}
|
||||
```
|
||||
|
||||
### `describe_table`
|
||||
Get the schema of a specific table.
|
||||
|
||||
**Parameters:**
|
||||
- `table` (string, required): The name of the table to describe
|
||||
|
||||
**Example:**
|
||||
```json
|
||||
{
|
||||
"tool": "describe_table",
|
||||
"arguments": {
|
||||
"table": "users"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Integration with AI Assistants
|
||||
|
||||
### Claude Desktop
|
||||
|
||||
To use with Claude Desktop, add the following to your Claude Desktop configuration:
|
||||
|
||||
```json
|
||||
{
|
||||
"mcpServers": {
|
||||
"turso": {
|
||||
"command": "/path/to/tursodb",
|
||||
"args": ["--mcp"]
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Note: You must use the full path to the tursodb executable as Claude Desktop may not recognize items in your PATH.
|
||||
|
||||
### Other MCP Clients
|
||||
|
||||
The Turso MCP server follows the standard MCP protocol and can be used with any MCP-compatible client.
|
||||
|
||||
## Example Session
|
||||
|
||||
Here's an example of using the MCP server:
|
||||
|
||||
1. **Start the server:**
|
||||
```bash
|
||||
/path/to/tursodb --mcp
|
||||
```
|
||||
|
||||
2. **Query data:**
|
||||
```
|
||||
> What tables are in the database?
|
||||
[Uses list_tables tool]
|
||||
|
||||
> Show me all users older than 25
|
||||
[Uses query tool with "SELECT * FROM users WHERE age > 25"]
|
||||
```
|
||||
|
||||
3. **Modify data:**
|
||||
```
|
||||
> Add a new user named Bob who is 28 years old
|
||||
[Uses execute tool with INSERT statement]
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Server doesn't start
|
||||
- Ensure the tursodb executable path is correct
|
||||
- Check that you're using the full path to the executable
|
||||
|
||||
### Commands fail
|
||||
- Verify SQL syntax is correct
|
||||
- Check that tables and columns exist
|
||||
- Ensure you have write permissions if modifying data
|
||||
|
||||
## See Also
|
||||
|
||||
- MCP Protocol Documentation: https://modelcontextprotocol.io
|
||||
- Turso Documentation: https://turso.tech/docs
|
||||
211
cli/manuals/vector.md
Normal file
211
cli/manuals/vector.md
Normal file
@@ -0,0 +1,211 @@
|
||||
---
|
||||
display_name: "vector search"
|
||||
---
|
||||
|
||||
# Vector Search
|
||||
|
||||
## Overview
|
||||
|
||||
Turso supports vector operations for building similarity search and semantic search applications. Vectors are stored as BLOBs and can be searched using distance functions to find similar items.
|
||||
|
||||
**Important:** Vector indexes are not yet supported. All vector searches currently use brute-force scanning, which means searching scales linearly with the number of rows.
|
||||
|
||||
## Vector Types
|
||||
|
||||
Turso supports two vector formats:
|
||||
|
||||
- **`vector32`** - 32-bit floating-point vectors (4 bytes per dimension)
|
||||
- **`vector64`** - 64-bit floating-point vectors (8 bytes per dimension)
|
||||
|
||||
## Creating and Storing Vectors
|
||||
|
||||
Vectors are stored in vector columns as-is, and represented on-disk as BLOBs. Embeddings are interpreted and validated at runtime. In order for embedding to be valid, it must be either JSON array of float values OR binary blob created with turso vector functiosn `vector32` / `vector64`.
|
||||
|
||||
### Basic Example
|
||||
|
||||
```sql
|
||||
-- Create a table with vector embeddings
|
||||
CREATE TABLE documents (
|
||||
id INTEGER PRIMARY KEY,
|
||||
content TEXT,
|
||||
embedding BLOB -- Store vector as BLOB
|
||||
);
|
||||
|
||||
-- Insert vectors using vector32() or vector64()
|
||||
INSERT INTO documents VALUES
|
||||
(1, 'Introduction to databases', vector32('[0.1, 0.2, 0.3, 0.4]')),
|
||||
(2, 'SQL query optimization', vector32('[0.2, 0.1, 0.4, 0.3]')),
|
||||
(3, 'Vector similarity search', vector32('[0.4, 0.3, 0.2, 0.1]'));
|
||||
```
|
||||
|
||||
### Working with Higher Dimensions
|
||||
|
||||
Real embeddings typically have hundreds or thousands of dimensions:
|
||||
|
||||
```sql
|
||||
-- Example with 1536-dimensional embeddings (like OpenAI's ada-002)
|
||||
CREATE TABLE embeddings (
|
||||
id INTEGER PRIMARY KEY,
|
||||
text TEXT,
|
||||
vector BLOB
|
||||
);
|
||||
|
||||
-- Insert a 1536-dimensional vector
|
||||
INSERT INTO embeddings VALUES
|
||||
(1, 'Sample text', vector32('[0.001, 0.002, ..., 0.1536]'));
|
||||
```
|
||||
|
||||
## Vector Functions
|
||||
|
||||
### Creation Functions
|
||||
|
||||
- **`vector32(text)`** - Create a 32-bit float vector from JSON array
|
||||
- **`vector64(text)`** - Create a 64-bit float vector from JSON array
|
||||
|
||||
### Distance Functions
|
||||
|
||||
- **`vector_distance_l2(v1, v2)`** - Euclidean (L2) distance between vectors
|
||||
- **`vector_distance_cos(v1, v2)`** - Cosine distance (1 - cosine similarity)
|
||||
|
||||
### Utility Functions
|
||||
|
||||
- **`vector_extract(blob)`** - Convert vector BLOB back to JSON text
|
||||
- **`vector_concat(v1, v2)`** - Concatenate two vectors
|
||||
- **`vector_slice(v, start, end)`** - Extract a portion of a vector
|
||||
|
||||
## Similarity Search Examples
|
||||
|
||||
### Finding Similar Documents
|
||||
|
||||
```sql
|
||||
-- Find documents similar to a query vector
|
||||
WITH query AS (
|
||||
SELECT vector32('[0.15, 0.25, 0.35, 0.45]') AS query_vector
|
||||
)
|
||||
SELECT
|
||||
id,
|
||||
content,
|
||||
vector_distance_l2(embedding, query_vector) AS distance
|
||||
FROM documents, query
|
||||
ORDER BY distance
|
||||
LIMIT 5;
|
||||
```
|
||||
|
||||
### Cosine Similarity Search
|
||||
|
||||
Cosine similarity is often preferred for text embeddings:
|
||||
|
||||
```sql
|
||||
-- Find semantically similar documents using cosine distance
|
||||
WITH query AS (
|
||||
SELECT vector32('[0.15, 0.25, 0.35, 0.45]') AS query_vector
|
||||
)
|
||||
SELECT
|
||||
id,
|
||||
content,
|
||||
vector_distance_cos(embedding, query_vector) AS cosine_distance
|
||||
FROM documents, query
|
||||
ORDER BY cosine_distance
|
||||
LIMIT 5;
|
||||
```
|
||||
|
||||
### Threshold-Based Search
|
||||
|
||||
Find all vectors within a certain distance:
|
||||
|
||||
```sql
|
||||
-- Find all documents within distance threshold
|
||||
WITH query AS (
|
||||
SELECT vector32('[0.15, 0.25, 0.35, 0.45]') AS query_vector
|
||||
)
|
||||
SELECT
|
||||
id,
|
||||
content,
|
||||
vector_distance_l2(embedding, query_vector) AS distance
|
||||
FROM documents, query
|
||||
WHERE vector_distance_l2(embedding, query_vector) < 0.5
|
||||
ORDER BY distance;
|
||||
```
|
||||
|
||||
## Working with Vector Data
|
||||
|
||||
### Inspecting Vectors
|
||||
|
||||
```sql
|
||||
-- Extract and view vector data as JSON
|
||||
SELECT id, vector_extract(embedding) AS vector_json
|
||||
FROM documents
|
||||
LIMIT 3;
|
||||
```
|
||||
|
||||
### Vector Operations
|
||||
|
||||
```sql
|
||||
-- Concatenate two vectors
|
||||
SELECT vector_concat(
|
||||
vector32('[1.0, 2.0]'),
|
||||
vector32('[3.0, 4.0]')
|
||||
) AS concatenated;
|
||||
|
||||
-- Slice a vector (extract dimensions 2-4)
|
||||
SELECT vector_slice(
|
||||
vector32('[1.0, 2.0, 3.0, 4.0, 5.0]'),
|
||||
2, 4
|
||||
) AS sliced;
|
||||
```
|
||||
|
||||
## Building a Semantic Search Application
|
||||
|
||||
Here's a complete example of a semantic search application:
|
||||
|
||||
```sql
|
||||
-- 1. Create schema
|
||||
CREATE TABLE articles (
|
||||
id INTEGER PRIMARY KEY,
|
||||
title TEXT,
|
||||
content TEXT,
|
||||
embedding BLOB
|
||||
);
|
||||
|
||||
-- 2. Insert pre-computed embeddings
|
||||
INSERT INTO articles VALUES
|
||||
(1, 'Database Fundamentals', 'An introduction to relational databases...',
|
||||
vector32('[0.12, -0.34, 0.56, ...]')),
|
||||
(2, 'Machine Learning Basics', 'Understanding neural networks and deep learning...',
|
||||
vector32('[0.23, 0.45, -0.67, ...]')),
|
||||
(3, 'Web Development Guide', 'Modern web applications with JavaScript...',
|
||||
vector32('[0.34, -0.12, 0.78, ...]'));
|
||||
|
||||
-- 3. Search for similar articles
|
||||
WITH search_embedding AS (
|
||||
-- This would come from your embedding model for the search query
|
||||
SELECT vector32('[0.15, -0.30, 0.60, ...]') AS query_vec
|
||||
)
|
||||
SELECT
|
||||
a.id,
|
||||
a.title,
|
||||
vector_distance_cos(a.embedding, s.query_vec) AS similarity_score
|
||||
FROM articles a, search_embedding s
|
||||
ORDER BY similarity_score
|
||||
LIMIT 10;
|
||||
```
|
||||
|
||||
## Performance Considerations
|
||||
|
||||
Since vector indexes are not yet implemented, keep in mind:
|
||||
|
||||
- **Linear scan**: Every search examines all rows in the table
|
||||
- **Memory usage**: Vectors consume significant space (4 bytes × dimensions for vector32)
|
||||
- **Optimization tips**:
|
||||
- Use smaller dimensions when possible
|
||||
- Pre-filter data with WHERE clauses before distance calculations
|
||||
- Consider partitioning large datasets
|
||||
- Use vector32 instead of vector64 unless high precision is needed
|
||||
|
||||
## Common Use Cases
|
||||
|
||||
- **Semantic search**: Find documents by meaning rather than keywords
|
||||
- **Recommendation systems**: Find similar items based on embeddings
|
||||
- **Duplicate detection**: Identify near-duplicate content
|
||||
- **Image similarity**: Search for similar images using visual embeddings
|
||||
- **Anomaly detection**: Find outliers in high-dimensional data
|
||||
Reference in New Issue
Block a user