Add built-in manual pages for Turso

In the hopes of doing a good job at teaching people what Turso can do,
I am adding built-in manual pages. When the CLI starts, it picks a
feature at random, and tells the user that the feature exists:

```
Turso v0.2.0-pre.8
Enter ".help" for usage hints.
Did you know that Turso supports Change Data Capture? Type .manual cdc to learn more.
This software is ALPHA, only use for development, testing, and experimentation.
Connected to a transient in-memory database.
Use ".open FILENAME" to reopen on a persistent database
```

There is a lot we can do to make this feature world class:
- we can automatically compile examples during compile time like
  rust-doc, to make sure examples used in the manuals always work
- we can implement scrolling and navigation
- we can document a lot more features

But for now, this is a start!
This commit is contained in:
Glauber Costa
2025-09-24 11:08:15 -03:00
parent c894dcf438
commit fbc3d0dbc3
14 changed files with 1090 additions and 5 deletions

223
Cargo.lock generated
View File

@@ -592,7 +592,7 @@ version = "7.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4a65ebfec4fb190b6f90e944a817d60499ee0744e582530e2c9900a22e591d9a"
dependencies = [
"crossterm",
"crossterm 0.28.1",
"unicode-segmentation",
"unicode-width 0.2.0",
]
@@ -649,6 +649,15 @@ version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7c74b8349d32d297c9134b8c88677813a227df8f779daa29bfc29c183fe3dca6"
[[package]]
name = "convert_case"
version = "0.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bb402b8d4c85569410425650ce3eddc7d698ed96d39a73f941b08fb63082f1e7"
dependencies = [
"unicode-segmentation",
]
[[package]]
name = "convert_case"
version = "0.8.0"
@@ -658,6 +667,15 @@ dependencies = [
"unicode-segmentation",
]
[[package]]
name = "coolor"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "980c2afde4af43d6a05c5be738f9eae595cff86dce1f38f88b95058a98c027f3"
dependencies = [
"crossterm 0.29.0",
]
[[package]]
name = "core-foundation-sys"
version = "0.8.7"
@@ -751,6 +769,45 @@ dependencies = [
"itertools 0.10.5",
]
[[package]]
name = "crokey"
version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "51360853ebbeb3df20c76c82aecf43d387a62860f1a59ba65ab51f00eea85aad"
dependencies = [
"crokey-proc_macros",
"crossterm 0.29.0",
"once_cell",
"serde",
"strict",
]
[[package]]
name = "crokey-proc_macros"
version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3bf1a727caeb5ee5e0a0826a97f205a9cf84ee964b0b48239fef5214a00ae439"
dependencies = [
"crossterm 0.29.0",
"proc-macro2",
"quote",
"strict",
"syn 2.0.100",
]
[[package]]
name = "crossbeam"
version = "0.8.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1137cd7e7fc0fb5d3c5a8678be38ec56e819125d8d7907411fe24ccb943faca8"
dependencies = [
"crossbeam-channel",
"crossbeam-deque",
"crossbeam-epoch",
"crossbeam-queue",
"crossbeam-utils",
]
[[package]]
name = "crossbeam-channel"
version = "0.5.14"
@@ -779,6 +836,15 @@ dependencies = [
"crossbeam-utils",
]
[[package]]
name = "crossbeam-queue"
version = "0.3.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0f58bbc28f91df819d0aa2a2c00cd19754769c2fad90579b3592b1c9ba7a3115"
dependencies = [
"crossbeam-utils",
]
[[package]]
name = "crossbeam-skiplist"
version = "0.1.3"
@@ -808,6 +874,24 @@ dependencies = [
"winapi",
]
[[package]]
name = "crossterm"
version = "0.29.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d8b9f2e4c67f833b660cdb0a3523065869fb35570177239812ed4c905aeff87b"
dependencies = [
"bitflags 2.9.4",
"crossterm_winapi",
"derive_more",
"document-features",
"mio",
"parking_lot",
"rustix 1.0.7",
"signal-hook",
"signal-hook-mio",
"winapi",
]
[[package]]
name = "crossterm_winapi"
version = "0.9.1"
@@ -995,6 +1079,27 @@ dependencies = [
"powerfmt",
]
[[package]]
name = "derive_more"
version = "2.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "093242cf7570c207c83073cf82f79706fe7b8317e98620a47d5be7c3d8497678"
dependencies = [
"derive_more-impl",
]
[[package]]
name = "derive_more-impl"
version = "2.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bda628edc44c4bb645fbe0f758797143e4e07926f7ebf4e9bdfbd3d2ce621df3"
dependencies = [
"convert_case 0.7.1",
"proc-macro2",
"quote",
"syn 2.0.100",
]
[[package]]
name = "difflib"
version = "0.4.0"
@@ -1070,6 +1175,15 @@ version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10"
[[package]]
name = "document-features"
version = "0.2.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "95249b50c6c185bee49034bcb378a49dc2b5dff0be90ff6616d31d64febab05d"
dependencies = [
"litrs",
]
[[package]]
name = "dtor"
version = "0.0.6"
@@ -1793,6 +1907,25 @@ dependencies = [
"icu_properties",
]
[[package]]
name = "include_dir"
version = "0.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "923d117408f1e49d914f1a379a309cffe4f18c05cf4e3d12e613a15fc81bd0dd"
dependencies = [
"include_dir_macros",
]
[[package]]
name = "include_dir_macros"
version = "0.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7cab85a7ed0bd5f0e76d93846e0147172bed2e2d3f859bcc33a8d9699cad1a75"
dependencies = [
"proc-macro2",
"quote",
]
[[package]]
name = "indexmap"
version = "1.9.3"
@@ -2022,6 +2155,29 @@ dependencies = [
"libc",
]
[[package]]
name = "lazy-regex"
version = "3.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "60c7310b93682b36b98fa7ea4de998d3463ccbebd94d935d6b48ba5b6ffa7126"
dependencies = [
"lazy-regex-proc_macros",
"once_cell",
"regex",
]
[[package]]
name = "lazy-regex-proc_macros"
version = "3.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4ba01db5ef81e17eb10a5e0f2109d1b3a3e29bac3070fdbd7d156bf7dbd206a1"
dependencies = [
"proc-macro2",
"quote",
"regex",
"syn 2.0.100",
]
[[package]]
name = "lazy_static"
version = "1.5.0"
@@ -2250,6 +2406,12 @@ version = "0.7.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "23fb14cb19457329c82206317a5663005a4d404783dc74f4252769b0d5f42856"
[[package]]
name = "litrs"
version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f5e54036fe321fd421e10d732f155734c4e4afd610dd556d9a82833ab3ee0bed"
[[package]]
name = "lock_api"
version = "0.4.13"
@@ -2364,6 +2526,15 @@ dependencies = [
"libmimalloc-sys",
]
[[package]]
name = "minimad"
version = "0.13.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a9c5d708226d186590a7b6d4a9780e2bdda5f689e0d58cd17012a298efd745d2"
dependencies = [
"once_cell",
]
[[package]]
name = "miniz_oxide"
version = "0.8.5"
@@ -2411,7 +2582,7 @@ version = "3.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "43e61844e0c0bb81e711f2084abe7cff187b03ca21ff8b000cb59bbda61e15a9"
dependencies = [
"convert_case",
"convert_case 0.8.0",
"ctor 0.4.2",
"napi-derive-backend",
"proc-macro2",
@@ -2425,7 +2596,7 @@ version = "2.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b7ab19e9b98efb13895f492a2e367ca50c955ac3c4723613af73fdda4011afcc"
dependencies = [
"convert_case",
"convert_case 0.8.0",
"proc-macro2",
"quote",
"semver",
@@ -3551,6 +3722,27 @@ version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
[[package]]
name = "signal-hook"
version = "0.3.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d881a16cf4426aa584979d30bd82cb33429027e42122b169753d6ef1085ed6e2"
dependencies = [
"libc",
"signal-hook-registry",
]
[[package]]
name = "signal-hook-mio"
version = "0.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "34db1a06d485c9142248b7a054f034b349b212551f3dfd19c94d45a754a217cd"
dependencies = [
"libc",
"mio",
"signal-hook",
]
[[package]]
name = "signal-hook-registry"
version = "1.4.2"
@@ -3666,6 +3858,12 @@ version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9091b6114800a5f2141aee1d1b9d6ca3592ac062dc5decb3764ec5895a47b4eb"
[[package]]
name = "strict"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f42444fea5b87a39db4218d9422087e66a85d0e7a0963a439b07bcdf91804006"
[[package]]
name = "strsim"
version = "0.11.1"
@@ -3817,6 +4015,22 @@ dependencies = [
"windows-sys 0.59.0",
]
[[package]]
name = "termimad"
version = "0.30.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "22117210909e9dfff30a558f554c7fb3edb198ef614e7691386785fb7679677c"
dependencies = [
"coolor",
"crokey",
"crossbeam",
"lazy-regex",
"minimad",
"serde",
"thiserror 1.0.69",
"unicode-width 0.1.14",
]
[[package]]
name = "terminal_size"
version = "0.4.2"
@@ -4155,17 +4369,20 @@ dependencies = [
"ctrlc",
"dirs 5.0.1",
"env_logger 0.11.7",
"include_dir",
"libc",
"limbo_completion",
"miette",
"mimalloc",
"nu-ansi-term",
"rand 0.8.5",
"rustyline",
"schemars 0.8.22",
"serde",
"serde_json",
"shlex",
"syntect",
"termimad",
"toml",
"toml_edit",
"tracing",

View File

@@ -46,6 +46,9 @@ serde = { workspace = true, features = ["derive"]}
validator = {version = "0.20.0", features = ["derive"]}
toml_edit = {version = "0.22.24", features = ["serde"]}
serde_json = "1.0"
termimad = "0.30"
include_dir = "0.7"
rand = "0.8"
mimalloc = { workspace = true }
[features]
@@ -55,3 +58,4 @@ tracing_release = ["turso_core/tracing_release"]
[build-dependencies]
syntect = { git = "https://github.com/trishume/syntect.git", rev = "64644ffe064457265cbcee12a0c1baf9485ba6ee" }
include_dir = "0.7"

View File

@@ -10,6 +10,7 @@ use crate::{
get_io, get_writer, ApplyWriter, DbLocation, NoopProgress, OutputMode, ProgressSink,
Settings, StderrProgress,
},
manual,
opcodes_dictionary::OPCODE_DESCRIPTIONS,
HISTORY_FILE,
};
@@ -255,6 +256,12 @@ impl Limbo {
if !quiet {
self.writeln_fmt(format_args!("Turso v{}", env!("CARGO_PKG_VERSION")))?;
self.writeln("Enter \".help\" for usage hints.")?;
// Add random feature hint
if let Some(hint) = manual::get_random_feature_hint() {
self.writeln(&hint)?;
}
self.writeln(
"This software is ALPHA, only use for development, testing, and experimentation.",
)?;
@@ -732,6 +739,12 @@ impl Limbo {
let _ = self.writeln(e.to_string());
}
}
Command::Manual(args) => {
let w = self.writer.as_mut().unwrap();
if let Err(e) = manual::display_manual(args.page.as_deref(), w) {
let _ = self.writeln(e.to_string());
}
}
},
}
}

View File

@@ -11,6 +11,7 @@ use syntect::parsing::SyntaxSet;
fn main() {
println!("cargo::rerun-if-changed=SQL.sublime-syntax");
println!("cargo::rerun-if-changed=build.rs");
println!("cargo::rerun-if-changed=manuals");
let out_dir = env::var_os("OUT_DIR").unwrap();
let syntax =

View File

@@ -166,6 +166,12 @@ pub struct CloneArgs {
pub output_file: String,
}
#[derive(Debug, Clone, Args)]
pub struct ManualArgs {
/// The manual page to display (e.g., "mcp")
pub page: Option<String>,
}
#[derive(ValueEnum, Copy, Clone, Debug, PartialEq, Eq)]
pub enum HeadersMode {
On,

View File

@@ -3,8 +3,8 @@ pub mod import;
use args::{
CwdArgs, DbConfigArgs, EchoArgs, ExitArgs, HeadersArgs, IndexesArgs, LoadExtensionArgs,
NullValueArgs, OpcodesArgs, OpenArgs, OutputModeArgs, SchemaArgs, SetOutputArgs, StatsArgs,
TablesArgs, TimerArgs,
ManualArgs, NullValueArgs, OpcodesArgs, OpenArgs, OutputModeArgs, SchemaArgs, SetOutputArgs,
StatsArgs, TablesArgs, TimerArgs,
};
use clap::Parser;
use import::ImportArgs;
@@ -94,6 +94,9 @@ pub enum Command {
Headers(HeadersArgs),
#[command(name = "clone", display_name = ".clone")]
Clone(CloneArgs),
/// Display manual pages for features
#[command(name = "manual", display_name = ".manual", alias = "man")]
Manual(ManualArgs),
}
const _HELP_TEMPLATE: &str = "{before-help}{name}

View File

@@ -361,6 +361,10 @@ pub const AFTER_HELP_MSG: &str = r#"Usage Examples:
18. To clone the open database to another file:
.clone output_file.db
19. To view manual pages for features:
.manual mcp # View MCP server documentation
.man # List all available manuals
Note:
- All SQL commands must end with a semicolon (;).
- Special commands start with a dot (.) and are not required to end with a semicolon."#;

View File

@@ -4,6 +4,7 @@ mod commands;
mod config;
mod helper;
mod input;
mod manual;
mod mcp_server;
mod opcodes_dictionary;

144
cli/manual.rs Normal file
View File

@@ -0,0 +1,144 @@
use include_dir::{include_dir, Dir};
use rand::seq::SliceRandom;
use std::io::{IsTerminal, Write};
use termimad::MadSkin;
static MANUAL_DIR: Dir = include_dir!("$CARGO_MANIFEST_DIR/manuals");
/// Get a random feature to highlight from available manuals
pub fn get_random_feature_hint() -> Option<String> {
let features: Vec<(&str, String)> = MANUAL_DIR
.files()
.filter_map(|file| {
let path = file.path();
let name = path.file_stem()?.to_str()?;
if name == "index" {
return None;
}
let content = file.contents_utf8()?;
let display_name = extract_display_name(content).unwrap_or_else(|| name.to_string());
Some((name, display_name))
})
.collect();
if features.is_empty() {
return None;
}
features
.choose(&mut rand::thread_rng())
.map(|(feature, display_name)| {
format!("Did you know that Turso supports {display_name}? Type .manual {feature} to learn more.")
})
}
fn extract_display_name(content: &str) -> Option<String> {
if !content.starts_with("---") {
return None;
}
let lines: Vec<&str> = content.lines().collect();
let end_idx = lines[1..].iter().position(|&line| line == "---")? + 1;
for line in &lines[1..end_idx] {
if let Some(display_name) = line.strip_prefix("display_name: ") {
return Some(display_name.trim_matches('"').to_string());
}
}
None
}
fn strip_frontmatter(content: &str) -> &str {
if !content.starts_with("---") {
return content;
}
if let Some(end_pos) = content[3..].find("\n---\n") {
&content[end_pos + 7..]
} else {
content
}
}
pub fn display_manual(page: Option<&str>, writer: &mut dyn Write) -> anyhow::Result<()> {
let page_name = page.unwrap_or("index");
let file_name = format!("{page_name}.md");
// Try to find the manual page
let content = if let Some(file) = MANUAL_DIR.get_file(&file_name) {
file.contents_utf8()
.ok_or_else(|| anyhow::anyhow!("Failed to read manual page: {}", page_name))?
} else if page.is_none() {
// If no page specified, list available pages
return list_available_manuals(writer);
} else {
return Err(anyhow::anyhow!("Manual page not found: {}", page_name));
};
// Strip frontmatter before displaying
let content = strip_frontmatter(content);
// Check if we're in a terminal or piped output
if IsTerminal::is_terminal(&std::io::stdout()) {
// Use termimad for nice terminal rendering
render_in_terminal(content)?;
} else {
// Plain output for pipes/redirects
writeln!(writer, "{content}")?;
}
Ok(())
}
fn render_in_terminal(content: &str) -> anyhow::Result<()> {
// Create a skin with nice styling
let mut skin = MadSkin::default();
// Customize the skin for better appearance
skin.set_headers_fg(termimad::crossterm::style::Color::Cyan);
skin.bold.set_fg(termimad::crossterm::style::Color::Yellow);
skin.italic
.set_fg(termimad::crossterm::style::Color::Magenta);
skin.inline_code
.set_fg(termimad::crossterm::style::Color::Green);
skin.code_block
.set_fg(termimad::crossterm::style::Color::Green);
// Just print the formatted content
skin.print_text(content);
Ok(())
}
fn list_available_manuals(writer: &mut dyn Write) -> anyhow::Result<()> {
writeln!(writer, "Available manual pages:")?;
writeln!(writer)?;
let mut pages: Vec<String> = Vec::new();
for file in MANUAL_DIR.files() {
if let Some(name) = file.path().file_stem() {
if let Some(name_str) = name.to_str() {
pages.push(name_str.to_string());
}
}
}
pages.sort();
for page in pages {
writeln!(writer, " .manual {page} # or .man {page}")?;
}
if MANUAL_DIR.files().count() == 0 {
writeln!(writer, " (No manual pages found)")?;
}
writeln!(writer)?;
writeln!(writer, "Usage: .manual <page> or .man <page>")?;
Ok(())
}

187
cli/manuals/cdc.md Normal file
View File

@@ -0,0 +1,187 @@
---
display_name: "Change Data Capture"
---
# CDC - Change Data Capture
## Overview
Change Data Capture (CDC) allows you to track and capture all data changes (inserts, updates, deletes) made to your database tables. This is useful for building reactive applications, syncing data between systems, replication, auditing, and more.
**Note:** This feature is currently marked as unstable, meaning the API is subject to change in future versions. The functionality itself is reliable for use.
## Enabling CDC
CDC is enabled per connection using the PRAGMA command:
```sql
PRAGMA unstable_capture_data_changes_conn('<mode>[,<table_name>]');
```
### Parameters
- **mode**: The capture mode (see below)
- **table_name**: Optional custom table name for storing changes (defaults to `turso_cdc`)
### Capture Modes
- **`off`**: Disable CDC for this connection
- **`id`**: Capture only the primary key/rowid of changed rows
- **`before`**: Capture row state before changes (for updates/deletes)
- **`after`**: Capture row state after changes (for inserts/updates)
- **`full`**: Capture both before and after states, plus update details
## Examples
### Basic Usage
Enable CDC with ID mode (captures primary keys only):
```sql
PRAGMA unstable_capture_data_changes_conn('id');
```
### Using Different Modes
Capture the state before changes:
```sql
PRAGMA unstable_capture_data_changes_conn('before');
```
Capture the state after changes:
```sql
PRAGMA unstable_capture_data_changes_conn('after');
```
Capture complete change information:
```sql
PRAGMA unstable_capture_data_changes_conn('full');
```
### Custom CDC Table
Store changes in a custom table instead of the default `turso_cdc`:
```sql
PRAGMA unstable_capture_data_changes_conn('full,my_changes_table');
```
### Disable CDC
Turn off CDC for the current connection:
```sql
PRAGMA unstable_capture_data_changes_conn('off');
```
## CDC Table Structure
The CDC table (default name: `turso_cdc`) contains the following columns:
| Column | Type | Description |
|--------|------|-------------|
| `change_id` | INTEGER | Auto-incrementing unique identifier for each change |
| `change_time` | INTEGER | Timestamp of the change (Unix epoch) |
| `change_type` | INTEGER | Type of change: 1 (INSERT), 0 (UPDATE), -1 (DELETE) |
| `table_name` | TEXT | Name of the table that was changed |
| `id` | varies | Primary key/rowid of the changed row |
| `before` | BLOB | Row data before the change (for modes: before, full) |
| `after` | BLOB | Row data after the change (for modes: after, full) |
| `updates` | BLOB | Details of updated columns (for mode: full) |
## Querying Changes
Once CDC is enabled, you can query the changes table like any other table:
```sql
-- View all captured changes
SELECT * FROM turso_cdc;
-- View only inserts
SELECT * FROM turso_cdc WHERE change_type = 1;
-- View only updates
SELECT * FROM turso_cdc WHERE change_type = 0;
-- View only deletes
SELECT * FROM turso_cdc WHERE change_type = -1;
-- View changes for a specific table
SELECT * FROM turso_cdc WHERE table_name = 'users';
-- View recent changes (last hour)
SELECT * FROM turso_cdc
WHERE change_time > unixepoch() - 3600;
```
## Practical Example
```sql
-- Create a table
CREATE TABLE users (
id INTEGER PRIMARY KEY,
name TEXT,
email TEXT
);
-- Enable full CDC
PRAGMA unstable_capture_data_changes_conn('full');
-- Make some changes
INSERT INTO users VALUES (1, 'Alice', 'alice@example.com');
INSERT INTO users VALUES (2, 'Bob', 'bob@example.com');
UPDATE users SET email = 'alice@newdomain.com' WHERE id = 1;
DELETE FROM users WHERE id = 2;
-- View the captured changes
SELECT change_type, table_name, id
FROM turso_cdc;
-- Results will show:
-- 1 (INSERT) for Alice
-- 1 (INSERT) for Bob
-- 0 (UPDATE) for Alice's email change
-- -1 (DELETE) for Bob
```
## Multiple Connections
Each connection can have its own CDC configuration:
```sql
-- Connection 1: Capture to 'audit_log' table
PRAGMA unstable_capture_data_changes_conn('full,audit_log');
-- Connection 2: Capture to 'sync_queue' table
PRAGMA unstable_capture_data_changes_conn('id,sync_queue');
-- Changes from Connection 1 go to 'audit_log'
-- Changes from Connection 2 go to 'sync_queue'
```
## Transactions
CDC respects transaction boundaries. Changes are only recorded when a transaction commits:
```sql
BEGIN;
INSERT INTO users VALUES (3, 'Charlie', 'charlie@example.com');
UPDATE users SET name = 'Charles' WHERE id = 3;
-- CDC table is not yet updated
COMMIT;
-- Now both the INSERT and UPDATE appear in the CDC table
```
If a transaction rolls back, no CDC entries are created for those changes.
## Schema Changes
CDC also tracks schema changes when using full mode:
```sql
PRAGMA unstable_capture_data_changes_conn('full');
CREATE TABLE products (id INTEGER PRIMARY KEY, name TEXT);
-- Recorded in CDC as change to sqlite_schema
DROP TABLE products;
-- Also recorded as a schema change
```

96
cli/manuals/encryption.md Normal file
View File

@@ -0,0 +1,96 @@
---
display_name: "encryption at-rest"
---
# Encryption - At-Rest Database Encryption
## Overview
Turso supports transparent at-rest encryption to protect your database files from unauthorized access. When enabled, all data written to disk is automatically encrypted, and decrypted when read, with no changes required to your application code.
## Supported Ciphers
Turso supports multiple encryption algorithms with different performance and security characteristics:
### AES-GCM Family
- **`aes128gcm`** - AES-128 in Galois/Counter Mode (16-byte key)
- **`aes256gcm`** - AES-256 in Galois/Counter Mode (32-byte key)
### AEGIS Family (High Performance)
- **`aegis256`** - AEGIS-256 (32-byte key) - Recommended for most use cases
- **`aegis128l`** - AEGIS-128L (16-byte key)
- **`aegis128x2`** - AEGIS-128 with 2x parallelization (16-byte key)
- **`aegis128x4`** - AEGIS-128 with 4x parallelization (16-byte key)
- **`aegis256x2`** - AEGIS-256 with 2x parallelization (32-byte key)
- **`aegis256x4`** - AEGIS-256 with 4x parallelization (32-byte key)
**Note:** AEGIS ciphers generally offer better performance than AES-GCM while maintaining excellent security properties. AEGIS-256 is recommended as the default choice.
## Generating Encryption Keys
Generate a secure encryption key using OpenSSL:
```bash
# For 32-byte key (256-bit) - use with aes256gcm, aegis256, etc.
openssl rand -hex 32
# For 16-byte key (128-bit) - use with aes128gcm, aegis128l, etc.
openssl rand -hex 16
```
Example output:
```
2d7a30108d3eb3e45c90a732041fe54778bdcf707c76749fab7da335d1b39c1d
```
**Important:** Store your encryption key securely. If you lose the key, your encrypted data cannot be recovered.
## Creating an Encrypted Database
### Method 1: Using PRAGMAs
Start Turso and set encryption parameters before creating tables:
```bash
tursodb database.db
```
Then in the SQL shell:
```sql
PRAGMA cipher = 'aegis256';
PRAGMA hexkey = '2d7a30108d3eb3e45c90a732041fe54778bdcf707c76749fab7da335d1b39c1d';
-- Now create your tables and insert data
CREATE TABLE users (id INTEGER PRIMARY KEY, name TEXT);
INSERT INTO users VALUES (1, 'Alice');
```
### Method 2: Using URI Parameters
Specify encryption parameters directly in the database URI:
```bash
tursodb "file:database.db?cipher=aegis256&hexkey=2d7a30108d3eb3e45c90a732041fe54778bdcf707c76749fab7da335d1b39c1d"
```
## Opening an Encrypted Database
**Important:** To open an existing encrypted database, you MUST provide the cipher and key as URI parameters:
```bash
tursodb "file:database.db?cipher=aegis256&hexkey=2d7a30108d3eb3e45c90a732041fe54778bdcf707c76749fab7da335d1b39c1d"
```
Attempting to open an encrypted database without the correct cipher and key will fail.
## Troubleshooting
### "Database is encrypted or is not a database"
This error occurs when:
- Opening an encrypted database without providing cipher/key
- Using the wrong cipher or key
- The database file is corrupted
### "Invalid hex string"
- Ensure your key is valid hexadecimal (0-9, a-f)
- Check the key length matches your cipher (32 hex chars for 16 bytes, 64 for 32 bytes)

53
cli/manuals/index.md Normal file
View File

@@ -0,0 +1,53 @@
# Turso Manual Pages
Welcome to the Turso manual pages. These pages provide detailed documentation for various features and capabilities.
## Available Manuals
### cdc - Change Data Capture
Track and capture all data changes made to your database tables for replication, syncing, and reactive applications.
```
.manual cdc
```
### encryption - At-Rest Database Encryption
Protect your database files with transparent encryption using AES-GCM or high-performance AEGIS ciphers.
```
.manual encryption
```
### mcp - Model Context Protocol
Learn about Turso's built-in MCP server that enables AI assistants and other tools to interact with your databases.
```
.manual mcp
```
### vector - Vector Search
Build similarity search and semantic search applications using vector embeddings and distance functions.
```
.manual vector
```
## Usage
To view a manual page, use the `.manual` or `.man` command:
```
.manual <page> # Full command
.man <page> # Short alias
```
### Examples
```
.manual mcp # View the MCP server documentation
.man mcp # Same as above, using the alias
```
## Adding More Manuals
Additional manual pages will be added for other features as they become available.

145
cli/manuals/mcp.md Normal file
View File

@@ -0,0 +1,145 @@
---
display_name: "a built-in MCP server"
---
# MCP Server - Model Context Protocol
## Overview
Turso includes a built-in MCP (Model Context Protocol) server that allows AI assistants and other tools to interact with your databases programmatically.
## Starting the MCP Server
To start Turso in MCP server mode, use the `--mcp` flag:
```bash
/path/to/tursodb --mcp
```
This will start an MCP server that listens on stdio for commands. The server starts without a database connection, allowing you to select or create databases using MCP commands.
## Available Tools
The MCP server exposes the following tools:
### `query`
Execute a SQL query and get results.
**Parameters:**
- `sql` (string, required): The SQL query to execute
**Example:**
```json
{
"tool": "query",
"arguments": {
"sql": "SELECT * FROM users WHERE age > 21"
}
}
```
### `execute`
Execute a SQL statement that modifies data (INSERT, UPDATE, DELETE).
**Parameters:**
- `sql` (string, required): The SQL statement to execute
**Example:**
```json
{
"tool": "execute",
"arguments": {
"sql": "INSERT INTO users (name, age) VALUES ('Alice', 30)"
}
}
```
### `list_tables`
List all tables in the database.
**Example:**
```json
{
"tool": "list_tables",
"arguments": {}
}
```
### `describe_table`
Get the schema of a specific table.
**Parameters:**
- `table` (string, required): The name of the table to describe
**Example:**
```json
{
"tool": "describe_table",
"arguments": {
"table": "users"
}
}
```
## Integration with AI Assistants
### Claude Desktop
To use with Claude Desktop, add the following to your Claude Desktop configuration:
```json
{
"mcpServers": {
"turso": {
"command": "/path/to/tursodb",
"args": ["--mcp"]
}
}
}
```
Note: You must use the full path to the tursodb executable as Claude Desktop may not recognize items in your PATH.
### Other MCP Clients
The Turso MCP server follows the standard MCP protocol and can be used with any MCP-compatible client.
## Example Session
Here's an example of using the MCP server:
1. **Start the server:**
```bash
/path/to/tursodb --mcp
```
2. **Query data:**
```
> What tables are in the database?
[Uses list_tables tool]
> Show me all users older than 25
[Uses query tool with "SELECT * FROM users WHERE age > 25"]
```
3. **Modify data:**
```
> Add a new user named Bob who is 28 years old
[Uses execute tool with INSERT statement]
```
## Troubleshooting
### Server doesn't start
- Ensure the tursodb executable path is correct
- Check that you're using the full path to the executable
### Commands fail
- Verify SQL syntax is correct
- Check that tables and columns exist
- Ensure you have write permissions if modifying data
## See Also
- MCP Protocol Documentation: https://modelcontextprotocol.io
- Turso Documentation: https://turso.tech/docs

211
cli/manuals/vector.md Normal file
View File

@@ -0,0 +1,211 @@
---
display_name: "vector search"
---
# Vector Search
## Overview
Turso supports vector operations for building similarity search and semantic search applications. Vectors are stored as BLOBs and can be searched using distance functions to find similar items.
**Important:** Vector indexes are not yet supported. All vector searches currently use brute-force scanning, which means searching scales linearly with the number of rows.
## Vector Types
Turso supports two vector formats:
- **`vector32`** - 32-bit floating-point vectors (4 bytes per dimension)
- **`vector64`** - 64-bit floating-point vectors (8 bytes per dimension)
## Creating and Storing Vectors
Vectors are stored in vector columns as-is, and represented on-disk as BLOBs. Embeddings are interpreted and validated at runtime. In order for embedding to be valid, it must be either JSON array of float values OR binary blob created with turso vector functiosn `vector32` / `vector64`.
### Basic Example
```sql
-- Create a table with vector embeddings
CREATE TABLE documents (
id INTEGER PRIMARY KEY,
content TEXT,
embedding BLOB -- Store vector as BLOB
);
-- Insert vectors using vector32() or vector64()
INSERT INTO documents VALUES
(1, 'Introduction to databases', vector32('[0.1, 0.2, 0.3, 0.4]')),
(2, 'SQL query optimization', vector32('[0.2, 0.1, 0.4, 0.3]')),
(3, 'Vector similarity search', vector32('[0.4, 0.3, 0.2, 0.1]'));
```
### Working with Higher Dimensions
Real embeddings typically have hundreds or thousands of dimensions:
```sql
-- Example with 1536-dimensional embeddings (like OpenAI's ada-002)
CREATE TABLE embeddings (
id INTEGER PRIMARY KEY,
text TEXT,
vector BLOB
);
-- Insert a 1536-dimensional vector
INSERT INTO embeddings VALUES
(1, 'Sample text', vector32('[0.001, 0.002, ..., 0.1536]'));
```
## Vector Functions
### Creation Functions
- **`vector32(text)`** - Create a 32-bit float vector from JSON array
- **`vector64(text)`** - Create a 64-bit float vector from JSON array
### Distance Functions
- **`vector_distance_l2(v1, v2)`** - Euclidean (L2) distance between vectors
- **`vector_distance_cos(v1, v2)`** - Cosine distance (1 - cosine similarity)
### Utility Functions
- **`vector_extract(blob)`** - Convert vector BLOB back to JSON text
- **`vector_concat(v1, v2)`** - Concatenate two vectors
- **`vector_slice(v, start, end)`** - Extract a portion of a vector
## Similarity Search Examples
### Finding Similar Documents
```sql
-- Find documents similar to a query vector
WITH query AS (
SELECT vector32('[0.15, 0.25, 0.35, 0.45]') AS query_vector
)
SELECT
id,
content,
vector_distance_l2(embedding, query_vector) AS distance
FROM documents, query
ORDER BY distance
LIMIT 5;
```
### Cosine Similarity Search
Cosine similarity is often preferred for text embeddings:
```sql
-- Find semantically similar documents using cosine distance
WITH query AS (
SELECT vector32('[0.15, 0.25, 0.35, 0.45]') AS query_vector
)
SELECT
id,
content,
vector_distance_cos(embedding, query_vector) AS cosine_distance
FROM documents, query
ORDER BY cosine_distance
LIMIT 5;
```
### Threshold-Based Search
Find all vectors within a certain distance:
```sql
-- Find all documents within distance threshold
WITH query AS (
SELECT vector32('[0.15, 0.25, 0.35, 0.45]') AS query_vector
)
SELECT
id,
content,
vector_distance_l2(embedding, query_vector) AS distance
FROM documents, query
WHERE vector_distance_l2(embedding, query_vector) < 0.5
ORDER BY distance;
```
## Working with Vector Data
### Inspecting Vectors
```sql
-- Extract and view vector data as JSON
SELECT id, vector_extract(embedding) AS vector_json
FROM documents
LIMIT 3;
```
### Vector Operations
```sql
-- Concatenate two vectors
SELECT vector_concat(
vector32('[1.0, 2.0]'),
vector32('[3.0, 4.0]')
) AS concatenated;
-- Slice a vector (extract dimensions 2-4)
SELECT vector_slice(
vector32('[1.0, 2.0, 3.0, 4.0, 5.0]'),
2, 4
) AS sliced;
```
## Building a Semantic Search Application
Here's a complete example of a semantic search application:
```sql
-- 1. Create schema
CREATE TABLE articles (
id INTEGER PRIMARY KEY,
title TEXT,
content TEXT,
embedding BLOB
);
-- 2. Insert pre-computed embeddings
INSERT INTO articles VALUES
(1, 'Database Fundamentals', 'An introduction to relational databases...',
vector32('[0.12, -0.34, 0.56, ...]')),
(2, 'Machine Learning Basics', 'Understanding neural networks and deep learning...',
vector32('[0.23, 0.45, -0.67, ...]')),
(3, 'Web Development Guide', 'Modern web applications with JavaScript...',
vector32('[0.34, -0.12, 0.78, ...]'));
-- 3. Search for similar articles
WITH search_embedding AS (
-- This would come from your embedding model for the search query
SELECT vector32('[0.15, -0.30, 0.60, ...]') AS query_vec
)
SELECT
a.id,
a.title,
vector_distance_cos(a.embedding, s.query_vec) AS similarity_score
FROM articles a, search_embedding s
ORDER BY similarity_score
LIMIT 10;
```
## Performance Considerations
Since vector indexes are not yet implemented, keep in mind:
- **Linear scan**: Every search examines all rows in the table
- **Memory usage**: Vectors consume significant space (4 bytes × dimensions for vector32)
- **Optimization tips**:
- Use smaller dimensions when possible
- Pre-filter data with WHERE clauses before distance calculations
- Consider partitioning large datasets
- Use vector32 instead of vector64 unless high precision is needed
## Common Use Cases
- **Semantic search**: Find documents by meaning rather than keywords
- **Recommendation systems**: Find similar items based on embeddings
- **Duplicate detection**: Identify near-duplicate content
- **Image similarity**: Search for similar images using visual embeddings
- **Anomaly detection**: Find outliers in high-dimensional data