Merge 'Feature: Collate' from Pedro Muniz

I was implementing `ALTER TABLE .. RENAME TO`, and I noticed that `COLLATE` was necessary for it to work. This is a relatively big PR as to properly implement `COLLATE`, I needed to add a field to a couple of instructions that are emitted frequently, and there is a lot of boilerplate that is required when you do such a change. My main source of reference was this site from SQLite: https://sqlite.org/datatype3.html#collation. It gives a good description of the precedence of collation in certain expressions. I did write a couple of tests that I thought caught the edges cases of `COLLATE`, but honestly, I may have missed a few. I would appreciate some help later to write more tests. `Collate` basically just compares two `TEXT` values according to some comparison function. If both values are not `TEXT`, just fallback to the normal comparison we are already doing. `Collate` happens in four main places: - `Collate` Expression modifier - `Binary` Expression - `Column` Expression - `Order By` and `Group By` In `Binary`, `Order By`, `Group By` expressions, the collation sequence for the comparisons can be derived from explicitly with the use of `COLLATE` keyword, or implicitly if there is a `COLLATE` definition in `CREATE TABLE`. If neither are present it defaults to `Binary` collation. For the `Column` expression, it tries to use collation in `CREATE TABLE` column definition. If not present it defaults to `Binary` collation. Lastly, there was some repetition on how the `Binary` expression was being translated, so I removed that part. As mentioned in the `COMPAT.md`, I did not implement custom collation sequences yet, as it would deter me from properly implementing. I have some ideas of how I can extend my current implementation to support that with FFI, but I think that is best served for a different PR. Closes #1367
2026-02-16 05:24:22 +01:00 · 2025-05-20 10:52:11 +03:00
parent 4cf9305947 52533cab40
commit 32aac8e9ef
32 changed files with 826 additions and 243 deletions
--- a/COMPAT.md
+++ b/COMPAT.md
@@ -193,7 +193,7 @@ Feature support of [sqlite expr syntax](https://www.sqlite.org/lang_expr.html).
 | ... OVER (...)            | No      | Is incorrectly ignored                   |
 | (expr)                    | Yes     |                                          |
 | CAST (expr AS type)       | Yes     |                                          |
-| COLLATE                   | No      |                                          |
+| COLLATE                   | Partial | Custom Collations not supported          |
 | (NOT) LIKE                | Yes     |                                          |
 | (NOT) GLOB                | Yes     |                                          |
 | (NOT) REGEXP              | No      |                                          |
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1841,10 +1841,12 @@ dependencies = [
 "ryu",
 "sorted-vec",
 "strum",
+ "strum_macros",
 "tempfile",
 "test-log",
 "thiserror 1.0.69",
 "tracing",
+ "uncased",
 ]

 [[package]]
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -50,6 +50,8 @@ limbo_series = { path = "extensions/series", version = "0.0.20" }
 limbo_sqlite3_parser = { path = "vendored/sqlite3-parser", version = "0.0.20" }
 limbo_time = { path = "extensions/time", version = "0.0.20" }
 limbo_uuid = { path = "extensions/uuid", version = "0.0.20" }
+strum = { version = "0.26", features = ["derive"] }
+strum_macros = "0.26"

 [profile.release]
 debug = "line-tables-only"
--- a/6
+++ b/6
@@ -51,7 +51,7 @@ uv-sync:
 	uv sync --all-packages
 .PHONE: uv-sync

-test: limbo uv-sync test-compat test-vector test-sqlite3 test-shell test-extensions test-memory test-write test-update test-constraint
+test: limbo uv-sync test-compat test-vector test-sqlite3 test-shell test-extensions test-memory test-write test-update test-constraint test-collate
 .PHONY: test

 test-extensions: limbo uv-sync
@@ -100,6 +100,10 @@ test-update: limbo uv-sync
 	SQLITE_EXEC=$(SQLITE_EXEC) uv run --project limbo_test test-update
 .PHONY: test-update

+test-collate: limbo uv-sync
+	SQLITE_EXEC=$(SQLITE_EXEC) uv run --project limbo_test test-collate
+.PHONY: test-collate
+
 test-constraint: limbo uv-sync
 	SQLITE_EXEC=$(SQLITE_EXEC) uv run --project limbo_test test-constraint
 .PHONY: test-constraint
--- a/core/Cargo.toml
+++ b/core/Cargo.toml
@@ -69,11 +69,13 @@ limbo_ipaddr = { workspace = true, optional = true, features = ["static"] }
 limbo_completion = { workspace = true, optional = true, features = ["static"] }
 limbo_ext_tests = { workspace = true, optional = true, features = ["static"] }
 miette = "7.6.0"
-strum = "0.26"
+strum = { workspace = true }
 parking_lot = "0.12.3"
 crossbeam-skiplist = "0.1.3"
 tracing = "0.1.41"
 ryu = "1.0.19"
+uncased = "0.9.10"
+strum_macros = {workspace = true }
 bitflags = "2.9.0"

 [build-dependencies]
--- a/core/schema.rs
+++ b/core/schema.rs
@@ -1,3 +1,4 @@
+use crate::translate::collate::CollationSeq;
 use crate::{util::normalize_ident, Result};
 use crate::{LimboError, VirtualTable};
 use core::fmt;
@@ -235,6 +236,10 @@ impl BTreeTable {
        sql.push_str(");\n");
        sql
    }
+
+    pub fn column_collations(&self) -> Vec<Option<CollationSeq>> {
+        self.columns.iter().map(|column| column.collation).collect()
+    }
 }

 #[derive(Debug, Default)]
@@ -261,6 +266,7 @@ impl PseudoTable {
            notnull: false,
            default: None,
            unique: false,
+            collation: None,
        });
    }
    pub fn get_column(&self, name: &str) -> Option<(usize, &Column)> {
@@ -418,6 +424,7 @@ fn create_table(
                let mut notnull = false;
                let mut order = SortOrder::Asc;
                let mut unique = false;
+                let mut collation = None;
                for c_def in &col_def.constraints {
                    match &c_def.constraint {
                        limbo_sqlite3_parser::ast::ColumnConstraint::PrimaryKey {
@@ -442,6 +449,10 @@ fn create_table(
                            }
                            unique = true;
                        }
+                        limbo_sqlite3_parser::ast::ColumnConstraint::Collate { collation_name } => {
+                            collation = Some(CollationSeq::new(collation_name.0.as_str())?);
+                        }
+                        // Collate
                        _ => {}
                    }
                }
@@ -464,6 +475,7 @@ fn create_table(
                    notnull,
                    default,
                    unique,
+                    collation,
                });
            }
            if options.contains(TableOptions::WITHOUT_ROWID) {
@@ -534,6 +546,7 @@ pub struct Column {
    pub notnull: bool,
    pub default: Option<Expr>,
    pub unique: bool,
+    pub collation: Option<CollationSeq>,
 }

 impl Column {
@@ -737,6 +750,7 @@ pub fn sqlite_schema_table() -> BTreeTable {
                notnull: false,
                default: None,
                unique: false,
+                collation: None,
            },
            Column {
                name: Some("name".to_string()),
@@ -747,6 +761,7 @@ pub fn sqlite_schema_table() -> BTreeTable {
                notnull: false,
                default: None,
                unique: false,
+                collation: None,
            },
            Column {
                name: Some("tbl_name".to_string()),
@@ -757,6 +772,7 @@ pub fn sqlite_schema_table() -> BTreeTable {
                notnull: false,
                default: None,
                unique: false,
+                collation: None,
            },
            Column {
                name: Some("rootpage".to_string()),
@@ -767,6 +783,7 @@ pub fn sqlite_schema_table() -> BTreeTable {
                notnull: false,
                default: None,
                unique: false,
+                collation: None,
            },
            Column {
                name: Some("sql".to_string()),
@@ -777,6 +794,7 @@ pub fn sqlite_schema_table() -> BTreeTable {
                notnull: false,
                default: None,
                unique: false,
+                collation: None,
            },
        ],
        unique_sets: None,
@@ -1406,6 +1424,7 @@ mod tests {
                notnull: false,
                default: None,
                unique: false,
+                collation: None,
            }],
            unique_sets: None,
        };
--- a/core/storage/btree.rs
+++ b/core/storage/btree.rs
@@ -7,7 +7,7 @@ use crate::{
            TableLeafCell,
        },
    },
-    translate::plan::IterationDirection,
+    translate::{collate::CollationSeq, plan::IterationDirection},
    types::IndexKeySortOrder,
    MvCursor,
 };
@@ -407,6 +407,10 @@ pub struct BTreeCursor {
    context: Option<CursorContext>,
    /// Store whether the Cursor is in a valid state. Meaning if it is pointing to a valid cell index or not
    valid_state: CursorValidState,
+    /// Colations for Index Btree constraint checks
+    /// Contains the Collation Seq for the whole Index
+    /// This Vec should be empty for Table Btree
+    collations: Vec<CollationSeq>,
 }

 impl BTreeCursor {
@@ -414,6 +418,7 @@ impl BTreeCursor {
        mv_cursor: Option<Rc<RefCell<MvCursor>>>,
        pager: Rc<Pager>,
        root_page: usize,
+        collations: Vec<CollationSeq>,
    ) -> Self {
        Self {
            mv_cursor,
@@ -435,17 +440,27 @@ impl BTreeCursor {
            count: 0,
            context: None,
            valid_state: CursorValidState::Valid,
+            collations,
        }
    }

+    pub fn new_table(
+        mv_cursor: Option<Rc<RefCell<MvCursor>>>,
+        pager: Rc<Pager>,
+        root_page: usize,
+    ) -> Self {
+        Self::new(mv_cursor, pager, root_page, Vec::new())
+    }
+
    pub fn new_index(
        mv_cursor: Option<Rc<RefCell<MvCursor>>>,
        pager: Rc<Pager>,
        root_page: usize,
        index: &Index,
+        collations: Vec<CollationSeq>,
    ) -> Self {
        let index_key_sort_order = IndexKeySortOrder::from_index(index);
-        let mut cursor = Self::new(mv_cursor, pager, root_page);
+        let mut cursor = Self::new(mv_cursor, pager, root_page, collations);
        cursor.index_key_sort_order = index_key_sort_order;
        cursor
    }
@@ -610,6 +625,7 @@ impl BTreeCursor {
                            record_slice_same_num_cols,
                            index_key.get_values(),
                            self.index_key_sort_order,
+                            &self.collations,
                        );
                        order
                    };
@@ -668,6 +684,7 @@ impl BTreeCursor {
                            record_slice_same_num_cols,
                            index_key.get_values(),
                            self.index_key_sort_order,
+                            &self.collations,
                        );
                        order
                    };
@@ -1248,6 +1265,7 @@ impl BTreeCursor {
                            record_slice_same_num_cols,
                            index_key.get_values(),
                            self.index_key_sort_order,
+                            &self.collations,
                        );
                        order
                    };
@@ -1308,6 +1326,7 @@ impl BTreeCursor {
                            record_slice_same_num_cols,
                            index_key.get_values(),
                            self.index_key_sort_order,
+                            &self.collations,
                        );
                        order
                    };
@@ -1588,6 +1607,7 @@ impl BTreeCursor {
                    record_slice_equal_number_of_cols,
                    index_key.get_values(),
                    self.index_key_sort_order,
+                    &self.collations,
                );
                // in sqlite btrees left child pages have <= keys.
                // in general, in forwards iteration we want to find the first key that matches the seek condition.
@@ -1912,6 +1932,7 @@ impl BTreeCursor {
                record_slice_equal_number_of_cols,
                key.get_values(),
                self.index_key_sort_order,
+                &self.collations,
            );
            let found = match seek_op {
                SeekOp::GT => cmp.is_gt(),
@@ -2079,6 +2100,7 @@ impl BTreeCursor {
                                    .unwrap()
                                    .get_values(),
                        self.index_key_sort_order,
+                        &self.collations,
                        ) == Ordering::Equal {

                        tracing::debug!("insert_into_page: found exact match with cell_idx={cell_idx}, overwriting");
@@ -3725,6 +3747,7 @@ impl BTreeCursor {
                        key.to_index_key_values(),
                        self.get_immutable_record().as_ref().unwrap().get_values(),
                        self.index_key_sort_order,
+                        &self.collations,
                    );
                    match order {
                        Ordering::Less | Ordering::Equal => {
@@ -4755,6 +4778,10 @@ impl BTreeCursor {
            }
        }
    }
+
+    pub fn collations(&self) -> &[CollationSeq] {
+        &self.collations
+    }
 }

 #[cfg(debug_assertions)]
@@ -5945,7 +5972,7 @@ mod tests {
    }

    fn validate_btree(pager: Rc<Pager>, page_idx: usize) -> (usize, bool) {
-        let cursor = BTreeCursor::new(None, pager.clone(), page_idx);
+        let cursor = BTreeCursor::new_table(None, pager.clone(), page_idx);
        let page = pager.read_page(page_idx).unwrap();
        let page = page.get();
        let contents = page.contents.as_ref().unwrap();
@@ -6035,7 +6062,7 @@ mod tests {
    }

    fn format_btree(pager: Rc<Pager>, page_idx: usize, depth: usize) -> String {
-        let cursor = BTreeCursor::new(None, pager.clone(), page_idx);
+        let cursor = BTreeCursor::new_table(None, pager.clone(), page_idx);
        let page = pager.read_page(page_idx).unwrap();
        let page = page.get();
        let contents = page.contents.as_ref().unwrap();
@@ -6165,7 +6192,7 @@ mod tests {
            .as_slice(),
        ] {
            let (pager, root_page) = empty_btree();
-            let mut cursor = BTreeCursor::new(None, pager.clone(), root_page);
+            let mut cursor = BTreeCursor::new_table(None, pager.clone(), root_page);
            for (key, size) in sequence.iter() {
                run_until_done(
                    || {
@@ -6232,7 +6259,7 @@ mod tests {
        tracing::info!("super seed: {}", seed);
        for _ in 0..attempts {
            let (pager, root_page) = empty_btree();
-            let mut cursor = BTreeCursor::new(None, pager.clone(), root_page);
+            let mut cursor = BTreeCursor::new_table(None, pager.clone(), root_page);
            let mut keys = SortedVec::new();
            tracing::info!("seed: {}", seed);
            for insert_id in 0..inserts {
@@ -6339,7 +6366,7 @@ mod tests {
            let (pager, _) = empty_btree();
            let index_root_page = pager.btree_create(&CreateBTreeFlags::new_index());
            let index_root_page = index_root_page as usize;
-            let mut cursor = BTreeCursor::new(None, pager.clone(), index_root_page);
+            let mut cursor = BTreeCursor::new_table(None, pager.clone(), index_root_page);
            let mut keys = SortedVec::new();
            tracing::info!("seed: {}", seed);
            for _ in 0..inserts {
@@ -6574,7 +6601,7 @@ mod tests {
    #[ignore]
    pub fn test_clear_overflow_pages() -> Result<()> {
        let (pager, db_header) = setup_test_env(5);
-        let mut cursor = BTreeCursor::new(None, pager.clone(), 1);
+        let mut cursor = BTreeCursor::new_table(None, pager.clone(), 1);

        let max_local = payload_overflow_threshold_max(PageType::TableLeaf, 4096);
        let usable_size = cursor.usable_space();
@@ -6673,7 +6700,7 @@ mod tests {
    #[test]
    pub fn test_clear_overflow_pages_no_overflow() -> Result<()> {
        let (pager, db_header) = setup_test_env(5);
-        let mut cursor = BTreeCursor::new(None, pager.clone(), 1);
+        let mut cursor = BTreeCursor::new_table(None, pager.clone(), 1);

        let small_payload = vec![b'A'; 10];

@@ -6717,7 +6744,7 @@ mod tests {
    fn test_btree_destroy() -> Result<()> {
        let initial_size = 3;
        let (pager, db_header) = setup_test_env(initial_size);
-        let mut cursor = BTreeCursor::new(None, pager.clone(), 2);
+        let mut cursor = BTreeCursor::new_table(None, pager.clone(), 2);
        assert_eq!(
            db_header.lock().database_size,
            initial_size,
@@ -7374,7 +7401,7 @@ mod tests {
        let (pager, root_page) = empty_btree();
        let mut keys = Vec::new();
        for i in 0..10000 {
-            let mut cursor = BTreeCursor::new(None, pager.clone(), root_page);
+            let mut cursor = BTreeCursor::new_table(None, pager.clone(), root_page);
            tracing::info!("INSERT INTO t VALUES ({});", i,);
            let value = ImmutableRecord::from_registers(&[Register::Value(Value::Integer(i))]);
            tracing::trace!("before insert {}", i);
@@ -7401,7 +7428,7 @@ mod tests {
            format_btree(pager.clone(), root_page, 0)
        );
        for key in keys.iter() {
-            let mut cursor = BTreeCursor::new(None, pager.clone(), root_page);
+            let mut cursor = BTreeCursor::new_table(None, pager.clone(), root_page);
            let key = Value::Integer(*key);
            let exists = run_until_done(|| cursor.exists(&key), pager.deref()).unwrap();
            assert!(exists, "key not found {}", key);
@@ -7450,7 +7477,7 @@ mod tests {

        // Insert 10,000 records in to the BTree.
        for i in 1..=10000 {
-            let mut cursor = BTreeCursor::new(None, pager.clone(), root_page);
+            let mut cursor = BTreeCursor::new_table(None, pager.clone(), root_page);
            let value = ImmutableRecord::from_registers(&[Register::Value(Value::Text(
                Text::new("hello world"),
            ))]);
@@ -7478,7 +7505,7 @@ mod tests {

        // Delete records with 500 <= key <= 3500
        for i in 500..=3500 {
-            let mut cursor = BTreeCursor::new(None, pager.clone(), root_page);
+            let mut cursor = BTreeCursor::new_table(None, pager.clone(), root_page);
            let seek_key = SeekKey::TableRowId(i as u64);

            let found = run_until_done(|| cursor.seek(seek_key.clone(), SeekOp::EQ), pager.deref())
@@ -7495,7 +7522,7 @@ mod tests {
                continue;
            }

-            let mut cursor = BTreeCursor::new(None, pager.clone(), root_page);
+            let mut cursor = BTreeCursor::new_table(None, pager.clone(), root_page);
            let key = Value::Integer(i);
            let exists = run_until_done(|| cursor.exists(&key), pager.deref()).unwrap();
            assert!(exists, "Key {} should exist but doesn't", i);
@@ -7503,7 +7530,7 @@ mod tests {

        // Verify the deleted records don't exist.
        for i in 500..=3500 {
-            let mut cursor = BTreeCursor::new(None, pager.clone(), root_page);
+            let mut cursor = BTreeCursor::new_table(None, pager.clone(), root_page);
            let key = Value::Integer(i);
            let exists = run_until_done(|| cursor.exists(&key), pager.deref()).unwrap();
            assert!(!exists, "Deleted key {} still exists", i);
@@ -7525,7 +7552,7 @@ mod tests {
        let (pager, root_page) = empty_btree();

        for i in 0..iterations {
-            let mut cursor = BTreeCursor::new(None, pager.clone(), root_page);
+            let mut cursor = BTreeCursor::new_table(None, pager.clone(), root_page);
            tracing::info!("INSERT INTO t VALUES ({});", i,);
            let value = ImmutableRecord::from_registers(&[Register::Value(Value::Text(Text {
                value: huge_texts[i].as_bytes().to_vec(),
@@ -7554,7 +7581,7 @@ mod tests {
                format_btree(pager.clone(), root_page, 0)
            );
        }
-        let mut cursor = BTreeCursor::new(None, pager.clone(), root_page);
+        let mut cursor = BTreeCursor::new_table(None, pager.clone(), root_page);
        cursor.move_to_root();
        for i in 0..iterations {
            let rowid = run_until_done(|| cursor.get_next_record(None), pager.deref()).unwrap();
@@ -7565,7 +7592,7 @@ mod tests {
    #[test]
    pub fn test_read_write_payload_with_offset() {
        let (pager, root_page) = empty_btree();
-        let mut cursor = BTreeCursor::new(None, pager.clone(), root_page);
+        let mut cursor = BTreeCursor::new(None, pager.clone(), root_page, vec![]);
        let offset = 2; // blobs data starts at offset 2
        let initial_text = "hello world";
        let initial_blob = initial_text.as_bytes().to_vec();
@@ -7641,7 +7668,7 @@ mod tests {
    #[test]
    pub fn test_read_write_payload_with_overflow_page() {
        let (pager, root_page) = empty_btree();
-        let mut cursor = BTreeCursor::new(None, pager.clone(), root_page);
+        let mut cursor = BTreeCursor::new(None, pager.clone(), root_page, vec![]);
        let mut large_blob = vec![b'A'; 40960 - 11]; // insert large blob. 40960 = 10 page long.
        let hello_world = b"hello world";
        large_blob.extend_from_slice(hello_world);
--- a/core/translate/collate.rs
+++ b/core/translate/collate.rs
@@ -0,0 +1,53 @@
+use std::{cmp::Ordering, str::FromStr as _};
+
+use tracing::Level;
+
+// TODO: in the future allow user to define collation sequences
+// Will have to meddle with ffi for this
+#[derive(
+    Debug, Clone, Copy, Eq, PartialEq, strum_macros::Display, strum_macros::EnumString, Default,
+)]
+#[strum(ascii_case_insensitive)]
+/// **Pre defined collation sequences**\
+/// Collating functions only matter when comparing string values.
+/// Numeric values are always compared numerically, and BLOBs are always compared byte-by-byte using memcmp().
+pub enum CollationSeq {
+    /// Standard String compare
+    #[default]
+    Binary,
+    /// Ascii case insensitive
+    NoCase,
+    /// Same as Binary but with trimmed whitespace
+    Rtrim,
+}
+
+impl CollationSeq {
+    pub fn new(collation: &str) -> crate::Result<Self> {
+        CollationSeq::from_str(collation).map_err(|_| {
+            crate::LimboError::ParseError(format!("no such collation sequence: {}", collation))
+        })
+    }
+
+    pub fn compare_strings(&self, lhs: &str, rhs: &str) -> Ordering {
+        tracing::event!(Level::DEBUG, collate = %self, lhs, rhs);
+        match self {
+            CollationSeq::Binary => Self::binary_cmp(lhs, rhs),
+            CollationSeq::NoCase => Self::nocase_cmp(lhs, rhs),
+            CollationSeq::Rtrim => Self::rtrim_cmp(lhs, rhs),
+        }
+    }
+
+    fn binary_cmp(lhs: &str, rhs: &str) -> Ordering {
+        lhs.cmp(rhs)
+    }
+
+    fn nocase_cmp(lhs: &str, rhs: &str) -> Ordering {
+        let nocase_lhs = uncased::UncasedStr::new(lhs);
+        let nocase_rhs = uncased::UncasedStr::new(rhs);
+        nocase_lhs.cmp(nocase_rhs)
+    }
+
+    fn rtrim_cmp(lhs: &str, rhs: &str) -> Ordering {
+        lhs.trim_end().cmp(rhs.trim_end())
+    }
+}
--- a/core/translate/emitter.rs
+++ b/core/translate/emitter.rs
@@ -275,7 +275,7 @@ pub fn emit_query<'a>(

    // Initialize cursors and other resources needed for query execution
    if let Some(ref mut order_by) = plan.order_by {
-        init_order_by(program, t_ctx, order_by)?;
+        init_order_by(program, t_ctx, order_by, &plan.table_references)?;
    }

    if let Some(ref group_by) = plan.group_by {
@@ -914,6 +914,7 @@ fn emit_update_insns(
            rhs: idx_rowid_reg,
            target_pc: constraint_check,
            flags: CmpInsFlags::default(), // TODO: not sure what type of comparison flag is needed
+            collation: program.curr_collation(),
        });

        program.emit_insn(Insn::Halt {
@@ -943,6 +944,7 @@ fn emit_update_insns(
                rhs: beg,
                target_pc: record_label,
                flags: CmpInsFlags::default(),
+                collation: program.curr_collation(),
            });

            program.emit_insn(Insn::NotExists {
--- a/core/translate/expr.rs
+++ b/core/translate/expr.rs
@@ -16,6 +16,8 @@ use crate::vdbe::{
 };
 use crate::{Result, Value};

+use super::collate::CollationSeq;
+
 #[derive(Debug, Clone, Copy)]
 pub struct ConditionMetadata {
    pub jump_if_condition_is_true: bool,
@@ -38,59 +40,6 @@ fn emit_cond_jump(program: &mut ProgramBuilder, cond_meta: ConditionMetadata, re
        });
    }
 }
-macro_rules! emit_cmp_insn {
-    (
-        $program:expr,
-        $cond:expr,
-        $op_true:ident,
-        $op_false:ident,
-        $lhs:expr,
-        $rhs:expr
-    ) => {{
-        if $cond.jump_if_condition_is_true {
-            $program.emit_insn(Insn::$op_true {
-                lhs: $lhs,
-                rhs: $rhs,
-                target_pc: $cond.jump_target_when_true,
-                flags: CmpInsFlags::default(),
-            });
-        } else {
-            $program.emit_insn(Insn::$op_false {
-                lhs: $lhs,
-                rhs: $rhs,
-                target_pc: $cond.jump_target_when_false,
-                flags: CmpInsFlags::default().jump_if_null(),
-            });
-        }
-    }};
-}
-
-macro_rules! emit_cmp_null_insn {
-    (
-        $program:expr,
-        $cond:expr,
-        $op_true:ident,
-        $op_false:ident,
-        $lhs:expr,
-        $rhs:expr
-    ) => {{
-        if $cond.jump_if_condition_is_true {
-            $program.emit_insn(Insn::$op_true {
-                lhs: $lhs,
-                rhs: $rhs,
-                target_pc: $cond.jump_target_when_true,
-                flags: CmpInsFlags::default().null_eq(),
-            });
-        } else {
-            $program.emit_insn(Insn::$op_false {
-                lhs: $lhs,
-                rhs: $rhs,
-                target_pc: $cond.jump_target_when_false,
-                flags: CmpInsFlags::default().null_eq(),
-            });
-        }
-    }};
-}

 macro_rules! expect_arguments_exact {
    (
@@ -240,51 +189,6 @@ pub fn translate_condition_expr(
                resolver,
            )?;
        }
-        ast::Expr::Binary(lhs, op, rhs)
-            if matches!(
-                op,
-                ast::Operator::Greater
-                    | ast::Operator::GreaterEquals
-                    | ast::Operator::Less
-                    | ast::Operator::LessEquals
-                    | ast::Operator::Equals
-                    | ast::Operator::NotEquals
-                    | ast::Operator::Is
-                    | ast::Operator::IsNot
-            ) =>
-        {
-            let lhs_reg = program.alloc_register();
-            let rhs_reg = program.alloc_register();
-            translate_expr(program, Some(referenced_tables), lhs, lhs_reg, resolver)?;
-            translate_expr(program, Some(referenced_tables), rhs, rhs_reg, resolver)?;
-            match op {
-                ast::Operator::Greater => {
-                    emit_cmp_insn!(program, condition_metadata, Gt, Le, lhs_reg, rhs_reg)
-                }
-                ast::Operator::GreaterEquals => {
-                    emit_cmp_insn!(program, condition_metadata, Ge, Lt, lhs_reg, rhs_reg)
-                }
-                ast::Operator::Less => {
-                    emit_cmp_insn!(program, condition_metadata, Lt, Ge, lhs_reg, rhs_reg)
-                }
-                ast::Operator::LessEquals => {
-                    emit_cmp_insn!(program, condition_metadata, Le, Gt, lhs_reg, rhs_reg)
-                }
-                ast::Operator::Equals => {
-                    emit_cmp_insn!(program, condition_metadata, Eq, Ne, lhs_reg, rhs_reg)
-                }
-                ast::Operator::NotEquals => {
-                    emit_cmp_insn!(program, condition_metadata, Ne, Eq, lhs_reg, rhs_reg)
-                }
-                ast::Operator::Is => {
-                    emit_cmp_null_insn!(program, condition_metadata, Eq, Ne, lhs_reg, rhs_reg)
-                }
-                ast::Operator::IsNot => {
-                    emit_cmp_null_insn!(program, condition_metadata, Ne, Eq, lhs_reg, rhs_reg)
-                }
-                _ => unreachable!(),
-            }
-        }
        ast::Expr::Binary(_, _, _) => {
            let result_reg = program.alloc_register();
            translate_expr(program, Some(referenced_tables), expr, result_reg, resolver)?;
@@ -370,6 +274,7 @@ pub fn translate_condition_expr(
                            rhs: rhs_reg,
                            target_pc: jump_target_when_true,
                            flags: CmpInsFlags::default(),
+                            collation: program.curr_collation(),
                        });
                    } else {
                        // If this is the last condition, we need to jump to the 'jump_target_when_false' label if there is no match.
@@ -378,6 +283,7 @@ pub fn translate_condition_expr(
                            rhs: rhs_reg,
                            target_pc: condition_metadata.jump_target_when_false,
                            flags: CmpInsFlags::default().jump_if_null(),
+                            collation: program.curr_collation(),
                        });
                    }
                }
@@ -399,6 +305,7 @@ pub fn translate_condition_expr(
                        rhs: rhs_reg,
                        target_pc: condition_metadata.jump_target_when_false,
                        flags: CmpInsFlags::default().jump_if_null(),
+                        collation: program.curr_collation(),
                    });
                }
                // If we got here, then none of the conditions were a match, so we jump to the 'jump_target_when_true' label if 'jump_if_condition_is_true'.
@@ -552,15 +459,54 @@ pub fn translate_expr(
                translate_expr(program, referenced_tables, e1, shared_reg, resolver)?;

                emit_binary_insn(program, op, shared_reg, shared_reg, target_register)?;
+                program.reset_collation();
                Ok(target_register)
            } else {
                let e1_reg = program.alloc_registers(2);
                let e2_reg = e1_reg + 1;

                translate_expr(program, referenced_tables, e1, e1_reg, resolver)?;
+                let left_collation_ctx = program.curr_collation_ctx();
+                program.reset_collation();
+
                translate_expr(program, referenced_tables, e2, e2_reg, resolver)?;
+                let right_collation_ctx = program.curr_collation_ctx();
+                program.reset_collation();
+
+                /*
+                 * The rules for determining which collating function to use for a binary comparison
+                 * operator (=, <, >, <=, >=, !=, IS, and IS NOT) are as follows:
+                 *
+                 * 1. If either operand has an explicit collating function assignment using the postfix COLLATE operator,
+                 * then the explicit collating function is used for comparison,
+                 * with precedence to the collating function of the left operand.
+                 *
+                 * 2. If either operand is a column, then the collating function of that column is used
+                 * with precedence to the left operand. For the purposes of the previous sentence,
+                 * a column name preceded by one or more unary "+" operators and/or CAST operators is still considered a column name.
+                 *
+                 * 3. Otherwise, the BINARY collating function is used for comparison.
+                 */
+                let collation_ctx = {
+                    match (left_collation_ctx, right_collation_ctx) {
+                        (Some((c_left, true)), _) => Some((c_left, true)),
+                        (_, Some((c_right, true))) => Some((c_right, true)),
+                        (Some((c_left, from_collate_left)), None) => {
+                            Some((c_left, from_collate_left))
+                        }
+                        (None, Some((c_right, from_collate_right))) => {
+                            Some((c_right, from_collate_right))
+                        }
+                        (Some((c_left, from_collate_left)), Some((_, false))) => {
+                            Some((c_left, from_collate_left))
+                        }
+                        _ => None,
+                    }
+                };
+                program.set_collation(collation_ctx);

                emit_binary_insn(program, op, e1_reg, e2_reg, target_register)?;
+                program.reset_collation();
                Ok(target_register)
            }
        }
@@ -609,6 +555,7 @@ pub fn translate_expr(
                        target_pc: next_case_label,
                        // A NULL result is considered untrue when evaluating WHEN terms.
                        flags: CmpInsFlags::default().jump_if_null(),
+                        collation: program.curr_collation(),
                    }),
                    // CASE WHEN 0 THEN 0 ELSE 1 becomes ifnot 0 branch to next clause
                    None => program.emit_insn(Insn::IfNot {
@@ -679,7 +626,14 @@ pub fn translate_expr(
            });
            Ok(target_register)
        }
-        ast::Expr::Collate(_, _) => todo!(),
+        ast::Expr::Collate(expr, collation) => {
+            // First translate inner expr, then set the curr collation. If we set curr collation before,
+            // it may be overwritten later by inner translate.
+            translate_expr(program, referenced_tables, expr, target_register, resolver)?;
+            let collation = CollationSeq::new(collation)?;
+            program.set_collation(Some((collation, true)));
+            Ok(target_register)
+        }
        ast::Expr::DoublyQualified(_, _, _) => todo!(),
        ast::Expr::Exists(_) => todo!(),
        ast::Expr::FunctionCall {
@@ -1839,6 +1793,12 @@ pub fn translate_expr(
            let table_reference = referenced_tables.as_ref().unwrap().get(*table).unwrap();
            let index = table_reference.op.index();
            let use_covering_index = table_reference.utilizes_covering_index();
+
+            let Some(table_column) = table_reference.table.get_column_at(*column) else {
+                crate::bail_parse_error!("column index out of bounds");
+            };
+            // Counter intuitive but a column always needs to have a collation
+            program.set_collation(Some((table_column.collation.unwrap_or_default(), false)));
            match table_reference.op {
                // If we are reading a column from a table, we find the cursor that corresponds to
                // the table and read the column from the cursor.
@@ -1889,10 +1849,7 @@ pub fn translate_expr(
                                    dest: target_register,
                                });
                            }
-                            let Some(column) = table_reference.table.get_column_at(*column) else {
-                                crate::bail_parse_error!("column index out of bounds");
-                            };
-                            maybe_apply_affinity(column.ty, target_register, program);
+                            maybe_apply_affinity(table_column.ty, target_register, program);
                            Ok(target_register)
                        }
                        Table::Virtual(_) => {
@@ -2216,6 +2173,7 @@ fn emit_binary_insn(
                    rhs,
                    target_pc: if_true_label,
                    flags: CmpInsFlags::default(),
+                    collation: program.curr_collation(),
                },
                target_register,
                if_true_label,
@@ -2232,6 +2190,7 @@ fn emit_binary_insn(
                    rhs,
                    target_pc: if_true_label,
                    flags: CmpInsFlags::default(),
+                    collation: program.curr_collation(),
                },
                target_register,
                if_true_label,
@@ -2248,6 +2207,7 @@ fn emit_binary_insn(
                    rhs,
                    target_pc: if_true_label,
                    flags: CmpInsFlags::default(),
+                    collation: program.curr_collation(),
                },
                target_register,
                if_true_label,
@@ -2264,6 +2224,7 @@ fn emit_binary_insn(
                    rhs,
                    target_pc: if_true_label,
                    flags: CmpInsFlags::default(),
+                    collation: program.curr_collation(),
                },
                target_register,
                if_true_label,
@@ -2280,6 +2241,7 @@ fn emit_binary_insn(
                    rhs,
                    target_pc: if_true_label,
                    flags: CmpInsFlags::default(),
+                    collation: program.curr_collation(),
                },
                target_register,
                if_true_label,
@@ -2296,6 +2258,7 @@ fn emit_binary_insn(
                    rhs,
                    target_pc: if_true_label,
                    flags: CmpInsFlags::default(),
+                    collation: program.curr_collation(),
                },
                target_register,
                if_true_label,
@@ -2389,6 +2352,7 @@ fn emit_binary_insn(
                    rhs,
                    target_pc: if_true_label,
                    flags: CmpInsFlags::default().null_eq(),
+                    collation: program.curr_collation(),
                },
                target_register,
                if_true_label,
@@ -2403,6 +2367,7 @@ fn emit_binary_insn(
                    rhs,
                    target_pc: if_true_label,
                    flags: CmpInsFlags::default().null_eq(),
+                    collation: program.curr_collation(),
                },
                target_register,
                if_true_label,
--- a/core/translate/group_by.rs
+++ b/core/translate/group_by.rs
@@ -5,6 +5,7 @@ use limbo_sqlite3_parser::ast;
 use crate::{
    function::AggFunc,
    schema::{Column, PseudoTable},
+    translate::collate::CollationSeq,
    util::exprs_are_equivalent,
    vdbe::{
        builder::{CursorType, ProgramBuilder},
@@ -117,10 +118,39 @@ pub fn init_group_by(
    let row_source = if let Some(sort_order) = group_by.sort_order.as_ref() {
        let sort_cursor = program.alloc_cursor_id(None, CursorType::Sorter);
        let sorter_column_count = plan.group_by_sorter_column_count();
+        // Should work the same way as Order By
+        /*
+         * Terms of the ORDER BY clause that is part of a SELECT statement may be assigned a collating sequence using the COLLATE operator,
+         * in which case the specified collating function is used for sorting.
+         * Otherwise, if the expression sorted by an ORDER BY clause is a column,
+         * then the collating sequence of the column is used to determine sort order.
+         * If the expression is not a column and has no COLLATE clause, then the BINARY collating sequence is used.
+         */
+        let collations = group_by
+            .exprs
+            .iter()
+            .map(|expr| match expr {
+                ast::Expr::Collate(_, collation_name) => {
+                    CollationSeq::new(collation_name).map(Some)
+                }
+                ast::Expr::Column { table, column, .. } => {
+                    let table_reference = plan.table_references.get(*table).unwrap();
+
+                    let Some(table_column) = table_reference.table.get_column_at(*column) else {
+                        crate::bail_parse_error!("column index out of bounds");
+                    };
+
+                    Ok(table_column.collation)
+                }
+                _ => Ok(Some(CollationSeq::default())),
+            })
+            .collect::<Result<Vec<_>>>()?;
+
        program.emit_insn(Insn::SorterOpen {
            cursor_id: sort_cursor,
            columns: sorter_column_count,
            order: sort_order.clone(),
+            collations,
        });
        let pseudo_cursor = group_by_create_pseudo_table(program, sorter_column_count);
        GroupByRowSource::Sorter {
@@ -217,6 +247,7 @@ pub fn group_by_create_pseudo_table(
            notnull: false,
            default: None,
            unique: false,
+            collation: None,
        })
        .collect::<Vec<_>>();

@@ -461,6 +492,7 @@ pub fn group_by_process_single_group(
        start_reg_a: registers.reg_group_exprs_cmp,
        start_reg_b: groups_start_reg,
        count: group_by.exprs.len(),
+        collation: program.curr_collation(),
    });

    program.add_comment(
--- a/core/translate/index.rs
+++ b/core/translate/index.rs
@@ -120,6 +120,7 @@ pub fn translate_create_index(
        cursor_id: sorter_cursor_id,
        columns: columns.len(),
        order,
+        collations: tbl.column_collations(),
    });
    let content_reg = program.alloc_register();
    program.emit_insn(Insn::OpenPseudo {
@@ -405,6 +406,7 @@ pub fn translate_drop_index(
        rhs: dest_reg,
        target_pc: next_label,
        flags: CmpInsFlags::default(),
+        collation: program.curr_collation(),
    });

    // read type of table
@@ -420,6 +422,7 @@ pub fn translate_drop_index(
        rhs: dest_reg,
        target_pc: next_label,
        flags: CmpInsFlags::default(),
+        collation: program.curr_collation(),
    });

    program.emit_insn(Insn::RowId {
--- a/core/translate/main_loop.rs
+++ b/core/translate/main_loop.rs
@@ -1151,24 +1151,28 @@ fn emit_seek_termination(
            rhs: start_reg,
            target_pc: loop_end,
            flags: CmpInsFlags::default(),
+            collation: program.curr_collation(),
        }),
        (false, SeekOp::GT) => program.emit_insn(Insn::Gt {
            lhs: rowid_reg.unwrap(),
            rhs: start_reg,
            target_pc: loop_end,
            flags: CmpInsFlags::default(),
+            collation: program.curr_collation(),
        }),
        (false, SeekOp::LE) => program.emit_insn(Insn::Le {
            lhs: rowid_reg.unwrap(),
            rhs: start_reg,
            target_pc: loop_end,
            flags: CmpInsFlags::default(),
+            collation: program.curr_collation(),
        }),
        (false, SeekOp::LT) => program.emit_insn(Insn::Lt {
            lhs: rowid_reg.unwrap(),
            rhs: start_reg,
            target_pc: loop_end,
            flags: CmpInsFlags::default(),
+            collation: program.curr_collation(),
        }),
        (_, SeekOp::EQ) => {
            panic!("An index termination condition is never EQ")
--- a/core/translate/mod.rs
+++ b/core/translate/mod.rs
@@ -8,6 +8,7 @@
 //! will read rows from the database and filter them according to a WHERE clause.

 pub(crate) mod aggregation;
+pub(crate) mod collate;
 pub(crate) mod delete;
 pub(crate) mod emitter;
 pub(crate) mod expr;
--- a/core/translate/optimizer/join.rs
+++ b/core/translate/optimizer/join.rs
@@ -1205,6 +1205,7 @@ mod tests {
            notnull: false,
            default: None,
            unique: false,
+            collation: None,
        }
    }
    fn _create_column_of_type(name: &str, ty: Type) -> Column {
--- a/core/translate/order_by.rs
+++ b/core/translate/order_by.rs
@@ -4,6 +4,7 @@ use limbo_sqlite3_parser::ast::{self, SortOrder};

 use crate::{
    schema::{Column, PseudoTable},
+    translate::collate::CollationSeq,
    util::exprs_are_equivalent,
    vdbe::{
        builder::{CursorType, ProgramBuilder},
@@ -15,7 +16,7 @@ use crate::{
 use super::{
    emitter::{Resolver, TranslateCtx},
    expr::translate_expr,
-    plan::{ResultSetColumn, SelectPlan},
+    plan::{ResultSetColumn, SelectPlan, TableReference},
    result_row::{emit_offset, emit_result_row_and_limit},
 };

@@ -33,16 +34,42 @@ pub fn init_order_by(
    program: &mut ProgramBuilder,
    t_ctx: &mut TranslateCtx,
    order_by: &[(ast::Expr, SortOrder)],
+    referenced_tables: &[TableReference],
 ) -> Result<()> {
    let sort_cursor = program.alloc_cursor_id(None, CursorType::Sorter);
    t_ctx.meta_sort = Some(SortMetadata {
        sort_cursor,
        reg_sorter_data: program.alloc_register(),
    });
+
+    /*
+     * Terms of the ORDER BY clause that is part of a SELECT statement may be assigned a collating sequence using the COLLATE operator,
+     * in which case the specified collating function is used for sorting.
+     * Otherwise, if the expression sorted by an ORDER BY clause is a column,
+     * then the collating sequence of the column is used to determine sort order.
+     * If the expression is not a column and has no COLLATE clause, then the BINARY collating sequence is used.
+     */
+    let collations = order_by
+        .iter()
+        .map(|(expr, _)| match expr {
+            ast::Expr::Collate(_, collation_name) => CollationSeq::new(collation_name).map(Some),
+            ast::Expr::Column { table, column, .. } => {
+                let table_reference = referenced_tables.get(*table).unwrap();
+
+                let Some(table_column) = table_reference.table.get_column_at(*column) else {
+                    crate::bail_parse_error!("column index out of bounds");
+                };
+
+                Ok(table_column.collation)
+            }
+            _ => Ok(Some(CollationSeq::default())),
+        })
+        .collect::<Result<Vec<_>>>()?;
    program.emit_insn(Insn::SorterOpen {
        cursor_id: sort_cursor,
        columns: order_by.len(),
        order: order_by.iter().map(|(_, direction)| *direction).collect(),
+        collations,
    });
    Ok(())
 }
@@ -73,6 +100,7 @@ pub fn emit_order_by(
            notnull: false,
            default: None,
            unique: false,
+            collation: None,
        });
    }
    for i in 0..result_columns.len() {
@@ -92,6 +120,7 @@ pub fn emit_order_by(
            notnull: false,
            default: None,
            unique: false,
+            collation: None,
        });
    }

--- a/core/translate/plan.rs
+++ b/core/translate/plan.rs
@@ -610,6 +610,7 @@ impl TableReference {
                    notnull: false,
                    default: None,
                    unique: false,
+                    collation: None,
                })
                .collect(),
        )));
--- a/core/translate/schema.rs
+++ b/core/translate/schema.rs
@@ -719,6 +719,7 @@ pub fn translate_drop_table(
        rhs: table_reg,
        target_pc: next_label,
        flags: CmpInsFlags::default(),
+        collation: program.curr_collation(),
    });
    program.emit_insn(Insn::Column {
        cursor_id: sqlite_schema_cursor_id,
@@ -730,6 +731,7 @@ pub fn translate_drop_table(
        rhs: table_type,
        target_pc: next_label,
        flags: CmpInsFlags::default(),
+        collation: program.curr_collation(),
    });
    program.emit_insn(Insn::RowId {
        cursor_id: sqlite_schema_cursor_id,
--- a/core/types.rs
+++ b/core/types.rs
@@ -7,6 +7,7 @@ use crate::pseudo::PseudoCursor;
 use crate::schema::Index;
 use crate::storage::btree::BTreeCursor;
 use crate::storage::sqlite3_ondisk::write_varint;
+use crate::translate::collate::CollationSeq;
 use crate::translate::plan::IterationDirection;
 use crate::vdbe::sorter::Sorter;
 use crate::vdbe::{Register, VTabOpaqueCursor};
@@ -1103,11 +1104,18 @@ pub fn compare_immutable(
    l: &[RefValue],
    r: &[RefValue],
    index_key_sort_order: IndexKeySortOrder,
+    collations: &[CollationSeq],
 ) -> std::cmp::Ordering {
    assert_eq!(l.len(), r.len());
    for (i, (l, r)) in l.iter().zip(r).enumerate() {
        let column_order = index_key_sort_order.get_sort_order_for_col(i);
-        let cmp = l.partial_cmp(r).unwrap();
+        let collation = collations.get(i).copied().unwrap_or_default();
+        let cmp = match (l, r) {
+            (RefValue::Text(left), RefValue::Text(right)) => {
+                collation.compare_strings(left.as_str(), right.as_str())
+            }
+            _ => l.partial_cmp(r).unwrap(),
+        };
        if !cmp.is_eq() {
            return match column_order {
                SortOrder::Asc => cmp,
--- a/core/util.rs
+++ b/core/util.rs
@@ -1,6 +1,7 @@
 use crate::{
    function::Func,
    schema::{self, Column, Schema, Type},
+    translate::collate::CollationSeq,
    types::{Value, ValueType},
    LimboError, OpenFlags, Result, Statement, StepResult, SymbolTable, IO,
 };
@@ -499,66 +500,83 @@ pub fn columns_from_create_table_body(body: &ast::CreateTableBody) -> crate::Res
                    return None;
                }
            }
-            let column = Column {
-                name: Some(name.0.clone()),
-                ty: match column_def.col_type {
-                    Some(ref data_type) => {
-                        // https://www.sqlite.org/datatype3.html
-                        let type_name = data_type.name.as_str().to_uppercase();
-                        if type_name.contains("INT") {
-                            Type::Integer
-                        } else if type_name.contains("CHAR")
-                            || type_name.contains("CLOB")
-                            || type_name.contains("TEXT")
-                        {
-                            Type::Text
-                        } else if type_name.contains("BLOB") || type_name.is_empty() {
-                            Type::Blob
-                        } else if type_name.contains("REAL")
-                            || type_name.contains("FLOA")
-                            || type_name.contains("DOUB")
-                        {
-                            Type::Real
-                        } else {
-                            Type::Numeric
+            let column =
+                Column {
+                    name: Some(name.0.clone()),
+                    ty: match column_def.col_type {
+                        Some(ref data_type) => {
+                            // https://www.sqlite.org/datatype3.html
+                            let type_name = data_type.name.as_str().to_uppercase();
+                            if type_name.contains("INT") {
+                                Type::Integer
+                            } else if type_name.contains("CHAR")
+                                || type_name.contains("CLOB")
+                                || type_name.contains("TEXT")
+                            {
+                                Type::Text
+                            } else if type_name.contains("BLOB") || type_name.is_empty() {
+                                Type::Blob
+                            } else if type_name.contains("REAL")
+                                || type_name.contains("FLOA")
+                                || type_name.contains("DOUB")
+                            {
+                                Type::Real
+                            } else {
+                                Type::Numeric
+                            }
                        }
-                    }
-                    None => Type::Null,
-                },
-                default: column_def
-                    .constraints
-                    .iter()
-                    .find_map(|c| match &c.constraint {
-                        limbo_sqlite3_parser::ast::ColumnConstraint::Default(val) => {
-                            Some(val.clone())
-                        }
-                        _ => None,
+                        None => Type::Null,
+                    },
+                    default: column_def
+                        .constraints
+                        .iter()
+                        .find_map(|c| match &c.constraint {
+                            limbo_sqlite3_parser::ast::ColumnConstraint::Default(val) => {
+                                Some(val.clone())
+                            }
+                            _ => None,
+                        }),
+                    notnull: column_def.constraints.iter().any(|c| {
+                        matches!(
+                            c.constraint,
+                            limbo_sqlite3_parser::ast::ColumnConstraint::NotNull { .. }
+                        )
                    }),
-                notnull: column_def.constraints.iter().any(|c| {
-                    matches!(
-                        c.constraint,
-                        limbo_sqlite3_parser::ast::ColumnConstraint::NotNull { .. }
-                    )
-                }),
-                ty_str: column_def
-                    .col_type
-                    .clone()
-                    .map(|t| t.name.to_string())
-                    .unwrap_or_default(),
-                primary_key: column_def.constraints.iter().any(|c| {
-                    matches!(
-                        c.constraint,
-                        limbo_sqlite3_parser::ast::ColumnConstraint::PrimaryKey { .. }
-                    )
-                }),
-                is_rowid_alias: false,
-                unique: column_def.constraints.iter().any(|c| {
-                    matches!(
-                        c.constraint,
-                        limbo_sqlite3_parser::ast::ColumnConstraint::Unique(..)
-                    )
-                }),
-            };
+                    ty_str: column_def
+                        .col_type
+                        .clone()
+                        .map(|t| t.name.to_string())
+                        .unwrap_or_default(),
+                    primary_key: column_def.constraints.iter().any(|c| {
+                        matches!(
+                            c.constraint,
+                            limbo_sqlite3_parser::ast::ColumnConstraint::PrimaryKey { .. }
+                        )
+                    }),
+                    is_rowid_alias: false,
+                    unique: column_def.constraints.iter().any(|c| {
+                        matches!(
+                            c.constraint,
+                            limbo_sqlite3_parser::ast::ColumnConstraint::Unique(..)
+                        )
+                    }),
+                    collation: column_def
+                        .constraints
+                        .iter()
+                        .find_map(|c| match &c.constraint {
+                            // TODO: see if this should be the correct behavior
+                            // currently there cannot be any user defined collation sequences.
+                            // But in the future, when a user defines a collation sequence, creates a table with it,
+                            // then closes the db and opens it again. This may panic here if the collation seq is not registered
+                            // before reading the columns
+                            limbo_sqlite3_parser::ast::ColumnConstraint::Collate {
+                                collation_name,
+                            } => Some(CollationSeq::new(collation_name.0.as_str()).expect(
+                                "collation should have been set correctly in create table",
+                            )),
+                            _ => None,
+                        }),
+                };
            Some(column)
        })
        .collect::<Vec<_>>())
--- a/core/vdbe/builder.rs
+++ b/core/vdbe/builder.rs
@@ -12,7 +12,10 @@ use crate::{
    parameters::Parameters,
    schema::{BTreeTable, Index, PseudoTable},
    storage::sqlite3_ondisk::DatabaseHeader,
-    translate::plan::{ResultSetColumn, TableReference},
+    translate::{
+        collate::CollationSeq,
+        plan::{ResultSetColumn, TableReference},
+    },
    Connection, VirtualTable,
 };

@@ -38,6 +41,8 @@ pub struct ProgramBuilder {
    pub parameters: Parameters,
    pub result_columns: Vec<ResultSetColumn>,
    pub table_references: Vec<TableReference>,
+    /// Curr collation sequence. Bool indicates whether it was set by a COLLATE expr
+    collation: Option<(CollationSeq, bool)>,
 }

 #[derive(Debug, Clone)]
@@ -95,6 +100,7 @@ impl ProgramBuilder {
            parameters: Parameters::new(),
            result_columns: Vec::new(),
            table_references: Vec::new(),
+            collation: None,
        }
    }

@@ -589,6 +595,22 @@ impl ProgramBuilder {
            .unwrap_or_else(|| panic!("Cursor not found: {}", table_identifier))
    }

+    pub fn set_collation(&mut self, c: Option<(CollationSeq, bool)>) {
+        self.collation = c
+    }
+
+    pub fn curr_collation_ctx(&self) -> Option<(CollationSeq, bool)> {
+        self.collation
+    }
+
+    pub fn curr_collation(&self) -> Option<CollationSeq> {
+        self.collation.map(|c| c.0)
+    }
+
+    pub fn reset_collation(&mut self) {
+        self.collation = None;
+    }
+
    pub fn build(
        mut self,
        database_header: Arc<SpinLock<DatabaseHeader>>,
--- a/core/vdbe/execute.rs
+++ b/core/vdbe/execute.rs
@@ -365,6 +365,7 @@ pub fn op_compare(
        start_reg_a,
        start_reg_b,
        count,
+        collation,
    } = insn
    else {
        unreachable!("unexpected Insn {:?}", insn)
@@ -372,6 +373,7 @@ pub fn op_compare(
    let start_reg_a = *start_reg_a;
    let start_reg_b = *start_reg_b;
    let count = *count;
+    let collation = collation.unwrap_or_default();

    if start_reg_a + count > start_reg_b {
        return Err(LimboError::InternalError(
@@ -383,7 +385,12 @@ pub fn op_compare(
    for i in 0..count {
        let a = state.registers[start_reg_a + i].get_owned_value();
        let b = state.registers[start_reg_b + i].get_owned_value();
-        cmp = Some(a.cmp(b));
+        cmp = match (a, b) {
+            (Value::Text(left), Value::Text(right)) => {
+                Some(collation.compare_strings(left.as_str(), right.as_str()))
+            }
+            _ => Some(a.cmp(b)),
+        };
        if cmp != Some(std::cmp::Ordering::Equal) {
            break;
        }
@@ -525,6 +532,7 @@ pub fn op_eq(
        rhs,
        target_pc,
        flags,
+        collation,
    } = insn
    else {
        unreachable!("unexpected Insn {:?}", insn)
@@ -536,9 +544,10 @@ pub fn op_eq(
    let cond = *state.registers[lhs].get_owned_value() == *state.registers[rhs].get_owned_value();
    let nulleq = flags.has_nulleq();
    let jump_if_null = flags.has_jump_if_null();
+    let collation = collation.unwrap_or_default();
    match (
-        &state.registers[lhs].get_owned_value(),
-        &state.registers[rhs].get_owned_value(),
+        state.registers[lhs].get_owned_value(),
+        state.registers[rhs].get_owned_value(),
    ) {
        (_, Value::Null) | (Value::Null, _) => {
            if (nulleq && cond) || (!nulleq && jump_if_null) {
@@ -547,8 +556,16 @@ pub fn op_eq(
                state.pc += 1;
            }
        }
-        _ => {
-            if *state.registers[lhs].get_owned_value() == *state.registers[rhs].get_owned_value() {
+        (Value::Text(lhs), Value::Text(rhs)) => {
+            let order = collation.compare_strings(lhs.as_str(), rhs.as_str());
+            if order.is_eq() {
+                state.pc = target_pc.to_offset_int();
+            } else {
+                state.pc += 1;
+            }
+        }
+        (lhs, rhs) => {
+            if *lhs == *rhs {
                state.pc = target_pc.to_offset_int();
            } else {
                state.pc += 1;
@@ -570,6 +587,7 @@ pub fn op_ne(
        rhs,
        target_pc,
        flags,
+        collation,
    } = insn
    else {
        unreachable!("unexpected Insn {:?}", insn)
@@ -581,9 +599,10 @@ pub fn op_ne(
    let cond = *state.registers[lhs].get_owned_value() != *state.registers[rhs].get_owned_value();
    let nulleq = flags.has_nulleq();
    let jump_if_null = flags.has_jump_if_null();
+    let collation = collation.unwrap_or_default();
    match (
-        &state.registers[lhs].get_owned_value(),
-        &state.registers[rhs].get_owned_value(),
+        state.registers[lhs].get_owned_value(),
+        state.registers[rhs].get_owned_value(),
    ) {
        (_, Value::Null) | (Value::Null, _) => {
            if (nulleq && cond) || (!nulleq && jump_if_null) {
@@ -592,8 +611,16 @@ pub fn op_ne(
                state.pc += 1;
            }
        }
-        _ => {
-            if *state.registers[lhs].get_owned_value() != *state.registers[rhs].get_owned_value() {
+        (Value::Text(lhs), Value::Text(rhs)) => {
+            let order = collation.compare_strings(lhs.as_str(), rhs.as_str());
+            if order.is_ne() {
+                state.pc = target_pc.to_offset_int();
+            } else {
+                state.pc += 1;
+            }
+        }
+        (lhs, rhs) => {
+            if *lhs != *rhs {
                state.pc = target_pc.to_offset_int();
            } else {
                state.pc += 1;
@@ -615,6 +642,7 @@ pub fn op_lt(
        rhs,
        target_pc,
        flags,
+        collation,
    } = insn
    else {
        unreachable!("unexpected Insn {:?}", insn)
@@ -624,9 +652,10 @@ pub fn op_lt(
    let rhs = *rhs;
    let target_pc = *target_pc;
    let jump_if_null = flags.has_jump_if_null();
+    let collation = collation.unwrap_or_default();
    match (
-        &state.registers[lhs].get_owned_value(),
-        &state.registers[rhs].get_owned_value(),
+        state.registers[lhs].get_owned_value(),
+        state.registers[rhs].get_owned_value(),
    ) {
        (_, Value::Null) | (Value::Null, _) => {
            if jump_if_null {
@@ -635,8 +664,16 @@ pub fn op_lt(
                state.pc += 1;
            }
        }
-        _ => {
-            if *state.registers[lhs].get_owned_value() < *state.registers[rhs].get_owned_value() {
+        (Value::Text(lhs), Value::Text(rhs)) => {
+            let order = collation.compare_strings(lhs.as_str(), rhs.as_str());
+            if order.is_lt() {
+                state.pc = target_pc.to_offset_int();
+            } else {
+                state.pc += 1;
+            }
+        }
+        (lhs, rhs) => {
+            if *lhs < *rhs {
                state.pc = target_pc.to_offset_int();
            } else {
                state.pc += 1;
@@ -658,6 +695,7 @@ pub fn op_le(
        rhs,
        target_pc,
        flags,
+        collation,
    } = insn
    else {
        unreachable!("unexpected Insn {:?}", insn)
@@ -667,9 +705,10 @@ pub fn op_le(
    let rhs = *rhs;
    let target_pc = *target_pc;
    let jump_if_null = flags.has_jump_if_null();
+    let collation = collation.unwrap_or_default();
    match (
-        &state.registers[lhs].get_owned_value(),
-        &state.registers[rhs].get_owned_value(),
+        state.registers[lhs].get_owned_value(),
+        state.registers[rhs].get_owned_value(),
    ) {
        (_, Value::Null) | (Value::Null, _) => {
            if jump_if_null {
@@ -678,8 +717,16 @@ pub fn op_le(
                state.pc += 1;
            }
        }
-        _ => {
-            if *state.registers[lhs].get_owned_value() <= *state.registers[rhs].get_owned_value() {
+        (Value::Text(lhs), Value::Text(rhs)) => {
+            let order = collation.compare_strings(lhs.as_str(), rhs.as_str());
+            if order.is_le() {
+                state.pc = target_pc.to_offset_int();
+            } else {
+                state.pc += 1;
+            }
+        }
+        (lhs, rhs) => {
+            if *lhs <= *rhs {
                state.pc = target_pc.to_offset_int();
            } else {
                state.pc += 1;
@@ -701,6 +748,7 @@ pub fn op_gt(
        rhs,
        target_pc,
        flags,
+        collation,
    } = insn
    else {
        unreachable!("unexpected Insn {:?}", insn)
@@ -710,9 +758,10 @@ pub fn op_gt(
    let rhs = *rhs;
    let target_pc = *target_pc;
    let jump_if_null = flags.has_jump_if_null();
+    let collation = collation.unwrap_or_default();
    match (
-        &state.registers[lhs].get_owned_value(),
-        &state.registers[rhs].get_owned_value(),
+        state.registers[lhs].get_owned_value(),
+        state.registers[rhs].get_owned_value(),
    ) {
        (_, Value::Null) | (Value::Null, _) => {
            if jump_if_null {
@@ -721,8 +770,16 @@ pub fn op_gt(
                state.pc += 1;
            }
        }
-        _ => {
-            if *state.registers[lhs].get_owned_value() > *state.registers[rhs].get_owned_value() {
+        (Value::Text(lhs), Value::Text(rhs)) => {
+            let order = collation.compare_strings(lhs.as_str(), rhs.as_str());
+            if order.is_gt() {
+                state.pc = target_pc.to_offset_int();
+            } else {
+                state.pc += 1;
+            }
+        }
+        (lhs, rhs) => {
+            if *lhs > *rhs {
                state.pc = target_pc.to_offset_int();
            } else {
                state.pc += 1;
@@ -744,6 +801,7 @@ pub fn op_ge(
        rhs,
        target_pc,
        flags,
+        collation,
    } = insn
    else {
        unreachable!("unexpected Insn {:?}", insn)
@@ -753,9 +811,10 @@ pub fn op_ge(
    let rhs = *rhs;
    let target_pc = *target_pc;
    let jump_if_null = flags.has_jump_if_null();
+    let collation = collation.unwrap_or_default();
    match (
-        &state.registers[lhs].get_owned_value(),
-        &state.registers[rhs].get_owned_value(),
+        state.registers[lhs].get_owned_value(),
+        state.registers[rhs].get_owned_value(),
    ) {
        (_, Value::Null) | (Value::Null, _) => {
            if jump_if_null {
@@ -764,8 +823,16 @@ pub fn op_ge(
                state.pc += 1;
            }
        }
-        _ => {
-            if *state.registers[lhs].get_owned_value() >= *state.registers[rhs].get_owned_value() {
+        (Value::Text(lhs), Value::Text(rhs)) => {
+            let order = collation.compare_strings(lhs.as_str(), rhs.as_str());
+            if order.is_ge() {
+                state.pc = target_pc.to_offset_int();
+            } else {
+                state.pc += 1;
+            }
+        }
+        (lhs, rhs) => {
+            if *lhs >= *rhs {
                state.pc = target_pc.to_offset_int();
            } else {
                state.pc += 1;
@@ -860,15 +927,39 @@ pub fn op_open_read(
    let mut cursors = state.cursors.borrow_mut();
    match cursor_type {
        CursorType::BTreeTable(_) => {
-            let cursor = BTreeCursor::new(mv_cursor, pager.clone(), *root_page);
+            let cursor = BTreeCursor::new_table(mv_cursor, pager.clone(), *root_page);
            cursors
                .get_mut(*cursor_id)
                .unwrap()
                .replace(Cursor::new_btree(cursor));
        }
        CursorType::BTreeIndex(index) => {
-            let cursor =
-                BTreeCursor::new_index(mv_cursor, pager.clone(), *root_page, index.as_ref());
+            let conn = program.connection.upgrade().unwrap();
+            let schema = conn.schema.try_read().ok_or(LimboError::SchemaLocked)?;
+            let table = schema
+                .get_table(&index.table_name)
+                .map_or(None, |table| table.btree());
+            let collations = table.map_or(Vec::new(), |table| {
+                index
+                    .columns
+                    .iter()
+                    .map(|c| {
+                        table
+                            .columns
+                            .get(c.pos_in_table)
+                            .unwrap()
+                            .collation
+                            .unwrap_or_default()
+                    })
+                    .collect()
+            });
+            let cursor = BTreeCursor::new_index(
+                mv_cursor,
+                pager.clone(),
+                *root_page,
+                index.as_ref(),
+                collations,
+            );
            cursors
                .get_mut(*cursor_id)
                .unwrap()
@@ -2088,7 +2179,12 @@ pub fn op_idx_ge(
            let idx_values = idx_record.get_values();
            let idx_values = &idx_values[..record_from_regs.len()];
            let record_values = record_from_regs.get_values();
-            let ord = compare_immutable(&idx_values, &record_values, cursor.index_key_sort_order);
+            let ord = compare_immutable(
+                &idx_values,
+                &record_values,
+                cursor.index_key_sort_order,
+                cursor.collations(),
+            );
            if ord.is_ge() {
                target_pc.to_offset_int()
            } else {
@@ -2147,7 +2243,12 @@ pub fn op_idx_le(
            let idx_values = idx_record.get_values();
            let idx_values = &idx_values[..record_from_regs.len()];
            let record_values = record_from_regs.get_values();
-            let ord = compare_immutable(&idx_values, &record_values, cursor.index_key_sort_order);
+            let ord = compare_immutable(
+                &idx_values,
+                &record_values,
+                cursor.index_key_sort_order,
+                cursor.collations(),
+            );
            if ord.is_le() {
                target_pc.to_offset_int()
            } else {
@@ -2188,7 +2289,12 @@ pub fn op_idx_gt(
            let idx_values = idx_record.get_values();
            let idx_values = &idx_values[..record_from_regs.len()];
            let record_values = record_from_regs.get_values();
-            let ord = compare_immutable(&idx_values, &record_values, cursor.index_key_sort_order);
+            let ord = compare_immutable(
+                &idx_values,
+                &record_values,
+                cursor.index_key_sort_order,
+                cursor.collations(),
+            );
            if ord.is_gt() {
                target_pc.to_offset_int()
            } else {
@@ -2229,7 +2335,12 @@ pub fn op_idx_lt(
            let idx_values = idx_record.get_values();
            let idx_values = &idx_values[..record_from_regs.len()];
            let record_values = record_from_regs.get_values();
-            let ord = compare_immutable(&idx_values, &record_values, cursor.index_key_sort_order);
+            let ord = compare_immutable(
+                &idx_values,
+                &record_values,
+                cursor.index_key_sort_order,
+                cursor.collations(),
+            );
            if ord.is_lt() {
                target_pc.to_offset_int()
            } else {
@@ -2698,11 +2809,18 @@ pub fn op_sorter_open(
        cursor_id,
        columns: _,
        order,
+        collations,
    } = insn
    else {
        unreachable!("unexpected Insn {:?}", insn)
    };
-    let cursor = Sorter::new(order);
+    let cursor = Sorter::new(
+        order,
+        collations
+            .iter()
+            .map(|collation| collation.unwrap_or_default())
+            .collect(),
+    );
    let mut cursors = state.cursors.borrow_mut();
    cursors
        .get_mut(*cursor_id)
@@ -4128,14 +4246,38 @@ pub fn op_open_write(
        None => None,
    };
    if let Some(index) = maybe_index {
-        let cursor =
-            BTreeCursor::new_index(mv_cursor, pager.clone(), root_page as usize, index.as_ref());
+        let conn = program.connection.upgrade().unwrap();
+        let schema = conn.schema.try_read().ok_or(LimboError::SchemaLocked)?;
+        let table = schema
+            .get_table(&index.table_name)
+            .map_or(None, |table| table.btree());
+        let collations = table.map_or(Vec::new(), |table| {
+            index
+                .columns
+                .iter()
+                .map(|c| {
+                    table
+                        .columns
+                        .get(c.pos_in_table)
+                        .unwrap()
+                        .collation
+                        .unwrap_or_default()
+                })
+                .collect()
+        });
+        let cursor = BTreeCursor::new_index(
+            mv_cursor,
+            pager.clone(),
+            root_page as usize,
+            index.as_ref(),
+            collations,
+        );
        cursors
            .get_mut(*cursor_id)
            .unwrap()
            .replace(Cursor::new_btree(cursor));
    } else {
-        let cursor = BTreeCursor::new(mv_cursor, pager.clone(), root_page as usize);
+        let cursor = BTreeCursor::new_table(mv_cursor, pager.clone(), root_page as usize);
        cursors
            .get_mut(*cursor_id)
            .unwrap()
@@ -4205,7 +4347,8 @@ pub fn op_destroy(
    if *is_temp == 1 {
        todo!("temp databases not implemented yet.");
    }
-    let mut cursor = BTreeCursor::new(None, pager.clone(), *root);
+    // TODO not sure if should be BTreeCursor::new_table or BTreeCursor::new_index here or neither and just pass an emtpy vec
+    let mut cursor = BTreeCursor::new(None, pager.clone(), *root, Vec::new());
    cursor.btree_destroy()?;
    state.pc += 1;
    Ok(InsnFunctionStepResult::Step)
@@ -4579,7 +4722,7 @@ pub fn op_open_ephemeral(
        }
        None => None,
    };
-    let mut cursor = BTreeCursor::new(mv_cursor, pager, root_page as usize);
+    let mut cursor = BTreeCursor::new_table(mv_cursor, pager, root_page as usize);
    cursor.rewind()?; // Will never return io

    let mut cursors: std::cell::RefMut<'_, Vec<Option<Cursor>>> = state.cursors.borrow_mut();
--- a/core/vdbe/explain.rs
+++ b/core/vdbe/explain.rs
@@ -141,12 +141,13 @@ pub fn insn_to_str(
                start_reg_a,
                start_reg_b,
                count,
+                collation,
            } => (
                "Compare",
                *start_reg_a as i32,
                *start_reg_b as i32,
                *count as i32,
-                Value::build_text(""),
+                Value::build_text(&format!("k({count}, {})", collation.unwrap_or_default())),
                0,
                format!(
                    "r[{}..{}]==r[{}..{}]",
@@ -211,13 +212,14 @@ pub fn insn_to_str(
                lhs,
                rhs,
                target_pc,
+                collation,
                ..
            } => (
                "Eq",
                *lhs as i32,
                *rhs as i32,
                target_pc.to_debug_int(),
-                Value::build_text(""),
+                Value::build_text(&collation.map_or("".to_string(), |c| c.to_string())),
                0,
                format!(
                    "if r[{}]==r[{}] goto {}",
@@ -230,13 +232,14 @@ pub fn insn_to_str(
                lhs,
                rhs,
                target_pc,
+                collation,
                ..
            } => (
                "Ne",
                *lhs as i32,
                *rhs as i32,
                target_pc.to_debug_int(),
-                Value::build_text(""),
+                Value::build_text(&collation.map_or("".to_string(), |c| c.to_string())),
                0,
                format!(
                    "if r[{}]!=r[{}] goto {}",
@@ -249,13 +252,14 @@ pub fn insn_to_str(
                lhs,
                rhs,
                target_pc,
+                collation,
                ..
            } => (
                "Lt",
                *lhs as i32,
                *rhs as i32,
                target_pc.to_debug_int(),
-                Value::build_text(""),
+                Value::build_text(&collation.map_or("".to_string(), |c| c.to_string())),
                0,
                format!("if r[{}]<r[{}] goto {}", lhs, rhs, target_pc.to_debug_int()),
            ),
@@ -263,13 +267,14 @@ pub fn insn_to_str(
                lhs,
                rhs,
                target_pc,
+                collation,
                ..
            } => (
                "Le",
                *lhs as i32,
                *rhs as i32,
                target_pc.to_debug_int(),
-                Value::build_text(""),
+                Value::build_text(&collation.map_or("".to_string(), |c| c.to_string())),
                0,
                format!(
                    "if r[{}]<=r[{}] goto {}",
@@ -282,13 +287,14 @@ pub fn insn_to_str(
                lhs,
                rhs,
                target_pc,
+                collation,
                ..
            } => (
                "Gt",
                *lhs as i32,
                *rhs as i32,
                target_pc.to_debug_int(),
-                Value::build_text(""),
+                Value::build_text(&collation.map_or("".to_string(), |c| c.to_string())),
                0,
                format!("if r[{}]>r[{}] goto {}", lhs, rhs, target_pc.to_debug_int()),
            ),
@@ -296,13 +302,14 @@ pub fn insn_to_str(
                lhs,
                rhs,
                target_pc,
+                collation,
                ..
            } => (
                "Ge",
                *lhs as i32,
                *rhs as i32,
                target_pc.to_debug_int(),
-                Value::build_text(""),
+                Value::build_text(&collation.map_or("".to_string(), |c| c.to_string())),
                0,
                format!(
                    "if r[{}]>=r[{}] goto {}",
@@ -879,13 +886,22 @@ pub fn insn_to_str(
                cursor_id,
                columns,
                order,
+                collations,
            } => {
                let _p4 = String::new();
                let to_print: Vec<String> = order
                    .iter()
-                    .map(|v| match v {
-                        SortOrder::Asc => "B".to_string(),
-                        SortOrder::Desc => "-B".to_string(),
+                    .zip(collations.iter())
+                    .map(|(v, collation)| {
+                        let sign = match v {
+                            SortOrder::Asc => "",
+                            SortOrder::Desc => "-",
+                        };
+                        if collation.is_some() {
+                            format!("{sign}{}", collation.unwrap())
+                        } else {
+                            format!("{sign}B")
+                        }
                    })
                    .collect();
                (
--- a/core/vdbe/insn.rs
+++ b/core/vdbe/insn.rs
@@ -8,6 +8,7 @@ use super::{execute, AggFunc, BranchOffset, CursorID, FuncCtx, InsnFunction, Pag
 use crate::{
    schema::{BTreeTable, Index},
    storage::{pager::CreateBTreeFlags, wal::CheckpointMode},
+    translate::collate::CollationSeq,
 };
 use limbo_macros::Description;
 use limbo_sqlite3_parser::ast::SortOrder;
@@ -152,6 +153,7 @@ pub enum Insn {
        start_reg_a: usize,
        start_reg_b: usize,
        count: usize,
+        collation: Option<CollationSeq>,
    },
    /// Place the result of rhs bitwise AND lhs in third register.
    BitAnd {
@@ -218,6 +220,7 @@ pub enum Insn {
        /// Without the jump_if_null flag it would not jump because the logical comparison "id != NULL" is never true.
        /// This flag indicates that if either is null we should still jump.
        flags: CmpInsFlags,
+        collation: Option<CollationSeq>,
    },
    /// Compare two registers and jump to the given PC if they are not equal.
    Ne {
@@ -228,6 +231,7 @@ pub enum Insn {
        ///
        /// jump_if_null jumps if either of the operands is null. Used for "jump when false" logic.
        flags: CmpInsFlags,
+        collation: Option<CollationSeq>,
    },
    /// Compare two registers and jump to the given PC if the left-hand side is less than the right-hand side.
    Lt {
@@ -236,6 +240,7 @@ pub enum Insn {
        target_pc: BranchOffset,
        /// jump_if_null: Jump if either of the operands is null. Used for "jump when false" logic.
        flags: CmpInsFlags,
+        collation: Option<CollationSeq>,
    },
    // Compare two registers and jump to the given PC if the left-hand side is less than or equal to the right-hand side.
    Le {
@@ -244,6 +249,7 @@ pub enum Insn {
        target_pc: BranchOffset,
        /// jump_if_null: Jump if either of the operands is null. Used for "jump when false" logic.
        flags: CmpInsFlags,
+        collation: Option<CollationSeq>,
    },
    /// Compare two registers and jump to the given PC if the left-hand side is greater than the right-hand side.
    Gt {
@@ -252,6 +258,7 @@ pub enum Insn {
        target_pc: BranchOffset,
        /// jump_if_null: Jump if either of the operands is null. Used for "jump when false" logic.
        flags: CmpInsFlags,
+        collation: Option<CollationSeq>,
    },
    /// Compare two registers and jump to the given PC if the left-hand side is greater than or equal to the right-hand side.
    Ge {
@@ -260,6 +267,7 @@ pub enum Insn {
        target_pc: BranchOffset,
        /// jump_if_null: Jump if either of the operands is null. Used for "jump when false" logic.
        flags: CmpInsFlags,
+        collation: Option<CollationSeq>,
    },
    /// Jump to target_pc if r\[reg\] != 0 or (r\[reg\] == NULL && r\[jump_if_null\] != 0)
    If {
@@ -597,9 +605,10 @@ pub enum Insn {

    /// Open a sorter.
    SorterOpen {
-        cursor_id: CursorID,   // P1
-        columns: usize,        // P2
-        order: Vec<SortOrder>, // P4.
+        cursor_id: CursorID,                   // P1
+        columns: usize,                        // P2
+        order: Vec<SortOrder>,                 // P4.
+        collations: Vec<Option<CollationSeq>>, // The only reason for using Option<CollationSeq> is so the explain message is the same as in SQLite
    },

    /// Insert a row into the sorter.
--- a/core/vdbe/sorter.rs
+++ b/core/vdbe/sorter.rs
@@ -1,21 +1,26 @@
 use limbo_sqlite3_parser::ast::SortOrder;

-use crate::types::{compare_immutable, ImmutableRecord, IndexKeySortOrder};
+use crate::{
+    translate::collate::CollationSeq,
+    types::{compare_immutable, ImmutableRecord, IndexKeySortOrder},
+};

 pub struct Sorter {
    records: Vec<ImmutableRecord>,
    current: Option<ImmutableRecord>,
    order: IndexKeySortOrder,
    key_len: usize,
+    collations: Vec<CollationSeq>,
 }

 impl Sorter {
-    pub fn new(order: &[SortOrder]) -> Self {
+    pub fn new(order: &[SortOrder], collations: Vec<CollationSeq>) -> Self {
        Self {
            records: Vec::new(),
            current: None,
            key_len: order.len(),
            order: IndexKeySortOrder::from_list(order),
+            collations,
        }
    }
    pub fn is_empty(&self) -> bool {
@@ -33,6 +38,7 @@ impl Sorter {
                &a.values[..self.key_len],
                &b.values[..self.key_len],
                self.order,
+                &self.collations,
            )
        });
        self.records.reverse();
--- a/testing/all.test
+++ b/testing/all.test
@@ -34,3 +34,4 @@ source $testdir/boolean.test
 source $testdir/literal.test
 source $testdir/null.test
 source $testdir/create_table.test
+source $testdir/collate.test
--- a/testing/cli_tests/collate.py
+++ b/testing/cli_tests/collate.py
@@ -0,0 +1,147 @@
+#!/usr/bin/env python3
+import os
+from cli_tests.test_limbo_cli import TestLimboShell
+from pydantic import BaseModel
+from cli_tests import console
+
+
+sqlite_flags = os.getenv("SQLITE_FLAGS", "-q").split(" ")
+
+
+class CollateTest(BaseModel):
+    name: str
+    db_schema: str = """CREATE TABLE t1(
+        x INTEGER PRIMARY KEY,
+        a,                 /* collating sequence BINARY */
+        b COLLATE BINARY,  /* collating sequence BINARY */
+        c COLLATE RTRIM,   /* collating sequence RTRIM  */
+        d COLLATE NOCASE   /* collating sequence NOCASE */
+    );"""
+    db_path: str = "testing/collate.db"
+
+    def init_db(self):
+        with TestLimboShell(
+            init_commands="",
+            exec_name="sqlite3",
+            flags=f"{self.db_path}",
+        ) as sqlite:
+            sqlite.execute_dot(f".open {self.db_path}")
+            stmt = [self.db_schema]
+            stmt = stmt + [
+                "INSERT INTO t1 VALUES(1,'abc','abc', 'abc  ','abc');",
+                "INSERT INTO t1 VALUES(2,'abc','abc', 'abc',  'ABC');",
+                "INSERT INTO t1 VALUES(3,'abc','abc', 'abc ', 'Abc');",
+                "INSERT INTO t1 VALUES(4,'abc','abc ','ABC',  'abc');",
+            ]
+            stmt.append("SELECT count(*) FROM t1;")
+
+            sqlite.run_test(
+                "Init Collate Db in Sqlite",
+                "".join(stmt),
+                f"{4}",
+            )
+
+    def run(self, limbo: TestLimboShell):
+        limbo.execute_dot(f".open {self.db_path}")
+
+        limbo.run_test(
+            "Text comparison a=b is performed using the BINARY collating sequence",
+            "SELECT x FROM t1 WHERE a = b ORDER BY x;",
+            "\n".join(map(lambda x: str(x), [1, 2, 3])),
+        )
+
+        limbo.run_test(
+            "Text comparison a=b is performed using the RTRIM collating sequence",
+            "SELECT x FROM t1 WHERE a = b COLLATE RTRIM ORDER BY x;",
+            "\n".join(map(lambda x: str(x), [1, 2, 3, 4])),
+        )
+
+        limbo.run_test(
+            "Text comparison d=a is performed using the NOCASE collating sequence",
+            "SELECT x FROM t1 WHERE d = a ORDER BY x;",
+            "\n".join(map(lambda x: str(x), [1, 2, 3, 4])),
+        )
+
+        limbo.run_test(
+            "Text comparison a=d is performed using the BINARY collating sequence.",
+            "SELECT x FROM t1 WHERE a = d ORDER BY x;",
+            "\n".join(map(lambda x: str(x), [1, 4])),
+        )
+
+        limbo.run_test(
+            "Text comparison 'abc'=c is performed using the RTRIM collating sequence.",
+            "SELECT x FROM t1 WHERE 'abc' = c ORDER BY x;",
+            "\n".join(map(lambda x: str(x), [1, 2, 3])),
+        )
+
+        limbo.run_test(
+            "Text comparison c='abc' is performed using the RTRIM collating sequence.",
+            "SELECT x FROM t1 WHERE c = 'abc' ORDER BY x;",
+            "\n".join(map(lambda x: str(x), [1, 2, 3])),
+        )
+
+        limbo.run_test(
+            "Grouping is performed using the NOCASE collating sequence (Values 'abc', 'ABC', and 'Abc' are placed in the same group).",
+            "SELECT count(*) FROM t1 GROUP BY d ORDER BY 1;",
+            "\n".join(map(lambda x: str(x), [4])),
+        )
+
+        limbo.run_test(
+            "Grouping is performed using the BINARY collating sequence. 'abc' and 'ABC' and 'Abc' form different groups",
+            "SELECT count(*) FROM t1 GROUP BY (d || '') ORDER BY 1;",
+            "\n".join(map(lambda x: str(x), [1, 1, 2])),
+        )
+
+        limbo.run_test(
+            "Sorting or column c is performed using the RTRIM collating sequence.",
+            "SELECT x FROM t1 ORDER BY c, x;",
+            "\n".join(map(lambda x: str(x), [4, 1, 2, 3])),
+        )
+
+        limbo.run_test(
+            "Sorting of (c||'') is performed using the BINARY collating sequence.",
+            "SELECT x FROM t1 ORDER BY (c||''), x;",
+            "\n".join(map(lambda x: str(x), [4, 2, 3, 1])),
+        )
+
+        limbo.run_test(
+            "Sorting of column c is performed using the NOCASE collating sequence.",
+            "SELECT x FROM t1 ORDER BY c COLLATE NOCASE, x;",
+            "\n".join(map(lambda x: str(x), [2, 4, 3, 1])),
+        )
+
+
+def cleanup(db_fullpath: str):
+    wal_path = f"{db_fullpath}-wal"
+    shm_path = f"{db_fullpath}-shm"
+    paths = [db_fullpath, wal_path, shm_path]
+    for path in paths:
+        if os.path.exists(path):
+            os.remove(path)
+
+
+def main():
+    # Test from using examples from Section 7.2
+    # https://sqlite.org/datatype3.html#collation
+    test = CollateTest(name="Smoke collate tests")
+    console.info(test)
+
+    db_path = test.db_path
+    try:
+        test.init_db()
+        # Use with syntax to automatically close shell on error
+        with TestLimboShell("") as limbo:
+            test.run(limbo)
+
+        # test.test_compat()
+    except Exception as e:
+        console.error(f"Test FAILED: {e}")
+        cleanup(db_path)
+        exit(1)
+    # delete db after every compat test so we we have fresh db for next test
+    cleanup(db_path)
+    console.info("All tests passed successfully.")
+
+
+if __name__ == "__main__":
+    main()
--- a/testing/collate.test
+++ b/testing/collate.test
@@ -0,0 +1,52 @@
+#!/usr/bin/env tclsh
+
+set testdir [file dirname $argv0]
+source $testdir/tester.tcl
+
+# SIMPLE SMOKE TESTS THAT DO NOT DEPEND ON SPECIFIC DATABASE ROWS
+
+do_execsql_test collate_nocase {
+    SELECT 'hat' == 'hAt' COLLATE NOCASE;
+} {1}
+
+do_execsql_test collate_binary_1 {
+    SELECT 'hat' == 'hAt' COLLATE BINARY;
+} {0}
+
+do_execsql_test collate_binary_2 {
+    SELECT 'hat' == 'hat' COLLATE BINARY;
+} {1}
+
+do_execsql_test collate_rtrim_1 {
+    SELECT 'hat' == 'hAt ' COLLATE RTRIM;
+} {0}
+
+do_execsql_test collate_rtrim_2 {
+    SELECT 'hat' == 'hat ' COLLATE RTRIM;
+} {1}
+
+do_execsql_test collate_rtrim_3 {
+    SELECT 'hat' == ' hAt ' COLLATE RTRIM;
+} {0}
+
+do_execsql_test collate_rtrim_4 {
+    SELECT 'hat' == ' hat ' COLLATE RTRIM;
+} {0}
+
+do_execsql_test collate_left_precedence {
+    SELECT 'hat' COLLATE BINARY == 'hAt' COLLATE NOCASE;
+} {0}
+
+do_execsql_test collate_left_precedence_2 {
+    SELECT 'hat' COLLATE NOCASE == 'hAt' COLLATE BINARY;
+} {1}
+
+do_execsql_test_in_memory_any_error collate_unique_constraint {
+    CREATE TABLE t(a TEXT COLLATE NOCASE PRIMARY KEY);
+    INSERT INTO t VALUES ('lol'), ('LOL'), ('lOl');
+}
+
+do_execsql_test_in_memory_any_error collate_unique_constraint {
+    CREATE TABLE t(a TEXT COLLATE NOCASE PRIMARY KEY);
+    INSERT INTO t VALUES ('lol'), ('LOL'), ('lOl');
+}
--- a/testing/pyproject.toml
+++ b/testing/pyproject.toml
@@ -17,6 +17,7 @@ test-update = "cli_tests.update:main"
 test-memory = "cli_tests.memory:main"
 bench-vfs = "cli_tests.vfs_bench:main"
 test-constraint = "cli_tests.constraint:main"
+test-collate = "cli_tests.collate:main"

 [tool.uv]
 package = true
--- a/testing/tester.tcl
+++ b/testing/tester.tcl
@@ -226,3 +226,13 @@ proc do_execsql_test_in_memory_any_error {test_name sql_statements} {
    set combined_sql [string trim $sql_statements]
    run_test_expecting_any_error $::sqlite_exec $db_name $combined_sql
 }
+
+proc do_execsql_test_in_memory_error_content {test_name sql_statements expected_error_text} {
+    test_put "Running error content test" in-memory $test_name
+
+    # Use ":memory:" special filename for in-memory database
+    set db_name ":memory:"
+
+    set combined_sql [string trim $sql_statements]
+    run_test_expecting_error_content $::sqlite_exec $db_name $combined_sql $expected_error_text
+}
--- a/tests/integration/fuzz/mod.rs
+++ b/tests/integration/fuzz/mod.rs
@@ -1220,6 +1220,7 @@ mod tests {
            );
            let query = format!("INSERT INTO t VALUES ({}, {}, {})", x, y, z);
            log::info!("insert: {}", query);
+            dbg!(&query);
            assert_eq!(
                limbo_exec_rows(&db, &limbo_conn, &query),
                sqlite_exec_rows(&sqlite_conn, &query),
--- a/vendored/sqlite3-parser/Cargo.toml
+++ b/vendored/sqlite3-parser/Cargo.toml
@@ -32,8 +32,8 @@ bitflags = "2.0"
 uncased = "0.9.10"
 indexmap = "2.0"
 miette = "7.4.0"
-strum = { version = "0.26", features = ["derive"] }
-strum_macros = "0.26"
+strum = { workspace = true }
+strum_macros = {workspace = true }

 [dev-dependencies]
 env_logger = { version = "0.11", default-features = false }