From cbdd5c5fc7d00e471eb4e8a5acc1c0c9643a9a5b Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Thu, 17 Jul 2025 21:19:11 -0500 Subject: [PATCH] improve handling of double quotes I ended up hitting #1974 today and wanted to fix it. I worked with Claude to generate a more comprehensive set of queries that could fail aside from just the insert query described in the issue. He got most of them right - lots of cases were indeed failing. The ones that were gibberish, he told me I was absolutely right for pointing out they were bad. But alas. With the test cases generated, we can work on fixing it. The place where the assertion was hit, all we need to do there is return true (but we assert that this is indeed a string literal, it shouldn't be anything else at this point). There are then just a couple of places where we need to make sure we handle double quotes correctly. We already tested for single quotes in a couple of places, but never for double quotes. There is one funny corner case where you can just select "col" from tbl, and if there is no column "col" on the table, that is treated as a string literal. We handle that too. Fixes #1974 --- core/translate/expr.rs | 25 +++++++++++++---- core/translate/optimizer/mod.rs | 8 ++++-- core/translate/planner.rs | 15 ++++++++-- testing/insert.test | 50 ++++++++++++++++++++++++++++++++- testing/select.test | 17 +++++++++++ testing/tester.tcl | 24 ++++++++++++++-- testing/values.test | 24 +++++++++++++++- 7 files changed, 149 insertions(+), 14 deletions(-) diff --git a/core/translate/expr.rs b/core/translate/expr.rs index 706496edb..3cc99a654 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -1885,10 +1885,14 @@ pub fn translate_expr( } } ast::Expr::FunctionCallStar { .. } => todo!(), - ast::Expr::Id(id) => crate::bail_parse_error!( - "no such column: {} - should this be a string literal in single-quotes?", - id.0 - ), + ast::Expr::Id(id) => { + // Treat double-quoted identifiers as string literals (SQLite compatibility) + program.emit_insn(Insn::String8 { + value: sanitize_double_quoted_string(&id.0), + dest: target_register, + }); + Ok(target_register) + } ast::Expr::Column { database: _, table: table_ref_id, @@ -2684,12 +2688,23 @@ pub fn maybe_apply_affinity(col_type: Type, target_register: usize, program: &mu } } -/// Sanitaizes a string literal by removing single quote at front and back +/// Sanitizes a string literal by removing single quote at front and back /// and escaping double single quotes pub fn sanitize_string(input: &str) -> String { input[1..input.len() - 1].replace("''", "'").to_string() } +/// Sanitizes a double-quoted string literal by removing double quotes at front and back +/// and unescaping double quotes +pub fn sanitize_double_quoted_string(input: &str) -> String { + input[1..input.len() - 1].replace("\"\"", "\"").to_string() +} + +/// Checks if an identifier represents a double-quoted string that should get fallback behavior +pub fn is_double_quoted_identifier(id_str: &str) -> bool { + id_str.len() >= 2 && id_str.starts_with('"') && id_str.ends_with('"') +} + /// Returns the components of a binary expression /// e.g. t.x = 5 -> Some((t.x, =, 5)) pub fn as_binary_components( diff --git a/core/translate/optimizer/mod.rs b/core/translate/optimizer/mod.rs index 0032469a5..3dbad8762 100644 --- a/core/translate/optimizer/mod.rs +++ b/core/translate/optimizer/mod.rs @@ -12,7 +12,7 @@ use turso_sqlite3_parser::ast::{self, fmt::ToTokens as _, Expr, SortOrder}; use crate::{ parameters::PARAM_PREFIX, schema::{Index, IndexColumn, Schema, Table}, - translate::{expr::walk_expr_mut, plan::TerminationKey}, + translate::{expr::is_double_quoted_identifier, expr::walk_expr_mut, plan::TerminationKey}, types::SeekOp, Result, }; @@ -604,7 +604,11 @@ impl Optimizable for ast::Expr { .is_none_or(|args| args.iter().all(|arg| arg.is_constant(resolver))) } Expr::FunctionCallStar { .. } => false, - Expr::Id(_) => panic!("Id should have been rewritten as Column"), + Expr::Id(id) => { + // If we got here with an id, this has to be double-quotes identifier + assert!(is_double_quoted_identifier(&id.0)); + true + } Expr::Column { .. } => false, Expr::RowId { .. } => false, Expr::InList { lhs, rhs, .. } => { diff --git a/core/translate/planner.rs b/core/translate/planner.rs index 8e2f8a501..747bed6c6 100644 --- a/core/translate/planner.rs +++ b/core/translate/planner.rs @@ -21,8 +21,8 @@ use crate::{ }; use turso_sqlite3_parser::ast::Literal::Null; use turso_sqlite3_parser::ast::{ - self, As, Expr, FromClause, JoinType, Limit, Materialized, QualifiedName, TableInternalId, - UnaryOperator, With, + self, As, Expr, FromClause, JoinType, Limit, Literal, Materialized, QualifiedName, + TableInternalId, UnaryOperator, With, }; pub const ROWID: &str = "rowid"; @@ -200,7 +200,16 @@ pub fn bind_column_references( } } } - crate::bail_parse_error!("Column {} not found", id.0); + // SQLite behavior: Only double-quoted identifiers get fallback to string literals + // Single quotes are handled as literals earlier, unquoted identifiers must resolve to columns + if crate::translate::expr::is_double_quoted_identifier(&id.0) { + // Convert failed double-quoted identifier to string literal + *expr = Expr::Literal(Literal::String(id.0.clone())); + Ok(()) + } else { + // Unquoted identifiers must resolve to columns - no fallback + crate::bail_parse_error!("Column {} not found", id.0) + } } Expr::Qualified(tbl, id) => { let normalized_table_name = normalize_ident(tbl.0.as_str()); diff --git a/testing/insert.test b/testing/insert.test index 5ddd09c9e..25224a026 100755 --- a/testing/insert.test +++ b/testing/insert.test @@ -409,4 +409,52 @@ if {[info exists ::env(SQLITE_EXEC)] && ($::env(SQLITE_EXEC) eq "scripts/limbo-s INSERT INTO t VALUES (replace(hex(zeroblob(1000)), '00', 'a') || 'h', 8); SELECT COUNT(*) FROM t WHERE x >= replace(hex(zeroblob(100)), '00', 'a'); } {8} -} \ No newline at end of file +} + +do_execsql_test_skip_lines_on_specific_db 1 {:memory:} double-quote-string-literals { + .dbconfig dqs_dml on + CREATE TABLE test (id INTEGER, name TEXT); + INSERT INTO test (id, name) VALUES (1, "Dave"); + INSERT INTO test (id,name) VALUES (2, 'Alice'); + SELECT * FROM test ORDER BY id; +} {1|Dave +2|Alice} + +do_execsql_test_skip_lines_on_specific_db 1 {:memory:} mixed-quote-types { + .dbconfig dqs_dml on + CREATE TABLE mixed (a TEXT, b TEXT, c TEXT); + INSERT INTO mixed (a,b,c) VALUES ("double", 'single', "another"); + SELECT * FROM mixed; +} {double|single|another} + +do_execsql_test_skip_lines_on_specific_db 1 {:memory:} double-quote-regression-original-case { + .dbconfig dqs_dml on + CREATE TABLE users ( + id INTEGER, + name TEXT, + email TEXT + ); + INSERT INTO users (name) values ("Dave"); + SELECT name FROM users; +} {Dave} + +do_execsql_test_skip_lines_on_specific_db 1 {:memory:} double-quote-multiple-rows { + .dbconfig dqs_dml on + CREATE TABLE items (id INTEGER, description TEXT, category TEXT); + INSERT INTO items (id, description, category) VALUES + (1, "First_item", "category_a"), + (2, 'Second_item', "category_b"), + (3, "Third_item", 'category_c'); + SELECT id, description, category FROM items ORDER BY id; +} {1|First_item|category_a +2|Second_item|category_b +3|Third_item|category_c} + +do_execsql_test_skip_lines_on_specific_db 1 {:memory:} double-quote-inner-quotes-preservation { + .dbconfig dqs_dml on + CREATE TABLE inner_quotes_test (id INTEGER, content TEXT); + INSERT INTO inner_quotes_test VALUES (1, '"foo"'); + INSERT INTO inner_quotes_test VALUES (2, "'bar'"); + SELECT id, content FROM inner_quotes_test ORDER BY id; +} {1|"foo" +2|'bar'} diff --git a/testing/select.test b/testing/select.test index 5cfe2c15d..b64c968c6 100755 --- a/testing/select.test +++ b/testing/select.test @@ -617,3 +617,20 @@ do_execsql_test_on_specific_db {:memory:} select-no-match-in-leaf-page { 2 2} +# Regression tests for double-quoted strings in SELECT statements +do_execsql_test_skip_lines_on_specific_db 1 {:memory:} select-double-quotes-values { + .dbconfig dqs_dml on + SELECT * FROM (VALUES ("select", "test"), ("double", "quotes")); +} {select|test +double|quotes} + +do_execsql_test_skip_lines_on_specific_db 1 {:memory:} select-double-quotes-no-column { + .dbconfig dqs_dml on + SELECT "first" +} {first} + +do_execsql_test_skip_lines_on_specific_db 1 {:memory:} select-double-quotes-literal { + .dbconfig dqs_dml on + SELECT "literal_string" AS col; +} {literal_string} + diff --git a/testing/tester.tcl b/testing/tester.tcl index 490a0f394..98d06f26f 100644 --- a/testing/tester.tcl +++ b/testing/tester.tcl @@ -11,8 +11,8 @@ proc test_put {msg db test_name} { } proc evaluate_sql {sqlite_exec db_name sql} { - set command [list $sqlite_exec $db_name $sql] - set output [exec {*}$command] + set command [list $sqlite_exec $db_name] + set output [exec echo $sql | {*}$command] return $output } @@ -69,6 +69,26 @@ proc do_execsql_test_on_specific_db {db_name test_name sql_statements expected_o run_test $::sqlite_exec $db_name $combined_sql $combined_expected_output } +proc run_test_skip_lines {sqlite_exec skip_lines db_name sql expected_output} { + set actual_output [evaluate_sql $sqlite_exec $db_name $sql] + set lines [split $actual_output "\n"] + set actual_without_skipped [join [lrange $lines $skip_lines end] "\n"] + if {$actual_without_skipped ne $expected_output} { + error_put $sql + puts "returned '$actual_without_skipped'" + puts "expected '$expected_output'" + exit 1 + } +} + +proc do_execsql_test_skip_lines_on_specific_db {skip_lines db_name test_name sql_statements expected_outputs} { + test_put "Running test" $db_name $test_name + set combined_sql [string trim $sql_statements] + set combined_expected_output [join $expected_outputs "\n"] + run_test_skip_lines $::sqlite_exec $skip_lines $db_name $combined_sql $combined_expected_output +} + + proc within_tolerance {actual expected tolerance} { expr {abs($actual - $expected) <= $tolerance} } diff --git a/testing/values.test b/testing/values.test index 664fd4a34..8ebc33978 100755 --- a/testing/values.test +++ b/testing/values.test @@ -25,4 +25,26 @@ do_execsql_test values-in-from { do_execsql_test values-in-join { select * from (values(1, 2)) join (values(3, 4), (5, 6)); } {1|2|3|4 - 1|2|5|6}; \ No newline at end of file + 1|2|5|6}; + +do_execsql_test_skip_lines_on_specific_db 1 {:memory:} values-double-quotes-simple { + .dbconfig dqs_dml on + VALUES ("double_quoted_string"); +} {double_quoted_string} + +do_execsql_test_skip_lines_on_specific_db 1 {:memory:} values-double-quotes-multiple { + .dbconfig dqs_dml on + VALUES ("first", "second"), ("third", "fourth"); +} {first|second +third|fourth} + +do_execsql_test_skip_lines_on_specific_db 1 {:memory:} values-mixed-quotes { + .dbconfig dqs_dml on + VALUES ("double", 'single'), ('single', "double"); +} {double|single +single|double} + +do_execsql_test_skip_lines_on_specific_db 1 {:memory:} values-double-quotes-subquery { + .dbconfig dqs_dml on + SELECT * FROM (VALUES ("subquery_string")); +} {subquery_string}