improve handling of double quotes

I ended up hitting #1974 today and wanted to fix it. I worked with
Claude to generate a more comprehensive set of queries that could fail
aside from just the insert query described in the issue. He got most of
them right - lots of cases were indeed failing. The ones that were
gibberish, he told me I was absolutely right for pointing out they were
bad.

But alas. With the test cases generated, we can work on fixing it. The
place where the assertion was hit, all we need to do there is return
true (but we assert that this is indeed a string literal, it shouldn't
be anything else at this point).

There are then just a couple of places where we need to make sure we
handle double quotes correctly. We already tested for single quotes in a
couple of places, but never for double quotes.

There is one funny corner case where you can just select "col" from tbl,
and if there is no column "col" on the table, that is treated as a
string literal. We handle that too.

Fixes #1974
This commit is contained in:
Glauber Costa
2025-07-17 21:19:11 -05:00
parent 523f8f9c67
commit cbdd5c5fc7
7 changed files with 149 additions and 14 deletions

View File

@@ -1885,10 +1885,14 @@ pub fn translate_expr(
}
}
ast::Expr::FunctionCallStar { .. } => todo!(),
ast::Expr::Id(id) => crate::bail_parse_error!(
"no such column: {} - should this be a string literal in single-quotes?",
id.0
),
ast::Expr::Id(id) => {
// Treat double-quoted identifiers as string literals (SQLite compatibility)
program.emit_insn(Insn::String8 {
value: sanitize_double_quoted_string(&id.0),
dest: target_register,
});
Ok(target_register)
}
ast::Expr::Column {
database: _,
table: table_ref_id,
@@ -2684,12 +2688,23 @@ pub fn maybe_apply_affinity(col_type: Type, target_register: usize, program: &mu
}
}
/// Sanitaizes a string literal by removing single quote at front and back
/// Sanitizes a string literal by removing single quote at front and back
/// and escaping double single quotes
pub fn sanitize_string(input: &str) -> String {
input[1..input.len() - 1].replace("''", "'").to_string()
}
/// Sanitizes a double-quoted string literal by removing double quotes at front and back
/// and unescaping double quotes
pub fn sanitize_double_quoted_string(input: &str) -> String {
input[1..input.len() - 1].replace("\"\"", "\"").to_string()
}
/// Checks if an identifier represents a double-quoted string that should get fallback behavior
pub fn is_double_quoted_identifier(id_str: &str) -> bool {
id_str.len() >= 2 && id_str.starts_with('"') && id_str.ends_with('"')
}
/// Returns the components of a binary expression
/// e.g. t.x = 5 -> Some((t.x, =, 5))
pub fn as_binary_components(

View File

@@ -12,7 +12,7 @@ use turso_sqlite3_parser::ast::{self, fmt::ToTokens as _, Expr, SortOrder};
use crate::{
parameters::PARAM_PREFIX,
schema::{Index, IndexColumn, Schema, Table},
translate::{expr::walk_expr_mut, plan::TerminationKey},
translate::{expr::is_double_quoted_identifier, expr::walk_expr_mut, plan::TerminationKey},
types::SeekOp,
Result,
};
@@ -604,7 +604,11 @@ impl Optimizable for ast::Expr {
.is_none_or(|args| args.iter().all(|arg| arg.is_constant(resolver)))
}
Expr::FunctionCallStar { .. } => false,
Expr::Id(_) => panic!("Id should have been rewritten as Column"),
Expr::Id(id) => {
// If we got here with an id, this has to be double-quotes identifier
assert!(is_double_quoted_identifier(&id.0));
true
}
Expr::Column { .. } => false,
Expr::RowId { .. } => false,
Expr::InList { lhs, rhs, .. } => {

View File

@@ -21,8 +21,8 @@ use crate::{
};
use turso_sqlite3_parser::ast::Literal::Null;
use turso_sqlite3_parser::ast::{
self, As, Expr, FromClause, JoinType, Limit, Materialized, QualifiedName, TableInternalId,
UnaryOperator, With,
self, As, Expr, FromClause, JoinType, Limit, Literal, Materialized, QualifiedName,
TableInternalId, UnaryOperator, With,
};
pub const ROWID: &str = "rowid";
@@ -200,7 +200,16 @@ pub fn bind_column_references(
}
}
}
crate::bail_parse_error!("Column {} not found", id.0);
// SQLite behavior: Only double-quoted identifiers get fallback to string literals
// Single quotes are handled as literals earlier, unquoted identifiers must resolve to columns
if crate::translate::expr::is_double_quoted_identifier(&id.0) {
// Convert failed double-quoted identifier to string literal
*expr = Expr::Literal(Literal::String(id.0.clone()));
Ok(())
} else {
// Unquoted identifiers must resolve to columns - no fallback
crate::bail_parse_error!("Column {} not found", id.0)
}
}
Expr::Qualified(tbl, id) => {
let normalized_table_name = normalize_ident(tbl.0.as_str());

View File

@@ -409,4 +409,52 @@ if {[info exists ::env(SQLITE_EXEC)] && ($::env(SQLITE_EXEC) eq "scripts/limbo-s
INSERT INTO t VALUES (replace(hex(zeroblob(1000)), '00', 'a') || 'h', 8);
SELECT COUNT(*) FROM t WHERE x >= replace(hex(zeroblob(100)), '00', 'a');
} {8}
}
}
do_execsql_test_skip_lines_on_specific_db 1 {:memory:} double-quote-string-literals {
.dbconfig dqs_dml on
CREATE TABLE test (id INTEGER, name TEXT);
INSERT INTO test (id, name) VALUES (1, "Dave");
INSERT INTO test (id,name) VALUES (2, 'Alice');
SELECT * FROM test ORDER BY id;
} {1|Dave
2|Alice}
do_execsql_test_skip_lines_on_specific_db 1 {:memory:} mixed-quote-types {
.dbconfig dqs_dml on
CREATE TABLE mixed (a TEXT, b TEXT, c TEXT);
INSERT INTO mixed (a,b,c) VALUES ("double", 'single', "another");
SELECT * FROM mixed;
} {double|single|another}
do_execsql_test_skip_lines_on_specific_db 1 {:memory:} double-quote-regression-original-case {
.dbconfig dqs_dml on
CREATE TABLE users (
id INTEGER,
name TEXT,
email TEXT
);
INSERT INTO users (name) values ("Dave");
SELECT name FROM users;
} {Dave}
do_execsql_test_skip_lines_on_specific_db 1 {:memory:} double-quote-multiple-rows {
.dbconfig dqs_dml on
CREATE TABLE items (id INTEGER, description TEXT, category TEXT);
INSERT INTO items (id, description, category) VALUES
(1, "First_item", "category_a"),
(2, 'Second_item', "category_b"),
(3, "Third_item", 'category_c');
SELECT id, description, category FROM items ORDER BY id;
} {1|First_item|category_a
2|Second_item|category_b
3|Third_item|category_c}
do_execsql_test_skip_lines_on_specific_db 1 {:memory:} double-quote-inner-quotes-preservation {
.dbconfig dqs_dml on
CREATE TABLE inner_quotes_test (id INTEGER, content TEXT);
INSERT INTO inner_quotes_test VALUES (1, '"foo"');
INSERT INTO inner_quotes_test VALUES (2, "'bar'");
SELECT id, content FROM inner_quotes_test ORDER BY id;
} {1|"foo"
2|'bar'}

View File

@@ -617,3 +617,20 @@ do_execsql_test_on_specific_db {:memory:} select-no-match-in-leaf-page {
2
2}
# Regression tests for double-quoted strings in SELECT statements
do_execsql_test_skip_lines_on_specific_db 1 {:memory:} select-double-quotes-values {
.dbconfig dqs_dml on
SELECT * FROM (VALUES ("select", "test"), ("double", "quotes"));
} {select|test
double|quotes}
do_execsql_test_skip_lines_on_specific_db 1 {:memory:} select-double-quotes-no-column {
.dbconfig dqs_dml on
SELECT "first"
} {first}
do_execsql_test_skip_lines_on_specific_db 1 {:memory:} select-double-quotes-literal {
.dbconfig dqs_dml on
SELECT "literal_string" AS col;
} {literal_string}

View File

@@ -11,8 +11,8 @@ proc test_put {msg db test_name} {
}
proc evaluate_sql {sqlite_exec db_name sql} {
set command [list $sqlite_exec $db_name $sql]
set output [exec {*}$command]
set command [list $sqlite_exec $db_name]
set output [exec echo $sql | {*}$command]
return $output
}
@@ -69,6 +69,26 @@ proc do_execsql_test_on_specific_db {db_name test_name sql_statements expected_o
run_test $::sqlite_exec $db_name $combined_sql $combined_expected_output
}
proc run_test_skip_lines {sqlite_exec skip_lines db_name sql expected_output} {
set actual_output [evaluate_sql $sqlite_exec $db_name $sql]
set lines [split $actual_output "\n"]
set actual_without_skipped [join [lrange $lines $skip_lines end] "\n"]
if {$actual_without_skipped ne $expected_output} {
error_put $sql
puts "returned '$actual_without_skipped'"
puts "expected '$expected_output'"
exit 1
}
}
proc do_execsql_test_skip_lines_on_specific_db {skip_lines db_name test_name sql_statements expected_outputs} {
test_put "Running test" $db_name $test_name
set combined_sql [string trim $sql_statements]
set combined_expected_output [join $expected_outputs "\n"]
run_test_skip_lines $::sqlite_exec $skip_lines $db_name $combined_sql $combined_expected_output
}
proc within_tolerance {actual expected tolerance} {
expr {abs($actual - $expected) <= $tolerance}
}

View File

@@ -25,4 +25,26 @@ do_execsql_test values-in-from {
do_execsql_test values-in-join {
select * from (values(1, 2)) join (values(3, 4), (5, 6));
} {1|2|3|4
1|2|5|6};
1|2|5|6};
do_execsql_test_skip_lines_on_specific_db 1 {:memory:} values-double-quotes-simple {
.dbconfig dqs_dml on
VALUES ("double_quoted_string");
} {double_quoted_string}
do_execsql_test_skip_lines_on_specific_db 1 {:memory:} values-double-quotes-multiple {
.dbconfig dqs_dml on
VALUES ("first", "second"), ("third", "fourth");
} {first|second
third|fourth}
do_execsql_test_skip_lines_on_specific_db 1 {:memory:} values-mixed-quotes {
.dbconfig dqs_dml on
VALUES ("double", 'single'), ('single', "double");
} {double|single
single|double}
do_execsql_test_skip_lines_on_specific_db 1 {:memory:} values-double-quotes-subquery {
.dbconfig dqs_dml on
SELECT * FROM (VALUES ("subquery_string"));
} {subquery_string}