Merge 'translate: make bind_and_rewrite_expr() reject unbound identifiers if no referenced tables exist' from Jussi Saurio

Before, we just skipped evaluating `Id`, `Qualified` and
`DoublyQualified` if `referenced_tables` was `None`, leading to shit
like #3621. Let's eagerly return `"No such column"` parse errors in
these cases instead, and punch exceptions for cases where that doesn't
cleanly work
Top tip: use `Hide whitespace` toggle when inspecting the diff of this
PR
Closes #3621

Reviewed-by: Preston Thorpe <preston@turso.tech>

Closes #3626
This commit is contained in:
Jussi Saurio
2025-10-09 12:45:16 +03:00
committed by GitHub
3 changed files with 239 additions and 212 deletions

View File

@@ -3319,11 +3319,14 @@ impl ParamState {
/// TryCanonicalColumnsFirst means that canonical columns take precedence over result columns. This is used for e.g. WHERE clauses.
///
/// ResultColumnsNotAllowed means that referring to result columns is not allowed. This is used e.g. for DML statements.
///
/// AllowUnboundIdentifiers means that unbound identifiers are allowed. This is used for INSERT ... ON CONFLICT DO UPDATE SET ... where binding is handled later than this phase.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum BindingBehavior {
TryResultColumnsFirst,
TryCanonicalColumnsFirst,
ResultColumnsNotAllowed,
AllowUnboundIdentifiers,
}
/// Rewrite ast::Expr in place, binding Column references/rewriting Expr::Id -> Expr::Column
@@ -3373,242 +3376,262 @@ pub fn bind_and_rewrite_expr<'a>(
}
_ => {}
}
if let Some(referenced_tables) = &mut referenced_tables {
match expr {
Expr::Id(id) => {
let normalized_id = normalize_ident(id.as_str());
match expr {
Expr::Id(id) => {
let Some(referenced_tables) = &mut referenced_tables else {
if binding_behavior == BindingBehavior::AllowUnboundIdentifiers {
return Ok(WalkControl::Continue);
}
crate::bail_parse_error!("no such column: {}", id.as_str());
};
let normalized_id = normalize_ident(id.as_str());
if binding_behavior == BindingBehavior::TryResultColumnsFirst {
if let Some(result_columns) = result_columns {
for result_column in result_columns.iter() {
if result_column.name(referenced_tables).is_some_and(|name| {
name.eq_ignore_ascii_case(&normalized_id)
}) {
*expr = result_column.expr.clone();
return Ok(WalkControl::Continue);
}
if binding_behavior == BindingBehavior::TryResultColumnsFirst {
if let Some(result_columns) = result_columns {
for result_column in result_columns.iter() {
if result_column
.name(referenced_tables)
.is_some_and(|name| name.eq_ignore_ascii_case(&normalized_id))
{
*expr = result_column.expr.clone();
return Ok(WalkControl::Continue);
}
}
}
let mut match_result = None;
}
let mut match_result = None;
// First check joined tables
for joined_table in referenced_tables.joined_tables().iter() {
let col_idx = joined_table.table.columns().iter().position(|c| {
// First check joined tables
for joined_table in referenced_tables.joined_tables().iter() {
let col_idx = joined_table.table.columns().iter().position(|c| {
c.name
.as_ref()
.is_some_and(|name| name.eq_ignore_ascii_case(&normalized_id))
});
if col_idx.is_some() {
if match_result.is_some() {
let mut ok = false;
// Column name ambiguity is ok if it is in the USING clause because then it is deduplicated
// and the left table is used.
if let Some(join_info) = &joined_table.join_info {
if join_info.using.iter().any(|using_col| {
using_col.as_str().eq_ignore_ascii_case(&normalized_id)
}) {
ok = true;
}
}
if !ok {
crate::bail_parse_error!("Column {} is ambiguous", id.as_str());
}
} else {
let col =
joined_table.table.columns().get(col_idx.unwrap()).unwrap();
match_result = Some((
joined_table.internal_id,
col_idx.unwrap(),
col.is_rowid_alias,
));
}
// only if we haven't found a match, check for explicit rowid reference
} else if let Some(row_id_expr) = parse_row_id(
&normalized_id,
referenced_tables.joined_tables()[0].internal_id,
|| referenced_tables.joined_tables().len() != 1,
)? {
*expr = row_id_expr;
return Ok(WalkControl::Continue);
}
}
// Then check outer query references, if we still didn't find something.
// Normally finding multiple matches for a non-qualified column is an error (column x is ambiguous)
// but in the case of subqueries, the inner query takes precedence.
// For example:
// SELECT * FROM t WHERE x = (SELECT x FROM t2)
// In this case, there is no ambiguity:
// - x in the outer query refers to t.x,
// - x in the inner query refers to t2.x.
if match_result.is_none() {
for outer_ref in referenced_tables.outer_query_refs().iter() {
let col_idx = outer_ref.table.columns().iter().position(|c| {
c.name
.as_ref()
.is_some_and(|name| name.eq_ignore_ascii_case(&normalized_id))
});
if col_idx.is_some() {
if match_result.is_some() {
let mut ok = false;
// Column name ambiguity is ok if it is in the USING clause because then it is deduplicated
// and the left table is used.
if let Some(join_info) = &joined_table.join_info {
if join_info.using.iter().any(|using_col| {
using_col.as_str().eq_ignore_ascii_case(&normalized_id)
}) {
ok = true;
}
}
if !ok {
crate::bail_parse_error!(
"Column {} is ambiguous",
id.as_str()
);
}
} else {
let col =
joined_table.table.columns().get(col_idx.unwrap()).unwrap();
match_result = Some((
joined_table.internal_id,
col_idx.unwrap(),
col.is_rowid_alias,
));
crate::bail_parse_error!("Column {} is ambiguous", id.as_str());
}
// only if we haven't found a match, check for explicit rowid reference
} else if let Some(row_id_expr) = parse_row_id(
&normalized_id,
referenced_tables.joined_tables()[0].internal_id,
|| referenced_tables.joined_tables().len() != 1,
)? {
*expr = row_id_expr;
return Ok(WalkControl::Continue);
let col = outer_ref.table.columns().get(col_idx.unwrap()).unwrap();
match_result = Some((
outer_ref.internal_id,
col_idx.unwrap(),
col.is_rowid_alias,
));
}
}
// Then check outer query references, if we still didn't find something.
// Normally finding multiple matches for a non-qualified column is an error (column x is ambiguous)
// but in the case of subqueries, the inner query takes precedence.
// For example:
// SELECT * FROM t WHERE x = (SELECT x FROM t2)
// In this case, there is no ambiguity:
// - x in the outer query refers to t.x,
// - x in the inner query refers to t2.x.
if match_result.is_none() {
for outer_ref in referenced_tables.outer_query_refs().iter() {
let col_idx = outer_ref.table.columns().iter().position(|c| {
c.name.as_ref().is_some_and(|name| {
name.eq_ignore_ascii_case(&normalized_id)
})
});
if col_idx.is_some() {
if match_result.is_some() {
crate::bail_parse_error!(
"Column {} is ambiguous",
id.as_str()
);
}
let col =
outer_ref.table.columns().get(col_idx.unwrap()).unwrap();
match_result = Some((
outer_ref.internal_id,
col_idx.unwrap(),
col.is_rowid_alias,
));
}
}
}
if let Some((table_id, col_idx, is_rowid_alias)) = match_result {
*expr = Expr::Column {
database: None, // TODO: support different databases
table: table_id,
column: col_idx,
is_rowid_alias,
};
referenced_tables.mark_column_used(table_id, col_idx);
return Ok(WalkControl::Continue);
}
if binding_behavior == BindingBehavior::TryCanonicalColumnsFirst {
if let Some(result_columns) = result_columns {
for result_column in result_columns.iter() {
if result_column.name(referenced_tables).is_some_and(|name| {
name.eq_ignore_ascii_case(&normalized_id)
}) {
*expr = result_column.expr.clone();
return Ok(WalkControl::Continue);
}
}
}
}
// SQLite behavior: Only double-quoted identifiers get fallback to string literals
// Single quotes are handled as literals earlier, unquoted identifiers must resolve to columns
if id.quoted_with('"') {
// Convert failed double-quoted identifier to string literal
*expr = Expr::Literal(ast::Literal::String(id.as_literal()));
return Ok(WalkControl::Continue);
} else {
// Unquoted identifiers must resolve to columns - no fallback
crate::bail_parse_error!("no such column: {}", id.as_str())
}
}
Expr::Qualified(tbl, id) => {
tracing::debug!("bind_and_rewrite_expr({:?}, {:?})", tbl, id);
let normalized_table_name = normalize_ident(tbl.as_str());
let matching_tbl = referenced_tables
.find_table_and_internal_id_by_identifier(&normalized_table_name);
if matching_tbl.is_none() {
crate::bail_parse_error!("no such table: {}", normalized_table_name);
}
let (tbl_id, tbl) = matching_tbl.unwrap();
let normalized_id = normalize_ident(id.as_str());
let col_idx = tbl.columns().iter().position(|c| {
c.name
.as_ref()
.is_some_and(|name| name.eq_ignore_ascii_case(&normalized_id))
});
if let Some(row_id_expr) = parse_row_id(&normalized_id, tbl_id, || false)? {
*expr = row_id_expr;
return Ok(WalkControl::Continue);
}
let Some(col_idx) = col_idx else {
crate::bail_parse_error!("no such column: {}", normalized_id);
};
let col = tbl.columns().get(col_idx).unwrap();
if let Some((table_id, col_idx, is_rowid_alias)) = match_result {
*expr = Expr::Column {
database: None, // TODO: support different databases
table: tbl_id,
table: table_id,
column: col_idx,
is_rowid_alias: col.is_rowid_alias,
is_rowid_alias,
};
tracing::debug!("rewritten to column");
referenced_tables.mark_column_used(tbl_id, col_idx);
referenced_tables.mark_column_used(table_id, col_idx);
return Ok(WalkControl::Continue);
}
Expr::DoublyQualified(db_name, tbl_name, col_name) => {
let normalized_col_name = normalize_ident(col_name.as_str());
// Create a QualifiedName and use existing resolve_database_id method
let qualified_name = ast::QualifiedName {
db_name: Some(db_name.clone()),
name: tbl_name.clone(),
alias: None,
};
let database_id = connection.resolve_database_id(&qualified_name)?;
// Get the table from the specified database
let table = connection
.with_schema(database_id, |schema| schema.get_table(tbl_name.as_str()))
.ok_or_else(|| {
crate::LimboError::ParseError(format!(
"no such table: {}.{}",
db_name.as_str(),
tbl_name.as_str()
))
})?;
// Find the column in the table
let col_idx = table
.columns()
.iter()
.position(|c| {
c.name.as_ref().is_some_and(|name| {
name.eq_ignore_ascii_case(&normalized_col_name)
})
})
.ok_or_else(|| {
crate::LimboError::ParseError(format!(
"Column: {}.{}.{} not found",
db_name.as_str(),
tbl_name.as_str(),
col_name.as_str()
))
})?;
let col = table.columns().get(col_idx).unwrap();
// Check if this is a rowid alias
let is_rowid_alias = col.is_rowid_alias;
// Convert to Column expression - since this is a cross-database reference,
// we need to create a synthetic table reference for it
// For now, we'll error if the table isn't already in the referenced tables
let normalized_tbl_name = normalize_ident(tbl_name.as_str());
let matching_tbl = referenced_tables
.find_table_and_internal_id_by_identifier(&normalized_tbl_name);
if let Some((tbl_id, _)) = matching_tbl {
// Table is already in referenced tables, use existing internal ID
*expr = Expr::Column {
database: Some(database_id),
table: tbl_id,
column: col_idx,
is_rowid_alias,
};
referenced_tables.mark_column_used(tbl_id, col_idx);
} else {
return Err(crate::LimboError::ParseError(format!(
"table {normalized_tbl_name} is not in FROM clause - cross-database column references require the table to be explicitly joined"
)));
if binding_behavior == BindingBehavior::TryCanonicalColumnsFirst {
if let Some(result_columns) = result_columns {
for result_column in result_columns.iter() {
if result_column
.name(referenced_tables)
.is_some_and(|name| name.eq_ignore_ascii_case(&normalized_id))
{
*expr = result_column.expr.clone();
return Ok(WalkControl::Continue);
}
}
}
}
_ => {}
// SQLite behavior: Only double-quoted identifiers get fallback to string literals
// Single quotes are handled as literals earlier, unquoted identifiers must resolve to columns
if id.quoted_with('"') {
// Convert failed double-quoted identifier to string literal
*expr = Expr::Literal(ast::Literal::String(id.as_literal()));
return Ok(WalkControl::Continue);
} else {
// Unquoted identifiers must resolve to columns - no fallback
crate::bail_parse_error!("no such column: {}", id.as_str())
}
}
Expr::Qualified(tbl, id) => {
tracing::debug!("bind_and_rewrite_expr({:?}, {:?})", tbl, id);
let Some(referenced_tables) = &mut referenced_tables else {
if binding_behavior == BindingBehavior::AllowUnboundIdentifiers {
return Ok(WalkControl::Continue);
}
crate::bail_parse_error!(
"no such column: {}.{}",
tbl.as_str(),
id.as_str()
);
};
let normalized_table_name = normalize_ident(tbl.as_str());
let matching_tbl = referenced_tables
.find_table_and_internal_id_by_identifier(&normalized_table_name);
if matching_tbl.is_none() {
crate::bail_parse_error!("no such table: {}", normalized_table_name);
}
let (tbl_id, tbl) = matching_tbl.unwrap();
let normalized_id = normalize_ident(id.as_str());
let col_idx = tbl.columns().iter().position(|c| {
c.name
.as_ref()
.is_some_and(|name| name.eq_ignore_ascii_case(&normalized_id))
});
if let Some(row_id_expr) = parse_row_id(&normalized_id, tbl_id, || false)? {
*expr = row_id_expr;
return Ok(WalkControl::Continue);
}
let Some(col_idx) = col_idx else {
crate::bail_parse_error!("no such column: {}", normalized_id);
};
let col = tbl.columns().get(col_idx).unwrap();
*expr = Expr::Column {
database: None, // TODO: support different databases
table: tbl_id,
column: col_idx,
is_rowid_alias: col.is_rowid_alias,
};
tracing::debug!("rewritten to column");
referenced_tables.mark_column_used(tbl_id, col_idx);
return Ok(WalkControl::Continue);
}
Expr::DoublyQualified(db_name, tbl_name, col_name) => {
let Some(referenced_tables) = &mut referenced_tables else {
if binding_behavior == BindingBehavior::AllowUnboundIdentifiers {
return Ok(WalkControl::Continue);
}
crate::bail_parse_error!(
"no such column: {}.{}.{}",
db_name.as_str(),
tbl_name.as_str(),
col_name.as_str()
);
};
let normalized_col_name = normalize_ident(col_name.as_str());
// Create a QualifiedName and use existing resolve_database_id method
let qualified_name = ast::QualifiedName {
db_name: Some(db_name.clone()),
name: tbl_name.clone(),
alias: None,
};
let database_id = connection.resolve_database_id(&qualified_name)?;
// Get the table from the specified database
let table = connection
.with_schema(database_id, |schema| schema.get_table(tbl_name.as_str()))
.ok_or_else(|| {
crate::LimboError::ParseError(format!(
"no such table: {}.{}",
db_name.as_str(),
tbl_name.as_str()
))
})?;
// Find the column in the table
let col_idx = table
.columns()
.iter()
.position(|c| {
c.name
.as_ref()
.is_some_and(|name| name.eq_ignore_ascii_case(&normalized_col_name))
})
.ok_or_else(|| {
crate::LimboError::ParseError(format!(
"Column: {}.{}.{} not found",
db_name.as_str(),
tbl_name.as_str(),
col_name.as_str()
))
})?;
let col = table.columns().get(col_idx).unwrap();
// Check if this is a rowid alias
let is_rowid_alias = col.is_rowid_alias;
// Convert to Column expression - since this is a cross-database reference,
// we need to create a synthetic table reference for it
// For now, we'll error if the table isn't already in the referenced tables
let normalized_tbl_name = normalize_ident(tbl_name.as_str());
let matching_tbl = referenced_tables
.find_table_and_internal_id_by_identifier(&normalized_tbl_name);
if let Some((tbl_id, _)) = matching_tbl {
// Table is already in referenced tables, use existing internal ID
*expr = Expr::Column {
database: Some(database_id),
table: tbl_id,
column: col_idx,
is_rowid_alias,
};
referenced_tables.mark_column_used(tbl_id, col_idx);
} else {
return Err(crate::LimboError::ParseError(format!(
"table {normalized_tbl_name} is not in FROM clause - cross-database column references require the table to be explicitly joined"
)));
}
}
_ => {}
}
Ok(WalkControl::Continue)
},

View File

@@ -195,7 +195,7 @@ pub fn translate_insert(
None,
connection,
&mut program.param_ctx,
BindingBehavior::ResultColumnsNotAllowed,
BindingBehavior::AllowUnboundIdentifiers,
)?;
}
if let Some(ref mut where_expr) = where_clause {
@@ -205,7 +205,7 @@ pub fn translate_insert(
None,
connection,
&mut program.param_ctx,
BindingBehavior::ResultColumnsNotAllowed,
BindingBehavior::AllowUnboundIdentifiers,
)?;
}
}

View File

@@ -812,3 +812,7 @@ do_execsql_test_on_specific_db {:memory:} null-in-search {
2|2
2|2}
do_execsql_test_in_memory_any_error limit-column-reference-error {
CREATE TABLE t(a);
SELECT * FROM t LIMIT (t.a);
}