Merge 'fix: make keyword_token safe by validating UTF-8 input' from ankit

This PR fixes an unsound usage of unsafe {
str::from_utf8_unchecked(word) } in the public function keyword_token in
mod.rs.
The function now uses std::str::from_utf8(word).ok()? to safely handle
invalid UTF-8, eliminating the unsoundness.
No logic or API changes.
Code compiles and tests pass (where possible).
Closes: https://github.com/tursodatabase/libsql/issues/1859

Reviewed-by: Pere Diaz Bou <pere-altea@homail.com>

Closes #1677
This commit is contained in:
Pere Diaz Bou
2025-06-09 16:07:37 +02:00

View File

@@ -46,9 +46,8 @@ pub(crate) const MAX_KEYWORD_LEN: usize = 17;
/// Check if `word` is a keyword
pub fn keyword_token(word: &[u8]) -> Option<TokenType> {
KEYWORDS
.get(UncasedStr::new(unsafe { str::from_utf8_unchecked(word) }))
.copied()
let s = std::str::from_utf8(word).ok()?;
KEYWORDS.get(UncasedStr::new(s)).cloned()
}
pub(crate) fn is_identifier(name: &str) -> bool {