This commit is contained in:
alpaylan
2025-07-11 01:33:50 -04:00
163 changed files with 19438 additions and 2320 deletions

View File

@@ -2,7 +2,6 @@
use std::fmt::Formatter;
use std::str;
use uncased::UncasedStr;
mod token;
pub use token::TokenType;
@@ -42,13 +41,6 @@ pub(crate) fn from_bytes(bytes: &[u8]) -> String {
}
include!(concat!(env!("OUT_DIR"), "/keywords.rs"));
pub(crate) const MAX_KEYWORD_LEN: usize = 17;
/// Check if `word` is a keyword
pub fn keyword_token(word: &[u8]) -> Option<TokenType> {
let s = std::str::from_utf8(word).ok()?;
KEYWORDS.get(UncasedStr::new(s)).cloned()
}
pub(crate) fn is_identifier(name: &str) -> bool {
if name.is_empty() {
@@ -242,3 +234,176 @@ impl TokenType {
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::collections::HashMap;
#[test]
fn test_keyword_token() {
let values = HashMap::from([
("ABORT", TokenType::TK_ABORT),
("ACTION", TokenType::TK_ACTION),
("ADD", TokenType::TK_ADD),
("AFTER", TokenType::TK_AFTER),
("ALL", TokenType::TK_ALL),
("ALTER", TokenType::TK_ALTER),
("ALWAYS", TokenType::TK_ALWAYS),
("ANALYZE", TokenType::TK_ANALYZE),
("AND", TokenType::TK_AND),
("AS", TokenType::TK_AS),
("ASC", TokenType::TK_ASC),
("ATTACH", TokenType::TK_ATTACH),
("AUTOINCREMENT", TokenType::TK_AUTOINCR),
("BEFORE", TokenType::TK_BEFORE),
("BEGIN", TokenType::TK_BEGIN),
("BETWEEN", TokenType::TK_BETWEEN),
("BY", TokenType::TK_BY),
("CASCADE", TokenType::TK_CASCADE),
("CASE", TokenType::TK_CASE),
("CAST", TokenType::TK_CAST),
("CHECK", TokenType::TK_CHECK),
("COLLATE", TokenType::TK_COLLATE),
("COLUMN", TokenType::TK_COLUMNKW),
("COMMIT", TokenType::TK_COMMIT),
("CONFLICT", TokenType::TK_CONFLICT),
("CONSTRAINT", TokenType::TK_CONSTRAINT),
("CREATE", TokenType::TK_CREATE),
("CROSS", TokenType::TK_JOIN_KW),
("CURRENT", TokenType::TK_CURRENT),
("CURRENT_DATE", TokenType::TK_CTIME_KW),
("CURRENT_TIME", TokenType::TK_CTIME_KW),
("CURRENT_TIMESTAMP", TokenType::TK_CTIME_KW),
("DATABASE", TokenType::TK_DATABASE),
("DEFAULT", TokenType::TK_DEFAULT),
("DEFERRABLE", TokenType::TK_DEFERRABLE),
("DEFERRED", TokenType::TK_DEFERRED),
("DELETE", TokenType::TK_DELETE),
("DESC", TokenType::TK_DESC),
("DETACH", TokenType::TK_DETACH),
("DISTINCT", TokenType::TK_DISTINCT),
("DO", TokenType::TK_DO),
("DROP", TokenType::TK_DROP),
("EACH", TokenType::TK_EACH),
("ELSE", TokenType::TK_ELSE),
("END", TokenType::TK_END),
("ESCAPE", TokenType::TK_ESCAPE),
("EXCEPT", TokenType::TK_EXCEPT),
("EXCLUDE", TokenType::TK_EXCLUDE),
("EXCLUSIVE", TokenType::TK_EXCLUSIVE),
("EXISTS", TokenType::TK_EXISTS),
("EXPLAIN", TokenType::TK_EXPLAIN),
("FAIL", TokenType::TK_FAIL),
("FILTER", TokenType::TK_FILTER),
("FIRST", TokenType::TK_FIRST),
("FOLLOWING", TokenType::TK_FOLLOWING),
("FOR", TokenType::TK_FOR),
("FOREIGN", TokenType::TK_FOREIGN),
("FROM", TokenType::TK_FROM),
("FULL", TokenType::TK_JOIN_KW),
("GENERATED", TokenType::TK_GENERATED),
("GLOB", TokenType::TK_LIKE_KW),
("GROUP", TokenType::TK_GROUP),
("GROUPS", TokenType::TK_GROUPS),
("HAVING", TokenType::TK_HAVING),
("IF", TokenType::TK_IF),
("IGNORE", TokenType::TK_IGNORE),
("IMMEDIATE", TokenType::TK_IMMEDIATE),
("IN", TokenType::TK_IN),
("INDEX", TokenType::TK_INDEX),
("INDEXED", TokenType::TK_INDEXED),
("INITIALLY", TokenType::TK_INITIALLY),
("INNER", TokenType::TK_JOIN_KW),
("INSERT", TokenType::TK_INSERT),
("INSTEAD", TokenType::TK_INSTEAD),
("INTERSECT", TokenType::TK_INTERSECT),
("INTO", TokenType::TK_INTO),
("IS", TokenType::TK_IS),
("ISNULL", TokenType::TK_ISNULL),
("JOIN", TokenType::TK_JOIN),
("KEY", TokenType::TK_KEY),
("LAST", TokenType::TK_LAST),
("LEFT", TokenType::TK_JOIN_KW),
("LIKE", TokenType::TK_LIKE_KW),
("LIMIT", TokenType::TK_LIMIT),
("MATCH", TokenType::TK_MATCH),
("MATERIALIZED", TokenType::TK_MATERIALIZED),
("NATURAL", TokenType::TK_JOIN_KW),
("NO", TokenType::TK_NO),
("NOT", TokenType::TK_NOT),
("NOTHING", TokenType::TK_NOTHING),
("NOTNULL", TokenType::TK_NOTNULL),
("NULL", TokenType::TK_NULL),
("NULLS", TokenType::TK_NULLS),
("OF", TokenType::TK_OF),
("OFFSET", TokenType::TK_OFFSET),
("ON", TokenType::TK_ON),
("OR", TokenType::TK_OR),
("ORDER", TokenType::TK_ORDER),
("OTHERS", TokenType::TK_OTHERS),
("OUTER", TokenType::TK_JOIN_KW),
("OVER", TokenType::TK_OVER),
("PARTITION", TokenType::TK_PARTITION),
("PLAN", TokenType::TK_PLAN),
("PRAGMA", TokenType::TK_PRAGMA),
("PRECEDING", TokenType::TK_PRECEDING),
("PRIMARY", TokenType::TK_PRIMARY),
("QUERY", TokenType::TK_QUERY),
("RAISE", TokenType::TK_RAISE),
("RANGE", TokenType::TK_RANGE),
("RECURSIVE", TokenType::TK_RECURSIVE),
("REFERENCES", TokenType::TK_REFERENCES),
("REGEXP", TokenType::TK_LIKE_KW),
("REINDEX", TokenType::TK_REINDEX),
("RELEASE", TokenType::TK_RELEASE),
("RENAME", TokenType::TK_RENAME),
("REPLACE", TokenType::TK_REPLACE),
("RETURNING", TokenType::TK_RETURNING),
("RESTRICT", TokenType::TK_RESTRICT),
("RIGHT", TokenType::TK_JOIN_KW),
("ROLLBACK", TokenType::TK_ROLLBACK),
("ROW", TokenType::TK_ROW),
("ROWS", TokenType::TK_ROWS),
("SAVEPOINT", TokenType::TK_SAVEPOINT),
("SELECT", TokenType::TK_SELECT),
("SET", TokenType::TK_SET),
("TABLE", TokenType::TK_TABLE),
("TEMP", TokenType::TK_TEMP),
("TEMPORARY", TokenType::TK_TEMP),
("THEN", TokenType::TK_THEN),
("TIES", TokenType::TK_TIES),
("TO", TokenType::TK_TO),
("TRANSACTION", TokenType::TK_TRANSACTION),
("TRIGGER", TokenType::TK_TRIGGER),
("UNBOUNDED", TokenType::TK_UNBOUNDED),
("UNION", TokenType::TK_UNION),
("UNIQUE", TokenType::TK_UNIQUE),
("UPDATE", TokenType::TK_UPDATE),
("USING", TokenType::TK_USING),
("VACUUM", TokenType::TK_VACUUM),
("VALUES", TokenType::TK_VALUES),
("VIEW", TokenType::TK_VIEW),
("VIRTUAL", TokenType::TK_VIRTUAL),
("WHEN", TokenType::TK_WHEN),
("WHERE", TokenType::TK_WHERE),
("WINDOW", TokenType::TK_WINDOW),
("WITH", TokenType::TK_WITH),
("WITHOUT", TokenType::TK_WITHOUT),
]);
for (key, value) in &values {
assert!(keyword_token(key.as_bytes()).unwrap() == *value);
assert!(
keyword_token(key.as_bytes().to_ascii_lowercase().as_slice()).unwrap() == *value
);
}
assert!(keyword_token(b"").is_none());
assert!(keyword_token(b"wrong").is_none());
assert!(keyword_token(b"super wrong").is_none());
assert!(keyword_token(b"super_wrong").is_none());
assert!(keyword_token(b"aae26e78-3ba7-4627-8f8f-02623302495a").is_none());
assert!(keyword_token("Crème Brulée".as_bytes()).is_none());
assert!(keyword_token("fróm".as_bytes()).is_none());
}
}

View File

@@ -4,9 +4,7 @@ use memchr::memchr;
pub use crate::dialect::TokenType;
use crate::dialect::TokenType::*;
use crate::dialect::{
is_identifier_continue, is_identifier_start, keyword_token, sentinel, MAX_KEYWORD_LEN,
};
use crate::dialect::{is_identifier_continue, is_identifier_start, keyword_token, sentinel};
use crate::parser::ast::Cmd;
use crate::parser::parse::{yyParser, YYCODETYPE};
use crate::parser::Context;
@@ -719,12 +717,7 @@ impl Tokenizer {
_ => data.len(),
};
let word = &data[..i];
let tt = if word.len() >= 2 && word.len() <= MAX_KEYWORD_LEN && word.is_ascii() {
keyword_token(word).unwrap_or(TK_ID)
} else {
TK_ID
};
(Some((word, tt)), i)
(Some((word, keyword_token(word).unwrap_or(TK_ID))), i)
}
}

View File

@@ -1362,6 +1362,23 @@ impl CreateTableBody {
options,
})
}
/// Constructor from Vec of column definition
pub fn columns_and_constraints_from_definition(
columns_vec: Vec<ColumnDefinition>,
constraints: Option<Vec<NamedTableConstraint>>,
options: TableOptions,
) -> Result<Self, ParserError> {
let mut columns = IndexMap::new();
for def in columns_vec {
columns.insert(def.col_name.clone(), def);
}
Ok(Self::ColumnsAndConstraints {
columns,
constraints,
options,
})
}
}
/// Table column definition
@@ -1744,6 +1761,8 @@ pub enum PragmaName {
SchemaVersion,
/// returns information about the columns of a table
TableInfo,
/// enable capture-changes logic for the connection
UnstableCaptureDataChangesConn,
/// Returns the user version of the database file.
UserVersion,
/// trigger a checkpoint to run on database(s) if WAL is enabled