From 00ef3082f31d9aca613e4c1d38a70fbdd63a0e3c Mon Sep 17 00:00:00 2001 From: William Casarin Date: Tue, 21 Jan 2025 13:12:07 -0800 Subject: [PATCH] tokens: add a more advanced tokens parser Signed-off-by: William Casarin --- crates/notedeck_columns/src/storage/mod.rs | 5 +- .../src/storage/token_parser.rs | 300 +++++++++++++++++- crates/notedeck_columns/src/ui/add_column.rs | 11 +- 3 files changed, 304 insertions(+), 12 deletions(-) diff --git a/crates/notedeck_columns/src/storage/mod.rs b/crates/notedeck_columns/src/storage/mod.rs index d870d91..95c88d7 100644 --- a/crates/notedeck_columns/src/storage/mod.rs +++ b/crates/notedeck_columns/src/storage/mod.rs @@ -5,4 +5,7 @@ mod token_parser; pub use decks::{load_decks_cache, save_decks_cache, DECKS_CACHE_FILE}; pub use migration::{deserialize_columns, COLUMNS_FILE}; -pub use token_parser::{ParseError, TokenParser, TokenSerializable, TokenWriter}; +pub use token_parser::{ + ParseError, Payload, Token, TokenAlternatives, TokenParser, TokenPayload, TokenSerializable, + TokenWriter, UnexpectedToken, +}; diff --git a/crates/notedeck_columns/src/storage/token_parser.rs b/crates/notedeck_columns/src/storage/token_parser.rs index 91dcedf..7a85599 100644 --- a/crates/notedeck_columns/src/storage/token_parser.rs +++ b/crates/notedeck_columns/src/storage/token_parser.rs @@ -1,5 +1,5 @@ use crate::timeline::kind::PubkeySource; -use enostr::Pubkey; +use enostr::{NoteId, Pubkey}; #[derive(Debug, Clone)] pub struct UnexpectedToken<'fnd, 'exp> { @@ -7,6 +7,251 @@ pub struct UnexpectedToken<'fnd, 'exp> { pub found: &'fnd str, } +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub enum TokenPayload { + PubkeySource, + Pubkey, + NoteId, +} + +pub struct TokenAlternatives { + /// This is the preferred token. It should be serialized this way + preferred: &'static str, + + /// These are deprecated tokens that should still be handled and parsed + deprecated: &'static [&'static str], +} + +impl TokenAlternatives { + pub const fn new(preferred: &'static str, deprecated: &'static [&'static str]) -> Self { + Self { + preferred, + deprecated, + } + } +} + +/// Token is a unified serialization helper. By specifying a list of +/// tokens for each thing you want to parse, you can type-safely parse +/// and serialize things +pub enum Token { + /// A simple identifier + Identifier(&'static str), + + /// There are multiple ways to parse this identifier + Alternatives(TokenAlternatives), + + /// Different payload types, pubkeys etc + Payload(TokenPayload), +} + +#[derive(Debug, Clone)] +pub enum Payload { + PubkeySource(PubkeySource), + Pubkey(Pubkey), + NoteId(NoteId), +} + +impl Payload { + pub fn token_payload(&self) -> TokenPayload { + match self { + Payload::PubkeySource(_) => TokenPayload::PubkeySource, + Payload::Pubkey(_) => TokenPayload::Pubkey, + Payload::NoteId(_) => TokenPayload::NoteId, + } + } + + pub fn parse_note_id(payload: Option) -> Result> { + payload + .and_then(|p| p.get_note_id().cloned()) + .ok_or(ParseError::ExpectedPayload(TokenPayload::NoteId)) + } + + pub fn parse_pubkey(payload: Option) -> Result> { + payload + .and_then(|p| p.get_pubkey().cloned()) + .ok_or(ParseError::ExpectedPayload(TokenPayload::Pubkey)) + } + + pub fn parse_pubkey_source( + payload: Option, + ) -> Result> { + payload + .and_then(|p| p.get_pubkey_source().cloned()) + .ok_or(ParseError::ExpectedPayload(TokenPayload::Pubkey)) + } + + pub fn parse<'a>( + expected: TokenPayload, + parser: &mut TokenParser<'a>, + ) -> Result> { + match expected { + TokenPayload::PubkeySource => Ok(Payload::pubkey_source( + PubkeySource::parse_from_tokens(parser)?, + )), + TokenPayload::Pubkey => { + let pubkey = parser.try_parse(|p| { + let hex = p.pull_token()?; + Pubkey::from_hex(hex).map_err(|_| ParseError::HexDecodeFailed) + })?; + + Ok(Payload::pubkey(pubkey)) + } + TokenPayload::NoteId => { + let note_id = parser.try_parse(|p| { + let hex = p.pull_token()?; + NoteId::from_hex(hex).map_err(|_| ParseError::HexDecodeFailed) + })?; + + Ok(Payload::note_id(note_id)) + } + } + } + + pub fn pubkey(pubkey: Pubkey) -> Self { + Self::Pubkey(pubkey) + } + + pub fn pubkey_source(pubkey_src: PubkeySource) -> Self { + Self::PubkeySource(pubkey_src) + } + + pub fn note_id(note_id: NoteId) -> Self { + Self::NoteId(note_id) + } + + pub fn get_pubkey(&self) -> Option<&Pubkey> { + if let Self::Pubkey(pubkey) = self { + Some(pubkey) + } else { + None + } + } + + pub fn get_pubkey_source(&self) -> Option<&PubkeySource> { + if let Self::PubkeySource(pk_src) = self { + Some(pk_src) + } else { + None + } + } + + pub fn get_note_id(&self) -> Option<&NoteId> { + if let Self::NoteId(note_id) = self { + Some(note_id) + } else { + None + } + } +} + +impl Token { + pub fn parse<'a>( + &self, + parser: &mut TokenParser<'a>, + ) -> Result, ParseError<'a>> { + match self { + Token::Identifier(s) => { + parser.parse_token(s)?; + Ok(None) + } + + Token::Payload(payload) => { + let payload = Payload::parse(*payload, parser)?; + Ok(Some(payload)) + } + + Token::Alternatives(alts) => { + if parser.try_parse(|p| p.parse_token(alts.preferred)).is_ok() { + return Ok(None); + } + + for token in alts.deprecated { + if parser.try_parse(|p| p.parse_token(token)).is_ok() { + return Ok(None); + } + } + + Err(ParseError::AltAllFailed) + } + } + } + + /// Parse all of the tokens in sequence, ensuring that we extract a payload + /// if we find one. This only handles a single payload, if you need more, + /// then use a custom parser + pub fn parse_all<'a>( + parser: &mut TokenParser<'a>, + tokens: &[Token], + ) -> Result, ParseError<'a>> { + parser.try_parse(|p| { + let mut payload: Option = None; + for token in tokens { + if let Some(pl) = token.parse(p)? { + payload = Some(pl); + } + } + + Ok(payload) + }) + } + + pub fn serialize_all(writer: &mut TokenWriter, tokens: &[Token], payload: Option<&Payload>) { + for token in tokens { + token.serialize(writer, payload) + } + } + + pub fn serialize(&self, writer: &mut TokenWriter, payload: Option<&Payload>) { + match self { + Token::Identifier(s) => writer.write_token(s), + Token::Alternatives(alts) => writer.write_token(alts.preferred), + Token::Payload(token_payload) => match token_payload { + TokenPayload::PubkeySource => { + payload + .and_then(|p| p.get_pubkey_source()) + .expect("expected pubkey payload") + .serialize_tokens(writer); + } + + TokenPayload::Pubkey => { + let pubkey = payload + .and_then(|p| p.get_pubkey()) + .expect("expected note_id payload"); + writer.write_token(&hex::encode(pubkey.bytes())); + } + + TokenPayload::NoteId => { + let note_id = payload + .and_then(|p| p.get_note_id()) + .expect("expected note_id payload"); + writer.write_token(&hex::encode(note_id.bytes())); + } + }, + } + } + + pub const fn id(s: &'static str) -> Self { + Token::Identifier(s) + } + + pub const fn alts(primary: &'static str, deprecated: &'static [&'static str]) -> Self { + Token::Alternatives(TokenAlternatives::new(primary, deprecated)) + } + + pub const fn pubkey() -> Self { + Token::Payload(TokenPayload::Pubkey) + } + + pub const fn pubkey_source() -> Self { + Token::Payload(TokenPayload::PubkeySource) + } + + pub const fn note_id() -> Self { + Token::Payload(TokenPayload::NoteId) + } +} + #[derive(Debug, Clone)] pub enum ParseError<'a> { /// Not done parsing yet @@ -18,6 +263,11 @@ pub enum ParseError<'a> { /// There was some issue decoding the data DecodeFailed, + /// There was some issue decoding the data + ExpectedPayload(TokenPayload), + + HexDecodeFailed, + /// We encountered an unexpected token UnexpectedToken(UnexpectedToken<'a, 'static>), @@ -122,6 +372,18 @@ impl<'a> TokenParser<'a> { Self { tokens, index } } + pub fn peek_parse_token(&mut self, expected: &'static str) -> Result<&'a str, ParseError<'a>> { + let found = self.peek_token()?; + if found == expected { + Ok(found) + } else { + Err(ParseError::UnexpectedToken(UnexpectedToken { + expected, + found, + })) + } + } + pub fn parse_token(&mut self, expected: &'static str) -> Result<&'a str, ParseError<'a>> { let found = self.pull_token()?; if found == expected { @@ -154,6 +416,23 @@ impl<'a> TokenParser<'a> { } } + /// Attempt to parse something, backtrack if we fail. + pub fn try_parse( + &mut self, + parse_fn: impl FnOnce(&mut Self) -> Result>, + ) -> Result> { + let start = self.index; + let result = parse_fn(self); + + // If the parser closure fails, revert the index + if result.is_err() { + self.index = start; + result + } else { + result + } + } + pub fn pull_token(&mut self) -> Result<&'a str, ParseError<'a>> { let token = self .tokens @@ -172,6 +451,13 @@ impl<'a> TokenParser<'a> { self.index -= 1; } + pub fn peek_token(&self) -> Result<&'a str, ParseError<'a>> { + self.tokens() + .first() + .ok_or(ParseError::DecodeFailed) + .copied() + } + #[inline] pub fn tokens(&self) -> &'a [&'a str] { let min_index = self.index.min(self.tokens.len()); @@ -187,8 +473,8 @@ impl<'a> TokenParser<'a> { pub trait TokenSerializable: Sized { /// Return a list of serialization plans for a type. We do this for /// type safety and assume constructing these types are lightweight - fn parse<'a>(parser: &mut TokenParser<'a>) -> Result>; - fn serialize(&self, writer: &mut TokenWriter); + fn parse_from_tokens<'a>(parser: &mut TokenParser<'a>) -> Result>; + fn serialize_tokens(&self, writer: &mut TokenWriter); } #[cfg(test)] @@ -204,9 +490,9 @@ mod tests { let data = &data_str.split(":").collect::>(); let mut token_writer = TokenWriter::default(); let mut parser = TokenParser::new(&data); - let parsed = AddColumnRoute::parse(&mut parser).unwrap(); + let parsed = AddColumnRoute::parse_from_tokens(&mut parser).unwrap(); let expected = AddColumnRoute::Algo(AddAlgoRoute::LastPerPubkey); - parsed.serialize(&mut token_writer); + parsed.serialize_tokens(&mut token_writer); assert_eq!(expected, parsed); assert_eq!(token_writer.str(), data_str); } @@ -216,9 +502,9 @@ mod tests { let mut token_writer = TokenWriter::default(); let data: &[&str] = &[data_str]; let mut parser = TokenParser::new(data); - let parsed = AddColumnRoute::parse(&mut parser).unwrap(); + let parsed = AddColumnRoute::parse_from_tokens(&mut parser).unwrap(); let expected = AddColumnRoute::Base; - parsed.serialize(&mut token_writer); + parsed.serialize_tokens(&mut token_writer); assert_eq!(expected, parsed); assert_eq!(token_writer.str(), data_str); } diff --git a/crates/notedeck_columns/src/ui/add_column.rs b/crates/notedeck_columns/src/ui/add_column.rs index 50acf1f..cc8610f 100644 --- a/crates/notedeck_columns/src/ui/add_column.rs +++ b/crates/notedeck_columns/src/ui/add_column.rs @@ -104,20 +104,23 @@ impl AddColumnRoute { Self::ExternalIndividual => &["column", "external_individual_selection"], Self::Hashtag => &["column", "hashtag"], Self::Algo(AddAlgoRoute::Base) => &["column", "algo_selection"], - Self::Algo(AddAlgoRoute::LastPerPubkey) => &["column", "algo_selection", "last_per_pubkey"], - // NOTE!!! When adding to this, update the parser for TokenSerializable below + Self::Algo(AddAlgoRoute::LastPerPubkey) => { + &["column", "algo_selection", "last_per_pubkey"] + } // NOTE!!! When adding to this, update the parser for TokenSerializable below } } } impl TokenSerializable for AddColumnRoute { - fn serialize(&self, writer: &mut TokenWriter) { + fn serialize_tokens(&self, writer: &mut TokenWriter) { for token in self.tokens() { writer.write_token(token); } } - fn parse<'a>(parser: &mut TokenParser<'a>) -> Result> { + fn parse_from_tokens<'a>(parser: &mut TokenParser<'a>) -> Result> { + parser.peek_parse_token("column")?; + TokenParser::alt( parser, &[