added new TextstyleParser implementation

* the old implementation was cumbersome to maintain and had some
  problems with nested formatting.

see #630
This commit is contained in:
Bernhard B
2024-12-21 22:10:35 +01:00
parent 54c884c06e
commit f0a49e6aac
4 changed files with 166 additions and 114 deletions

View File

@@ -7,7 +7,7 @@ ARG GRAALVM_VERSION=21.0.0
ARG BUILD_VERSION_ARG=unset ARG BUILD_VERSION_ARG=unset
FROM golang:1.22-bookworm AS buildcontainer FROM golang:1.23-bookworm AS buildcontainer
ARG SIGNAL_CLI_VERSION ARG SIGNAL_CLI_VERSION
ARG LIBSIGNAL_CLIENT_VERSION ARG LIBSIGNAL_CLIENT_VERSION

View File

@@ -398,7 +398,8 @@ func (s *SignalClient) send(signalCliSendRequest ds.SignalCliSendRequest) (*Send
signalCliTextFormatStrings := []string{} signalCliTextFormatStrings := []string{}
if signalCliSendRequest.TextMode != nil && *signalCliSendRequest.TextMode == "styled" { if signalCliSendRequest.TextMode != nil && *signalCliSendRequest.TextMode == "styled" {
signalCliSendRequest.Message, signalCliTextFormatStrings = utils.ParseMarkdownMessage(signalCliSendRequest.Message) textstyleParser := utils.NewTextstyleParser(signalCliSendRequest.Message)
signalCliSendRequest.Message, signalCliTextFormatStrings = textstyleParser.Parse()
} }
var groupId string = "" var groupId string = ""

View File

@@ -2,6 +2,8 @@ package utils
import ( import (
"strconv" "strconv"
"unicode/utf16"
"unicode/utf8"
) )
const ( const (
@@ -18,119 +20,141 @@ const (
ItalicBegin = 1 ItalicBegin = 1
ItalicEnd = 2 ItalicEnd = 2
BoldBegin = 3 BoldBegin = 3
BoldEnd1 = 4
BoldEnd2 = 5
MonoSpaceBegin = 6 MonoSpaceBegin = 6
MonoSpaceEnd = 7
StrikethroughBegin = 8 StrikethroughBegin = 8
StrikethroughEnd = 9 SpoilerBegin = 9
SpoilerBegin1 = 10
SpoilerBegin = 11
SpoilerEnd1 = 12
SpoilerEnd2 = 13
) )
func getUtf16CharacterCount(s string) int { func getUtf16StringLength(s string) int {
stringLength := len(s) runes := []rune(s) //turn string to slice
if stringLength == 1 {
return 1 length := 0
for _, r := range runes {
length += utf16.RuneLen(r)
} }
return stringLength / 2 return length
} }
func getAdditionalCharacterCount(characterCount int) int { type TokenState struct {
additionalCharacterCount := characterCount - 1 BeginPos int
if additionalCharacterCount > 0 { Token int
return additionalCharacterCount
}
return 0
} }
func ParseMarkdownMessage(message string) (string, []string) { type Stack []TokenState
textFormat := Normal
textFormatBegin := 0
textFormatLength := 0
numOfControlChars := 0
state := None
signalCliFormatStrings := []string{}
fullString := ""
lastChar := ""
additionalCharacterCount := 0
runes := []rune(message) //turn string to slice func (s *Stack) Push(v TokenState) {
*s = append(*s, v)
}
for i, v := range runes { //iterate through rune func (s *Stack) Pop() TokenState {
if v == '*' { ret := (*s)[len(*s)-1]
if state == ItalicBegin { *s = (*s)[0 : len(*s)-1]
if lastChar == "*" {
state = BoldBegin return ret
textFormat = Bold }
textFormatBegin = i - numOfControlChars + additionalCharacterCount
textFormatLength = 0 func (s *Stack) Peek() TokenState {
additionalCharacterCount = 0 ret := (*s)[len(*s)-1]
return ret
}
func (s *Stack) Empty() bool {
if len(*s) == 0 {
return true
}
return false
}
const eof = -1
type TextstyleParser struct {
input string
pos int
width int
tokens Stack
fullString string
signalCliFormatStrings []string
//numOfControlTokens int
}
func NewTextstyleParser(input string) *TextstyleParser {
return &TextstyleParser{
input: input,
pos: 0,
width: 0,
tokens: make(Stack, 0),
fullString: "",
signalCliFormatStrings: []string{},
}
}
func (l *TextstyleParser) next() (rune rune) {
if l.pos >= len(l.input) {
l.width = 0
return eof
}
//r := []rune(l.input[l.pos:])[0]
//l.width = utf16.RuneLen(r)
//l.pos += l.width
rune, l.width = utf8.DecodeRuneInString(l.input[l.pos:])
l.pos += l.width
return rune
}
// backup steps back one rune.
// Can be called only once per call of next.
func (l *TextstyleParser) backup() {
l.pos -= l.width
}
// peek returns but does not consume
// the next rune in the input.
func (l *TextstyleParser) peek() rune {
rune := l.next()
l.backup()
return rune
}
func (l *TextstyleParser) handleToken(tokenType int, signalCliStylingType string) {
if l.tokens.Empty() {
l.tokens.Push(TokenState{BeginPos: getUtf16StringLength(l.fullString), Token: tokenType})
} else { } else {
state = ItalicEnd if l.tokens.Peek().Token == tokenType {
} tokenBeginState := l.tokens.Pop()
} else if state == None { l.signalCliFormatStrings = append(l.signalCliFormatStrings, strconv.Itoa(tokenBeginState.BeginPos)+":"+strconv.Itoa(getUtf16StringLength(l.fullString)-tokenBeginState.BeginPos)+":"+signalCliStylingType)
state = ItalicBegin
textFormat = Italic
textFormatBegin = i - numOfControlChars + additionalCharacterCount
textFormatLength = 0
} else if state == BoldBegin {
state = BoldEnd1
} else if state == BoldEnd1 {
state = BoldEnd2
}
numOfControlChars += 1
} else if v == '|' {
if state == None {
state = SpoilerBegin1
} else if state == SpoilerBegin1 && lastChar == "|" {
state = SpoilerBegin
textFormat = Spoiler
textFormatBegin = i - numOfControlChars + additionalCharacterCount
textFormatLength = 0
} else if state == SpoilerBegin {
state = SpoilerEnd1
} else if state == SpoilerEnd1 && lastChar == "|" {
state = SpoilerEnd2
}
numOfControlChars += 1
} else if v == '`' {
if state == None {
state = MonoSpaceBegin
textFormat = Monospace
textFormatBegin = i - numOfControlChars + additionalCharacterCount
textFormatLength = 0
} else if state == MonoSpaceBegin {
state = MonoSpaceEnd
}
numOfControlChars += 1
} else if v == '~' {
if state == None {
state = StrikethroughBegin
textFormat = Strikethrough
textFormatBegin = i - numOfControlChars + additionalCharacterCount
textFormatLength = 0
} else if state == StrikethroughBegin {
state = StrikethroughEnd
}
numOfControlChars += 1
} else { } else {
textFormatLength += 1 l.tokens.Push(TokenState{BeginPos: getUtf16StringLength(l.fullString), Token: tokenType})
fullString += string(v)
additionalCharacterCount += getAdditionalCharacterCount(getUtf16CharacterCount(string(v)))
}
lastChar = string(v)
if state == ItalicEnd || state == BoldEnd2 || state == MonoSpaceEnd || state == StrikethroughEnd || state == SpoilerEnd2 {
signalCliFormatStrings = append(signalCliFormatStrings, strconv.Itoa(textFormatBegin)+":"+strconv.Itoa(textFormatLength+additionalCharacterCount)+":"+textFormat)
state = None
textFormatBegin = 0
textFormatLength = 0
textFormat = Normal
} }
} }
}
return fullString, signalCliFormatStrings
func (l *TextstyleParser) Parse() (string, []string) {
for {
c := l.next()
if c == eof {
break
}
nextRune := l.peek()
if c == '*' {
if nextRune == '*' { //Bold
l.next()
l.handleToken(BoldBegin, Bold)
} else { //Italic
l.handleToken(ItalicBegin, Italic)
}
} else if (c == '|') && (nextRune == '|') {
l.next()
l.handleToken(SpoilerBegin, Spoiler)
} else if c == '~' {
l.handleToken(StrikethroughBegin, Strikethrough)
} else if c == '`' {
l.handleToken(MonoSpaceBegin, Monospace)
} else {
l.fullString += string(c)
}
}
return l.fullString, l.signalCliFormatStrings
} }

View File

@@ -15,80 +15,107 @@ func expectFormatStringsEqual(t *testing.T, formatStrings1 []string, formatStrin
} }
} }
func TestSimpleMessage1(t *testing.T) { func TestSimpleItalicMessage(t *testing.T) {
message, signalCliFormatStrings := ParseMarkdownMessage("*italic*") textstyleParser := NewTextstyleParser("*italic*")
message, signalCliFormatStrings := textstyleParser.Parse()
expectMessageEqual(t, message, "italic") expectMessageEqual(t, message, "italic")
expectFormatStringsEqual(t, signalCliFormatStrings, []string{"0:6:ITALIC"}) expectFormatStringsEqual(t, signalCliFormatStrings, []string{"0:6:ITALIC"})
} }
func TestSimpleBoldMessage(t *testing.T) {
textstyleParser := NewTextstyleParser("**bold**")
message, signalCliFormatStrings := textstyleParser.Parse()
expectMessageEqual(t, message, "bold")
expectFormatStringsEqual(t, signalCliFormatStrings, []string{"0:4:BOLD"})
}
func TestSimpleMessage(t *testing.T) { func TestSimpleMessage(t *testing.T) {
message, signalCliFormatStrings := ParseMarkdownMessage("*This is a italic message*") textstyleParser := NewTextstyleParser("*This is a italic message*")
message, signalCliFormatStrings := textstyleParser.Parse()
expectMessageEqual(t, message, "This is a italic message") expectMessageEqual(t, message, "This is a italic message")
expectFormatStringsEqual(t, signalCliFormatStrings, []string{"0:24:ITALIC"}) expectFormatStringsEqual(t, signalCliFormatStrings, []string{"0:24:ITALIC"})
} }
func TestBoldAndItalicMessage(t *testing.T) { func TestBoldAndItalicMessage(t *testing.T) {
message, signalCliFormatStrings := ParseMarkdownMessage("This is a **bold** and *italic* message") textstyleParser := NewTextstyleParser("This is a **bold** and *italic* message")
message, signalCliFormatStrings := textstyleParser.Parse()
expectMessageEqual(t, message, "This is a bold and italic message") expectMessageEqual(t, message, "This is a bold and italic message")
expectFormatStringsEqual(t, signalCliFormatStrings, []string{"10:4:BOLD", "19:6:ITALIC"}) expectFormatStringsEqual(t, signalCliFormatStrings, []string{"10:4:BOLD", "19:6:ITALIC"})
} }
func TestTwoBoldFormattedStrings(t *testing.T) { func TestTwoBoldFormattedStrings(t *testing.T) {
message, signalCliFormatStrings := ParseMarkdownMessage("This is a **bold** and another **bold** message") textstyleParser := NewTextstyleParser("This is a **bold** and another **bold** message")
message, signalCliFormatStrings := textstyleParser.Parse()
expectMessageEqual(t, message, "This is a bold and another bold message") expectMessageEqual(t, message, "This is a bold and another bold message")
expectFormatStringsEqual(t, signalCliFormatStrings, []string{"10:4:BOLD", "27:4:BOLD"}) expectFormatStringsEqual(t, signalCliFormatStrings, []string{"10:4:BOLD", "27:4:BOLD"})
} }
func TestStrikethrough(t *testing.T) { func TestStrikethrough(t *testing.T) {
message, signalCliFormatStrings := ParseMarkdownMessage("This is a ~strikethrough~ and a **bold** message") textstyleParser := NewTextstyleParser("This is a ~strikethrough~ and a **bold** message")
message, signalCliFormatStrings := textstyleParser.Parse()
expectMessageEqual(t, message, "This is a strikethrough and a bold message") expectMessageEqual(t, message, "This is a strikethrough and a bold message")
expectFormatStringsEqual(t, signalCliFormatStrings, []string{"10:13:STRIKETHROUGH", "30:4:BOLD"}) expectFormatStringsEqual(t, signalCliFormatStrings, []string{"10:13:STRIKETHROUGH", "30:4:BOLD"})
} }
func TestMonospace(t *testing.T) { func TestMonospace(t *testing.T) {
message, signalCliFormatStrings := ParseMarkdownMessage("This is a `monospace` and a **bold** message") textstyleParser := NewTextstyleParser("This is a `monospace` and a **bold** message")
message, signalCliFormatStrings := textstyleParser.Parse()
expectMessageEqual(t, message, "This is a monospace and a bold message") expectMessageEqual(t, message, "This is a monospace and a bold message")
expectFormatStringsEqual(t, signalCliFormatStrings, []string{"10:9:MONOSPACE", "26:4:BOLD"}) expectFormatStringsEqual(t, signalCliFormatStrings, []string{"10:9:MONOSPACE", "26:4:BOLD"})
} }
func TestMulticharacterEmoji(t *testing.T) { func TestMulticharacterEmoji(t *testing.T) {
message, signalCliFormatStrings := ParseMarkdownMessage("👋abcdefg") textstyleParser := NewTextstyleParser("👋abcdefg")
message, signalCliFormatStrings := textstyleParser.Parse()
expectMessageEqual(t, message, "👋abcdefg") expectMessageEqual(t, message, "👋abcdefg")
expectFormatStringsEqual(t, signalCliFormatStrings, []string{}) expectFormatStringsEqual(t, signalCliFormatStrings, []string{})
} }
func TestMulticharacterEmojiWithBoldText(t *testing.T) { func TestMulticharacterEmojiWithBoldText(t *testing.T) {
message, signalCliFormatStrings := ParseMarkdownMessage("👋**abcdefg**") textstyleParser := NewTextstyleParser("👋**abcdefg**")
message, signalCliFormatStrings := textstyleParser.Parse()
expectMessageEqual(t, message, "👋abcdefg") expectMessageEqual(t, message, "👋abcdefg")
expectFormatStringsEqual(t, signalCliFormatStrings, []string{"2:7:BOLD"}) expectFormatStringsEqual(t, signalCliFormatStrings, []string{"2:7:BOLD"})
} }
func TestMultipleMulticharacterEmoji(t *testing.T) { func TestMultipleMulticharacterEmoji(t *testing.T) {
message, signalCliFormatStrings := ParseMarkdownMessage("👋🏾abcdefg") textstyleParser := NewTextstyleParser("👋🏾abcdefg")
message, signalCliFormatStrings := textstyleParser.Parse()
expectMessageEqual(t, message, "👋🏾abcdefg") expectMessageEqual(t, message, "👋🏾abcdefg")
expectFormatStringsEqual(t, signalCliFormatStrings, []string{}) expectFormatStringsEqual(t, signalCliFormatStrings, []string{})
} }
func TestMultipleMulticharacterEmojiWithBoldText(t *testing.T) { func TestMultipleMulticharacterEmojiWithBoldText(t *testing.T) {
message, signalCliFormatStrings := ParseMarkdownMessage("👋🏾**abcdefg**") textstyleParser := NewTextstyleParser("👋🏾**abcdefg**")
message, signalCliFormatStrings := textstyleParser.Parse()
expectMessageEqual(t, message, "👋🏾abcdefg") expectMessageEqual(t, message, "👋🏾abcdefg")
expectFormatStringsEqual(t, signalCliFormatStrings, []string{"4:7:BOLD"}) expectFormatStringsEqual(t, signalCliFormatStrings, []string{"4:7:BOLD"})
} }
func TestMulticharacterEmojiWithBoldText2(t *testing.T) { func TestMulticharacterEmojiWithBoldText2(t *testing.T) {
message, signalCliFormatStrings := ParseMarkdownMessage("Test 👦🏿 via **signal** API") textstyleParser := NewTextstyleParser("Test 👦🏿 via **signal** API")
message, signalCliFormatStrings := textstyleParser.Parse()
expectMessageEqual(t, message, "Test 👦🏿 via signal API") expectMessageEqual(t, message, "Test 👦🏿 via signal API")
expectFormatStringsEqual(t, signalCliFormatStrings, []string{"14:6:BOLD"}) expectFormatStringsEqual(t, signalCliFormatStrings, []string{"14:6:BOLD"})
} }
func TestSpoiler(t *testing.T) { func TestSpoiler(t *testing.T) {
message, signalCliFormatStrings := ParseMarkdownMessage("||this is a spoiler||") textstyleParser := NewTextstyleParser("||this is a spoiler||")
message, signalCliFormatStrings := textstyleParser.Parse()
expectMessageEqual(t, message, "this is a spoiler") expectMessageEqual(t, message, "this is a spoiler")
expectFormatStringsEqual(t, signalCliFormatStrings, []string{"0:17:SPOILER"}) expectFormatStringsEqual(t, signalCliFormatStrings, []string{"0:17:SPOILER"})
} }
func TestSpoiler1(t *testing.T) { func TestSpoiler1(t *testing.T) {
message, signalCliFormatStrings := ParseMarkdownMessage("||this is a spoiler|| and another ||spoiler||") textstyleParser := NewTextstyleParser("||this is a spoiler|| and another ||spoiler||")
message, signalCliFormatStrings := textstyleParser.Parse()
expectMessageEqual(t, message, "this is a spoiler and another spoiler") expectMessageEqual(t, message, "this is a spoiler and another spoiler")
expectFormatStringsEqual(t, signalCliFormatStrings, []string{"0:17:SPOILER", "30:7:SPOILER"}) expectFormatStringsEqual(t, signalCliFormatStrings, []string{"0:17:SPOILER", "30:7:SPOILER"})
} }
func TestBoldTextInsideSpoiler(t *testing.T) {
textstyleParser := NewTextstyleParser("||**this is a bold text inside a spoiler**||")
message, signalCliFormatStrings := textstyleParser.Parse()
expectMessageEqual(t, message, "this is a bold text inside a spoiler")
expectFormatStringsEqual(t, signalCliFormatStrings, []string{"0:36:BOLD", "0:36:SPOILER"})
}