refactor regexes for HTML replacing stuff so another telegram instant view bug is fixed.

now with images inside blockquotes (from quoted events).
This commit is contained in:
fiatjaf
2023-09-21 14:55:42 -03:00
parent 24a104fd56
commit 112e204133
5 changed files with 76 additions and 97 deletions

3
go.mod
View File

@@ -12,7 +12,9 @@ require (
github.com/nbd-wtf/go-nostr v0.20.0 github.com/nbd-wtf/go-nostr v0.20.0
github.com/pelletier/go-toml v1.9.5 github.com/pelletier/go-toml v1.9.5
github.com/rs/zerolog v1.29.1 github.com/rs/zerolog v1.29.1
golang.org/x/exp v0.0.0-20221106115401-f9659909a136
golang.org/x/image v0.0.0-20190802002840-cff245a6509b golang.org/x/image v0.0.0-20190802002840-cff245a6509b
mvdan.cc/xurls/v2 v2.5.0
) )
require ( require (
@@ -42,7 +44,6 @@ require (
github.com/tidwall/gjson v1.14.4 // indirect github.com/tidwall/gjson v1.14.4 // indirect
github.com/tidwall/match v1.1.1 // indirect github.com/tidwall/match v1.1.1 // indirect
github.com/tidwall/pretty v1.2.0 // indirect github.com/tidwall/pretty v1.2.0 // indirect
golang.org/x/exp v0.0.0-20221106115401-f9659909a136 // indirect
golang.org/x/net v0.10.0 // indirect golang.org/x/net v0.10.0 // indirect
golang.org/x/sys v0.8.0 // indirect golang.org/x/sys v0.8.0 // indirect
google.golang.org/protobuf v1.23.0 // indirect google.golang.org/protobuf v1.23.0 // indirect

2
go.sum
View File

@@ -222,3 +222,5 @@ gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
mvdan.cc/xurls/v2 v2.5.0 h1:lyBNOm8Wo71UknhUs4QTFUNNMyxy2JEIaKKo0RWOh+8=
mvdan.cc/xurls/v2 v2.5.0/go.mod h1:yQgaGQ1rFtJUzkmKiHYSSfuQxqfYmd//X6PxvholpeE=

View File

@@ -76,7 +76,7 @@ func main() {
} }
funcMap := template.FuncMap{ funcMap := template.FuncMap{
"basicFormatting": basicFormatting, "basicFormatting": func(input string) string { return basicFormatting(input, false, false) },
"previewNotesFormatting": previewNotesFormatting, "previewNotesFormatting": previewNotesFormatting,
"escapeString": html.EscapeString, "escapeString": html.EscapeString,
"sanitizeXSS": sanitizeXSS, "sanitizeXSS": sanitizeXSS,

View File

@@ -308,7 +308,11 @@ func render(w http.ResponseWriter, r *http.Request) {
if event.Kind == 30023 || event.Kind == 30024 { if event.Kind == 30023 || event.Kind == 30024 {
content = mdToHTML(content, typ == "telegram_instant_view") content = mdToHTML(content, typ == "telegram_instant_view")
} else { } else {
content = basicFormatting(renderQuotesAsHTML(r.Context(), html.EscapeString(content))) // first we run basicFormatting, which turns URLs into their appropriate HTML tags
content = basicFormatting(html.EscapeString(content), true, false)
// then we render quotes as HTML, which will also apply basicFormatting to all the internal quotes
content = renderQuotesAsHTML(r.Context(), content, typ == "telegram_instant_view")
// we must do this because inside <blockquotes> we must treat <img>s different when telegram_instant_view
} }
// pretty JSON // pretty JSON

160
utils.go
View File

@@ -16,6 +16,7 @@ import (
mdhtml "github.com/gomarkdown/markdown/html" mdhtml "github.com/gomarkdown/markdown/html"
"github.com/gomarkdown/markdown/parser" "github.com/gomarkdown/markdown/parser"
"github.com/microcosm-cc/bluemonday" "github.com/microcosm-cc/bluemonday"
"mvdan.cc/xurls/v2"
"github.com/nbd-wtf/go-nostr" "github.com/nbd-wtf/go-nostr"
"github.com/nbd-wtf/go-nostr/nip10" "github.com/nbd-wtf/go-nostr/nip10"
@@ -25,12 +26,19 @@ import (
var ( var (
urlSuffixMatcher = regexp.MustCompile(`[\w-_.]+\.[\w-_.]+(\/[\/\w]*)?$`) urlSuffixMatcher = regexp.MustCompile(`[\w-_.]+\.[\w-_.]+(\/[\/\w]*)?$`)
nostrEveryMatcher = regexp.MustCompile(`\S*(nostr:)?((npub|note|nevent|nprofile|naddr)1[a-z0-9]+)\b`) nostrEveryMatcher = regexp.MustCompile(`nostr:((npub|note|nevent|nprofile|naddr)1[a-z0-9]+)\b`)
nostrNoteNeventMatcher = regexp.MustCompile(`\S*(nostr:)?((note|nevent)1[a-z0-9]+)\b`) nostrNoteNeventMatcher = regexp.MustCompile(`nostr:((note|nevent)1[a-z0-9]+)\b`)
nostrNpubNprofileMatcher = regexp.MustCompile(`\S*(nostr:)?((npub|nprofile)1[a-z0-9]+)\b`) nostrNpubNprofileMatcher = regexp.MustCompile(`nostr:((npub|nprofile)1[a-z0-9]+)\b`)
hrefMatcher = regexp.MustCompile(`\S*(https?://\S+)\S*`)
imgsMatcher = regexp.MustCompile(`\S*(\()?(https?://\S+(\.jpg|\.jpeg|\.png|\.webp|\.gif))\S*`) urlMatcher = func() *regexp.Regexp {
videoMatcher = regexp.MustCompile(`\S*(https?://\S+(\.mp4|\.ogg|\.webm|.mov))\S*`) // hack to only allow these schemes while still using this library
xurls.Schemes = []string{"https"}
xurls.SchemesNoAuthority = []string{"blob"}
xurls.SchemesUnofficial = []string{"http"}
return xurls.Strict()
}()
imageExtensionMatcher = regexp.MustCompile(`.*\.(png|jpg|jpeg|gif|webp)(\?.*)?$`)
videoExtensionMatcher = regexp.MustCompile(`.*\.(mp4|ogg|webm|mov)(\?.*)?$`)
) )
var kindNames = map[int]string{ var kindNames = map[int]string{
@@ -212,47 +220,29 @@ func getParentNevent(event *nostr.Event) string {
// Rendering functions // Rendering functions
// ### ### ### ### ### ### ### ### ### ### ### // ### ### ### ### ### ### ### ### ### ### ###
func replaceImageURLsWithTags(input string, replacement string) string { func replaceURLsWithTags(input string, imageReplacementTemplate, videoReplacementTemplate string) string {
// Match and replace image URLs with a custom replacement return urlMatcher.ReplaceAllStringFunc(input, func(match string) string {
// Usually is html <img> => ` <img src="%s" alt=""> ` switch {
// or markdown !()[...] tags for further processing => `![](%s)` case imageExtensionMatcher.MatchString(match):
input = imgsMatcher.ReplaceAllStringFunc(input, func(match string) string { // Match and replace image URLs with a custom replacement
submatch := imgsMatcher.FindStringSubmatch(match) // Usually is html <img> => ` <img src="%s" alt=""> `
if len(submatch) < 2 || // or markdown !()[...] tags for further processing => `![](%s)`
strings.Contains(submatch[0], "](") { // Markdown ![](...) image return fmt.Sprintf(imageReplacementTemplate, match)
return match case videoExtensionMatcher.MatchString(match):
// Match and replace video URLs with a custom replacement
// Usually is html <video> => ` <video controls width="100%%"><source src="%s"></video> `
// or markdown !()[...] tags for further processing => `![](%s)`
return fmt.Sprintf(videoReplacementTemplate, match)
default:
return "<a href=\"" + match + "\">" + match + "</a>"
} }
capturedGroup := submatch[2]
replacement := fmt.Sprintf(replacement, capturedGroup)
return replacement
}) })
return input
} }
func replaceVideoURLsWithTags(input string, replacement string) string { func replaceNostrURLs(matcher *regexp.Regexp, input string, style string) string {
// Match and replace video URLs with a custom replacement
// Usually is html <video> => ` <video controls width="100%%"><source src="%s"></video> `
// or markdown !()[...] tags for further processing => `![](%s)`
input = videoMatcher.ReplaceAllStringFunc(input, func(match string) string {
submatch := videoMatcher.FindStringSubmatch(match)
if len(submatch) < 2 {
return match
}
capturedGroup := submatch[1]
replacement := fmt.Sprintf(replacement, capturedGroup)
return replacement
})
return input
}
func replaceNostrURLs(input string, style string) string {
// Match and replace npup1, nprofile1, note1, nevent1, etc // Match and replace npup1, nprofile1, note1, nevent1, etc
input = nostrEveryMatcher.ReplaceAllStringFunc(input, func(match string) string { input = matcher.ReplaceAllStringFunc(input, func(match string) string {
submatch := nostrEveryMatcher.FindStringSubmatch(match) nip19 := match[len("nostr:"):]
if len(submatch) < 2 || strings.Contains(submatch[0], "/") {
return match
}
nip19 := submatch[2]
first_chars := nip19[:8] first_chars := nip19[:8]
last_chars := nip19[len(nip19)-4:] last_chars := nip19[len(nip19)-4:]
replacement := "" replacement := ""
@@ -278,17 +268,16 @@ func replaceNostrURLs(input string, style string) string {
} }
return replacement return replacement
} }
}) })
return input return input
} }
func replaceNostrURLsWithTags(input string) string { func replaceNostrURLsWithTags(matcher *regexp.Regexp, input string) string {
return replaceNostrURLs(input, "tags") return replaceNostrURLs(matcher, input, "tags")
} }
func shortenNostrURLs(input string) string { func shortenNostrURLs(input string) string {
return replaceNostrURLs(input, "short") return replaceNostrURLs(nostrEveryMatcher, input, "short")
} }
func getNameFromNip19(ctx context.Context, nip19 string) string { func getNameFromNip19(ctx context.Context, nip19 string) string {
@@ -315,10 +304,7 @@ func replaceUserReferencesWithNames(ctx context.Context, input []string) []strin
for i, line := range input { for i, line := range input {
input[i] = nostrNpubNprofileMatcher.ReplaceAllStringFunc(line, func(match string) string { input[i] = nostrNpubNprofileMatcher.ReplaceAllStringFunc(line, func(match string) string {
submatch := nostrNpubNprofileMatcher.FindStringSubmatch(match) submatch := nostrNpubNprofileMatcher.FindStringSubmatch(match)
if len(submatch) < 2 || strings.Contains(submatch[0], "/") { nip19 := submatch[1]
return match
}
nip19 := submatch[2]
return getNameFromNip19(ctx, nip19) return getNameFromNip19(ctx, nip19)
}) })
} }
@@ -326,23 +312,23 @@ func replaceUserReferencesWithNames(ctx context.Context, input []string) []strin
} }
// replace nevent and note with their text, HTML-formatted // replace nevent and note with their text, HTML-formatted
func renderQuotesAsHTML(ctx context.Context, input string) string { func renderQuotesAsHTML(ctx context.Context, input string, usingTelegramInstantView bool) string {
ctx, cancel := context.WithTimeout(ctx, time.Second*3) ctx, cancel := context.WithTimeout(ctx, time.Second*3)
defer cancel() defer cancel()
return nostrNoteNeventMatcher.ReplaceAllStringFunc(input, func(match string) string { return nostrNoteNeventMatcher.ReplaceAllStringFunc(input, func(match string) string {
submatch := nostrNoteNeventMatcher.FindStringSubmatch(match) submatch := nostrNoteNeventMatcher.FindStringSubmatch(match)
if len(submatch) < 2 || strings.Contains(submatch[0], "/") { nip19 := submatch[1]
return match
}
nip19 := submatch[2]
event, err := getEvent(ctx, nip19) event, err := getEvent(ctx, nip19)
if err != nil { if err != nil {
log.Warn().Str("nip19", nip19).Msg("failed to get nip19")
return nip19 return nip19
} }
return fmt.Sprintf(`<blockquote class="mention"><div>quoting %s </div> %s </blockquote>`, match, event.Content) content := fmt.Sprintf(
`<blockquote class="mention"><div>quoting %s </div> %s </blockquote>`, match, event.Content)
return basicFormatting(content, false, usingTelegramInstantView)
}) })
} }
@@ -370,11 +356,6 @@ func renderQuotesAsArrowPrefixedText(ctx context.Context, input string) []string
matchText := input[match[0]:match[1]] matchText := input[match[0]:match[1]]
submatch := nostrNoteNeventMatcher.FindStringSubmatch(matchText) submatch := nostrNoteNeventMatcher.FindStringSubmatch(matchText)
if len(submatch) < 2 || strings.Contains(submatch[0], "/") {
// error case concat this to previous block
blocks[b] += matchText
continue
}
nip19 := submatch[2] nip19 := submatch[2]
event, err := getEvent(ctx, nip19) event, err := getEvent(ctx, nip19)
@@ -399,27 +380,6 @@ func renderQuotesAsArrowPrefixedText(ctx context.Context, input string) []string
return blocks return blocks
} }
func replaceURLsWithTags(line string) string {
var rline string
rline = replaceImageURLsWithTags(line, ` <img src="%s" alt=""> `)
if rline != line {
return rline
}
rline = replaceVideoURLsWithTags(line, `<video controls width="100%%"><source src="%s"></video>`)
if rline != line {
return rline
}
line = replaceNostrURLsWithTags(line)
// Match and replace other URLs with <a> tags
line = hrefMatcher.ReplaceAllString(line, `<a href="$1">$1</a>`)
return line
}
func sanitizeXSS(html string) string { func sanitizeXSS(html string) string {
p := bluemonday.UGCPolicy() p := bluemonday.UGCPolicy()
p.AllowStyling() p.AllowStyling()
@@ -431,20 +391,34 @@ func sanitizeXSS(html string) string {
return p.Sanitize(html) return p.Sanitize(html)
} }
func basicFormatting(input string) string { func basicFormatting(input string, skipNostrEventLinks bool, usingTelegramInstantView bool) string {
nostrMatcher := nostrEveryMatcher
lines := strings.Split(input, "\n") if skipNostrEventLinks {
var processedLines []string nostrMatcher = nostrNpubNprofileMatcher
for _, line := range lines {
processedLine := replaceURLsWithTags(line)
processedLines = append(processedLines, processedLine)
} }
return strings.Join(processedLines, "<br/>") imageReplacementTemplate := ` <img src="%s"> `
if usingTelegramInstantView {
// telegram instant view doesn't like when there is an image inside a blockquote (like <p><img></p>)
// so we use this custom thing to stop all blockquotes before the images, print the images then
// start a new blockquote afterwards -- we do the same with the markdown renderer for <p> tags on mdToHtml
imageReplacementTemplate = "</blockquote>" + imageReplacementTemplate + "<blockquote>"
}
lines := strings.Split(input, "\n")
for i, line := range lines {
line = replaceURLsWithTags(line,
imageReplacementTemplate,
`<video controls width="100%%"><source src="%s"></video>`,
)
line = replaceNostrURLsWithTags(nostrMatcher, line)
lines[i] = line
}
return strings.Join(lines, "<br/>")
} }
func previewNotesFormatting(input string) string { func previewNotesFormatting(input string) string {
lines := strings.Split(input, "\n") lines := strings.Split(input, "\n")
var processedLines []string var processedLines []string
for _, line := range lines { for _, line := range lines {
@@ -460,9 +434,7 @@ func previewNotesFormatting(input string) string {
func mdToHTML(md string, usingTelegramInstantView bool) string { func mdToHTML(md string, usingTelegramInstantView bool) string {
md = strings.ReplaceAll(md, "\u00A0", " ") md = strings.ReplaceAll(md, "\u00A0", " ")
md = replaceImageURLsWithTags(md, `![](%s)`) md = replaceNostrURLsWithTags(nostrEveryMatcher, md)
md = replaceVideoURLsWithTags(md, `<video controls width="100%%"><source src="%s"></video>`)
md = replaceNostrURLsWithTags(md)
// create markdown parser with extensions // create markdown parser with extensions
p := parser.NewWithExtensions( p := parser.NewWithExtensions(