fix: improve URL regex patterns to prevent text artifacts

- Updated VideoEmbedProcessor regex patterns to use lookahead assertions
- This prevents capturing HTML attribute syntax like quotes and angle brackets
- Fixes text artifact appearing in UI when processing video URLs in HTML content
This commit is contained in:
Gigi
2025-10-20 20:45:22 +02:00
parent 64aad42be3
commit a5494ba15c

View File

@@ -31,7 +31,8 @@ const VideoEmbedProcessor = forwardRef<HTMLDivElement, VideoEmbedProcessorProps>
const videoUrls: string[] = html.match(videoUrlPattern) || []
// Also check for video URLs that might not have extensions but are classified as video
const allUrlPattern = /https?:\/\/[^\s<>"']+/gi
// Use a more precise pattern that stops at whitespace, quotes, and HTML tag boundaries
const allUrlPattern = /https?:\/\/[^\s<>"']+(?=\s|>|"|'|$)/gi
const allUrls: string[] = html.match(allUrlPattern) || []
const videoUrlsWithoutExt = allUrls.filter(url => {
const classification = classifyUrl(url)
@@ -62,7 +63,8 @@ const VideoEmbedProcessor = forwardRef<HTMLDivElement, VideoEmbedProcessorProps>
const videoUrlPattern = /https?:\/\/[^\s<>"']+\.(mp4|webm|ogg|mov|avi|mkv|m4v)(?:\?[^\s<>"']*)?/gi
const videoUrls: string[] = html.match(videoUrlPattern) || []
const allUrlPattern = /https?:\/\/[^\s<>"']+/gi
// Use a more precise pattern that stops at whitespace, quotes, and HTML tag boundaries
const allUrlPattern = /https?:\/\/[^\s<>"']+(?=\s|>|"|'|$)/gi
const allUrls: string[] = html.match(allUrlPattern) || []
const videoUrlsWithoutExt = allUrls.filter(url => {
const classification = classifyUrl(url)