From 0803417755574fae5df0878a77800b5cd3aae33b Mon Sep 17 00:00:00 2001 From: Gigi Date: Sat, 4 Oct 2025 20:32:55 +0100 Subject: [PATCH] feat: improve highlight URL and text matching - Use proper URL parsing to normalize URLs (remove www, query params, fragments) - Add detailed logging for URL comparison to debug matching issues - Implement two-pass text matching: exact match first, then normalized whitespace - Handle whitespace variations in highlighted text more flexibly - Add context to debug logs showing surrounding text This should make highlights appear more reliably even with URL variations and whitespace differences between the highlight and the actual content. --- src/components/ContentPanel.tsx | 40 ++++++++++++++++----- src/utils/highlightMatching.tsx | 64 +++++++++++++++++++++++++++++++-- 2 files changed, 92 insertions(+), 12 deletions(-) diff --git a/src/components/ContentPanel.tsx b/src/components/ContentPanel.tsx index f49af0d3..0a01526d 100644 --- a/src/components/ContentPanel.tsx +++ b/src/components/ContentPanel.tsx @@ -30,19 +30,41 @@ const ContentPanel: React.FC = ({ return [] } + // Normalize URLs for comparison (remove trailing slashes, protocols, www, query params, fragments) + const normalizeUrl = (url: string) => { + try { + const urlObj = new URL(url.startsWith('http') ? url : `https://${url}`) + // Get just the hostname + pathname, remove trailing slash + return `${urlObj.hostname.replace(/^www\./, '')}${urlObj.pathname}`.replace(/\/$/, '').toLowerCase() + } catch { + // Fallback for invalid URLs + return url.replace(/^https?:\/\//, '').replace(/^www\./, '').replace(/\/$/, '').toLowerCase() + } + } + + const normalizedSelected = normalizeUrl(selectedUrl) + console.log('🔍 Normalized selected URL:', normalizedSelected) + const filtered = highlights.filter(h => { - // Match by URL reference - if (h.urlReference && selectedUrl.includes(h.urlReference)) return true - if (h.urlReference && h.urlReference.includes(selectedUrl)) return true + if (!h.urlReference) { + console.log('⚠️ Highlight has no URL reference:', h.id.slice(0, 8)) + return false + } - // Normalize URLs for comparison (remove trailing slashes, protocols) - const normalizeUrl = (url: string) => - url.replace(/^https?:\/\//, '').replace(/\/$/, '').toLowerCase() + const normalizedRef = normalizeUrl(h.urlReference) + const matches = normalizedSelected === normalizedRef || + normalizedSelected.includes(normalizedRef) || + normalizedRef.includes(normalizedSelected) - const normalizedSelected = normalizeUrl(selectedUrl) - const normalizedRef = h.urlReference ? normalizeUrl(h.urlReference) : '' + console.log('🔍 URL comparison:', { + highlightId: h.id.slice(0, 8), + originalRef: h.urlReference, + normalizedRef, + normalizedSelected, + matches + }) - return normalizedSelected === normalizedRef + return matches }) console.log('🔍 Filtered highlights:', { diff --git a/src/utils/highlightMatching.tsx b/src/utils/highlightMatching.tsx index 892cce10..073692ef 100644 --- a/src/utils/highlightMatching.tsx +++ b/src/utils/highlightMatching.tsx @@ -122,6 +122,10 @@ export function applyHighlightsToHTML( console.log('🔍 Processing highlight:', searchText.slice(0, 50)) + // Normalize whitespace for more flexible matching + const normalizeWhitespace = (str: string) => str.replace(/\s+/g, ' ').trim() + const normalizedSearch = normalizeWhitespace(searchText) + // Walk through all text nodes and replace matches const walker = document.createTreeWalker( tempDiv, @@ -135,13 +139,16 @@ export function applyHighlightsToHTML( textNodes.push(node as Text) } - // Process text nodes + // Try exact match first, then normalized match + let found = false + + // First pass: exact match for (const textNode of textNodes) { const text = textNode.textContent || '' const index = text.indexOf(searchText) if (index !== -1) { - console.log('✅ Found match in text node:', text.slice(0, 50)) + console.log('✅ Found exact match in text node:', text.slice(Math.max(0, index - 20), index + 50)) // Split the text node and insert the mark element const before = text.substring(0, index) @@ -167,10 +174,61 @@ export function applyHighlightsToHTML( } } - // Only highlight the first occurrence + found = true break } } + + // Second pass: normalized whitespace match + if (!found) { + for (const textNode of textNodes) { + const text = textNode.textContent || '' + const normalizedText = normalizeWhitespace(text) + const index = normalizedText.indexOf(normalizedSearch) + + if (index !== -1) { + console.log('✅ Found normalized match in text node:', text.slice(0, 50)) + + // Find the actual position in the original text + let actualIndex = 0 + let normalizedIndex = 0 + + for (let i = 0; i < text.length && normalizedIndex < index; i++) { + if (!/\s/.test(text[i]) || (i > 0 && !/\s/.test(text[i-1]))) { + normalizedIndex++ + } + actualIndex = i + 1 + } + + // Approximate the length in the original text + const actualLength = searchText.length + const match = text.substring(actualIndex, actualIndex + actualLength) + const before = text.substring(0, actualIndex) + const after = text.substring(actualIndex + actualLength) + + const mark = document.createElement('mark') + mark.className = 'content-highlight' + mark.setAttribute('data-highlight-id', highlight.id) + mark.setAttribute('title', `Highlighted ${new Date(highlight.created_at * 1000).toLocaleDateString()}`) + mark.textContent = match + + const parent = textNode.parentNode + if (parent) { + if (before) { + parent.insertBefore(document.createTextNode(before), textNode) + } + parent.insertBefore(mark, textNode) + if (after) { + textNode.textContent = after + } else { + parent.removeChild(textNode) + } + } + + break + } + } + } } const result = tempDiv.innerHTML