From 0803417755574fae5df0878a77800b5cd3aae33b Mon Sep 17 00:00:00 2001
From: Gigi <dergigi@pm.me>
Date: Sat, 4 Oct 2025 20:32:55 +0100
Subject: [PATCH] feat: improve highlight URL and text matching

- Use proper URL parsing to normalize URLs (remove www, query params, fragments)
- Add detailed logging for URL comparison to debug matching issues
- Implement two-pass text matching: exact match first, then normalized whitespace
- Handle whitespace variations in highlighted text more flexibly
- Add context to debug logs showing surrounding text

This should make highlights appear more reliably even with URL variations
and whitespace differences between the highlight and the actual content.
---
 src/components/ContentPanel.tsx | 40 ++++++++++++++++-----
 src/utils/highlightMatching.tsx | 64 +++++++++++++++++++++++++++++++--
 2 files changed, 92 insertions(+), 12 deletions(-)

diff --git a/src/components/ContentPanel.tsx b/src/components/ContentPanel.tsx
index f49af0d3..0a01526d 100644
--- a/src/components/ContentPanel.tsx
+++ b/src/components/ContentPanel.tsx
@@ -30,19 +30,41 @@ const ContentPanel: React.FC<ContentPanelProps> = ({
       return []
     }
     
+    // Normalize URLs for comparison (remove trailing slashes, protocols, www, query params, fragments)
+    const normalizeUrl = (url: string) => {
+      try {
+        const urlObj = new URL(url.startsWith('http') ? url : `https://${url}`)
+        // Get just the hostname + pathname, remove trailing slash
+        return `${urlObj.hostname.replace(/^www\./, '')}${urlObj.pathname}`.replace(/\/$/, '').toLowerCase()
+      } catch {
+        // Fallback for invalid URLs
+        return url.replace(/^https?:\/\//, '').replace(/^www\./, '').replace(/\/$/, '').toLowerCase()
+      }
+    }
+    
+    const normalizedSelected = normalizeUrl(selectedUrl)
+    console.log('🔍 Normalized selected URL:', normalizedSelected)
+    
     const filtered = highlights.filter(h => {
-      // Match by URL reference
-      if (h.urlReference && selectedUrl.includes(h.urlReference)) return true
-      if (h.urlReference && h.urlReference.includes(selectedUrl)) return true
+      if (!h.urlReference) {
+        console.log('⚠️ Highlight has no URL reference:', h.id.slice(0, 8))
+        return false
+      }
       
-      // Normalize URLs for comparison (remove trailing slashes, protocols)
-      const normalizeUrl = (url: string) => 
-        url.replace(/^https?:\/\//, '').replace(/\/$/, '').toLowerCase()
+      const normalizedRef = normalizeUrl(h.urlReference)
+      const matches = normalizedSelected === normalizedRef || 
+                     normalizedSelected.includes(normalizedRef) ||
+                     normalizedRef.includes(normalizedSelected)
       
-      const normalizedSelected = normalizeUrl(selectedUrl)
-      const normalizedRef = h.urlReference ? normalizeUrl(h.urlReference) : ''
+      console.log('🔍 URL comparison:', {
+        highlightId: h.id.slice(0, 8),
+        originalRef: h.urlReference,
+        normalizedRef,
+        normalizedSelected,
+        matches
+      })
       
-      return normalizedSelected === normalizedRef
+      return matches
     })
     
     console.log('🔍 Filtered highlights:', {
diff --git a/src/utils/highlightMatching.tsx b/src/utils/highlightMatching.tsx
index 892cce10..073692ef 100644
--- a/src/utils/highlightMatching.tsx
+++ b/src/utils/highlightMatching.tsx
@@ -122,6 +122,10 @@ export function applyHighlightsToHTML(
     
     console.log('🔍 Processing highlight:', searchText.slice(0, 50))
     
+    // Normalize whitespace for more flexible matching
+    const normalizeWhitespace = (str: string) => str.replace(/\s+/g, ' ').trim()
+    const normalizedSearch = normalizeWhitespace(searchText)
+    
     // Walk through all text nodes and replace matches
     const walker = document.createTreeWalker(
       tempDiv,
@@ -135,13 +139,16 @@ export function applyHighlightsToHTML(
       textNodes.push(node as Text)
     }
     
-    // Process text nodes
+    // Try exact match first, then normalized match
+    let found = false
+    
+    // First pass: exact match
     for (const textNode of textNodes) {
       const text = textNode.textContent || ''
       const index = text.indexOf(searchText)
       
       if (index !== -1) {
-        console.log('✅ Found match in text node:', text.slice(0, 50))
+        console.log('✅ Found exact match in text node:', text.slice(Math.max(0, index - 20), index + 50))
         
         // Split the text node and insert the mark element
         const before = text.substring(0, index)
@@ -167,10 +174,61 @@ export function applyHighlightsToHTML(
           }
         }
         
-        // Only highlight the first occurrence
+        found = true
         break
       }
     }
+    
+    // Second pass: normalized whitespace match
+    if (!found) {
+      for (const textNode of textNodes) {
+        const text = textNode.textContent || ''
+        const normalizedText = normalizeWhitespace(text)
+        const index = normalizedText.indexOf(normalizedSearch)
+        
+        if (index !== -1) {
+          console.log('✅ Found normalized match in text node:', text.slice(0, 50))
+          
+          // Find the actual position in the original text
+          let actualIndex = 0
+          let normalizedIndex = 0
+          
+          for (let i = 0; i < text.length && normalizedIndex < index; i++) {
+            if (!/\s/.test(text[i]) || (i > 0 && !/\s/.test(text[i-1]))) {
+              normalizedIndex++
+            }
+            actualIndex = i + 1
+          }
+          
+          // Approximate the length in the original text
+          const actualLength = searchText.length
+          const match = text.substring(actualIndex, actualIndex + actualLength)
+          const before = text.substring(0, actualIndex)
+          const after = text.substring(actualIndex + actualLength)
+          
+          const mark = document.createElement('mark')
+          mark.className = 'content-highlight'
+          mark.setAttribute('data-highlight-id', highlight.id)
+          mark.setAttribute('title', `Highlighted ${new Date(highlight.created_at * 1000).toLocaleDateString()}`)
+          mark.textContent = match
+          
+          const parent = textNode.parentNode
+          if (parent) {
+            if (before) {
+              parent.insertBefore(document.createTextNode(before), textNode)
+            }
+            parent.insertBefore(mark, textNode)
+            if (after) {
+              textNode.textContent = after
+            } else {
+              parent.removeChild(textNode)
+            }
+          }
+          
+          break
+        }
+      }
+    }
   }
   
   const result = tempDiv.innerHTML