feat: improve highlight URL and text matching

- Use proper URL parsing to normalize URLs (remove www, query params, fragments) - Add detailed logging for URL comparison to debug matching issues - Implement two-pass text matching: exact match first, then normalized whitespace - Handle whitespace variations in highlighted text more flexibly - Add context to debug logs showing surrounding text This should make highlights appear more reliably even with URL variations and whitespace differences between the highlight and the actual content.
2026-01-06 00:14:48 +01:00 · 2025-10-04 20:32:55 +01:00
parent a602f163fb
commit 0803417755
2 changed files with 92 additions and 12 deletions
--- a/src/utils/highlightMatching.tsx
+++ b/src/utils/highlightMatching.tsx
@@ -122,6 +122,10 @@ export function applyHighlightsToHTML(
    
    console.log('🔍 Processing highlight:', searchText.slice(0, 50))
    
+    // Normalize whitespace for more flexible matching
+    const normalizeWhitespace = (str: string) => str.replace(/\s+/g, ' ').trim()
+    const normalizedSearch = normalizeWhitespace(searchText)
+    
    // Walk through all text nodes and replace matches
    const walker = document.createTreeWalker(
      tempDiv,
@@ -135,13 +139,16 @@ export function applyHighlightsToHTML(
      textNodes.push(node as Text)
    }
    
-    // Process text nodes
+    // Try exact match first, then normalized match
+    let found = false
+    
+    // First pass: exact match
    for (const textNode of textNodes) {
      const text = textNode.textContent || ''
      const index = text.indexOf(searchText)
      
      if (index !== -1) {
-        console.log('✅ Found match in text node:', text.slice(0, 50))
+        console.log('✅ Found exact match in text node:', text.slice(Math.max(0, index - 20), index + 50))
        
        // Split the text node and insert the mark element
        const before = text.substring(0, index)
@@ -167,10 +174,61 @@ export function applyHighlightsToHTML(
          }
        }
        
-        // Only highlight the first occurrence
+        found = true
        break
      }
    }
+    
+    // Second pass: normalized whitespace match
+    if (!found) {
+      for (const textNode of textNodes) {
+        const text = textNode.textContent || ''
+        const normalizedText = normalizeWhitespace(text)
+        const index = normalizedText.indexOf(normalizedSearch)
+        
+        if (index !== -1) {
+          console.log('✅ Found normalized match in text node:', text.slice(0, 50))
+          
+          // Find the actual position in the original text
+          let actualIndex = 0
+          let normalizedIndex = 0
+          
+          for (let i = 0; i < text.length && normalizedIndex < index; i++) {
+            if (!/\s/.test(text[i]) || (i > 0 && !/\s/.test(text[i-1]))) {
+              normalizedIndex++
+            }
+            actualIndex = i + 1
+          }
+          
+          // Approximate the length in the original text
+          const actualLength = searchText.length
+          const match = text.substring(actualIndex, actualIndex + actualLength)
+          const before = text.substring(0, actualIndex)
+          const after = text.substring(actualIndex + actualLength)
+          
+          const mark = document.createElement('mark')
+          mark.className = 'content-highlight'
+          mark.setAttribute('data-highlight-id', highlight.id)
+          mark.setAttribute('title', `Highlighted ${new Date(highlight.created_at * 1000).toLocaleDateString()}`)
+          mark.textContent = match
+          
+          const parent = textNode.parentNode
+          if (parent) {
+            if (before) {
+              parent.insertBefore(document.createTextNode(before), textNode)
+            }
+            parent.insertBefore(mark, textNode)
+            if (after) {
+              textNode.textContent = after
+            } else {
+              parent.removeChild(textNode)
+            }
+          }
+          
+          break
+        }
+      }
+    }
  }
  
  const result = tempDiv.innerHTML