mirror of
https://github.com/dergigi/boris.git
synced 2026-01-06 00:14:48 +01:00
feat: improve highlight URL and text matching
- Use proper URL parsing to normalize URLs (remove www, query params, fragments) - Add detailed logging for URL comparison to debug matching issues - Implement two-pass text matching: exact match first, then normalized whitespace - Handle whitespace variations in highlighted text more flexibly - Add context to debug logs showing surrounding text This should make highlights appear more reliably even with URL variations and whitespace differences between the highlight and the actual content.
This commit is contained in:
@@ -122,6 +122,10 @@ export function applyHighlightsToHTML(
|
||||
|
||||
console.log('🔍 Processing highlight:', searchText.slice(0, 50))
|
||||
|
||||
// Normalize whitespace for more flexible matching
|
||||
const normalizeWhitespace = (str: string) => str.replace(/\s+/g, ' ').trim()
|
||||
const normalizedSearch = normalizeWhitespace(searchText)
|
||||
|
||||
// Walk through all text nodes and replace matches
|
||||
const walker = document.createTreeWalker(
|
||||
tempDiv,
|
||||
@@ -135,13 +139,16 @@ export function applyHighlightsToHTML(
|
||||
textNodes.push(node as Text)
|
||||
}
|
||||
|
||||
// Process text nodes
|
||||
// Try exact match first, then normalized match
|
||||
let found = false
|
||||
|
||||
// First pass: exact match
|
||||
for (const textNode of textNodes) {
|
||||
const text = textNode.textContent || ''
|
||||
const index = text.indexOf(searchText)
|
||||
|
||||
if (index !== -1) {
|
||||
console.log('✅ Found match in text node:', text.slice(0, 50))
|
||||
console.log('✅ Found exact match in text node:', text.slice(Math.max(0, index - 20), index + 50))
|
||||
|
||||
// Split the text node and insert the mark element
|
||||
const before = text.substring(0, index)
|
||||
@@ -167,10 +174,61 @@ export function applyHighlightsToHTML(
|
||||
}
|
||||
}
|
||||
|
||||
// Only highlight the first occurrence
|
||||
found = true
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// Second pass: normalized whitespace match
|
||||
if (!found) {
|
||||
for (const textNode of textNodes) {
|
||||
const text = textNode.textContent || ''
|
||||
const normalizedText = normalizeWhitespace(text)
|
||||
const index = normalizedText.indexOf(normalizedSearch)
|
||||
|
||||
if (index !== -1) {
|
||||
console.log('✅ Found normalized match in text node:', text.slice(0, 50))
|
||||
|
||||
// Find the actual position in the original text
|
||||
let actualIndex = 0
|
||||
let normalizedIndex = 0
|
||||
|
||||
for (let i = 0; i < text.length && normalizedIndex < index; i++) {
|
||||
if (!/\s/.test(text[i]) || (i > 0 && !/\s/.test(text[i-1]))) {
|
||||
normalizedIndex++
|
||||
}
|
||||
actualIndex = i + 1
|
||||
}
|
||||
|
||||
// Approximate the length in the original text
|
||||
const actualLength = searchText.length
|
||||
const match = text.substring(actualIndex, actualIndex + actualLength)
|
||||
const before = text.substring(0, actualIndex)
|
||||
const after = text.substring(actualIndex + actualLength)
|
||||
|
||||
const mark = document.createElement('mark')
|
||||
mark.className = 'content-highlight'
|
||||
mark.setAttribute('data-highlight-id', highlight.id)
|
||||
mark.setAttribute('title', `Highlighted ${new Date(highlight.created_at * 1000).toLocaleDateString()}`)
|
||||
mark.textContent = match
|
||||
|
||||
const parent = textNode.parentNode
|
||||
if (parent) {
|
||||
if (before) {
|
||||
parent.insertBefore(document.createTextNode(before), textNode)
|
||||
}
|
||||
parent.insertBefore(mark, textNode)
|
||||
if (after) {
|
||||
textNode.textContent = after
|
||||
} else {
|
||||
parent.removeChild(textNode)
|
||||
}
|
||||
}
|
||||
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const result = tempDiv.innerHTML
|
||||
|
||||
Reference in New Issue
Block a user