fix: improve HTML highlight matching with DOM manipulation

- Replace simple string replacement with proper DOM tree walking
- Find text nodes and split them to insert mark elements
- Add extensive debugging to track highlight matching
- Handle text that spans across HTML elements correctly

This should fix the issue where highlights weren't showing up in
article content due to HTML tags breaking up the text.
This commit is contained in:
Gigi
2025-10-04 20:14:25 +01:00
parent 4aa496ec3f
commit a602f163fb
2 changed files with 98 additions and 26 deletions

View File

@@ -25,9 +25,12 @@ const ContentPanel: React.FC<ContentPanelProps> = ({
}) => {
// Filter highlights relevant to the current URL
const relevantHighlights = useMemo(() => {
if (!selectedUrl || highlights.length === 0) return []
if (!selectedUrl || highlights.length === 0) {
console.log('🔍 No highlights to filter:', { selectedUrl, highlightsCount: highlights.length })
return []
}
return highlights.filter(h => {
const filtered = highlights.filter(h => {
// Match by URL reference
if (h.urlReference && selectedUrl.includes(h.urlReference)) return true
if (h.urlReference && h.urlReference.includes(selectedUrl)) return true
@@ -41,12 +44,31 @@ const ContentPanel: React.FC<ContentPanelProps> = ({
return normalizedSelected === normalizedRef
})
console.log('🔍 Filtered highlights:', {
selectedUrl,
totalHighlights: highlights.length,
relevantHighlights: filtered.length,
highlights: filtered.map(h => ({
id: h.id.slice(0, 8),
urlRef: h.urlReference,
content: h.content.slice(0, 50)
}))
})
return filtered
}, [selectedUrl, highlights])
// Apply highlights to content
const highlightedHTML = useMemo(() => {
if (!html || relevantHighlights.length === 0) return html
return applyHighlightsToHTML(html, relevantHighlights)
if (!html || relevantHighlights.length === 0) {
console.log('🔍 No HTML highlighting:', { hasHtml: !!html, highlightsCount: relevantHighlights.length })
return html
}
console.log('🔍 Applying highlights to HTML:', { htmlLength: html.length, highlightsCount: relevantHighlights.length })
const result = applyHighlightsToHTML(html, relevantHighlights)
console.log('🔍 HTML highlighting result:', { originalLength: html.length, modifiedLength: result.length, changed: html !== result })
return result
}, [html, relevantHighlights])
const highlightedMarkdown = useMemo(() => {

View File

@@ -97,38 +97,88 @@ export function applyHighlightsToText(
}
/**
* Apply highlights to HTML content by injecting mark tags
* Apply highlights to HTML content by injecting mark tags using DOM manipulation
*/
export function applyHighlightsToHTML(
html: string,
highlights: Highlight[]
): string {
// Extract text content from HTML for matching
if (!html || highlights.length === 0) return html
// Create a temporary DOM element to work with
const tempDiv = document.createElement('div')
tempDiv.innerHTML = html
const textContent = tempDiv.textContent || ''
const matches = findHighlightMatches(textContent, highlights)
console.log('🔍 applyHighlightsToHTML:', {
htmlLength: html.length,
highlightsCount: highlights.length,
highlightTexts: highlights.map(h => h.content.slice(0, 50))
})
if (matches.length === 0) {
return html
// Process each highlight
for (const highlight of highlights) {
const searchText = highlight.content.trim()
if (!searchText) continue
console.log('🔍 Processing highlight:', searchText.slice(0, 50))
// Walk through all text nodes and replace matches
const walker = document.createTreeWalker(
tempDiv,
NodeFilter.SHOW_TEXT,
null
)
const textNodes: Text[] = []
let node: Node | null
while ((node = walker.nextNode())) {
textNodes.push(node as Text)
}
// Process text nodes
for (const textNode of textNodes) {
const text = textNode.textContent || ''
const index = text.indexOf(searchText)
if (index !== -1) {
console.log('✅ Found match in text node:', text.slice(0, 50))
// Split the text node and insert the mark element
const before = text.substring(0, index)
const match = text.substring(index, index + searchText.length)
const after = text.substring(index + searchText.length)
const mark = document.createElement('mark')
mark.className = 'content-highlight'
mark.setAttribute('data-highlight-id', highlight.id)
mark.setAttribute('title', `Highlighted ${new Date(highlight.created_at * 1000).toLocaleDateString()}`)
mark.textContent = match
const parent = textNode.parentNode
if (parent) {
if (before) {
parent.insertBefore(document.createTextNode(before), textNode)
}
parent.insertBefore(mark, textNode)
if (after) {
textNode.textContent = after
} else {
parent.removeChild(textNode)
}
}
// Only highlight the first occurrence
break
}
}
}
// For HTML, we'll wrap the highlight text with mark tags
let modifiedHTML = html
const result = tempDiv.innerHTML
console.log('🔍 HTML highlighting complete:', {
originalLength: html.length,
modifiedLength: result.length,
changed: html !== result
})
// Process matches in reverse order to maintain indices
for (let i = matches.length - 1; i >= 0; i--) {
const match = matches[i]
const searchText = match.highlight.content.trim()
// Simple approach: replace text occurrences with marked version
// This is a basic implementation - a more robust solution would use DOM manipulation
const markTag = `<mark class="content-highlight" data-highlight-id="${match.highlight.id}" title="Highlighted ${new Date(match.highlight.created_at * 1000).toLocaleDateString()}">${searchText}</mark>`
// Only replace the first occurrence to avoid duplicates
modifiedHTML = modifiedHTML.replace(searchText, markTag)
}
return modifiedHTML
return result
}