diff --git a/src/utils/highlightMatching/domUtils.ts b/src/utils/highlightMatching/domUtils.ts index 594bf9b1..c8c1fafb 100644 --- a/src/utils/highlightMatching/domUtils.ts +++ b/src/utils/highlightMatching/domUtils.ts @@ -64,10 +64,36 @@ export function tryMarkInTextNodes( let actualIndex = index if (useNormalized) { // Map normalized index back to original text - let normalizedIdx = 0 - for (let i = 0; i < text.length && normalizedIdx < index; i++) { - if (!/\s/.test(text[i]) || (i > 0 && !/\s/.test(text[i-1]))) normalizedIdx++ - actualIndex = i + 1 + // Build normalized text while tracking original positions + let normalizedPos = 0 + let prevWasWs = false + for (let i = 0; i < text.length; i++) { + const ch = text[i] + const isWs = /\s/.test(ch) + + if (isWs) { + // Whitespace: count only at start of whitespace sequence + if (!prevWasWs) { + if (normalizedPos === index) { + actualIndex = i + break + } + normalizedPos++ + } + prevWasWs = true + } else { + // Non-whitespace: count each character + if (normalizedPos === index) { + actualIndex = i + break + } + normalizedPos++ + prevWasWs = false + } + } + // If we didn't find exact match, use last position + if (normalizedPos < index) { + actualIndex = text.length } } diff --git a/src/utils/highlightMatching/htmlMatching.ts b/src/utils/highlightMatching/htmlMatching.ts index 05ec73f6..1c72eda2 100644 --- a/src/utils/highlightMatching/htmlMatching.ts +++ b/src/utils/highlightMatching/htmlMatching.ts @@ -1,6 +1,54 @@ import { Highlight } from '../../types/highlights' import { tryMarkInTextNodes } from './domUtils' +interface CacheEntry { + html: string + timestamp: number +} + +// Simple in-memory cache for highlighted HTML results +const highlightCache = new Map() +const CACHE_TTL = 5 * 60 * 1000 // 5 minutes +const MAX_CACHE_SIZE = 50 // FIFO eviction after this many entries + +/** + * Generate cache key from content and highlights + */ +function getCacheKey(html: string, highlights: Highlight[], highlightStyle: string): string { + // Create a stable key from content hash (first 200 chars) and highlight IDs + const contentHash = html.slice(0, 200).replace(/\s+/g, ' ').trim() + const highlightIds = highlights + .map(h => h.id) + .sort() + .join(',') + return `${contentHash.length}:${highlightIds}:${highlightStyle}` +} + +/** + * Clean up old cache entries and enforce size limit + */ +function cleanupCache(): void { + const now = Date.now() + const entries = Array.from(highlightCache.entries()) + + // Remove expired entries + for (const [key, entry] of entries) { + if (now - entry.timestamp > CACHE_TTL) { + highlightCache.delete(key) + } + } + + // Enforce size limit with FIFO eviction (oldest first) + if (highlightCache.size > MAX_CACHE_SIZE) { + const sortedEntries = Array.from(highlightCache.entries()) + .sort((a, b) => a[1].timestamp - b[1].timestamp) + const toRemove = sortedEntries.slice(0, highlightCache.size - MAX_CACHE_SIZE) + for (const [key] of toRemove) { + highlightCache.delete(key) + } + } +} + /** * Apply highlights to HTML content by injecting mark tags using DOM manipulation */ @@ -13,19 +61,24 @@ export function applyHighlightsToHTML( return html } + // Check cache + const cacheKey = getCacheKey(html, highlights, highlightStyle) + const cached = highlightCache.get(cacheKey) + if (cached && Date.now() - cached.timestamp < CACHE_TTL) { + return cached.html + } + + // Clean up cache periodically + cleanupCache() const tempDiv = document.createElement('div') tempDiv.innerHTML = html - // CRITICAL: Remove any existing highlight marks to start with clean HTML - // This prevents old broken highlights from corrupting the new rendering - const existingMarks = tempDiv.querySelectorAll('mark[data-highlight-id]') - existingMarks.forEach(mark => { - // Replace the mark with its text content - const textNode = document.createTextNode(mark.textContent || '') - mark.parentNode?.replaceChild(textNode, mark) - }) - + // Collect all text nodes once before processing highlights (performance optimization) + const walker = document.createTreeWalker(tempDiv, NodeFilter.SHOW_TEXT, null) + const textNodes: Text[] = [] + let node: Node | null + while ((node = walker.nextNode())) textNodes.push(node as Text) for (const highlight of highlights) { const searchText = highlight.content.trim() @@ -34,14 +87,6 @@ export function applyHighlightsToHTML( continue } - - // Collect all text nodes - const walker = document.createTreeWalker(tempDiv, NodeFilter.SHOW_TEXT, null) - const textNodes: Text[] = [] - let node: Node | null - while ((node = walker.nextNode())) textNodes.push(node as Text) - - // Try exact match first, then normalized match const found = tryMarkInTextNodes(textNodes, searchText, highlight, false, highlightStyle) || tryMarkInTextNodes(textNodes, searchText, highlight, true, highlightStyle) @@ -51,7 +96,14 @@ export function applyHighlightsToHTML( } } + const result = tempDiv.innerHTML - return tempDiv.innerHTML + // Store in cache + highlightCache.set(cacheKey, { + html: result, + timestamp: Date.now() + }) + + return result }