mirror of
https://github.com/dergigi/boris.git
synced 2025-12-26 19:14:52 +01:00
- Add published field to ReadableContent interface - Pass published date from article loader through component chain - Display formatted publication date in ReaderHeader with calendar icon - Format date as 'MMMM d, yyyy' using date-fns
113 lines
2.9 KiB
TypeScript
113 lines
2.9 KiB
TypeScript
// Lightweight readability-style fetcher using r.jina.ai proxy
|
|
// Returns simplified HTML for a given URL. This avoids CORS and heavy deps.
|
|
|
|
export interface ReadableContent {
|
|
url: string
|
|
title?: string
|
|
html?: string
|
|
markdown?: string
|
|
image?: string
|
|
summary?: string
|
|
published?: number
|
|
}
|
|
|
|
interface CachedContent {
|
|
content: ReadableContent
|
|
timestamp: number
|
|
}
|
|
|
|
const CACHE_TTL = 7 * 24 * 60 * 60 * 1000 // 7 days in milliseconds
|
|
const CACHE_PREFIX = 'reader_cache_'
|
|
|
|
function getCacheKey(url: string): string {
|
|
return `${CACHE_PREFIX}${url}`
|
|
}
|
|
|
|
function getFromCache(url: string): ReadableContent | null {
|
|
try {
|
|
const cacheKey = getCacheKey(url)
|
|
const cached = localStorage.getItem(cacheKey)
|
|
if (!cached) return null
|
|
|
|
const { content, timestamp }: CachedContent = JSON.parse(cached)
|
|
const age = Date.now() - timestamp
|
|
|
|
if (age > CACHE_TTL) {
|
|
localStorage.removeItem(cacheKey)
|
|
return null
|
|
}
|
|
|
|
return content
|
|
} catch {
|
|
return null
|
|
}
|
|
}
|
|
|
|
function saveToCache(url: string, content: ReadableContent): void {
|
|
try {
|
|
const cacheKey = getCacheKey(url)
|
|
const cached: CachedContent = {
|
|
content,
|
|
timestamp: Date.now()
|
|
}
|
|
localStorage.setItem(cacheKey, JSON.stringify(cached))
|
|
} catch {
|
|
// Silently fail if storage is full or unavailable
|
|
}
|
|
}
|
|
|
|
function toProxyUrl(url: string): string {
|
|
// Ensure the target URL has a protocol and build the proxy URL
|
|
const normalized = /^https?:\/\//i.test(url) ? url : `https://${url}`
|
|
return `https://r.jina.ai/${normalized}`
|
|
}
|
|
|
|
export async function fetchReadableContent(
|
|
targetUrl: string,
|
|
bypassCache = false
|
|
): Promise<ReadableContent> {
|
|
// Check cache first unless bypassed
|
|
if (!bypassCache) {
|
|
const cached = getFromCache(targetUrl)
|
|
if (cached) return cached
|
|
}
|
|
|
|
const proxyUrl = toProxyUrl(targetUrl)
|
|
const res = await fetch(proxyUrl)
|
|
if (!res.ok) {
|
|
throw new Error(`Failed to fetch readable content (${res.status})`)
|
|
}
|
|
const text = await res.text()
|
|
// Detect if the proxy delivered Markdown or HTML. r.jina.ai often returns a
|
|
// block starting with "Title:" and "Markdown Content:". We handle both.
|
|
const hasMarkdownBlock = /Markdown Content:\s/i.test(text)
|
|
|
|
let content: ReadableContent
|
|
|
|
if (hasMarkdownBlock) {
|
|
// Try to split out Title and the Markdown payload
|
|
const titleMatch = text.match(/Title:\s*(.*?)(?:\s+URL Source:|\s+Markdown Content:)/i)
|
|
const mdMatch = text.match(/Markdown Content:\s*([\s\S]*)$/i)
|
|
content = {
|
|
url: targetUrl,
|
|
title: titleMatch?.[1]?.trim(),
|
|
markdown: mdMatch?.[1]?.trim()
|
|
}
|
|
} else {
|
|
const html = text
|
|
// Best-effort title extraction from HTML
|
|
const match = html.match(/<title[^>]*>(.*?)<\/title>/i)
|
|
content = {
|
|
url: targetUrl,
|
|
title: match?.[1],
|
|
html
|
|
}
|
|
}
|
|
|
|
// Save to cache before returning
|
|
saveToCache(targetUrl, content)
|
|
return content
|
|
}
|
|
|
|
|