Files
boris/src/services/readerService.ts
Gigi 719ddf3f0b feat(readability): render Markdown when proxy provides it
- Detect markdown blocks from r.jina.ai output
- Add react-markdown + remark-gfm for rendering
- Extend ContentPanel to render markdown or HTML
- Add styles for markdown content
2025-10-02 23:46:33 +02:00

50 lines
1.5 KiB
TypeScript

// Lightweight readability-style fetcher using r.jina.ai proxy
// Returns simplified HTML for a given URL. This avoids CORS and heavy deps.
export interface ReadableContent {
url: string
title?: string
html?: string
markdown?: string
}
function toProxyUrl(url: string): string {
// Ensure the target URL has a protocol and build the proxy URL
const normalized = /^https?:\/\//i.test(url) ? url : `https://${url}`
return `https://r.jina.ai/http://${normalized.replace(/^https?:\/\//, '')}`
}
export async function fetchReadableContent(targetUrl: string): Promise<ReadableContent> {
const proxyUrl = toProxyUrl(targetUrl)
const res = await fetch(proxyUrl)
if (!res.ok) {
throw new Error(`Failed to fetch readable content (${res.status})`)
}
const text = await res.text()
// Detect if the proxy delivered Markdown or HTML. r.jina.ai often returns a
// block starting with "Title:" and "Markdown Content:". We handle both.
const hasMarkdownBlock = /Markdown Content:\s/i.test(text)
if (hasMarkdownBlock) {
// Try to split out Title and the Markdown payload
const titleMatch = text.match(/Title:\s*(.*?)(?:\s+URL Source:|\s+Markdown Content:)/i)
const mdMatch = text.match(/Markdown Content:\s*([\s\S]*)$/i)
return {
url: targetUrl,
title: titleMatch?.[1]?.trim(),
markdown: mdMatch?.[1]?.trim()
}
}
const html = text
// Best-effort title extraction from HTML
const match = html.match(/<title[^>]*>(.*?)<\/title>/i)
return {
url: targetUrl,
title: match?.[1],
html
}
}