From de32807995f76f81607f70c3a1dadc06a3559b8b Mon Sep 17 00:00:00 2001 From: Gigi Date: Thu, 2 Oct 2025 23:32:00 +0200 Subject: [PATCH] feat(reader): add lightweight readability fetcher via r.jina.ai proxy - Provide fetchReadableContent(url) returning simplified HTML - Avoid heavy deps and CORS issues using proxy - Extract best-effort --- src/services/readerService.ts | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 src/services/readerService.ts diff --git a/src/services/readerService.ts b/src/services/readerService.ts new file mode 100644 index 00000000..8b2b97ab --- /dev/null +++ b/src/services/readerService.ts @@ -0,0 +1,32 @@ +// Lightweight readability-style fetcher using r.jina.ai proxy +// Returns simplified HTML for a given URL. This avoids CORS and heavy deps. + +export interface ReadableContent { + url: string + title?: string + html: string +} + +function toProxyUrl(url: string): string { + // Ensure the target URL has a protocol and build the proxy URL + const normalized = /^https?:\/\//i.test(url) ? url : `https://${url}` + return `https://r.jina.ai/http://${normalized.replace(/^https?:\/\//, '')}` +} + +export async function fetchReadableContent(targetUrl: string): Promise<ReadableContent> { + const proxyUrl = toProxyUrl(targetUrl) + const res = await fetch(proxyUrl) + if (!res.ok) { + throw new Error(`Failed to fetch readable content (${res.status})`) + } + const html = await res.text() + // Best-effort title extraction + const match = html.match(/<title[^>]*>(.*?)<\/title>/i) + return { + url: targetUrl, + title: match?.[1], + html + } +} + +