mirror of
https://github.com/dergigi/boris.git
synced 2026-02-23 16:04:29 +01:00
feat: replace custom OpenGraph extraction with fetch-opengraph library
- Install fetch-opengraph library for robust OpenGraph extraction - Replace custom regex patterns and proxy logic with specialized library - Simplify AddBookmarkModal OpenGraph extraction logic - Remove fetchRawHtml function from readerService - Improve reliability and maintainability of metadata extraction
This commit is contained in:
60
package-lock.json
generated
60
package-lock.json
generated
@@ -1,12 +1,12 @@
|
||||
{
|
||||
"name": "boris",
|
||||
"version": "0.10.19",
|
||||
"version": "0.10.23",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "boris",
|
||||
"version": "0.10.19",
|
||||
"version": "0.10.23",
|
||||
"dependencies": {
|
||||
"@fortawesome/fontawesome-svg-core": "^7.1.0",
|
||||
"@fortawesome/free-regular-svg-icons": "^7.1.0",
|
||||
@@ -23,6 +23,7 @@
|
||||
"applesauce-relay": "^4.0.0",
|
||||
"date-fns": "^4.1.0",
|
||||
"fast-average-color": "^9.5.0",
|
||||
"fetch-opengraph": "^1.0.36",
|
||||
"nostr-tools": "^2.4.0",
|
||||
"prismjs": "^1.30.0",
|
||||
"react": "^18.2.0",
|
||||
@@ -4502,6 +4503,15 @@
|
||||
"url": "https://github.com/sponsors/ljharb"
|
||||
}
|
||||
},
|
||||
"node_modules/axios": {
|
||||
"version": "0.21.4",
|
||||
"resolved": "https://registry.npmjs.org/axios/-/axios-0.21.4.tgz",
|
||||
"integrity": "sha512-ut5vewkiu8jjGBdqpM44XxjuCjq9LAKeHVmoVfHVzy8eHgxxq8SbAVQNovDA8mVi05kP0Ea/n/UzcSHcTJQfNg==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"follow-redirects": "^1.14.0"
|
||||
}
|
||||
},
|
||||
"node_modules/babel-plugin-polyfill-corejs2": {
|
||||
"version": "0.4.14",
|
||||
"resolved": "https://registry.npmjs.org/babel-plugin-polyfill-corejs2/-/babel-plugin-polyfill-corejs2-0.4.14.tgz",
|
||||
@@ -6171,6 +6181,16 @@
|
||||
"reusify": "^1.0.4"
|
||||
}
|
||||
},
|
||||
"node_modules/fetch-opengraph": {
|
||||
"version": "1.0.36",
|
||||
"resolved": "https://registry.npmjs.org/fetch-opengraph/-/fetch-opengraph-1.0.36.tgz",
|
||||
"integrity": "sha512-w2Gs64zjL1O86E0I6E26MrxeXpTrR8Y1vWrgupmZN6NXKV8F5I3W0tlh+ZX686jZwxyilWnQjYwgnWpdETdHWw==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"axios": "^0.21.1",
|
||||
"html-entities": "^2.3.2"
|
||||
}
|
||||
},
|
||||
"node_modules/file-entry-cache": {
|
||||
"version": "6.0.1",
|
||||
"resolved": "https://registry.npmjs.org/file-entry-cache/-/file-entry-cache-6.0.1.tgz",
|
||||
@@ -6264,6 +6284,26 @@
|
||||
"dev": true,
|
||||
"license": "ISC"
|
||||
},
|
||||
"node_modules/follow-redirects": {
|
||||
"version": "1.15.11",
|
||||
"resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.11.tgz",
|
||||
"integrity": "sha512-deG2P0JfjrTxl50XGCDyfI97ZGVCxIpfKYmfyrQ54n5FO/0gfIES8C/Psl6kWVDolizcaaxZJnTS0QSMxvnsBQ==",
|
||||
"funding": [
|
||||
{
|
||||
"type": "individual",
|
||||
"url": "https://github.com/sponsors/RubenVerborgh"
|
||||
}
|
||||
],
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=4.0"
|
||||
},
|
||||
"peerDependenciesMeta": {
|
||||
"debug": {
|
||||
"optional": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"node_modules/for-each": {
|
||||
"version": "0.3.5",
|
||||
"resolved": "https://registry.npmjs.org/for-each/-/for-each-0.3.5.tgz",
|
||||
@@ -6896,6 +6936,22 @@
|
||||
"he": "bin/he"
|
||||
}
|
||||
},
|
||||
"node_modules/html-entities": {
|
||||
"version": "2.6.0",
|
||||
"resolved": "https://registry.npmjs.org/html-entities/-/html-entities-2.6.0.tgz",
|
||||
"integrity": "sha512-kig+rMn/QOVRvr7c86gQ8lWXq+Hkv6CbAH1hLu+RG338StTpE8Z0b44SDVaqVu7HGKf27frdmUYEs9hTUX/cLQ==",
|
||||
"funding": [
|
||||
{
|
||||
"type": "github",
|
||||
"url": "https://github.com/sponsors/mdevils"
|
||||
},
|
||||
{
|
||||
"type": "patreon",
|
||||
"url": "https://patreon.com/mdevils"
|
||||
}
|
||||
],
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/html-url-attributes": {
|
||||
"version": "3.0.1",
|
||||
"resolved": "https://registry.npmjs.org/html-url-attributes/-/html-url-attributes-3.0.1.tgz",
|
||||
|
||||
@@ -26,6 +26,7 @@
|
||||
"applesauce-relay": "^4.0.0",
|
||||
"date-fns": "^4.1.0",
|
||||
"fast-average-color": "^9.5.0",
|
||||
"fetch-opengraph": "^1.0.36",
|
||||
"nostr-tools": "^2.4.0",
|
||||
"prismjs": "^1.30.0",
|
||||
"react": "^18.2.0",
|
||||
|
||||
@@ -4,41 +4,37 @@ import { FontAwesomeIcon } from '@fortawesome/react-fontawesome'
|
||||
import { faTimes, faSpinner } from '@fortawesome/free-solid-svg-icons'
|
||||
import IconButton from './IconButton'
|
||||
import { fetchReadableContent } from '../services/readerService'
|
||||
import { fetch } from 'fetch-opengraph'
|
||||
|
||||
interface AddBookmarkModalProps {
|
||||
onClose: () => void
|
||||
onSave: (url: string, title?: string, description?: string, tags?: string[]) => Promise<void>
|
||||
}
|
||||
|
||||
// Helper to extract metadata from HTML
|
||||
function extractMetaTag(html: string, patterns: string[]): string | null {
|
||||
for (const pattern of patterns) {
|
||||
const match = html.match(new RegExp(pattern, 'i'))
|
||||
if (match) return match[1]
|
||||
}
|
||||
return null
|
||||
}
|
||||
|
||||
function extractTags(html: string): string[] {
|
||||
// Helper to extract tags from OpenGraph data
|
||||
function extractTagsFromOgData(ogData: any): string[] {
|
||||
const tags: string[] = []
|
||||
|
||||
// Extract keywords meta tag
|
||||
const keywords = extractMetaTag(html, [
|
||||
'<meta\\s+name=["\'"]keywords["\'"]\\s+content=["\'"]([^"\']+)["\']'
|
||||
])
|
||||
if (keywords) {
|
||||
keywords.split(/[,;]/)
|
||||
.map(k => k.trim().toLowerCase())
|
||||
.filter(k => k.length > 0 && k.length < 30)
|
||||
.forEach(k => tags.push(k))
|
||||
// Extract keywords from OpenGraph data
|
||||
if (ogData.keywords) {
|
||||
ogData.keywords.split(/[,;]/)
|
||||
.map((k: string) => k.trim().toLowerCase())
|
||||
.filter((k: string) => k.length > 0 && k.length < 30)
|
||||
.forEach((k: string) => tags.push(k))
|
||||
}
|
||||
|
||||
// Extract article:tag (multiple possible)
|
||||
const articleTagRegex = /<meta\s+property=["']article:tag["']\s+content=["']([^"']+)["']/gi
|
||||
let match
|
||||
while ((match = articleTagRegex.exec(html)) !== null) {
|
||||
const tag = match[1].trim().toLowerCase()
|
||||
if (tag && tag.length < 30) tags.push(tag)
|
||||
// Extract article:tag from OpenGraph data
|
||||
if (ogData['article:tag']) {
|
||||
const articleTags = Array.isArray(ogData['article:tag'])
|
||||
? ogData['article:tag']
|
||||
: [ogData['article:tag']]
|
||||
|
||||
articleTags.forEach((tag: string) => {
|
||||
const cleanTag = tag.trim().toLowerCase()
|
||||
if (cleanTag && cleanTag.length < 30) {
|
||||
tags.push(cleanTag)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
return Array.from(new Set(tags)).slice(0, 5)
|
||||
@@ -83,17 +79,27 @@ const AddBookmarkModal: React.FC<AddBookmarkModalProps> = ({ onClose, onSave })
|
||||
fetchTimeoutRef.current = window.setTimeout(async () => {
|
||||
setIsFetchingMetadata(true)
|
||||
try {
|
||||
const content = await fetchReadableContent(normalizedUrl)
|
||||
lastFetchedUrlRef.current = normalizedUrl
|
||||
// Fetch both readable content and OpenGraph data in parallel
|
||||
const [content, ogData] = await Promise.all([
|
||||
fetchReadableContent(normalizedUrl),
|
||||
fetch(normalizedUrl).catch(() => null) // Don't fail if OpenGraph fetch fails
|
||||
])
|
||||
|
||||
lastFetchedUrlRef.current = normalizedUrl
|
||||
let extractedAnything = false
|
||||
|
||||
// Extract title: prioritize og:title > twitter:title > <title>
|
||||
if (!title && content.html) {
|
||||
const extractedTitle = extractMetaTag(content.html, [
|
||||
'<meta\\s+property=["\'"]og:title["\'"]\\s+content=["\'"]([^"\']+)["\']',
|
||||
'<meta\\s+name=["\'"]twitter:title["\'"]\\s+content=["\'"]([^"\']+)["\']'
|
||||
]) || content.title
|
||||
// Extract title: prioritize og:title > twitter:title > content.title
|
||||
if (!title) {
|
||||
let extractedTitle = null
|
||||
|
||||
if (ogData) {
|
||||
extractedTitle = ogData['og:title'] || ogData['twitter:title'] || ogData.title
|
||||
}
|
||||
|
||||
// Fallback to content.title if no OpenGraph title found
|
||||
if (!extractedTitle) {
|
||||
extractedTitle = content.title
|
||||
}
|
||||
|
||||
if (extractedTitle) {
|
||||
setTitle(extractedTitle)
|
||||
@@ -102,12 +108,8 @@ const AddBookmarkModal: React.FC<AddBookmarkModalProps> = ({ onClose, onSave })
|
||||
}
|
||||
|
||||
// Extract description: prioritize og:description > twitter:description > meta description
|
||||
if (!description && content.html) {
|
||||
const extractedDesc = extractMetaTag(content.html, [
|
||||
'<meta\\s+property=["\'"]og:description["\'"]\\s+content=["\'"]([^"\']+)["\']',
|
||||
'<meta\\s+name=["\'"]twitter:description["\'"]\\s+content=["\'"]([^"\']+)["\']',
|
||||
'<meta\\s+name=["\'"]description["\'"]\\s+content=["\'"]([^"\']+)["\']'
|
||||
])
|
||||
if (!description && ogData) {
|
||||
const extractedDesc = ogData['og:description'] || ogData['twitter:description'] || ogData.description
|
||||
|
||||
if (extractedDesc) {
|
||||
setDescription(extractedDesc)
|
||||
@@ -116,8 +118,8 @@ const AddBookmarkModal: React.FC<AddBookmarkModalProps> = ({ onClose, onSave })
|
||||
}
|
||||
|
||||
// Extract tags from keywords and article:tag (only if user hasn't modified tags)
|
||||
if (!tagsInput && content.html) {
|
||||
const extractedTags = extractTags(content.html)
|
||||
if (!tagsInput && ogData) {
|
||||
const extractedTags = extractTagsFromOgData(ogData)
|
||||
|
||||
// Only add boris tag if we extracted something
|
||||
if (extractedAnything || extractedTags.length > 0) {
|
||||
|
||||
@@ -110,3 +110,4 @@ export async function fetchReadableContent(
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user