feat: prioritize OpenGraph tags for metadata extraction

- Extract title with priority: og:title > twitter:title > <title>
- Extract description with priority: og:description > twitter:description > meta description > first <p>
- OpenGraph tags provide better, curated metadata for sharing
- Twitter Card tags as fallback for social media compatibility
- Improved metadata quality for most modern websites
This commit is contained in:
Gigi
2025-10-08 11:01:51 +01:00
parent 82977fa5d4
commit 4edc22cec2

View File

@@ -43,31 +43,66 @@ const AddBookmarkModal: React.FC<AddBookmarkModalProps> = ({ onClose, onSave })
try {
const metadata = await fetchReadableContent(parsedUrl.toString())
// Only auto-fill if fields are empty
if (metadata.title && !title) {
setTitle(metadata.title)
}
// Try to extract description from markdown or HTML
if (!description) {
let extractedDesc = ''
if (metadata.markdown) {
// Take first paragraph from markdown
const firstPara = metadata.markdown.split('\n\n')[0]
extractedDesc = firstPara.replace(/^#+\s*/g, '').trim().slice(0, 200)
} else if (metadata.html) {
// Try to extract meta description or first paragraph
const metaMatch = metadata.html.match(/<meta\s+name=["']description["']\s+content=["']([^"']+)["']/i)
if (metaMatch) {
extractedDesc = metaMatch[1]
} else {
// Fallback to first <p> tag
const pMatch = metadata.html.match(/<p[^>]*>(.*?)<\/p>/is)
if (pMatch) {
extractedDesc = pMatch[1].replace(/<[^>]+>/g, '').trim().slice(0, 200)
}
// Extract title: prioritize og:title, then regular title
let extractedTitle = ''
if (metadata.html) {
// Try OpenGraph title first
const ogTitleMatch = metadata.html.match(/<meta\s+property=["']og:title["']\s+content=["']([^"']+)["']/i)
if (ogTitleMatch) {
extractedTitle = ogTitleMatch[1]
} else {
// Fallback to twitter:title
const twitterTitleMatch = metadata.html.match(/<meta\s+name=["']twitter:title["']\s+content=["']([^"']+)["']/i)
if (twitterTitleMatch) {
extractedTitle = twitterTitleMatch[1]
}
}
}
// Use metadata.title as last resort
if (!extractedTitle && metadata.title) {
extractedTitle = metadata.title
}
// Only auto-fill if field is empty
if (extractedTitle && !title) {
setTitle(extractedTitle)
}
// Extract description: prioritize og:description
if (!description) {
let extractedDesc = ''
if (metadata.html) {
// Try OpenGraph description first
const ogDescMatch = metadata.html.match(/<meta\s+property=["']og:description["']\s+content=["']([^"']+)["']/i)
if (ogDescMatch) {
extractedDesc = ogDescMatch[1]
} else {
// Try twitter:description
const twitterDescMatch = metadata.html.match(/<meta\s+name=["']twitter:description["']\s+content=["']([^"']+)["']/i)
if (twitterDescMatch) {
extractedDesc = twitterDescMatch[1]
} else {
// Fallback to standard meta description
const metaDescMatch = metadata.html.match(/<meta\s+name=["']description["']\s+content=["']([^"']+)["']/i)
if (metaDescMatch) {
extractedDesc = metaDescMatch[1]
} else {
// Last resort: extract from first <p> tag
const pMatch = metadata.html.match(/<p[^>]*>(.*?)<\/p>/is)
if (pMatch) {
extractedDesc = pMatch[1].replace(/<[^>]+>/g, '').trim().slice(0, 200)
}
}
}
}
} else if (metadata.markdown) {
// For markdown, take first paragraph
const firstPara = metadata.markdown.split('\n\n')[0]
extractedDesc = firstPara.replace(/^#+\s*/g, '').trim().slice(0, 200)
}
if (extractedDesc) {
setDescription(extractedDesc)
}