From 4edc22cec20f2635a09468c9cd0bb4a3409e0d66 Mon Sep 17 00:00:00 2001
From: Gigi <dergigi@pm.me>
Date: Wed, 8 Oct 2025 11:01:51 +0100
Subject: [PATCH] feat: prioritize OpenGraph tags for metadata extraction

- Extract title with priority: og:title > twitter:title > <title>
- Extract description with priority: og:description > twitter:description > meta description > first <p>
- OpenGraph tags provide better, curated metadata for sharing
- Twitter Card tags as fallback for social media compatibility
- Improved metadata quality for most modern websites
---
 src/components/AddBookmarkModal.tsx | 81 +++++++++++++++++++++--------
 1 file changed, 58 insertions(+), 23 deletions(-)
diff --git a/src/components/AddBookmarkModal.tsx b/src/components/AddBookmarkModal.tsx
index 3a8325ee..1063182e 100644
--- a/src/components/AddBookmarkModal.tsx
+++ b/src/components/AddBookmarkModal.tsx
@@ -43,31 +43,66 @@ const AddBookmarkModal: React.FC<AddBookmarkModalProps> = ({ onClose, onSave })
       try {
         const metadata = await fetchReadableContent(parsedUrl.toString())
         
-        // Only auto-fill if fields are empty
-        if (metadata.title && !title) {
-          setTitle(metadata.title)
-        }
-        
-        // Try to extract description from markdown or HTML
-        if (!description) {
-          let extractedDesc = ''
-          if (metadata.markdown) {
-            // Take first paragraph from markdown
-            const firstPara = metadata.markdown.split('\n\n')[0]
-            extractedDesc = firstPara.replace(/^#+\s*/g, '').trim().slice(0, 200)
-          } else if (metadata.html) {
-            // Try to extract meta description or first paragraph
-            const metaMatch = metadata.html.match(/<meta\s+name=["']description["']\s+content=["']([^"']+)["']/i)
-            if (metaMatch) {
-              extractedDesc = metaMatch[1]
-            } else {
-              // Fallback to first <p> tag
-              const pMatch = metadata.html.match(/<p[^>]*>(.*?)<\/p>/is)
-              if (pMatch) {
-                extractedDesc = pMatch[1].replace(/<[^>]+>/g, '').trim().slice(0, 200)
-              }
+        // Extract title: prioritize og:title, then regular title
+        let extractedTitle = ''
+        if (metadata.html) {
+          // Try OpenGraph title first
+          const ogTitleMatch = metadata.html.match(/<meta\s+property=["']og:title["']\s+content=["']([^"']+)["']/i)
+          if (ogTitleMatch) {
+            extractedTitle = ogTitleMatch[1]
+          } else {
+            // Fallback to twitter:title
+            const twitterTitleMatch = metadata.html.match(/<meta\s+name=["']twitter:title["']\s+content=["']([^"']+)["']/i)
+            if (twitterTitleMatch) {
+              extractedTitle = twitterTitleMatch[1]
             }
           }
+        }
+        
+        // Use metadata.title as last resort
+        if (!extractedTitle && metadata.title) {
+          extractedTitle = metadata.title
+        }
+        
+        // Only auto-fill if field is empty
+        if (extractedTitle && !title) {
+          setTitle(extractedTitle)
+        }
+        
+        // Extract description: prioritize og:description
+        if (!description) {
+          let extractedDesc = ''
+          
+          if (metadata.html) {
+            // Try OpenGraph description first
+            const ogDescMatch = metadata.html.match(/<meta\s+property=["']og:description["']\s+content=["']([^"']+)["']/i)
+            if (ogDescMatch) {
+              extractedDesc = ogDescMatch[1]
+            } else {
+              // Try twitter:description
+              const twitterDescMatch = metadata.html.match(/<meta\s+name=["']twitter:description["']\s+content=["']([^"']+)["']/i)
+              if (twitterDescMatch) {
+                extractedDesc = twitterDescMatch[1]
+              } else {
+                // Fallback to standard meta description
+                const metaDescMatch = metadata.html.match(/<meta\s+name=["']description["']\s+content=["']([^"']+)["']/i)
+                if (metaDescMatch) {
+                  extractedDesc = metaDescMatch[1]
+                } else {
+                  // Last resort: extract from first <p> tag
+                  const pMatch = metadata.html.match(/<p[^>]*>(.*?)<\/p>/is)
+                  if (pMatch) {
+                    extractedDesc = pMatch[1].replace(/<[^>]+>/g, '').trim().slice(0, 200)
+                  }
+                }
+              }
+            }
+          } else if (metadata.markdown) {
+            // For markdown, take first paragraph
+            const firstPara = metadata.markdown.split('\n\n')[0]
+            extractedDesc = firstPara.replace(/^#+\s*/g, '').trim().slice(0, 200)
+          }
+          
           if (extractedDesc) {
             setDescription(extractedDesc)
           }