refactor(post-process): simplify item extraction with regex

- Use regex to match list markers and find first letter\n- Clean up non-letter characters from beginning of lines\n- Remove trailing metadata notes
2026-01-21 23:44:28 +01:00 · 2025-04-03 11:30:05 +01:00
parent dc6321323e
commit c7eb933739
1 changed files with 10 additions and 12 deletions
--- a/src/post_process.py
+++ b/src/post_process.py
@@ -22,18 +22,16 @@ def extract_action_items(content: str) -> List[str]:
        if not line.strip() or line.strip().startswith(('Here are', 'Rules were', 'No action items')):
            continue
            
-        # Find the first letter in the line
-        first_letter_pos = -1
-        for i, char in enumerate(line):
-            if char.isalpha():
-                first_letter_pos = i
-                break
-                
-        if first_letter_pos >= 0:
-            # Extract the item starting from the first letter
-            item = line[first_letter_pos:].strip()
-            if item and not item.startswith('#'):  # Skip headers
-                items.append(item)
+        # Match lines that start with any list marker (-, *, +)
+        if re.match(r'^\s*[-*+]', line):
+            # Find the first letter in the line
+            match = re.search(r'[a-zA-Z]', line)
+            if match:
+                item = line[match.start():].strip()
+                if item and not item.startswith('#'):  # Skip headers
+                    # Remove any trailing "(no deadline or priority mentioned)"
+                    item = re.sub(r'\s*\(no deadline or priority mentioned\)$', '', item)
+                    items.append(item)
    return items

 def format_action_items(items: List[str], filename: str) -> str: