From c7eb93373975e6ee67faebb7b9640d07ce9a1a70 Mon Sep 17 00:00:00 2001 From: Gigi Date: Thu, 3 Apr 2025 11:30:05 +0100 Subject: [PATCH] refactor(post-process): simplify item extraction with regex - Use regex to match list markers and find first letter\n- Clean up non-letter characters from beginning of lines\n- Remove trailing metadata notes --- src/post_process.py | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/src/post_process.py b/src/post_process.py index 5139239..7198188 100644 --- a/src/post_process.py +++ b/src/post_process.py @@ -22,18 +22,16 @@ def extract_action_items(content: str) -> List[str]: if not line.strip() or line.strip().startswith(('Here are', 'Rules were', 'No action items')): continue - # Find the first letter in the line - first_letter_pos = -1 - for i, char in enumerate(line): - if char.isalpha(): - first_letter_pos = i - break - - if first_letter_pos >= 0: - # Extract the item starting from the first letter - item = line[first_letter_pos:].strip() - if item and not item.startswith('#'): # Skip headers - items.append(item) + # Match lines that start with any list marker (-, *, +) + if re.match(r'^\s*[-*+]', line): + # Find the first letter in the line + match = re.search(r'[a-zA-Z]', line) + if match: + item = line[match.start():].strip() + if item and not item.startswith('#'): # Skip headers + # Remove any trailing "(no deadline or priority mentioned)" + item = re.sub(r'\s*\(no deadline or priority mentioned\)$', '', item) + items.append(item) return items def format_action_items(items: List[str], filename: str) -> str: