diff --git a/src/extract.py b/src/extract.py index db12ed5..414b1bf 100755 --- a/src/extract.py +++ b/src/extract.py @@ -7,6 +7,20 @@ from pathlib import Path import time from typing import List +def get_base_name(plugin_name: str) -> str: + """Get the base name of a plugin without any suffixes.""" + return plugin_name.split('.')[0] + +def read_file(file_path: Path) -> str: + """Read text from a file.""" + with open(file_path, 'r', encoding='utf-8') as f: + return f.read() + +def write_file(file_path: Path, content: str) -> None: + """Write text to a file.""" + with open(file_path, 'w', encoding='utf-8') as f: + f.write(content) + def load_plugins() -> dict[str, str]: """Load all plugins from the plugins directory.""" plugin_dir = Path("plugins") @@ -16,46 +30,39 @@ def load_plugins() -> dict[str, str]: for plugin_file in plugin_dir.glob("*.md"): plugin_name = plugin_file.stem if plugin_name != "summary": # Skip the summary plugin as it's used differently - # Keep the full plugin name including all suffixes - plugins[plugin_name] = plugin_file.read_text(encoding='utf-8') + plugins[plugin_name] = read_file(plugin_file) return plugins def determine_active_plugins(text: str, available_plugins: List[str]) -> List[str]: """Determine which plugins should be run on this transcript.""" - active_plugins = [] + active_plugins = set() # Use a set to avoid duplicates - # First, add all .all plugins for plugin_name in available_plugins: - if '.all' in plugin_name: - # Remove all suffixes for the base name - base_name = plugin_name.split('.')[0] - active_plugins.append(base_name) - - # Then check for each plugin's content type - for plugin_name in available_plugins: - # Convert plugin name to search pattern (e.g., "blog_post" -> "blog post") - search_pattern = plugin_name.replace('_', ' ') + base_name = get_base_name(plugin_name) - # Check if this is an "or" plugin + # Always include plugins with .all suffix + if '.all' in plugin_name: + active_plugins.add(base_name) + continue + + # For .or plugins, check if any word matches if '.or' in plugin_name: - # Get base name without suffixes - base_name = plugin_name.split('.')[0] - # For or plugins, check if any of the words match words = base_name.split('_') if any(re.search(r'\b' + word + r'\b', text.lower()) for word in words): - active_plugins.append(base_name) + active_plugins.add(base_name) + # For regular plugins, check for the exact pattern else: - # For regular plugins, check for the exact pattern + search_pattern = plugin_name.replace('_', ' ') if re.search(r'\b' + search_pattern + r'\b', text.lower()): - active_plugins.append(plugin_name) + active_plugins.add(base_name) - return active_plugins + return list(active_plugins) -def generate_additional_content(content_type: str, transcript_text: str, summary_text: str, plugins: dict[str, str]) -> str: - """Generate additional content based on the content type.""" - # Find the plugin key that matches the content type (with or without .or suffix) - plugin_key = next(key for key in plugins.keys() if key.startswith(content_type)) +def generate_additional_content(plugin_base_name: str, transcript_text: str, summary_text: str, plugins: dict[str, str]) -> str: + """Generate additional content using the specified plugin.""" + # Find the plugin key that matches the base name + plugin_key = next(key for key in plugins.keys() if get_base_name(key) == plugin_base_name) prompt_template = plugins[plugin_key] prompt = prompt_template.format(transcript=transcript_text, summary=summary_text) @@ -89,8 +96,7 @@ def main(): # Create output directories for each plugin for plugin_name in plugins.keys(): - # Get base name without any suffixes - base_name = plugin_name.split('.')[0] + base_name = get_base_name(plugin_name) output_dir = voice_memo_dir / f"{base_name}s" # Pluralize the plugin name output_dir.mkdir(parents=True, exist_ok=True) # Store with base name as key @@ -100,30 +106,27 @@ def main(): print("Extracting content...") # Read transcript - with open(input_file, 'r', encoding='utf-8') as f: - transcript_text = f.read() + transcript_text = read_file(input_file) # Read summary if it exists summary_file = input_file.parent / f"{input_file.stem}_summary.txt" summary_text = "" if summary_file.exists(): - with open(summary_file, 'r', encoding='utf-8') as f: - summary_text = f.read() + summary_text = read_file(summary_file) # Determine which plugins to run active_plugins = determine_active_plugins(transcript_text, list(plugins.keys())) if active_plugins: for plugin_name in active_plugins: print(f" Running {plugin_name} plugin...") - additional_content = generate_additional_content(plugin_name, transcript_text, summary_text, plugins) + additional_content = generate_additional_content(get_base_name(plugin_name), transcript_text, summary_text, plugins) # Save to appropriate directory with timestamp timestamp = time.strftime("%Y%m%d_%H%M%S") filename = input_file.stem - output_file = output_dirs[plugin_name] / f"{filename}_{timestamp}.md" + output_file = output_dirs[get_base_name(plugin_name)] / f"{filename}_{timestamp}.md" - with open(output_file, 'w', encoding='utf-8') as f: - f.write(additional_content) + write_file(output_file, additional_content) print(f" Content saved to: {output_file}") else: print(" No matching plugins found for this transcript")