mirror of
https://github.com/aljazceru/vibeline.git
synced 2026-01-15 12:34:24 +01:00
Refactor extract.py: add utility functions, simplify plugin detection, and improve code organization
This commit is contained in:
@@ -7,6 +7,20 @@ from pathlib import Path
|
||||
import time
|
||||
from typing import List
|
||||
|
||||
def get_base_name(plugin_name: str) -> str:
|
||||
"""Get the base name of a plugin without any suffixes."""
|
||||
return plugin_name.split('.')[0]
|
||||
|
||||
def read_file(file_path: Path) -> str:
|
||||
"""Read text from a file."""
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
return f.read()
|
||||
|
||||
def write_file(file_path: Path, content: str) -> None:
|
||||
"""Write text to a file."""
|
||||
with open(file_path, 'w', encoding='utf-8') as f:
|
||||
f.write(content)
|
||||
|
||||
def load_plugins() -> dict[str, str]:
|
||||
"""Load all plugins from the plugins directory."""
|
||||
plugin_dir = Path("plugins")
|
||||
@@ -16,46 +30,39 @@ def load_plugins() -> dict[str, str]:
|
||||
for plugin_file in plugin_dir.glob("*.md"):
|
||||
plugin_name = plugin_file.stem
|
||||
if plugin_name != "summary": # Skip the summary plugin as it's used differently
|
||||
# Keep the full plugin name including all suffixes
|
||||
plugins[plugin_name] = plugin_file.read_text(encoding='utf-8')
|
||||
plugins[plugin_name] = read_file(plugin_file)
|
||||
|
||||
return plugins
|
||||
|
||||
def determine_active_plugins(text: str, available_plugins: List[str]) -> List[str]:
|
||||
"""Determine which plugins should be run on this transcript."""
|
||||
active_plugins = []
|
||||
active_plugins = set() # Use a set to avoid duplicates
|
||||
|
||||
# First, add all .all plugins
|
||||
for plugin_name in available_plugins:
|
||||
if '.all' in plugin_name:
|
||||
# Remove all suffixes for the base name
|
||||
base_name = plugin_name.split('.')[0]
|
||||
active_plugins.append(base_name)
|
||||
|
||||
# Then check for each plugin's content type
|
||||
for plugin_name in available_plugins:
|
||||
# Convert plugin name to search pattern (e.g., "blog_post" -> "blog post")
|
||||
search_pattern = plugin_name.replace('_', ' ')
|
||||
base_name = get_base_name(plugin_name)
|
||||
|
||||
# Check if this is an "or" plugin
|
||||
# Always include plugins with .all suffix
|
||||
if '.all' in plugin_name:
|
||||
active_plugins.add(base_name)
|
||||
continue
|
||||
|
||||
# For .or plugins, check if any word matches
|
||||
if '.or' in plugin_name:
|
||||
# Get base name without suffixes
|
||||
base_name = plugin_name.split('.')[0]
|
||||
# For or plugins, check if any of the words match
|
||||
words = base_name.split('_')
|
||||
if any(re.search(r'\b' + word + r'\b', text.lower()) for word in words):
|
||||
active_plugins.append(base_name)
|
||||
active_plugins.add(base_name)
|
||||
# For regular plugins, check for the exact pattern
|
||||
else:
|
||||
# For regular plugins, check for the exact pattern
|
||||
search_pattern = plugin_name.replace('_', ' ')
|
||||
if re.search(r'\b' + search_pattern + r'\b', text.lower()):
|
||||
active_plugins.append(plugin_name)
|
||||
active_plugins.add(base_name)
|
||||
|
||||
return active_plugins
|
||||
return list(active_plugins)
|
||||
|
||||
def generate_additional_content(content_type: str, transcript_text: str, summary_text: str, plugins: dict[str, str]) -> str:
|
||||
"""Generate additional content based on the content type."""
|
||||
# Find the plugin key that matches the content type (with or without .or suffix)
|
||||
plugin_key = next(key for key in plugins.keys() if key.startswith(content_type))
|
||||
def generate_additional_content(plugin_base_name: str, transcript_text: str, summary_text: str, plugins: dict[str, str]) -> str:
|
||||
"""Generate additional content using the specified plugin."""
|
||||
# Find the plugin key that matches the base name
|
||||
plugin_key = next(key for key in plugins.keys() if get_base_name(key) == plugin_base_name)
|
||||
prompt_template = plugins[plugin_key]
|
||||
prompt = prompt_template.format(transcript=transcript_text, summary=summary_text)
|
||||
|
||||
@@ -89,8 +96,7 @@ def main():
|
||||
|
||||
# Create output directories for each plugin
|
||||
for plugin_name in plugins.keys():
|
||||
# Get base name without any suffixes
|
||||
base_name = plugin_name.split('.')[0]
|
||||
base_name = get_base_name(plugin_name)
|
||||
output_dir = voice_memo_dir / f"{base_name}s" # Pluralize the plugin name
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
# Store with base name as key
|
||||
@@ -100,30 +106,27 @@ def main():
|
||||
print("Extracting content...")
|
||||
|
||||
# Read transcript
|
||||
with open(input_file, 'r', encoding='utf-8') as f:
|
||||
transcript_text = f.read()
|
||||
transcript_text = read_file(input_file)
|
||||
|
||||
# Read summary if it exists
|
||||
summary_file = input_file.parent / f"{input_file.stem}_summary.txt"
|
||||
summary_text = ""
|
||||
if summary_file.exists():
|
||||
with open(summary_file, 'r', encoding='utf-8') as f:
|
||||
summary_text = f.read()
|
||||
summary_text = read_file(summary_file)
|
||||
|
||||
# Determine which plugins to run
|
||||
active_plugins = determine_active_plugins(transcript_text, list(plugins.keys()))
|
||||
if active_plugins:
|
||||
for plugin_name in active_plugins:
|
||||
print(f" Running {plugin_name} plugin...")
|
||||
additional_content = generate_additional_content(plugin_name, transcript_text, summary_text, plugins)
|
||||
additional_content = generate_additional_content(get_base_name(plugin_name), transcript_text, summary_text, plugins)
|
||||
|
||||
# Save to appropriate directory with timestamp
|
||||
timestamp = time.strftime("%Y%m%d_%H%M%S")
|
||||
filename = input_file.stem
|
||||
output_file = output_dirs[plugin_name] / f"{filename}_{timestamp}.md"
|
||||
output_file = output_dirs[get_base_name(plugin_name)] / f"{filename}_{timestamp}.md"
|
||||
|
||||
with open(output_file, 'w', encoding='utf-8') as f:
|
||||
f.write(additional_content)
|
||||
write_file(output_file, additional_content)
|
||||
print(f" Content saved to: {output_file}")
|
||||
else:
|
||||
print(" No matching plugins found for this transcript")
|
||||
|
||||
Reference in New Issue
Block a user