Restructure transcript processing to always generate summaries and create additional content in separate directories

2026-01-17 21:44:36 +01:00 · 2025-04-01 10:54:09 +01:00
parent 2b967cd891
commit 218cf7a61a
4 changed files with 85 additions and 57 deletions
--- a/prompts/blog_post.md
+++ b/prompts/blog_post.md
@@ -1,14 +1,14 @@
-Please transform the following transcript into an engaging blog post. The post should be written in a comprehensive yet accessible style, with a tone that balances technical accuracy with conversational ease.
-
-Guidelines for the blog post:
-1. Structure the content with clear sections and headings
-2. Use a mix of technical depth and accessible explanations
-3. Include relevant examples or analogies where appropriate
-4. Maintain a conversational tone while being technically accurate
-5. Use GitHub markdown flavor for formatting, prefixing headings with "#" / "##" / "###"
-6. Include code blocks where relevant, with appropriate syntax highlighting
-
-The target audience is technically-inclined general readers who are familiar with basic technical concepts but may not be experts in the specific domain.
+Based on the following transcript and its summary, create a draft blog post. The draft should include:
+1. A compelling title
+2. An introduction that hooks the reader
+3. Main sections with clear headings
+4. Key points from the transcript
+5. A conclusion

 Transcript:
 {transcript}
+
+Summary:
+{summary}
+
+Blog Post Draft:
--- a/prompts/idea_app.md
+++ b/prompts/idea_app.md
@@ -1,10 +1,16 @@
-Please analyze this transcript about an app idea and provide a structured prompt that can be easily understood by an LLM that can code, focusing on:
-1. The core app concept and its main purpose
-2. Key features and functionality discussed
-3. Any technical considerations or implementation details
-4. A step-by-step implementation plan that can be followed by a junior developer to build the app
+Based on the following transcript and its summary, create a detailed app idea specification. The specification should include:
+1. App name and tagline
+2. Problem statement
+3. Target audience
+4. Key features
+5. Technical considerations
+6. User flow
+7. Potential challenges

 Transcript:
 {transcript}

-Prompt: 
+Summary:
+{summary}
+
+App Idea Specification: 
--- a/prompts/summary.md
+++ b/prompts/summary.md
@@ -0,0 +1,6 @@
+Please provide a concise summary of the following transcript. Focus on the main points and key takeaways. Keep the summary clear and well-structured.
+
+Transcript:
+{transcript}
+
+Summary: 
--- a/src/summarize_transcripts.py
+++ b/src/summarize_transcripts.py
@@ -6,69 +6,70 @@ from pathlib import Path
 import ollama
 import time
 import re
+from datetime import datetime

 def read_transcript(transcript_file: Path) -> str:
    """Read the content of a transcript file."""
    with open(transcript_file, 'r', encoding='utf-8') as f:
        return f.read()

-def load_prompt_template(transcript_text: str) -> str:
-    """Load the appropriate prompt template based on transcript content."""
+def load_prompt_template(template_name: str) -> str:
+    """Load a prompt template by name."""
    prompt_dir = Path("prompts")
-    
-    # Convert to lowercase for case-insensitive matching
-    text = transcript_text.lower()
-    
-    # Check transcript content to determine appropriate prompt using regex word boundaries
-    if re.search(r'\bblog post\b', text):
-        # "I want to write a blog post"
-        prompt_file = prompt_dir / "blog_post.md"
-    elif re.search(r'\bidea\b', text) and re.search(r'\bapp\b', text):
-        # "I have an idea for an app"
-        prompt_file = prompt_dir / "idea_app.md"
-    else:
-        prompt_file = prompt_dir / "default.md"
-    
+    prompt_file = prompt_dir / f"{template_name}.md"
    with open(prompt_file, 'r', encoding='utf-8') as f:
        return f.read()

-def process_transcript(transcript_text: str) -> str:
-    """Process a transcript using LLaMA to generate a summary."""
-    # Load the appropriate prompt template
-    prompt_template = load_prompt_template(transcript_text)
-    
-    # Format the prompt with the transcript
-    prompt = prompt_template.format(transcript=transcript_text)
-    
-    # Use Ollama to generate the summary
+def process_with_llama(prompt: str) -> str:
+    """Process text using LLaMA to generate content."""
    response = ollama.chat(model='llama2', messages=[
        {
            'role': 'user',
            'content': prompt
        }
    ])
-    
-    # Debug print
-    print("Response structure:", response)
-    
-    # Extract the content from the response
    return response['message']['content'].strip()

-def save_summary(summary: str, output_file: Path) -> None:
-    """Save the summary to a file."""
+def save_content(content: str, output_file: Path) -> None:
+    """Save content to a file."""
    with open(output_file, 'w', encoding='utf-8') as f:
-        f.write(summary)
+        f.write(content)

 def count_words(text: str) -> int:
    """Count the number of words in a text string."""
    return len(text.split())

+def determine_content_type(transcript_text: str) -> str:
+    """Determine the type of content in the transcript."""
+    text = transcript_text.lower()
+    
+    if re.search(r'\bblog post\b', text):
+        return "blog_post"
+    elif re.search(r'\bidea\b', text) and re.search(r'\bapp\b', text):
+        return "idea_app"
+    return "default"
+
+def generate_summary(transcript_text: str) -> str:
+    """Generate a summary of the transcript."""
+    prompt_template = load_prompt_template("summary")
+    prompt = prompt_template.format(transcript=transcript_text)
+    return process_with_llama(prompt)
+
+def generate_additional_content(content_type: str, transcript_text: str, summary: str) -> str:
+    """Generate additional content based on the content type."""
+    prompt_template = load_prompt_template(content_type)
+    prompt = prompt_template.format(transcript=transcript_text, summary=summary)
+    return process_with_llama(prompt)
+
 def main():
    transcript_dir = Path("VoiceMemos/transcripts")
    summary_dir = Path("VoiceMemos/summaries")
+    draft_dir = Path("VoiceMemos/drafts")
+    prompt_dir = Path("VoiceMemos/prompts")
    
-    # Create summaries directory if it doesn't exist
-    summary_dir.mkdir(parents=True, exist_ok=True)
+    # Create necessary directories
+    for directory in [summary_dir, draft_dir, prompt_dir]:
+        directory.mkdir(parents=True, exist_ok=True)
    
    # Get list of all transcript files
    transcript_files = list(transcript_dir.glob("*.txt"))
@@ -94,16 +95,31 @@ def main():
            
            # Skip if transcript is too short
            if word_count <= 210:
-                print("  Transcript is too short (≤210 words), skipping summary creation")
+                print("  Transcript is too short (≤210 words), skipping processing")
                continue
            
            # Generate summary
-            summary = process_transcript(transcript_text)
-            
-            # Save summary
-            save_summary(summary, summary_file)
+            print("  Generating summary...")
+            summary = generate_summary(transcript_text)
+            save_content(summary, summary_file)
            print(f"  Summary saved to {summary_file}")
            
+            # Determine content type and generate additional content if needed
+            content_type = determine_content_type(transcript_text)
+            if content_type != "default":
+                print(f"  Generating additional content for type: {content_type}")
+                additional_content = generate_additional_content(content_type, transcript_text, summary)
+                
+                # Save to appropriate directory with timestamp
+                timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+                if content_type == "blog_post":
+                    output_file = draft_dir / f"{transcript_file.stem}_{timestamp}.md"
+                else:  # idea_app
+                    output_file = prompt_dir / f"{transcript_file.stem}_{timestamp}.md"
+                
+                save_content(additional_content, output_file)
+                print(f"  Additional content saved to {output_file}")
+            
            # Add a small delay between files to avoid overloading
            if idx < total_files:
                time.sleep(1)