diff --git a/prompts/blog_post.md b/prompts/blog_post.md index c3d617f..c7ed996 100644 --- a/prompts/blog_post.md +++ b/prompts/blog_post.md @@ -1,14 +1,14 @@ -Please transform the following transcript into an engaging blog post. The post should be written in a comprehensive yet accessible style, with a tone that balances technical accuracy with conversational ease. - -Guidelines for the blog post: -1. Structure the content with clear sections and headings -2. Use a mix of technical depth and accessible explanations -3. Include relevant examples or analogies where appropriate -4. Maintain a conversational tone while being technically accurate -5. Use GitHub markdown flavor for formatting, prefixing headings with "#" / "##" / "###" -6. Include code blocks where relevant, with appropriate syntax highlighting - -The target audience is technically-inclined general readers who are familiar with basic technical concepts but may not be experts in the specific domain. +Based on the following transcript and its summary, create a draft blog post. The draft should include: +1. A compelling title +2. An introduction that hooks the reader +3. Main sections with clear headings +4. Key points from the transcript +5. A conclusion Transcript: {transcript} + +Summary: +{summary} + +Blog Post Draft: diff --git a/prompts/idea_app.md b/prompts/idea_app.md index 435d47d..87cf9df 100644 --- a/prompts/idea_app.md +++ b/prompts/idea_app.md @@ -1,10 +1,16 @@ -Please analyze this transcript about an app idea and provide a structured prompt that can be easily understood by an LLM that can code, focusing on: -1. The core app concept and its main purpose -2. Key features and functionality discussed -3. Any technical considerations or implementation details -4. A step-by-step implementation plan that can be followed by a junior developer to build the app +Based on the following transcript and its summary, create a detailed app idea specification. The specification should include: +1. App name and tagline +2. Problem statement +3. Target audience +4. Key features +5. Technical considerations +6. User flow +7. Potential challenges Transcript: {transcript} -Prompt: \ No newline at end of file +Summary: +{summary} + +App Idea Specification: \ No newline at end of file diff --git a/prompts/summary.md b/prompts/summary.md new file mode 100644 index 0000000..fe7f8ef --- /dev/null +++ b/prompts/summary.md @@ -0,0 +1,6 @@ +Please provide a concise summary of the following transcript. Focus on the main points and key takeaways. Keep the summary clear and well-structured. + +Transcript: +{transcript} + +Summary: \ No newline at end of file diff --git a/src/summarize_transcripts.py b/src/summarize_transcripts.py index 51dc2b6..e89cd15 100755 --- a/src/summarize_transcripts.py +++ b/src/summarize_transcripts.py @@ -6,69 +6,70 @@ from pathlib import Path import ollama import time import re +from datetime import datetime def read_transcript(transcript_file: Path) -> str: """Read the content of a transcript file.""" with open(transcript_file, 'r', encoding='utf-8') as f: return f.read() -def load_prompt_template(transcript_text: str) -> str: - """Load the appropriate prompt template based on transcript content.""" +def load_prompt_template(template_name: str) -> str: + """Load a prompt template by name.""" prompt_dir = Path("prompts") - - # Convert to lowercase for case-insensitive matching - text = transcript_text.lower() - - # Check transcript content to determine appropriate prompt using regex word boundaries - if re.search(r'\bblog post\b', text): - # "I want to write a blog post" - prompt_file = prompt_dir / "blog_post.md" - elif re.search(r'\bidea\b', text) and re.search(r'\bapp\b', text): - # "I have an idea for an app" - prompt_file = prompt_dir / "idea_app.md" - else: - prompt_file = prompt_dir / "default.md" - + prompt_file = prompt_dir / f"{template_name}.md" with open(prompt_file, 'r', encoding='utf-8') as f: return f.read() -def process_transcript(transcript_text: str) -> str: - """Process a transcript using LLaMA to generate a summary.""" - # Load the appropriate prompt template - prompt_template = load_prompt_template(transcript_text) - - # Format the prompt with the transcript - prompt = prompt_template.format(transcript=transcript_text) - - # Use Ollama to generate the summary +def process_with_llama(prompt: str) -> str: + """Process text using LLaMA to generate content.""" response = ollama.chat(model='llama2', messages=[ { 'role': 'user', 'content': prompt } ]) - - # Debug print - print("Response structure:", response) - - # Extract the content from the response return response['message']['content'].strip() -def save_summary(summary: str, output_file: Path) -> None: - """Save the summary to a file.""" +def save_content(content: str, output_file: Path) -> None: + """Save content to a file.""" with open(output_file, 'w', encoding='utf-8') as f: - f.write(summary) + f.write(content) def count_words(text: str) -> int: """Count the number of words in a text string.""" return len(text.split()) +def determine_content_type(transcript_text: str) -> str: + """Determine the type of content in the transcript.""" + text = transcript_text.lower() + + if re.search(r'\bblog post\b', text): + return "blog_post" + elif re.search(r'\bidea\b', text) and re.search(r'\bapp\b', text): + return "idea_app" + return "default" + +def generate_summary(transcript_text: str) -> str: + """Generate a summary of the transcript.""" + prompt_template = load_prompt_template("summary") + prompt = prompt_template.format(transcript=transcript_text) + return process_with_llama(prompt) + +def generate_additional_content(content_type: str, transcript_text: str, summary: str) -> str: + """Generate additional content based on the content type.""" + prompt_template = load_prompt_template(content_type) + prompt = prompt_template.format(transcript=transcript_text, summary=summary) + return process_with_llama(prompt) + def main(): transcript_dir = Path("VoiceMemos/transcripts") summary_dir = Path("VoiceMemos/summaries") + draft_dir = Path("VoiceMemos/drafts") + prompt_dir = Path("VoiceMemos/prompts") - # Create summaries directory if it doesn't exist - summary_dir.mkdir(parents=True, exist_ok=True) + # Create necessary directories + for directory in [summary_dir, draft_dir, prompt_dir]: + directory.mkdir(parents=True, exist_ok=True) # Get list of all transcript files transcript_files = list(transcript_dir.glob("*.txt")) @@ -94,16 +95,31 @@ def main(): # Skip if transcript is too short if word_count <= 210: - print(" Transcript is too short (≤210 words), skipping summary creation") + print(" Transcript is too short (≤210 words), skipping processing") continue # Generate summary - summary = process_transcript(transcript_text) - - # Save summary - save_summary(summary, summary_file) + print(" Generating summary...") + summary = generate_summary(transcript_text) + save_content(summary, summary_file) print(f" Summary saved to {summary_file}") + # Determine content type and generate additional content if needed + content_type = determine_content_type(transcript_text) + if content_type != "default": + print(f" Generating additional content for type: {content_type}") + additional_content = generate_additional_content(content_type, transcript_text, summary) + + # Save to appropriate directory with timestamp + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + if content_type == "blog_post": + output_file = draft_dir / f"{transcript_file.stem}_{timestamp}.md" + else: # idea_app + output_file = prompt_dir / f"{transcript_file.stem}_{timestamp}.md" + + save_content(additional_content, output_file) + print(f" Additional content saved to {output_file}") + # Add a small delay between files to avoid overloading if idx < total_files: time.sleep(1)