mirror of
https://github.com/aljazceru/vibeline.git
synced 2026-01-17 21:44:36 +01:00
Restructure transcript processing to always generate summaries and create additional content in separate directories
This commit is contained in:
@@ -1,14 +1,14 @@
|
||||
Please transform the following transcript into an engaging blog post. The post should be written in a comprehensive yet accessible style, with a tone that balances technical accuracy with conversational ease.
|
||||
|
||||
Guidelines for the blog post:
|
||||
1. Structure the content with clear sections and headings
|
||||
2. Use a mix of technical depth and accessible explanations
|
||||
3. Include relevant examples or analogies where appropriate
|
||||
4. Maintain a conversational tone while being technically accurate
|
||||
5. Use GitHub markdown flavor for formatting, prefixing headings with "#" / "##" / "###"
|
||||
6. Include code blocks where relevant, with appropriate syntax highlighting
|
||||
|
||||
The target audience is technically-inclined general readers who are familiar with basic technical concepts but may not be experts in the specific domain.
|
||||
Based on the following transcript and its summary, create a draft blog post. The draft should include:
|
||||
1. A compelling title
|
||||
2. An introduction that hooks the reader
|
||||
3. Main sections with clear headings
|
||||
4. Key points from the transcript
|
||||
5. A conclusion
|
||||
|
||||
Transcript:
|
||||
{transcript}
|
||||
|
||||
Summary:
|
||||
{summary}
|
||||
|
||||
Blog Post Draft:
|
||||
|
||||
@@ -1,10 +1,16 @@
|
||||
Please analyze this transcript about an app idea and provide a structured prompt that can be easily understood by an LLM that can code, focusing on:
|
||||
1. The core app concept and its main purpose
|
||||
2. Key features and functionality discussed
|
||||
3. Any technical considerations or implementation details
|
||||
4. A step-by-step implementation plan that can be followed by a junior developer to build the app
|
||||
Based on the following transcript and its summary, create a detailed app idea specification. The specification should include:
|
||||
1. App name and tagline
|
||||
2. Problem statement
|
||||
3. Target audience
|
||||
4. Key features
|
||||
5. Technical considerations
|
||||
6. User flow
|
||||
7. Potential challenges
|
||||
|
||||
Transcript:
|
||||
{transcript}
|
||||
|
||||
Prompt:
|
||||
Summary:
|
||||
{summary}
|
||||
|
||||
App Idea Specification:
|
||||
6
prompts/summary.md
Normal file
6
prompts/summary.md
Normal file
@@ -0,0 +1,6 @@
|
||||
Please provide a concise summary of the following transcript. Focus on the main points and key takeaways. Keep the summary clear and well-structured.
|
||||
|
||||
Transcript:
|
||||
{transcript}
|
||||
|
||||
Summary:
|
||||
@@ -6,69 +6,70 @@ from pathlib import Path
|
||||
import ollama
|
||||
import time
|
||||
import re
|
||||
from datetime import datetime
|
||||
|
||||
def read_transcript(transcript_file: Path) -> str:
|
||||
"""Read the content of a transcript file."""
|
||||
with open(transcript_file, 'r', encoding='utf-8') as f:
|
||||
return f.read()
|
||||
|
||||
def load_prompt_template(transcript_text: str) -> str:
|
||||
"""Load the appropriate prompt template based on transcript content."""
|
||||
def load_prompt_template(template_name: str) -> str:
|
||||
"""Load a prompt template by name."""
|
||||
prompt_dir = Path("prompts")
|
||||
|
||||
# Convert to lowercase for case-insensitive matching
|
||||
text = transcript_text.lower()
|
||||
|
||||
# Check transcript content to determine appropriate prompt using regex word boundaries
|
||||
if re.search(r'\bblog post\b', text):
|
||||
# "I want to write a blog post"
|
||||
prompt_file = prompt_dir / "blog_post.md"
|
||||
elif re.search(r'\bidea\b', text) and re.search(r'\bapp\b', text):
|
||||
# "I have an idea for an app"
|
||||
prompt_file = prompt_dir / "idea_app.md"
|
||||
else:
|
||||
prompt_file = prompt_dir / "default.md"
|
||||
|
||||
prompt_file = prompt_dir / f"{template_name}.md"
|
||||
with open(prompt_file, 'r', encoding='utf-8') as f:
|
||||
return f.read()
|
||||
|
||||
def process_transcript(transcript_text: str) -> str:
|
||||
"""Process a transcript using LLaMA to generate a summary."""
|
||||
# Load the appropriate prompt template
|
||||
prompt_template = load_prompt_template(transcript_text)
|
||||
|
||||
# Format the prompt with the transcript
|
||||
prompt = prompt_template.format(transcript=transcript_text)
|
||||
|
||||
# Use Ollama to generate the summary
|
||||
def process_with_llama(prompt: str) -> str:
|
||||
"""Process text using LLaMA to generate content."""
|
||||
response = ollama.chat(model='llama2', messages=[
|
||||
{
|
||||
'role': 'user',
|
||||
'content': prompt
|
||||
}
|
||||
])
|
||||
|
||||
# Debug print
|
||||
print("Response structure:", response)
|
||||
|
||||
# Extract the content from the response
|
||||
return response['message']['content'].strip()
|
||||
|
||||
def save_summary(summary: str, output_file: Path) -> None:
|
||||
"""Save the summary to a file."""
|
||||
def save_content(content: str, output_file: Path) -> None:
|
||||
"""Save content to a file."""
|
||||
with open(output_file, 'w', encoding='utf-8') as f:
|
||||
f.write(summary)
|
||||
f.write(content)
|
||||
|
||||
def count_words(text: str) -> int:
|
||||
"""Count the number of words in a text string."""
|
||||
return len(text.split())
|
||||
|
||||
def determine_content_type(transcript_text: str) -> str:
|
||||
"""Determine the type of content in the transcript."""
|
||||
text = transcript_text.lower()
|
||||
|
||||
if re.search(r'\bblog post\b', text):
|
||||
return "blog_post"
|
||||
elif re.search(r'\bidea\b', text) and re.search(r'\bapp\b', text):
|
||||
return "idea_app"
|
||||
return "default"
|
||||
|
||||
def generate_summary(transcript_text: str) -> str:
|
||||
"""Generate a summary of the transcript."""
|
||||
prompt_template = load_prompt_template("summary")
|
||||
prompt = prompt_template.format(transcript=transcript_text)
|
||||
return process_with_llama(prompt)
|
||||
|
||||
def generate_additional_content(content_type: str, transcript_text: str, summary: str) -> str:
|
||||
"""Generate additional content based on the content type."""
|
||||
prompt_template = load_prompt_template(content_type)
|
||||
prompt = prompt_template.format(transcript=transcript_text, summary=summary)
|
||||
return process_with_llama(prompt)
|
||||
|
||||
def main():
|
||||
transcript_dir = Path("VoiceMemos/transcripts")
|
||||
summary_dir = Path("VoiceMemos/summaries")
|
||||
draft_dir = Path("VoiceMemos/drafts")
|
||||
prompt_dir = Path("VoiceMemos/prompts")
|
||||
|
||||
# Create summaries directory if it doesn't exist
|
||||
summary_dir.mkdir(parents=True, exist_ok=True)
|
||||
# Create necessary directories
|
||||
for directory in [summary_dir, draft_dir, prompt_dir]:
|
||||
directory.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Get list of all transcript files
|
||||
transcript_files = list(transcript_dir.glob("*.txt"))
|
||||
@@ -94,16 +95,31 @@ def main():
|
||||
|
||||
# Skip if transcript is too short
|
||||
if word_count <= 210:
|
||||
print(" Transcript is too short (≤210 words), skipping summary creation")
|
||||
print(" Transcript is too short (≤210 words), skipping processing")
|
||||
continue
|
||||
|
||||
# Generate summary
|
||||
summary = process_transcript(transcript_text)
|
||||
|
||||
# Save summary
|
||||
save_summary(summary, summary_file)
|
||||
print(" Generating summary...")
|
||||
summary = generate_summary(transcript_text)
|
||||
save_content(summary, summary_file)
|
||||
print(f" Summary saved to {summary_file}")
|
||||
|
||||
# Determine content type and generate additional content if needed
|
||||
content_type = determine_content_type(transcript_text)
|
||||
if content_type != "default":
|
||||
print(f" Generating additional content for type: {content_type}")
|
||||
additional_content = generate_additional_content(content_type, transcript_text, summary)
|
||||
|
||||
# Save to appropriate directory with timestamp
|
||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
if content_type == "blog_post":
|
||||
output_file = draft_dir / f"{transcript_file.stem}_{timestamp}.md"
|
||||
else: # idea_app
|
||||
output_file = prompt_dir / f"{transcript_file.stem}_{timestamp}.md"
|
||||
|
||||
save_content(additional_content, output_file)
|
||||
print(f" Additional content saved to {output_file}")
|
||||
|
||||
# Add a small delay between files to avoid overloading
|
||||
if idx < total_files:
|
||||
time.sleep(1)
|
||||
|
||||
Reference in New Issue
Block a user