From bb08d5185c888fac32e62699e18ee065ae27c901 Mon Sep 17 00:00:00 2001 From: Gigi Date: Sun, 16 Mar 2025 12:57:12 +0000 Subject: [PATCH] feat: Switch to Mistral model and improve error handling --- summarize_transcripts.py | 66 +++++++++++++++++++++++++++++----------- 1 file changed, 48 insertions(+), 18 deletions(-) diff --git a/summarize_transcripts.py b/summarize_transcripts.py index 38e1e40..abbd860 100755 --- a/summarize_transcripts.py +++ b/summarize_transcripts.py @@ -4,6 +4,7 @@ import os import sys from pathlib import Path import ollama +import time def read_transcript(transcript_file: Path) -> str: """Read the content of a transcript file.""" @@ -11,7 +12,7 @@ def read_transcript(transcript_file: Path) -> str: return f.read() def process_transcript(transcript_text: str) -> str: - """Process a transcript using LLaMA to generate a summary.""" + """Process a transcript using Mistral to generate a summary.""" prompt = f"""Please provide a concise summary of the following transcript. Focus on the main topics, key points, and any action items or decisions mentioned. Keep the summary clear and well-structured. @@ -19,17 +20,27 @@ Keep the summary clear and well-structured. Transcript: {transcript_text} +Please structure the summary as follows: +1. Main Topics +2. Key Points +3. Action Items/Decisions (if any) +4. Additional Notes (if relevant) + Summary:""" - # Use Ollama to generate the summary - response = ollama.chat(model='llama2', messages=[ - { - 'role': 'user', - 'content': prompt - } - ]) - - return response['message']['content'] + # Use Ollama with Mistral model to generate the summary + try: + print(" Generating summary...") + response = ollama.chat(model='mistral', messages=[ + { + 'role': 'user', + 'content': prompt + } + ]) + return response['message']['content'] + except Exception as e: + print(f" Error during summarization: {str(e)}") + raise def save_summary(summary: str, output_file: Path) -> None: """Save the summary to a file.""" @@ -43,25 +54,44 @@ def main(): # Create summaries directory if it doesn't exist summary_dir.mkdir(parents=True, exist_ok=True) + # Get list of all transcript files + transcript_files = list(transcript_dir.glob("*.txt")) + total_files = len(transcript_files) + + print(f"Found {total_files} transcript(s) to process") + # Process all transcript files - for transcript_file in transcript_dir.glob("*.txt"): - print(f"Processing {transcript_file.name}...") + for idx, transcript_file in enumerate(transcript_files, 1): + print(f"\nProcessing {transcript_file.name} ({idx}/{total_files})...") - # Read transcript - transcript_text = read_transcript(transcript_file) + # Skip if summary already exists + summary_file = summary_dir / f"{transcript_file.stem}_summary.txt" + if summary_file.exists(): + print(" Summary already exists, skipping...") + continue - # Generate summary try: + # Read transcript + transcript_text = read_transcript(transcript_file) + print(f" Read transcript ({len(transcript_text)} characters)") + + # Generate summary summary = process_transcript(transcript_text) # Save summary - summary_file = summary_dir / f"{transcript_file.stem}_summary.txt" save_summary(summary, summary_file) + print(f" Summary saved to {summary_file}") - print(f"Summary saved to {summary_file}") + # Add a small delay between files to avoid overloading + if idx < total_files: + time.sleep(1) + except Exception as e: - print(f"Error processing {transcript_file.name}: {str(e)}") + print(f" Failed to process {transcript_file.name}") + print(f" Error: {str(e)}") continue + + print("\nDone! All transcripts processed.") if __name__ == "__main__": main() \ No newline at end of file