From bb08d5185c888fac32e62699e18ee065ae27c901 Mon Sep 17 00:00:00 2001
From: Gigi <dergigi@pm.me>
Date: Sun, 16 Mar 2025 12:57:12 +0000
Subject: [PATCH] feat: Switch to Mistral model and improve error handling

---
 summarize_transcripts.py | 66 +++++++++++++++++++++++++++++-----------
 1 file changed, 48 insertions(+), 18 deletions(-)

diff --git a/summarize_transcripts.py b/summarize_transcripts.py
index 38e1e40..abbd860 100755
--- a/summarize_transcripts.py
+++ b/summarize_transcripts.py
@@ -4,6 +4,7 @@ import os
 import sys
 from pathlib import Path
 import ollama
+import time
 
 def read_transcript(transcript_file: Path) -> str:
     """Read the content of a transcript file."""
@@ -11,7 +12,7 @@ def read_transcript(transcript_file: Path) -> str:
         return f.read()
 
 def process_transcript(transcript_text: str) -> str:
-    """Process a transcript using LLaMA to generate a summary."""
+    """Process a transcript using Mistral to generate a summary."""
     prompt = f"""Please provide a concise summary of the following transcript. 
 Focus on the main topics, key points, and any action items or decisions mentioned.
 Keep the summary clear and well-structured.
@@ -19,17 +20,27 @@ Keep the summary clear and well-structured.
 Transcript:
 {transcript_text}
 
+Please structure the summary as follows:
+1. Main Topics
+2. Key Points
+3. Action Items/Decisions (if any)
+4. Additional Notes (if relevant)
+
 Summary:"""
     
-    # Use Ollama to generate the summary
-    response = ollama.chat(model='llama2', messages=[
-        {
-            'role': 'user',
-            'content': prompt
-        }
-    ])
-    
-    return response['message']['content']
+    # Use Ollama with Mistral model to generate the summary
+    try:
+        print("  Generating summary...")
+        response = ollama.chat(model='mistral', messages=[
+            {
+                'role': 'user',
+                'content': prompt
+            }
+        ])
+        return response['message']['content']
+    except Exception as e:
+        print(f"  Error during summarization: {str(e)}")
+        raise
 
 def save_summary(summary: str, output_file: Path) -> None:
     """Save the summary to a file."""
@@ -43,25 +54,44 @@ def main():
     # Create summaries directory if it doesn't exist
     summary_dir.mkdir(parents=True, exist_ok=True)
     
+    # Get list of all transcript files
+    transcript_files = list(transcript_dir.glob("*.txt"))
+    total_files = len(transcript_files)
+    
+    print(f"Found {total_files} transcript(s) to process")
+    
     # Process all transcript files
-    for transcript_file in transcript_dir.glob("*.txt"):
-        print(f"Processing {transcript_file.name}...")
+    for idx, transcript_file in enumerate(transcript_files, 1):
+        print(f"\nProcessing {transcript_file.name} ({idx}/{total_files})...")
         
-        # Read transcript
-        transcript_text = read_transcript(transcript_file)
+        # Skip if summary already exists
+        summary_file = summary_dir / f"{transcript_file.stem}_summary.txt"
+        if summary_file.exists():
+            print("  Summary already exists, skipping...")
+            continue
         
-        # Generate summary
         try:
+            # Read transcript
+            transcript_text = read_transcript(transcript_file)
+            print(f"  Read transcript ({len(transcript_text)} characters)")
+            
+            # Generate summary
             summary = process_transcript(transcript_text)
             
             # Save summary
-            summary_file = summary_dir / f"{transcript_file.stem}_summary.txt"
             save_summary(summary, summary_file)
+            print(f"  Summary saved to {summary_file}")
             
-            print(f"Summary saved to {summary_file}")
+            # Add a small delay between files to avoid overloading
+            if idx < total_files:
+                time.sleep(1)
+                
         except Exception as e:
-            print(f"Error processing {transcript_file.name}: {str(e)}")
+            print(f"  Failed to process {transcript_file.name}")
+            print(f"  Error: {str(e)}")
             continue
+    
+    print("\nDone! All transcripts processed.")
 
 if __name__ == "__main__":
     main() 
\ No newline at end of file