Restructure processing flow into three separate scripts with a main process script

2026-01-20 15:04:19 +01:00 · 2025-04-01 12:07:58 +01:00
parent 5e5088b44b
commit 597349eecf
4 changed files with 249 additions and 0 deletions
--- a/src/extract.sh
+++ b/src/extract.sh
@@ -0,0 +1,91 @@
+#!/bin/bash
+
+# Check if a file argument was provided
+if [ $# -ne 1 ]; then
+    echo "Usage: $0 <transcript_file>"
+    exit 1
+fi
+
+# Get the input file
+input_file="$1"
+
+# Check if the input file exists
+if [ ! -f "$input_file" ]; then
+    echo "Error: File $input_file does not exist"
+    exit 1
+fi
+
+# Set the directory paths
+VOICE_MEMO_DIR="VoiceMemos"
+DRAFT_DIR="$VOICE_MEMO_DIR/drafts"
+PROMPT_DIR="$VOICE_MEMO_DIR/prompts"
+
+# Create output directories if they don't exist
+mkdir -p "$DRAFT_DIR"
+mkdir -p "$PROMPT_DIR"
+
+echo "Processing transcript: $input_file"
+echo "Extracting content..."
+
+# Activate the virtual environment
+source vibenv/bin/activate
+
+# Run the Python script to extract content
+python -c "
+import ollama
+import re
+from pathlib import Path
+import time
+
+def determine_content_type(transcript_text: str) -> str:
+    text = transcript_text.lower()
+    
+    if re.search(r'\bblog post\b', text) or re.search(r'\bdraft\b', text):
+        return 'blog_post'
+    elif re.search(r'\bidea\b', text) and re.search(r'\bapp\b', text):
+        return 'idea_app'
+    return 'default'
+
+def generate_additional_content(content_type: str, transcript_text: str) -> str:
+    prompt_dir = Path('prompts')
+    with open(prompt_dir / f'{content_type}.md', 'r', encoding='utf-8') as f:
+        prompt_template = f.read()
+    
+    prompt = prompt_template.format(transcript=transcript_text)
+    response = ollama.chat(model='llama2', messages=[
+        {
+            'role': 'user',
+            'content': prompt
+        }
+    ])
+    return response['message']['content'].strip()
+
+# Read transcript
+with open('$input_file', 'r', encoding='utf-8') as f:
+    transcript_text = f.read()
+
+# Determine content type and generate content if needed
+content_type = determine_content_type(transcript_text)
+if content_type != 'default':
+    print(f'  Generating {content_type} content...')
+    additional_content = generate_additional_content(content_type, transcript_text)
+    
+    # Save to appropriate directory with timestamp
+    timestamp = time.strftime('%Y%m%d_%H%M%S')
+    filename = Path('$input_file').stem
+    if content_type == 'blog_post':
+        output_file = Path('$DRAFT_DIR') / f'{filename}_{timestamp}.md'
+    else:  # idea_app
+        output_file = Path('$PROMPT_DIR') / f'{filename}_{timestamp}.md'
+    
+    with open(output_file, 'w', encoding='utf-8') as f:
+        f.write(additional_content)
+    print(f'  Content saved to: {output_file}')
+else:
+    print('  No blog post or app idea content detected')
+"
+
+# Deactivate the virtual environment
+deactivate
+
+echo "----------------------------------------" 
--- a/src/process.sh
+++ b/src/process.sh
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+# Check if a file argument was provided
+if [ $# -ne 1 ]; then
+    echo "Usage: $0 <voice_memo_file>"
+    exit 1
+fi
+
+# Get the input file
+input_file="$1"
+
+# Check if the input file exists
+if [ ! -f "$input_file" ]; then
+    echo "Error: File $input_file does not exist"
+    exit 1
+fi
+
+# Set the directory paths
+VOICE_MEMO_DIR="VoiceMemos"
+TRANSCRIPT_DIR="$VOICE_MEMO_DIR/transcripts"
+
+# Get the filename without the path and extension
+filename=$(basename "$input_file" .m4a)
+transcript_file="$TRANSCRIPT_DIR/$filename.txt"
+
+echo "Processing voice memo: $input_file"
+echo "----------------------------------------"
+
+# Step 1: Transcribe
+echo "Step 1: Transcribing audio..."
+./transcribe.sh "$input_file"
+
+# Check if transcription was successful
+if [ ! -f "$transcript_file" ]; then
+    echo "Error: Transcription failed"
+    exit 1
+fi
+
+# Step 2: Summarize
+echo "Step 2: Generating summary..."
+./summarize.sh "$transcript_file"
+
+# Step 3: Extract content
+echo "Step 3: Extracting content..."
+./extract.sh "$transcript_file"
+
+echo "----------------------------------------"
+echo "Processing complete!" 
--- a/src/summarize.sh
+++ b/src/summarize.sh
@@ -0,0 +1,68 @@
+#!/bin/bash
+
+# Check if a file argument was provided
+if [ $# -ne 1 ]; then
+    echo "Usage: $0 <transcript_file>"
+    exit 1
+fi
+
+# Get the input file
+input_file="$1"
+
+# Check if the input file exists
+if [ ! -f "$input_file" ]; then
+    echo "Error: File $input_file does not exist"
+    exit 1
+fi
+
+# Set the directory paths
+VOICE_MEMO_DIR="VoiceMemos"
+SUMMARY_DIR="$VOICE_MEMO_DIR/summaries"
+
+# Create summaries directory if it doesn't exist
+mkdir -p "$SUMMARY_DIR"
+
+# Get the filename without the path and extension
+filename=$(basename "$input_file" .txt)
+summary_file="$SUMMARY_DIR/${filename}_summary.txt"
+
+echo "Processing transcript: $input_file"
+echo "Generating summary..."
+
+# Activate the virtual environment
+source vibenv/bin/activate
+
+# Run the Python script to generate the summary
+python -c "
+import ollama
+from pathlib import Path
+
+def generate_summary(transcript_text: str) -> str:
+    prompt_dir = Path('prompts')
+    with open(prompt_dir / 'summary.md', 'r', encoding='utf-8') as f:
+        prompt_template = f.read()
+    
+    prompt = prompt_template.format(transcript=transcript_text)
+    response = ollama.chat(model='llama2', messages=[
+        {
+            'role': 'user',
+            'content': prompt
+        }
+    ])
+    return response['message']['content'].strip()
+
+# Read transcript
+with open('$input_file', 'r', encoding='utf-8') as f:
+    transcript_text = f.read()
+
+# Generate and save summary
+summary = generate_summary(transcript_text)
+with open('$summary_file', 'w', encoding='utf-8') as f:
+    f.write(summary)
+"
+
+# Deactivate the virtual environment
+deactivate
+
+echo "Summary saved to: $summary_file"
+echo "----------------------------------------" 
--- a/src/transcribe.sh
+++ b/src/transcribe.sh
@@ -0,0 +1,42 @@
+#!/bin/bash
+
+# Check if a file argument was provided
+if [ $# -ne 1 ]; then
+    echo "Usage: $0 <audio_file>"
+    exit 1
+fi
+
+# Get the input file
+input_file="$1"
+
+# Check if the input file exists
+if [ ! -f "$input_file" ]; then
+    echo "Error: File $input_file does not exist"
+    exit 1
+fi
+
+# Set the directory paths
+VOICE_MEMO_DIR="VoiceMemos"
+TRANSCRIPT_DIR="$VOICE_MEMO_DIR/transcripts"
+
+# Create transcripts directory if it doesn't exist
+mkdir -p "$TRANSCRIPT_DIR"
+
+# Get the filename without the path and extension
+filename=$(basename "$input_file" .m4a)
+transcript_file="$TRANSCRIPT_DIR/$filename.txt"
+
+echo "Processing file: $input_file"
+echo "Transcribing audio..."
+
+# Activate the virtual environment
+source vibenv/bin/activate
+
+# Use whisper to transcribe the audio with base.en model
+whisper "$input_file" --model base.en --output_dir "$TRANSCRIPT_DIR" --output_format txt
+
+# Deactivate the virtual environment
+deactivate
+
+echo "Transcription saved to: $transcript_file"
+echo "----------------------------------------"