commit dae0d1ba2e8a527916e7160ef703911ab6b79522 Author: Gigi Date: Sun Mar 16 11:26:12 2025 +0000 Initial commit: Add voice memo processing script with Whisper diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..68d5e35 --- /dev/null +++ b/.gitignore @@ -0,0 +1,11 @@ +# Python virtual environment +vibenv/ + +# Voice memo files +VoiceMemos/*.m4a +VoiceMemos/transcripts/ + +# Python cache files +__pycache__/ +*.py[cod] +*$py.class \ No newline at end of file diff --git a/process_voice_memos.sh b/process_voice_memos.sh new file mode 100755 index 0000000..643519d --- /dev/null +++ b/process_voice_memos.sh @@ -0,0 +1,49 @@ +#!/bin/bash + +# Set the directory paths +VOICE_MEMO_DIR="VoiceMemos" +TRANSCRIPT_DIR="$VOICE_MEMO_DIR/transcripts" + +# Check if the voice memo directory exists +if [ ! -d "$VOICE_MEMO_DIR" ]; then + echo "Error: $VOICE_MEMO_DIR directory does not exist" + exit 1 +fi + +# Create transcripts directory if it doesn't exist +mkdir -p "$TRANSCRIPT_DIR" + +# Check if there are any m4a files in the directory +if ! ls "$VOICE_MEMO_DIR"/*.m4a 1> /dev/null 2>&1; then + echo "No m4a files found in $VOICE_MEMO_DIR" + exit 0 +fi + +# Activate the virtual environment +source vibenv/bin/activate + +# Process each m4a file +for file in "$VOICE_MEMO_DIR"/*.m4a; do + if [ -f "$file" ]; then + # Get the filename without the path and extension + filename=$(basename "$file" .m4a) + transcript_file="$TRANSCRIPT_DIR/$filename.txt" + + # Only process if transcript doesn't exist + if [ ! -f "$transcript_file" ]; then + echo "Processing file: $file" + echo "Transcribing audio..." + + # Use whisper to transcribe the audio with tiny model + whisper "$file" --model tiny --output_dir "$TRANSCRIPT_DIR" --output_format txt + + echo "Transcription saved to: $transcript_file" + echo "----------------------------------------" + else + echo "Skipping $file - transcript already exists" + fi + fi +done + +# Deactivate the virtual environment +deactivate \ No newline at end of file