Restructure processing flow into three separate scripts with a main process script

This commit is contained in:
Gigi
2025-04-01 12:07:58 +01:00
parent 5e5088b44b
commit 597349eecf
4 changed files with 249 additions and 0 deletions

91
src/extract.sh Executable file
View File

@@ -0,0 +1,91 @@
#!/bin/bash
# Check if a file argument was provided
if [ $# -ne 1 ]; then
echo "Usage: $0 <transcript_file>"
exit 1
fi
# Get the input file
input_file="$1"
# Check if the input file exists
if [ ! -f "$input_file" ]; then
echo "Error: File $input_file does not exist"
exit 1
fi
# Set the directory paths
VOICE_MEMO_DIR="VoiceMemos"
DRAFT_DIR="$VOICE_MEMO_DIR/drafts"
PROMPT_DIR="$VOICE_MEMO_DIR/prompts"
# Create output directories if they don't exist
mkdir -p "$DRAFT_DIR"
mkdir -p "$PROMPT_DIR"
echo "Processing transcript: $input_file"
echo "Extracting content..."
# Activate the virtual environment
source vibenv/bin/activate
# Run the Python script to extract content
python -c "
import ollama
import re
from pathlib import Path
import time
def determine_content_type(transcript_text: str) -> str:
text = transcript_text.lower()
if re.search(r'\bblog post\b', text) or re.search(r'\bdraft\b', text):
return 'blog_post'
elif re.search(r'\bidea\b', text) and re.search(r'\bapp\b', text):
return 'idea_app'
return 'default'
def generate_additional_content(content_type: str, transcript_text: str) -> str:
prompt_dir = Path('prompts')
with open(prompt_dir / f'{content_type}.md', 'r', encoding='utf-8') as f:
prompt_template = f.read()
prompt = prompt_template.format(transcript=transcript_text)
response = ollama.chat(model='llama2', messages=[
{
'role': 'user',
'content': prompt
}
])
return response['message']['content'].strip()
# Read transcript
with open('$input_file', 'r', encoding='utf-8') as f:
transcript_text = f.read()
# Determine content type and generate content if needed
content_type = determine_content_type(transcript_text)
if content_type != 'default':
print(f' Generating {content_type} content...')
additional_content = generate_additional_content(content_type, transcript_text)
# Save to appropriate directory with timestamp
timestamp = time.strftime('%Y%m%d_%H%M%S')
filename = Path('$input_file').stem
if content_type == 'blog_post':
output_file = Path('$DRAFT_DIR') / f'{filename}_{timestamp}.md'
else: # idea_app
output_file = Path('$PROMPT_DIR') / f'{filename}_{timestamp}.md'
with open(output_file, 'w', encoding='utf-8') as f:
f.write(additional_content)
print(f' Content saved to: {output_file}')
else:
print(' No blog post or app idea content detected')
"
# Deactivate the virtual environment
deactivate
echo "----------------------------------------"

48
src/process.sh Executable file
View File

@@ -0,0 +1,48 @@
#!/bin/bash
# Check if a file argument was provided
if [ $# -ne 1 ]; then
echo "Usage: $0 <voice_memo_file>"
exit 1
fi
# Get the input file
input_file="$1"
# Check if the input file exists
if [ ! -f "$input_file" ]; then
echo "Error: File $input_file does not exist"
exit 1
fi
# Set the directory paths
VOICE_MEMO_DIR="VoiceMemos"
TRANSCRIPT_DIR="$VOICE_MEMO_DIR/transcripts"
# Get the filename without the path and extension
filename=$(basename "$input_file" .m4a)
transcript_file="$TRANSCRIPT_DIR/$filename.txt"
echo "Processing voice memo: $input_file"
echo "----------------------------------------"
# Step 1: Transcribe
echo "Step 1: Transcribing audio..."
./transcribe.sh "$input_file"
# Check if transcription was successful
if [ ! -f "$transcript_file" ]; then
echo "Error: Transcription failed"
exit 1
fi
# Step 2: Summarize
echo "Step 2: Generating summary..."
./summarize.sh "$transcript_file"
# Step 3: Extract content
echo "Step 3: Extracting content..."
./extract.sh "$transcript_file"
echo "----------------------------------------"
echo "Processing complete!"

68
src/summarize.sh Executable file
View File

@@ -0,0 +1,68 @@
#!/bin/bash
# Check if a file argument was provided
if [ $# -ne 1 ]; then
echo "Usage: $0 <transcript_file>"
exit 1
fi
# Get the input file
input_file="$1"
# Check if the input file exists
if [ ! -f "$input_file" ]; then
echo "Error: File $input_file does not exist"
exit 1
fi
# Set the directory paths
VOICE_MEMO_DIR="VoiceMemos"
SUMMARY_DIR="$VOICE_MEMO_DIR/summaries"
# Create summaries directory if it doesn't exist
mkdir -p "$SUMMARY_DIR"
# Get the filename without the path and extension
filename=$(basename "$input_file" .txt)
summary_file="$SUMMARY_DIR/${filename}_summary.txt"
echo "Processing transcript: $input_file"
echo "Generating summary..."
# Activate the virtual environment
source vibenv/bin/activate
# Run the Python script to generate the summary
python -c "
import ollama
from pathlib import Path
def generate_summary(transcript_text: str) -> str:
prompt_dir = Path('prompts')
with open(prompt_dir / 'summary.md', 'r', encoding='utf-8') as f:
prompt_template = f.read()
prompt = prompt_template.format(transcript=transcript_text)
response = ollama.chat(model='llama2', messages=[
{
'role': 'user',
'content': prompt
}
])
return response['message']['content'].strip()
# Read transcript
with open('$input_file', 'r', encoding='utf-8') as f:
transcript_text = f.read()
# Generate and save summary
summary = generate_summary(transcript_text)
with open('$summary_file', 'w', encoding='utf-8') as f:
f.write(summary)
"
# Deactivate the virtual environment
deactivate
echo "Summary saved to: $summary_file"
echo "----------------------------------------"

42
src/transcribe.sh Executable file
View File

@@ -0,0 +1,42 @@
#!/bin/bash
# Check if a file argument was provided
if [ $# -ne 1 ]; then
echo "Usage: $0 <audio_file>"
exit 1
fi
# Get the input file
input_file="$1"
# Check if the input file exists
if [ ! -f "$input_file" ]; then
echo "Error: File $input_file does not exist"
exit 1
fi
# Set the directory paths
VOICE_MEMO_DIR="VoiceMemos"
TRANSCRIPT_DIR="$VOICE_MEMO_DIR/transcripts"
# Create transcripts directory if it doesn't exist
mkdir -p "$TRANSCRIPT_DIR"
# Get the filename without the path and extension
filename=$(basename "$input_file" .m4a)
transcript_file="$TRANSCRIPT_DIR/$filename.txt"
echo "Processing file: $input_file"
echo "Transcribing audio..."
# Activate the virtual environment
source vibenv/bin/activate
# Use whisper to transcribe the audio with base.en model
whisper "$input_file" --model base.en --output_dir "$TRANSCRIPT_DIR" --output_format txt
# Deactivate the virtual environment
deactivate
echo "Transcription saved to: $transcript_file"
echo "----------------------------------------"