Add word count check to skip summaries for short transcripts (≤210 words)

This commit is contained in:
Gigi
2025-03-29 20:47:28 +00:00
parent ccf027959a
commit a46b15c337

View File

@@ -51,6 +51,10 @@ def save_summary(summary: str, output_file: Path) -> None:
with open(output_file, 'w', encoding='utf-8') as f:
f.write(summary)
def count_words(text: str) -> int:
"""Count the number of words in a text string."""
return len(text.split())
def main():
transcript_dir = Path("VoiceMemos/transcripts")
summary_dir = Path("VoiceMemos/summaries")
@@ -77,7 +81,13 @@ def main():
try:
# Read transcript
transcript_text = read_transcript(transcript_file)
print(f" Read transcript ({len(transcript_text)} characters)")
word_count = count_words(transcript_text)
print(f" Read transcript ({len(transcript_text)} characters, {word_count} words)")
# Skip if transcript is too short
if word_count <= 210:
print(" Transcript is too short (≤210 words), skipping summary creation")
continue
# Generate summary
summary = process_transcript(transcript_text)