From a46b15c337b10ce81158e6a04ab4ea26690a8faa Mon Sep 17 00:00:00 2001 From: Gigi Date: Sat, 29 Mar 2025 20:47:28 +0000 Subject: [PATCH] =?UTF-8?q?Add=20word=20count=20check=20to=20skip=20summar?= =?UTF-8?q?ies=20for=20short=20transcripts=20(=E2=89=A4210=20words)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/summarize_transcripts.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/summarize_transcripts.py b/src/summarize_transcripts.py index e325f4b..b84b0f1 100755 --- a/src/summarize_transcripts.py +++ b/src/summarize_transcripts.py @@ -51,6 +51,10 @@ def save_summary(summary: str, output_file: Path) -> None: with open(output_file, 'w', encoding='utf-8') as f: f.write(summary) +def count_words(text: str) -> int: + """Count the number of words in a text string.""" + return len(text.split()) + def main(): transcript_dir = Path("VoiceMemos/transcripts") summary_dir = Path("VoiceMemos/summaries") @@ -77,7 +81,13 @@ def main(): try: # Read transcript transcript_text = read_transcript(transcript_file) - print(f" Read transcript ({len(transcript_text)} characters)") + word_count = count_words(transcript_text) + print(f" Read transcript ({len(transcript_text)} characters, {word_count} words)") + + # Skip if transcript is too short + if word_count <= 210: + print(" Transcript is too short (≤210 words), skipping summary creation") + continue # Generate summary summary = process_transcript(transcript_text)