From 73bf928c1d252bc9b23f4453eb21caaa4bd836ef Mon Sep 17 00:00:00 2001 From: Bituq Date: Wed, 5 Apr 2023 15:50:28 +0200 Subject: [PATCH 1/5] fixed speech blocking main thread --- requirements.txt | 2 +- scripts/speak.py | 35 ++++++++++++++++++++++------------- 2 files changed, 23 insertions(+), 14 deletions(-) diff --git a/requirements.txt b/requirements.txt index 158e9324..198b1b50 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ beautifulsoup4 colorama==0.4.6 openai==0.27.2 -playsound==1.3.0 +playsound==1.2.2 python-dotenv==1.0.0 pyyaml==6.0 readability-lxml==0.8.1 diff --git a/scripts/speak.py b/scripts/speak.py index f6242a37..f98a9f44 100644 --- a/scripts/speak.py +++ b/scripts/speak.py @@ -4,6 +4,8 @@ import requests from config import Config cfg = Config() import gtts +import threading +from threading import Lock # TODO: Nicer names for these ids @@ -14,18 +16,21 @@ tts_headers = { "xi-api-key": cfg.elevenlabs_api_key } +mutex_lock = Lock() # Ensure only one sound is played at a time + def eleven_labs_speech(text, voice_index=0): tts_url = "https://api.elevenlabs.io/v1/text-to-speech/{voice_id}".format( voice_id=voices[voice_index]) - formatted_message = {"text": text} + formatted_message = {"text": text, "voice_settings": {"stability": 0.05, "similarity_boost": 0.8}} response = requests.post( tts_url, headers=tts_headers, json=formatted_message) if response.status_code == 200: - with open("speech.mpeg", "wb") as f: - f.write(response.content) - playsound("speech.mpeg") - os.remove("speech.mpeg") + with mutex_lock: + with open("speech.mpeg", "wb") as f: + f.write(response.content) + playsound("speech.mpeg", True) + os.remove("speech.mpeg") return True else: print("Request failed with status code:", response.status_code) @@ -34,15 +39,19 @@ def eleven_labs_speech(text, voice_index=0): def gtts_speech(text): tts = gtts.gTTS(text) - tts.save("speech.mp3") - playsound("speech.mp3") - os.remove("speech.mp3") + with mutex_lock: + tts.save("speech.mp3") + playsound("speech.mp3", True) + os.remove("speech.mp3") def say_text(text, voice_index=0): - if not cfg.elevenlabs_api_key: - gtts_speech(text) - else: - success = eleven_labs_speech(text) - if not success: + def speak(): + if not cfg.elevenlabs_api_key: gtts_speech(text) + else: + success = eleven_labs_speech(text) + if not success: + gtts_speech(text) + thread = threading.Thread(target=speak) + thread.start() From 0cbd2bb3d03a18dd6da6cc4db2a15ab717c36b57 Mon Sep 17 00:00:00 2001 From: Dylan Date: Thu, 6 Apr 2023 11:59:46 +0200 Subject: [PATCH 2/5] Fix voice_index not being used --- scripts/speak.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/speak.py b/scripts/speak.py index f98a9f44..6a966efc 100644 --- a/scripts/speak.py +++ b/scripts/speak.py @@ -49,9 +49,9 @@ def say_text(text, voice_index=0): if not cfg.elevenlabs_api_key: gtts_speech(text) else: - success = eleven_labs_speech(text) + success = eleven_labs_speech(text, voice_index) if not success: gtts_speech(text) thread = threading.Thread(target=speak) - thread.start() + thread.start() \ No newline at end of file From ef4a02757f9ff55b96df27a7effcaa7685718f23 Mon Sep 17 00:00:00 2001 From: Dylan Date: Thu, 6 Apr 2023 14:02:39 +0200 Subject: [PATCH 3/5] Add semaphore to speak module --- scripts/speak.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/scripts/speak.py b/scripts/speak.py index 6a966efc..0f8c243d 100644 --- a/scripts/speak.py +++ b/scripts/speak.py @@ -5,7 +5,7 @@ from config import Config cfg = Config() import gtts import threading -from threading import Lock +from threading import Lock, Semaphore # TODO: Nicer names for these ids @@ -17,6 +17,7 @@ tts_headers = { } mutex_lock = Lock() # Ensure only one sound is played at a time +queue_semaphore = Semaphore(1) # The amount of sounds to queue before blocking the main thread def eleven_labs_speech(text, voice_index=0): tts_url = "https://api.elevenlabs.io/v1/text-to-speech/{voice_id}".format( @@ -52,6 +53,9 @@ def say_text(text, voice_index=0): success = eleven_labs_speech(text, voice_index) if not success: gtts_speech(text) + + queue_semaphore.release() + queue_semaphore.acquire(True) thread = threading.Thread(target=speak) thread.start() \ No newline at end of file From 88a802a675eaa0385e91a73d435fdd4c1e1c13d7 Mon Sep 17 00:00:00 2001 From: Dylan N <65708398+bituq@users.noreply.github.com> Date: Mon, 10 Apr 2023 16:44:01 +0200 Subject: [PATCH 4/5] Remove voice_settings parameters I removed the voice_settings parameters as it was an irrelevant change. --- scripts/speak.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/speak.py b/scripts/speak.py index d8dd191b..8e0fbd9c 100644 --- a/scripts/speak.py +++ b/scripts/speak.py @@ -22,7 +22,7 @@ queue_semaphore = Semaphore(1) # The amount of sounds to queue before blocking t def eleven_labs_speech(text, voice_index=0): tts_url = "https://api.elevenlabs.io/v1/text-to-speech/{voice_id}".format( voice_id=voices[voice_index]) - formatted_message = {"text": text, "voice_settings": {"stability": 0.05, "similarity_boost": 0.8}} + formatted_message = {"text": text} response = requests.post( tts_url, headers=tts_headers, json=formatted_message) @@ -59,4 +59,4 @@ def say_text(text, voice_index=0): queue_semaphore.acquire(True) thread = threading.Thread(target=speak) - thread.start() \ No newline at end of file + thread.start() From 353785d23d4f2ea75fb5c1f3cd3ed4a0993a0912 Mon Sep 17 00:00:00 2001 From: Bituq Date: Tue, 11 Apr 2023 00:50:57 +0200 Subject: [PATCH 5/5] Fix macos_tts_speech merge conflict --- scripts/speak.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/scripts/speak.py b/scripts/speak.py index 8e0fbd9c..08b0c1c9 100644 --- a/scripts/speak.py +++ b/scripts/speak.py @@ -20,6 +20,7 @@ mutex_lock = Lock() # Ensure only one sound is played at a time queue_semaphore = Semaphore(1) # The amount of sounds to queue before blocking the main thread def eleven_labs_speech(text, voice_index=0): + """Speak text using elevenlabs.io's API""" tts_url = "https://api.elevenlabs.io/v1/text-to-speech/{voice_id}".format( voice_id=voices[voice_index]) formatted_message = {"text": text} @@ -45,11 +46,17 @@ def gtts_speech(text): playsound("speech.mp3", True) os.remove("speech.mp3") +def macos_tts_speech(text): + os.system(f'say "{text}"') + def say_text(text, voice_index=0): def speak(): if not cfg.elevenlabs_api_key: - gtts_speech(text) + if cfg.use_mac_os_tts == 'True': + macos_tts_speech(text) + else: + gtts_speech(text) else: success = eleven_labs_speech(text, voice_index) if not success: