Refactor/remove abstract singleton as voice base parent (#4931)

Co-authored-by: Reinier van der Leer <reinier.vanderleer@agpt.co>
2026-02-12 01:34:32 +01:00 · 2023-09-08 14:28:36 -07:00
parent fc96309a73
commit aef6b50b11
8 changed files with 49 additions and 44 deletions
--- a/autogpts/autogpt/autogpt/app/main.py
+++ b/autogpts/autogpt/autogpt/app/main.py
@@ -26,12 +26,11 @@ from autogpt.commands import COMMAND_CATEGORIES
 from autogpt.config import AIConfig, Config, ConfigBuilder, check_openai_api_key
 from autogpt.llm.api_manager import ApiManager
 from autogpt.logs.config import configure_chat_plugins, configure_logging
-from autogpt.logs.helpers import print_attribute
+from autogpt.logs.helpers import print_attribute, speak
 from autogpt.memory.vector import get_memory
 from autogpt.models.command_registry import CommandRegistry
 from autogpt.plugins import scan_plugins
 from autogpt.prompts.prompt import DEFAULT_TRIGGERING_PROMPT
-from autogpt.speech import say_text
 from autogpt.workspace import Workspace
 from scripts.install_plugin_deps import install_plugin_dependencies

@@ -366,7 +365,7 @@ def update_user(
    print_assistant_thoughts(ai_config.ai_name, assistant_reply_dict, config)

    if config.speak_mode:
-        say_text(f"I want to execute {command_name}", config)
+        speak(f"I want to execute {command_name}")

    # First log new-line so user can differentiate sections better in console
    print()
@@ -531,8 +530,6 @@ def print_assistant_thoughts(
    assistant_reply_json_valid: dict,
    config: Config,
 ) -> None:
-    from autogpt.speech import say_text
-
    logger = logging.getLogger(__name__)

    assistant_thoughts_reasoning = None
@@ -577,7 +574,7 @@ def print_assistant_thoughts(
    # Speak the assistant's thoughts
    if assistant_thoughts_speak:
        if config.speak_mode:
-            say_text(assistant_thoughts_speak, config)
+            speak(assistant_thoughts_speak)
        else:
            print_attribute("SPEAK", assistant_thoughts_speak, title_color=Fore.YELLOW)

--- a/autogpts/autogpt/autogpt/logs/config.py
+++ b/autogpts/autogpt/autogpt/logs/config.py
@@ -27,6 +27,7 @@ DEBUG_LOG_FORMAT = (
    "  %(title)s%(message)s"
 )

+SPEECH_OUTPUT_LOGGER = "VOICE"
 USER_FRIENDLY_OUTPUT_LOGGER = "USER_FRIENDLY_OUTPUT"

 _chat_plugins: list[AutoGPTPluginTemplate] = []
@@ -96,6 +97,11 @@ def configure_logging(config: Config, log_dir: Path = LOG_DIR) -> None:
    user_friendly_output_logger.addHandler(stderr)
    user_friendly_output_logger.propagate = False

+    speech_output_logger = logging.getLogger(SPEECH_OUTPUT_LOGGER)
+    speech_output_logger.setLevel(logging.INFO)
+    speech_output_logger.addHandler(TTSHandler(config))
+    speech_output_logger.propagate = False
+
    # JSON logger with better formatting
    json_logger = logging.getLogger("JSON_LOGGER")
    json_logger.setLevel(logging.DEBUG)
--- a/autogpts/autogpt/autogpt/logs/handlers.py
+++ b/autogpts/autogpt/autogpt/logs/handlers.py
@@ -8,7 +8,7 @@ import time
 from typing import TYPE_CHECKING

 from autogpt.logs.utils import remove_color_codes
-from autogpt.speech.say import say_text
+from autogpt.speech import TextToSpeechProvider

 if TYPE_CHECKING:
    from autogpt.config import Config
@@ -53,6 +53,7 @@ class TTSHandler(logging.Handler):
    def __init__(self, config: Config):
        super().__init__()
        self.config = config
+        self.tts_provider = TextToSpeechProvider(config)

    def format(self, record: logging.LogRecord) -> str:
        if getattr(record, "title", ""):
@@ -67,7 +68,7 @@ class TTSHandler(logging.Handler):
            return

        message = self.format(record)
-        say_text(message, self.config)
+        self.tts_provider.say(message)


 class JsonFileHandler(logging.FileHandler):
--- a/autogpts/autogpt/autogpt/logs/helpers.py
+++ b/autogpts/autogpt/autogpt/logs/helpers.py
@@ -3,7 +3,7 @@ from typing import Any, Optional

 from colorama import Fore

-from .config import USER_FRIENDLY_OUTPUT_LOGGER, _chat_plugins
+from .config import SPEECH_OUTPUT_LOGGER, USER_FRIENDLY_OUTPUT_LOGGER, _chat_plugins


 def user_friendly_output(
@@ -65,3 +65,7 @@ def request_user_double_check(additionalText: Optional[str] = None) -> None:
        title="DOUBLE CHECK CONFIGURATION",
        preserve_message_color=True,
    )
+
+
+def speak(message: str, level: int = logging.INFO) -> None:
+    logging.getLogger(SPEECH_OUTPUT_LOGGER).log(level, message)
--- a/autogpts/autogpt/autogpt/singleton.py
+++ b/autogpts/autogpt/autogpt/singleton.py
@@ -14,9 +14,3 @@ class Singleton(abc.ABCMeta, type):
        if cls not in cls._instances:
            cls._instances[cls] = super(Singleton, cls).__call__(*args, **kwargs)
        return cls._instances[cls]
-
-
-class AbstractSingleton(abc.ABC, metaclass=Singleton):
-    """
-    Abstract singleton class for ensuring only one instance of a class.
-    """
--- a/autogpts/autogpt/autogpt/speech/init.py
+++ b/autogpts/autogpt/autogpt/speech/init.py
@@ -1,4 +1,4 @@
 """This module contains the speech recognition and speech synthesis functions."""
-from autogpt.speech.say import say_text
+from autogpt.speech.say import TextToSpeechProvider

-__all__ = ["say_text"]
+__all__ = ["TextToSpeechProvider"]
--- a/autogpts/autogpt/autogpt/speech/base.py
+++ b/autogpts/autogpt/autogpt/speech/base.py
@@ -9,10 +9,8 @@ from typing import TYPE_CHECKING
 if TYPE_CHECKING:
    from autogpt.config import Config

-from autogpt.singleton import AbstractSingleton

-
-class VoiceBase(AbstractSingleton):
+class VoiceBase:
    """
    Base class for all voice classes.
    """
--- a/autogpts/autogpt/autogpt/speech/say.py
+++ b/autogpts/autogpt/autogpt/speech/say.py
@@ -19,32 +19,37 @@ _QUEUE_SEMAPHORE = Semaphore(
 )  # The amount of sounds to queue before blocking the main thread


-def say_text(text: str, config: Config, voice_index: int = 0) -> None:
-    """Speak the given text using the given voice index"""
-    default_voice_engine, voice_engine = _get_voice_engine(config)
+class TextToSpeechProvider:
+    def __init__(self, config: Config):
+        self._config = config
+        self._default_voice_engine, self._voice_engine = self._get_voice_engine(config)

-    def speak() -> None:
-        success = voice_engine.say(text, voice_index)
-        if not success:
-            default_voice_engine.say(text)
+    def say(self, text, voice_index: int = 0) -> None:
+        def _speak() -> None:
+            success = self._voice_engine.say(text, voice_index)
+            if not success:
+                self._default_voice_engine.say(text, voice_index)
+            _QUEUE_SEMAPHORE.release()

-        _QUEUE_SEMAPHORE.release()
+        if self._config.speak_mode:
+            _QUEUE_SEMAPHORE.acquire(True)
+            thread = threading.Thread(target=_speak)
+            thread.start()

-    _QUEUE_SEMAPHORE.acquire(True)
-    thread = threading.Thread(target=speak)
-    thread.start()
+    def __repr__(self):
+        return f"{self.__class__.__name__}(enabled={self._config.speak_mode}, provider={self._voice_engine.__class__.__name__})"

+    @staticmethod
+    def _get_voice_engine(config: Config) -> tuple[VoiceBase, VoiceBase]:
+        """Get the voice engine to use for the given configuration"""
+        tts_provider = config.text_to_speech_provider
+        if tts_provider == "elevenlabs":
+            voice_engine = ElevenLabsSpeech(config)
+        elif tts_provider == "macos":
+            voice_engine = MacOSTTS(config)
+        elif tts_provider == "streamelements":
+            voice_engine = StreamElementsSpeech(config)
+        else:
+            voice_engine = GTTSVoice(config)

-def _get_voice_engine(config: Config) -> tuple[VoiceBase, VoiceBase]:
-    """Get the voice engine to use for the given configuration"""
-    tts_provider = config.text_to_speech_provider
-    if tts_provider == "elevenlabs":
-        voice_engine = ElevenLabsSpeech(config)
-    elif tts_provider == "macos":
-        voice_engine = MacOSTTS(config)
-    elif tts_provider == "streamelements":
-        voice_engine = StreamElementsSpeech(config)
-    else:
-        voice_engine = GTTSVoice(config)
-
-    return GTTSVoice(config), voice_engine
+        return GTTSVoice(config), voice_engine