mirror of
https://github.com/aljazceru/Auto-GPT.git
synced 2026-02-12 01:34:32 +01:00
Refactor/remove abstract singleton as voice base parent (#4931)
Co-authored-by: Reinier van der Leer <reinier.vanderleer@agpt.co>
This commit is contained in:
@@ -26,12 +26,11 @@ from autogpt.commands import COMMAND_CATEGORIES
|
||||
from autogpt.config import AIConfig, Config, ConfigBuilder, check_openai_api_key
|
||||
from autogpt.llm.api_manager import ApiManager
|
||||
from autogpt.logs.config import configure_chat_plugins, configure_logging
|
||||
from autogpt.logs.helpers import print_attribute
|
||||
from autogpt.logs.helpers import print_attribute, speak
|
||||
from autogpt.memory.vector import get_memory
|
||||
from autogpt.models.command_registry import CommandRegistry
|
||||
from autogpt.plugins import scan_plugins
|
||||
from autogpt.prompts.prompt import DEFAULT_TRIGGERING_PROMPT
|
||||
from autogpt.speech import say_text
|
||||
from autogpt.workspace import Workspace
|
||||
from scripts.install_plugin_deps import install_plugin_dependencies
|
||||
|
||||
@@ -366,7 +365,7 @@ def update_user(
|
||||
print_assistant_thoughts(ai_config.ai_name, assistant_reply_dict, config)
|
||||
|
||||
if config.speak_mode:
|
||||
say_text(f"I want to execute {command_name}", config)
|
||||
speak(f"I want to execute {command_name}")
|
||||
|
||||
# First log new-line so user can differentiate sections better in console
|
||||
print()
|
||||
@@ -531,8 +530,6 @@ def print_assistant_thoughts(
|
||||
assistant_reply_json_valid: dict,
|
||||
config: Config,
|
||||
) -> None:
|
||||
from autogpt.speech import say_text
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
assistant_thoughts_reasoning = None
|
||||
@@ -577,7 +574,7 @@ def print_assistant_thoughts(
|
||||
# Speak the assistant's thoughts
|
||||
if assistant_thoughts_speak:
|
||||
if config.speak_mode:
|
||||
say_text(assistant_thoughts_speak, config)
|
||||
speak(assistant_thoughts_speak)
|
||||
else:
|
||||
print_attribute("SPEAK", assistant_thoughts_speak, title_color=Fore.YELLOW)
|
||||
|
||||
|
||||
@@ -27,6 +27,7 @@ DEBUG_LOG_FORMAT = (
|
||||
" %(title)s%(message)s"
|
||||
)
|
||||
|
||||
SPEECH_OUTPUT_LOGGER = "VOICE"
|
||||
USER_FRIENDLY_OUTPUT_LOGGER = "USER_FRIENDLY_OUTPUT"
|
||||
|
||||
_chat_plugins: list[AutoGPTPluginTemplate] = []
|
||||
@@ -96,6 +97,11 @@ def configure_logging(config: Config, log_dir: Path = LOG_DIR) -> None:
|
||||
user_friendly_output_logger.addHandler(stderr)
|
||||
user_friendly_output_logger.propagate = False
|
||||
|
||||
speech_output_logger = logging.getLogger(SPEECH_OUTPUT_LOGGER)
|
||||
speech_output_logger.setLevel(logging.INFO)
|
||||
speech_output_logger.addHandler(TTSHandler(config))
|
||||
speech_output_logger.propagate = False
|
||||
|
||||
# JSON logger with better formatting
|
||||
json_logger = logging.getLogger("JSON_LOGGER")
|
||||
json_logger.setLevel(logging.DEBUG)
|
||||
|
||||
@@ -8,7 +8,7 @@ import time
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from autogpt.logs.utils import remove_color_codes
|
||||
from autogpt.speech.say import say_text
|
||||
from autogpt.speech import TextToSpeechProvider
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from autogpt.config import Config
|
||||
@@ -53,6 +53,7 @@ class TTSHandler(logging.Handler):
|
||||
def __init__(self, config: Config):
|
||||
super().__init__()
|
||||
self.config = config
|
||||
self.tts_provider = TextToSpeechProvider(config)
|
||||
|
||||
def format(self, record: logging.LogRecord) -> str:
|
||||
if getattr(record, "title", ""):
|
||||
@@ -67,7 +68,7 @@ class TTSHandler(logging.Handler):
|
||||
return
|
||||
|
||||
message = self.format(record)
|
||||
say_text(message, self.config)
|
||||
self.tts_provider.say(message)
|
||||
|
||||
|
||||
class JsonFileHandler(logging.FileHandler):
|
||||
|
||||
@@ -3,7 +3,7 @@ from typing import Any, Optional
|
||||
|
||||
from colorama import Fore
|
||||
|
||||
from .config import USER_FRIENDLY_OUTPUT_LOGGER, _chat_plugins
|
||||
from .config import SPEECH_OUTPUT_LOGGER, USER_FRIENDLY_OUTPUT_LOGGER, _chat_plugins
|
||||
|
||||
|
||||
def user_friendly_output(
|
||||
@@ -65,3 +65,7 @@ def request_user_double_check(additionalText: Optional[str] = None) -> None:
|
||||
title="DOUBLE CHECK CONFIGURATION",
|
||||
preserve_message_color=True,
|
||||
)
|
||||
|
||||
|
||||
def speak(message: str, level: int = logging.INFO) -> None:
|
||||
logging.getLogger(SPEECH_OUTPUT_LOGGER).log(level, message)
|
||||
|
||||
@@ -14,9 +14,3 @@ class Singleton(abc.ABCMeta, type):
|
||||
if cls not in cls._instances:
|
||||
cls._instances[cls] = super(Singleton, cls).__call__(*args, **kwargs)
|
||||
return cls._instances[cls]
|
||||
|
||||
|
||||
class AbstractSingleton(abc.ABC, metaclass=Singleton):
|
||||
"""
|
||||
Abstract singleton class for ensuring only one instance of a class.
|
||||
"""
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
"""This module contains the speech recognition and speech synthesis functions."""
|
||||
from autogpt.speech.say import say_text
|
||||
from autogpt.speech.say import TextToSpeechProvider
|
||||
|
||||
__all__ = ["say_text"]
|
||||
__all__ = ["TextToSpeechProvider"]
|
||||
|
||||
@@ -9,10 +9,8 @@ from typing import TYPE_CHECKING
|
||||
if TYPE_CHECKING:
|
||||
from autogpt.config import Config
|
||||
|
||||
from autogpt.singleton import AbstractSingleton
|
||||
|
||||
|
||||
class VoiceBase(AbstractSingleton):
|
||||
class VoiceBase:
|
||||
"""
|
||||
Base class for all voice classes.
|
||||
"""
|
||||
|
||||
@@ -19,32 +19,37 @@ _QUEUE_SEMAPHORE = Semaphore(
|
||||
) # The amount of sounds to queue before blocking the main thread
|
||||
|
||||
|
||||
def say_text(text: str, config: Config, voice_index: int = 0) -> None:
|
||||
"""Speak the given text using the given voice index"""
|
||||
default_voice_engine, voice_engine = _get_voice_engine(config)
|
||||
class TextToSpeechProvider:
|
||||
def __init__(self, config: Config):
|
||||
self._config = config
|
||||
self._default_voice_engine, self._voice_engine = self._get_voice_engine(config)
|
||||
|
||||
def speak() -> None:
|
||||
success = voice_engine.say(text, voice_index)
|
||||
if not success:
|
||||
default_voice_engine.say(text)
|
||||
def say(self, text, voice_index: int = 0) -> None:
|
||||
def _speak() -> None:
|
||||
success = self._voice_engine.say(text, voice_index)
|
||||
if not success:
|
||||
self._default_voice_engine.say(text, voice_index)
|
||||
_QUEUE_SEMAPHORE.release()
|
||||
|
||||
_QUEUE_SEMAPHORE.release()
|
||||
if self._config.speak_mode:
|
||||
_QUEUE_SEMAPHORE.acquire(True)
|
||||
thread = threading.Thread(target=_speak)
|
||||
thread.start()
|
||||
|
||||
_QUEUE_SEMAPHORE.acquire(True)
|
||||
thread = threading.Thread(target=speak)
|
||||
thread.start()
|
||||
def __repr__(self):
|
||||
return f"{self.__class__.__name__}(enabled={self._config.speak_mode}, provider={self._voice_engine.__class__.__name__})"
|
||||
|
||||
@staticmethod
|
||||
def _get_voice_engine(config: Config) -> tuple[VoiceBase, VoiceBase]:
|
||||
"""Get the voice engine to use for the given configuration"""
|
||||
tts_provider = config.text_to_speech_provider
|
||||
if tts_provider == "elevenlabs":
|
||||
voice_engine = ElevenLabsSpeech(config)
|
||||
elif tts_provider == "macos":
|
||||
voice_engine = MacOSTTS(config)
|
||||
elif tts_provider == "streamelements":
|
||||
voice_engine = StreamElementsSpeech(config)
|
||||
else:
|
||||
voice_engine = GTTSVoice(config)
|
||||
|
||||
def _get_voice_engine(config: Config) -> tuple[VoiceBase, VoiceBase]:
|
||||
"""Get the voice engine to use for the given configuration"""
|
||||
tts_provider = config.text_to_speech_provider
|
||||
if tts_provider == "elevenlabs":
|
||||
voice_engine = ElevenLabsSpeech(config)
|
||||
elif tts_provider == "macos":
|
||||
voice_engine = MacOSTTS(config)
|
||||
elif tts_provider == "streamelements":
|
||||
voice_engine = StreamElementsSpeech(config)
|
||||
else:
|
||||
voice_engine = GTTSVoice(config)
|
||||
|
||||
return GTTSVoice(config), voice_engine
|
||||
return GTTSVoice(config), voice_engine
|
||||
|
||||
Reference in New Issue
Block a user