added tts generation support

2025-12-20 22:24:57 +01:00 · 2023-11-08 13:55:58 +01:00
parent 450b86d5a7
commit d637e35498
7 changed files with 159 additions and 3 deletions
--- a/bot/telegram_bot.py
+++ b/bot/telegram_bot.py
@@ -46,6 +46,9 @@ class ChatGPTTelegramBot:
        if self.config.get('enable_image_generation', False):
            self.commands.append(BotCommand(command='image', description=localized_text('image_description', bot_language)))

+        if self.config.get('enable_speech_generation', False):
+            self.commands.append(BotCommand(command='tts', description=localized_text('speech_description', bot_language)))
+
        self.group_commands = [BotCommand(
            command='chat', description=localized_text('chat_description', bot_language)
        )] + self.commands
@@ -94,6 +97,7 @@ class ChatGPTTelegramBot:
        images_today, images_month = self.usage[user_id].get_current_image_count()
        (transcribe_minutes_today, transcribe_seconds_today, transcribe_minutes_month,
         transcribe_seconds_month) = self.usage[user_id].get_current_transcription_duration()
+        characters_today, characters_month = self.usage[user_id].get_current_tts_usage()
        current_cost = self.usage[user_id].get_current_cost()

        chat_id = update.effective_chat.id
@@ -112,11 +116,16 @@ class ChatGPTTelegramBot:
        text_today_images = ""
        if self.config.get('enable_image_generation', False):
            text_today_images = f"{images_today} {localized_text('stats_images', bot_language)}\n"
+
+        text_today_tts = ""
+        if self.config.get('enable_tts_generation', False):
+            text_today_tts = f"{characters_today} {localized_text('stats_tts', bot_language)}\n"
        
        text_today = (
            f"*{localized_text('usage_today', bot_language)}:*\n"
            f"{tokens_today} {localized_text('stats_tokens', bot_language)}\n"
            f"{text_today_images}"  # Include the image statistics for today if applicable
+            f"{text_today_tts}"
            f"{transcribe_minutes_today} {localized_text('stats_transcribe', bot_language)[0]} "
            f"{transcribe_seconds_today} {localized_text('stats_transcribe', bot_language)[1]}\n"
            f"{localized_text('stats_total', bot_language)}{current_cost['cost_today']:.2f}\n"
@@ -126,12 +135,17 @@ class ChatGPTTelegramBot:
        text_month_images = ""
        if self.config.get('enable_image_generation', False):
            text_month_images = f"{images_month} {localized_text('stats_images', bot_language)}\n"
+
+        text_month_tts = ""
+        if self.config.get('enable_tts_generation', False):
+            text_month_tts = f"{characters_month} {localized_text('stats_tts', bot_language)}\n"
        
        # Check if image generation is enabled and, if so, generate the image statistics for the month
        text_month = (
            f"*{localized_text('usage_month', bot_language)}:*\n"
            f"{tokens_month} {localized_text('stats_tokens', bot_language)}\n"
            f"{text_month_images}"  # Include the image statistics for the month if applicable
+            f"{text_month_tts}"
            f"{transcribe_minutes_month} {localized_text('stats_transcribe', bot_language)[0]} "
            f"{transcribe_seconds_month} {localized_text('stats_transcribe', bot_language)[1]}\n"
            f"{localized_text('stats_total', bot_language)}{current_cost['cost_month']:.2f}"
@@ -250,6 +264,52 @@ class ChatGPTTelegramBot:

        await wrap_with_indicator(update, context, _generate, constants.ChatAction.UPLOAD_PHOTO)

+    async def tts(self, update: Update, context: ContextTypes.DEFAULT_TYPE):
+        """
+        Generates an speech for the given input using TTS APIs
+        """
+        if not self.config['enable_tts_generation'] \
+                or not await self.check_allowed_and_within_budget(update, context):
+            return
+
+        tts_query = message_text(update.message)
+        if tts_query == '':
+            await update.effective_message.reply_text(
+                message_thread_id=get_thread_id(update),
+                text=localized_text('tts_no_prompt', self.config['bot_language'])
+            )
+            return
+
+        logging.info(f'New speech generation request received from user {update.message.from_user.name} '
+                     f'(id: {update.message.from_user.id})')
+
+        async def _generate():
+            try:
+                speech_file, text_length = await self.openai.generate_speech(text=tts_query)
+
+                await update.effective_message.reply_voice(
+                    reply_to_message_id=get_reply_to_message_id(self.config, update),
+                    voice=speech_file
+                )
+                speech_file.close()
+                # add image request to users usage tracker
+                user_id = update.message.from_user.id
+                self.usage[user_id].add_tts_request(text_length, self.config['tts_model'], self.config['tts_prices'])
+                # add guest chat request to guest usage tracker
+                if str(user_id) not in self.config['allowed_user_ids'].split(',') and 'guests' in self.usage:
+                    self.usage["guests"].add_tts_request(text_length, self.config['tts_model'], self.config['tts_prices'])
+
+            except Exception as e:
+                logging.exception(e)
+                await update.effective_message.reply_text(
+                    message_thread_id=get_thread_id(update),
+                    reply_to_message_id=get_reply_to_message_id(self.config, update),
+                    text=f"{localized_text('tts_fail', self.config['bot_language'])}: {str(e)}",
+                    parse_mode=constants.ParseMode.MARKDOWN
+                )
+
+        await wrap_with_indicator(update, context, _generate, constants.ChatAction.UPLOAD_VOICE)
+
    async def transcribe(self, update: Update, context: ContextTypes.DEFAULT_TYPE):
        """
        Transcribe audio messages.
@@ -786,6 +846,7 @@ class ChatGPTTelegramBot:
        application.add_handler(CommandHandler('reset', self.reset))
        application.add_handler(CommandHandler('help', self.help))
        application.add_handler(CommandHandler('image', self.image))
+        application.add_handler(CommandHandler('tts', self.tts))
        application.add_handler(CommandHandler('start', self.help))
        application.add_handler(CommandHandler('stats', self.stats))
        application.add_handler(CommandHandler('resend', self.resend))