Add streaming support for inline-query responses

2025-12-19 21:55:06 +01:00 · 2023-04-15 20:55:40 +02:00
parent 90f27e62f1
commit edc1e0c9a4
1 changed files with 85 additions and 23 deletions
--- a/bot/telegram_bot.py
+++ b/bot/telegram_bot.py
@@ -243,7 +243,7 @@ class ChatGPTTelegramBot:
                    parse_mode=constants.ParseMode.MARKDOWN
                )
-        await self.wrap_with_indicator(update, context, constants.ChatAction.UPLOAD_PHOTO, _generate)
+        await self.wrap_with_indicator(update, context, _generate, constants.ChatAction.UPLOAD_PHOTO)
    async def transcribe(self, update: Update, context: ContextTypes.DEFAULT_TYPE):
        """
@@ -368,7 +368,7 @@ class ChatGPTTelegramBot:
                if os.path.exists(filename):
                    os.remove(filename)
-        await self.wrap_with_indicator(update, context, constants.ChatAction.TYPING, _execute)
+        await self.wrap_with_indicator(update, context, _execute, constants.ChatAction.TYPING)
    async def prompt(self, update: Update, context: ContextTypes.DEFAULT_TYPE):
        """
@@ -512,7 +512,7 @@ class ChatGPTTelegramBot:
                            except Exception as exception:
                                raise exception
-                await self.wrap_with_indicator(update, context, constants.ChatAction.TYPING, _reply)
+                await self.wrap_with_indicator(update, context, _reply, constants.ChatAction.TYPING)
            self.add_chat_request_to_usage_tracker(user_id, total_tokens)
@@ -580,8 +580,9 @@ class ChatGPTTelegramBot:
        try:
            if callback_data.startswith(callback_data_suffix):
                unique_id = callback_data.split(':')[1]
                total_tokens = 0
-                # Retrieve the long text from the cache
+                # Retrieve the prompt from the cache
                query = self.inline_queries_cache.get(unique_id)
                if query:
                    self.inline_queries_cache.pop(unique_id)
@@ -590,30 +591,90 @@ class ChatGPTTelegramBot:
                        f'{localized_text("error", bot_language)}. '
                        f'{localized_text("try_again", bot_language)}'
                    )
-                    await self.edit_message_with_retry(context,
+                    await self.edit_message_with_retry(context, chat_id=None, message_id=inline_message_id,
                                                       chat_id=None,
                                                       message_id=inline_message_id,
                                                       text=f'{query}\n\n_{answer_tr}:_\n{error_message}',
                                                       is_inline=True)
                    return
-            # Edit the current message to indicate that the answer is being processed
+                if self.config['stream']:
-            await context.bot.edit_message_text(inline_message_id=inline_message_id,
+                    stream_response = self.openai.get_chat_response_stream(chat_id=user_id, query=query)
-                                                text=f'{query}\n\n_{answer_tr}:_\n{loading_tr}',
+                    i = 0
-                                                parse_mode=constants.ParseMode.MARKDOWN)
+                    prev = ''
                    sent_message = None
                    backoff = 0
                    async for content, tokens in stream_response:
                        if len(content.strip()) == 0:
                            continue
-            logging.info(f'Generating response for inline query by {name}')
+                        cutoff = 180 if len(content) > 1000 else 120 if len(content) > 200 else 90 if len(
-            response, used_tokens = await self.openai.get_chat_response(chat_id=user_id, query=query)
+                            content) > 50 else 50
-            self.add_chat_request_to_usage_tracker(user_id, used_tokens)
+                        cutoff += backoff
                        if i == 0:
                            try:
                                if sent_message is not None:
                                    await self.edit_message_with_retry(context, chat_id=None,
                                                                       message_id=inline_message_id,
                                                                       text=f'{query}\n\n_{answer_tr}:_\n{content}',
                                                                       is_inline=True)
                            except:
                                continue
                        elif abs(len(content) - len(prev)) > cutoff or tokens != 'not_finished':
                            prev = content
                            try:
                                use_markdown = tokens != 'not_finished'
                                await self.edit_message_with_retry(context,
                                                                   chat_id=None, message_id=inline_message_id,
                                                                   text=f'{query}\n\n_{answer_tr}:_\n{content}',
                                                                   markdown=use_markdown, is_inline=True)
                            except RetryAfter as e:
                                backoff += 5
                                await asyncio.sleep(e.retry_after)
                                continue
                            except TimedOut:
                                backoff += 5
                                await asyncio.sleep(0.5)
                                continue
                            except Exception:
                                backoff += 5
                                continue
                            await asyncio.sleep(0.01)
                        i += 1
                        if tokens != 'not_finished':
                            total_tokens = int(tokens)
                else:
                    async def _send_inline_query_response():
                        nonlocal total_tokens
                        # Edit the current message to indicate that the answer is being processed
                        await context.bot.edit_message_text(inline_message_id=inline_message_id,
                                                            text=f'{query}\n\n_{answer_tr}:_\n{loading_tr}',
                                                            parse_mode=constants.ParseMode.MARKDOWN)
                        logging.info(f'Generating response for inline query by {name}')
                        response, total_tokens = await self.openai.get_chat_response(chat_id=user_id, query=query)
                        # Edit the original message with the generated content
                        await self.edit_message_with_retry(context, chat_id=None, message_id=inline_message_id,
                                                           text=f'{query}\n\n_{answer_tr}:_\n{response}',
                                                           is_inline=True)
                    await self.wrap_with_indicator(update, context, _send_inline_query_response,
                                                   constants.ChatAction.TYPING, is_inline=True)
                self.add_chat_request_to_usage_tracker(user_id, total_tokens)
            # Edit the original message with the generated content
            await self.edit_message_with_retry(context,
                                               chat_id=None,
                                               message_id=inline_message_id,
                                               text=f'{query}\n\n_{answer_tr}:_\n{response}',
                                               is_inline=True)
        except Exception as e:
            logging.error(f'Failed to respond to an inline query via button callback: {e}')
            logging.exception(e)
            localized_answer = localized_text('chat_fail', self.config['bot_language'])
            await self.edit_message_with_retry(context, chat_id=None, message_id=inline_message_id,
                                               text=f"{query}\n\n_{answer_tr}:_\n{localized_answer} {str(e)}",
                                               is_inline=True)
    async def edit_message_with_retry(self, context: ContextTypes.DEFAULT_TYPE, chat_id: int | None,
                                      message_id: str, text: str, markdown: bool = True, is_inline: bool = False):
@@ -652,14 +713,15 @@ class ChatGPTTelegramBot:
            logging.warning(str(e))
            raise e
-    async def wrap_with_indicator(self, update: Update, context: CallbackContext, chat_action: constants.ChatAction,
+    async def wrap_with_indicator(self, update: Update, context: CallbackContext, coroutine,
-                                  coroutine):
+                                  chat_action: constants.ChatAction = "", is_inline=False):
        """
        Wraps a coroutine while repeatedly sending a chat action to the user.
        """
        task = context.application.create_task(coroutine(), update=update)
        while not task.done():
-            context.application.create_task(update.effective_chat.send_action(chat_action))
+            if not is_inline:
                context.application.create_task(update.effective_chat.send_action(chat_action))
            try:
                await asyncio.wait_for(asyncio.shield(task), 4.5)
            except asyncio.TimeoutError: