initial stream support

2025-12-23 23:55:05 +01:00 · 2023-03-25 19:36:56 +01:00
parent 1a28b6b683
commit 99d98d2048
4 changed files with 138 additions and 43 deletions
--- a/README.md
+++ b/README.md
@@ -26,9 +26,9 @@ A [Telegram bot](https://core.telegram.org/bots/api) that integrates with OpenAI
 - [x] (NEW!) Track token usage per user - by [@AlexHTW](https://github.com/AlexHTW)
 - [x] (NEW!) Get personal token usage statistics and cost per day/month via the `/stats` command - by [@AlexHTW](https://github.com/AlexHTW)
 - [x] (NEW!) User budgets and guest budgets - by [@AlexHTW](https://github.com/AlexHTW)
+- [x] (NEW!) Stream support

 ## Additional features - help needed!
- [ ] Add stream support ([#43](https://github.com/n3d1117/chatgpt-telegram-bot/issues/43))
 - [ ] Add session persistence ([#70](https://github.com/n3d1117/chatgpt-telegram-bot/issues/70), [#71](https://github.com/n3d1117/chatgpt-telegram-bot/issues/71))

 PRs are always welcome!
@@ -58,6 +58,7 @@ Customize the configuration by copying `.env.example` and renaming it to `.env`,
 | `OPENAI_MODEL`                     | The OpenAI model to use for generating responses                                                                                                                                                                                 | `gpt-3.5-turbo`                |
 | `ASSISTANT_PROMPT`                 | A system message that sets the tone and controls the behavior of the assistant                                                                                                                                                   | `You are a helpful assistant.` |
 | `SHOW_USAGE`                       | Whether to show OpenAI token usage information after each response                                                                                                                                                               | false                          |
+| `STREAM`                           | Whether to stream responses                                                                                                                                                                                                      | true                           |
 | `MAX_TOKENS`                       | Upper bound on how many tokens the ChatGPT API will return                                                                                                                                                                       | 1200                           |
 | `MAX_HISTORY_SIZE`                 | Max number of messages to keep in memory, after which the conversation will be summarised to avoid excessive token usage                                                                                                         | 10                             |
 | `MAX_CONVERSATION_AGE_MINUTES`     | Maximum number of minutes a conversation should live since the last message, after which the conversation will be reset                                                                                                          | 180                            |
--- a/bot/main.py
+++ b/bot/main.py
@@ -28,6 +28,7 @@ def main():
    openai_config = {
        'api_key': os.environ['OPENAI_API_KEY'],
        'show_usage': os.environ.get('SHOW_USAGE', 'false').lower() == 'true',
+        'stream': os.environ.get('STREAM', 'true').lower() == 'true',
        'proxy': os.environ.get('PROXY', None),
        'max_history_size': int(os.environ.get('MAX_HISTORY_SIZE', 10)),
        'max_conversation_age_minutes': int(os.environ.get('MAX_CONVERSATION_AGE_MINUTES', 180)),
@@ -46,6 +47,7 @@ def main():
        'allowed_user_ids': os.environ.get('ALLOWED_TELEGRAM_USER_IDS', '*'),
        'monthly_user_budgets': os.environ.get('MONTHLY_USER_BUDGETS', '*'),
        'monthly_guest_budget': float(os.environ.get('MONTHLY_GUEST_BUDGET', '100.0')),
+        'stream': os.environ.get('STREAM', 'true').lower() == 'true',
        'proxy': os.environ.get('PROXY', None),
        'voice_reply_transcript': os.environ.get('VOICE_REPLY_WITH_TRANSCRIPT_ONLY', 'true').lower() == 'true',
        'ignore_group_transcriptions': os.environ.get('IGNORE_GROUP_TRANSCRIPTIONS', 'true').lower() == 'true',
--- a/bot/openai_helper.py
+++ b/bot/openai_helper.py
@@ -39,7 +39,63 @@ class OpenAIHelper:

    async def get_chat_response(self, chat_id: int, query: str) -> tuple[str, str]:
        """
-        Gets a response from the GPT-3 model.
+        Gets a full response from the GPT model.
+        :param chat_id: The chat ID
+        :param query: The query to send to the model
+        :return: The answer from the model and the number of tokens used
+        """
+        response = await self.__common_get_chat_response(chat_id, query)
+        answer = ''
+
+        if len(response.choices) > 1 and self.config['n_choices'] > 1:
+            for index, choice in enumerate(response.choices):
+                content = choice['message']['content'].strip()
+                if index == 0:
+                    self.__add_to_history(chat_id, role="assistant", content=content)
+                answer += f'{index + 1}\u20e3\n'
+                answer += content
+                answer += '\n\n'
+        else:
+            answer = response.choices[0]['message']['content'].strip()
+            self.__add_to_history(chat_id, role="assistant", content=answer)
+
+        if self.config['show_usage']:
+            answer += "\n\n---\n" \
+                      f"💰 Tokens used: {str(response.usage['total_tokens'])}" \
+                      f" ({str(response.usage['prompt_tokens'])} prompt," \
+                      f" {str(response.usage['completion_tokens'])} completion)"
+
+        return answer, response.usage['total_tokens']
+
+    async def get_chat_response_stream(self, chat_id: int, query: str):
+        """
+        Stream response from the GPT model.
+        :param chat_id: The chat ID
+        :param query: The query to send to the model
+        :return: The answer from the model and the number of tokens used, or 'not_finished'
+        """
+        response = await self.__common_get_chat_response(chat_id, query)
+
+        answer = ''
+        async for item in response:
+            if 'choices' not in item or len(item.choices) == 0:
+                continue
+            delta = item.choices[0].delta
+            if 'content' in delta:
+                answer += delta.content
+                yield answer, 'not_finished'
+        answer = answer.strip()
+        self.__add_to_history(chat_id, role="assistant", content=answer)
+        tokens_used = str(self.__count_tokens(self.conversations[chat_id]))
+
+        if self.config['show_usage']:
+            answer += f"\n\n---\n💰 Tokens used: {tokens_used}"
+
+        yield answer, tokens_used
+
+    async def __common_get_chat_response(self, chat_id: int, query: str):
+        """
+        Request a response from the GPT model.
        :param chat_id: The chat ID
        :param query: The query to send to the model
        :return: The answer from the model and the number of tokens used
@@ -69,7 +125,7 @@ class OpenAIHelper:
                    logging.warning(f'Error while summarising chat history: {str(e)}. Popping elements instead...')
                    self.conversations[chat_id] = self.conversations[chat_id][-self.config['max_history_size']:]

-            response = await openai.ChatCompletion.acreate(
+            return await openai.ChatCompletion.acreate(
                model=self.config['model'],
                messages=self.conversations[chat_id],
                temperature=self.config['temperature'],
@@ -77,34 +133,9 @@ class OpenAIHelper:
                max_tokens=self.config['max_tokens'],
                presence_penalty=self.config['presence_penalty'],
                frequency_penalty=self.config['frequency_penalty'],
+                stream=self.config['stream']
            )

-            if len(response.choices) > 0:
-                answer = ''
-
-                if len(response.choices) > 1 and self.config['n_choices'] > 1:
-                    for index, choice in enumerate(response.choices):
-                        content = choice['message']['content'].strip()
-                        if index == 0:
-                            self.__add_to_history(chat_id, role="assistant", content=content)
-                        answer += f'{index+1}\u20e3\n'
-                        answer += content
-                        answer += '\n\n'
-                else:
-                    answer = response.choices[0]['message']['content'].strip()
-                    self.__add_to_history(chat_id, role="assistant", content=answer)
-
-                if self.config['show_usage']:
-                    answer += "\n\n---\n" \
-                              f"💰 Tokens used: {str(response.usage['total_tokens'])}" \
-                              f" ({str(response.usage['prompt_tokens'])} prompt," \
-                              f" {str(response.usage['completion_tokens'])} completion)"
-
-                return answer, response.usage['total_tokens']
-
-            logging.error(f'No response from GPT: {str(response)}')
-            raise Exception('⚠️ _An error has occurred_ ⚠️\nPlease try again in a while.')
-
        except openai.error.RateLimitError as e:
            raise Exception(f'⚠️ _OpenAI Rate Limit exceeded_ ⚠️\n{str(e)}') from e

@@ -206,7 +237,7 @@ class OpenAIHelper:
        )

    # https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
-    def __count_tokens(self, messages):
+    def __count_tokens(self, messages) -> int:
        """
        Counts the number of tokens required to send the given messages.
        :param messages: the messages to send
--- a/bot/telegram_bot.py
+++ b/bot/telegram_bot.py
@@ -1,8 +1,12 @@
 import logging
 import os

+import asyncio
+
+import telegram
 from telegram import constants
 from telegram import Update, InlineQueryResultArticle, InputTextMessageContent, BotCommand
+from telegram.error import RetryAfter, TimedOut
 from telegram.ext import ApplicationBuilder, ContextTypes, CommandHandler, MessageHandler, \
    filters, InlineQueryHandler, Application

@@ -18,7 +22,7 @@ class ChatGPT3TelegramBot:

    def __init__(self, config: dict, openai: OpenAIHelper):
        """
-        Initializes the bot with the given configuration and GPT-3 bot object.
+        Initializes the bot with the given configuration and GPT bot object.
        :param config: A dictionary containing the bot configuration
        :param openai: OpenAIHelper object
        """
@@ -27,7 +31,7 @@ class ChatGPT3TelegramBot:
        self.commands = [
            BotCommand(command='help', description='Show help message'),
            BotCommand(command='reset', description='Reset the conversation. Optionally pass high-level instructions '
-                                                    'for the conversation (e.g. /reset You are a helpful assistant)'),
+                                                    '(e.g. /reset You are a helpful assistant)'),
            BotCommand(command='image', description='Generate image from prompt (e.g. /image cat)'),
            BotCommand(command='stats', description='Get your current usage statistics')
        ]
@@ -308,14 +312,62 @@ class ChatGPT3TelegramBot:
        await context.bot.send_chat_action(chat_id=chat_id, action=constants.ChatAction.TYPING)

        try:
-            response, total_tokens = await self.openai.get_chat_response(chat_id=chat_id, query=prompt)
+            if self.config['stream']:
+                is_group_chat = self.is_group_chat(update)

-            # add chat request to users usage tracker
-            self.usage[user_id].add_chat_tokens(total_tokens, self.config['token_price'])
-            # add guest chat request to guest usage tracker
-            allowed_user_ids = self.config['allowed_user_ids'].split(',')
-            if str(user_id) not in allowed_user_ids and 'guests' in self.usage:
-                self.usage["guests"].add_chat_tokens(total_tokens, self.config['token_price'])
+                stream_response = self.openai.get_chat_response_stream(chat_id=chat_id, query=prompt)
+                i = 0
+                prev = ''
+                sent_message = None
+
+                async for content, tokens in stream_response:
+                    if len(content.strip()) == 0:
+                        continue
+
+                    if is_group_chat:
+                        # group chats have stricter flood limits
+                        cutoff = 180 if len(content) > 1000 else 120 if len(content) > 200 else 90 if len(content) > 50 else 50
+                    else:
+                        cutoff = 120 if len(content) > 1000 else 100 if len(content) > 200 else 85 if len(content) > 50 else 40
+
+                    if i == 0:
+                        try:
+                            if sent_message is not None:
+                                await context.bot.delete_message(chat_id=sent_message.chat_id,
+                                                                 message_id=sent_message.message_id)
+                            sent_message = await update.message.reply_text(content)
+                        except:
+                            continue
+
+                    elif abs(len(content) - len(prev)) > cutoff or tokens != 'not_finished':
+                        prev = content
+
+                        try:
+                            await context.bot.edit_message_text(content, chat_id=sent_message.chat_id,
+                                                                message_id=sent_message.message_id,
+                                                                parse_mode=constants.ParseMode.MARKDOWN)
+                        except telegram.error.BadRequest as e:
+                            if str(e).startswith("Message is not modified"):
+                                continue
+                            await context.bot.edit_message_text(content, chat_id=sent_message.chat_id,
+                                                                message_id=sent_message.message_id)
+
+                        except RetryAfter as e:
+                            logging.warning(str(e))
+                            await asyncio.sleep(e.retry_after)
+
+                        except TimedOut as e:
+                            logging.warning(str(e))
+                            await asyncio.sleep(1)
+
+                        await asyncio.sleep(0.01)
+
+                    i += 1
+                    if tokens != 'not_finished':
+                        total_tokens = int(tokens)
+
+            else:
+                response, total_tokens = await self.openai.get_chat_response(chat_id=chat_id, query=prompt)

                # Split into chunks of 4096 characters (Telegram's message limit)
                chunks = self.split_into_chunks(response)
@@ -327,6 +379,14 @@ class ChatGPT3TelegramBot:
                        text=chunk,
                        parse_mode=constants.ParseMode.MARKDOWN
                    )
+
+            # add chat request to users usage tracker
+            self.usage[user_id].add_chat_tokens(total_tokens, self.config['token_price'])
+            # add guest chat request to guest usage tracker
+            allowed_user_ids = self.config['allowed_user_ids'].split(',')
+            if str(user_id) not in allowed_user_ids and 'guests' in self.usage:
+                self.usage["guests"].add_chat_tokens(total_tokens, self.config['token_price'])
+
        except Exception as e:
            logging.exception(e)
            await context.bot.send_message(
@@ -484,6 +544,7 @@ class ChatGPT3TelegramBot:
            .proxy_url(self.config['proxy']) \
            .get_updates_proxy_url(self.config['proxy']) \
            .post_init(self.post_init) \
+            .concurrent_updates(True) \
            .build()

        application.add_handler(CommandHandler('reset', self.reset))