initial stream support

2025-12-23 23:55:05 +01:00 · 2023-03-25 19:36:56 +01:00
parent 1a28b6b683
commit 99d98d2048
4 changed files with 138 additions and 43 deletions
--- a/README.md
+++ b/README.md
@@ -26,9 +26,9 @@ A [Telegram bot](https://core.telegram.org/bots/api) that integrates with OpenAI
 - [x] (NEW!) Track token usage per user - by [@AlexHTW](https://github.com/AlexHTW)
 - [x] (NEW!) Get personal token usage statistics and cost per day/month via the `/stats` command - by [@AlexHTW](https://github.com/AlexHTW)
 - [x] (NEW!) User budgets and guest budgets - by [@AlexHTW](https://github.com/AlexHTW)
 - [x] (NEW!) Stream support
 ## Additional features - help needed!
 - [ ] Add stream support ([#43](https://github.com/n3d1117/chatgpt-telegram-bot/issues/43))
 - [ ] Add session persistence ([#70](https://github.com/n3d1117/chatgpt-telegram-bot/issues/70), [#71](https://github.com/n3d1117/chatgpt-telegram-bot/issues/71))
 PRs are always welcome!
@@ -58,6 +58,7 @@ Customize the configuration by copying `.env.example` and renaming it to `.env`,
 | `OPENAI_MODEL`                     | The OpenAI model to use for generating responses                                                                                                                                                                                 | `gpt-3.5-turbo`                |
 | `ASSISTANT_PROMPT`                 | A system message that sets the tone and controls the behavior of the assistant                                                                                                                                                   | `You are a helpful assistant.` |
 | `SHOW_USAGE`                       | Whether to show OpenAI token usage information after each response                                                                                                                                                               | false                          |
 | `STREAM`                           | Whether to stream responses                                                                                                                                                                                                      | true                           |
 | `MAX_TOKENS`                       | Upper bound on how many tokens the ChatGPT API will return                                                                                                                                                                       | 1200                           |
 | `MAX_HISTORY_SIZE`                 | Max number of messages to keep in memory, after which the conversation will be summarised to avoid excessive token usage                                                                                                         | 10                             |
 | `MAX_CONVERSATION_AGE_MINUTES`     | Maximum number of minutes a conversation should live since the last message, after which the conversation will be reset                                                                                                          | 180                            |
--- a/bot/main.py
+++ b/bot/main.py
@@ -28,6 +28,7 @@ def main():
    openai_config = {
        'api_key': os.environ['OPENAI_API_KEY'],
        'show_usage': os.environ.get('SHOW_USAGE', 'false').lower() == 'true',
        'stream': os.environ.get('STREAM', 'true').lower() == 'true',
        'proxy': os.environ.get('PROXY', None),
        'max_history_size': int(os.environ.get('MAX_HISTORY_SIZE', 10)),
        'max_conversation_age_minutes': int(os.environ.get('MAX_CONVERSATION_AGE_MINUTES', 180)),
@@ -46,6 +47,7 @@ def main():
        'allowed_user_ids': os.environ.get('ALLOWED_TELEGRAM_USER_IDS', '*'),
        'monthly_user_budgets': os.environ.get('MONTHLY_USER_BUDGETS', '*'),
        'monthly_guest_budget': float(os.environ.get('MONTHLY_GUEST_BUDGET', '100.0')),
        'stream': os.environ.get('STREAM', 'true').lower() == 'true',
        'proxy': os.environ.get('PROXY', None),
        'voice_reply_transcript': os.environ.get('VOICE_REPLY_WITH_TRANSCRIPT_ONLY', 'true').lower() == 'true',
        'ignore_group_transcriptions': os.environ.get('IGNORE_GROUP_TRANSCRIPTIONS', 'true').lower() == 'true',
--- a/bot/openai_helper.py
+++ b/bot/openai_helper.py
@@ -39,7 +39,63 @@ class OpenAIHelper:
    async def get_chat_response(self, chat_id: int, query: str) -> tuple[str, str]:
        """
-        Gets a response from the GPT-3 model.
+        Gets a full response from the GPT model.
        :param chat_id: The chat ID
        :param query: The query to send to the model
        :return: The answer from the model and the number of tokens used
        """
        response = await self.__common_get_chat_response(chat_id, query)
        answer = ''
        if len(response.choices) > 1 and self.config['n_choices'] > 1:
            for index, choice in enumerate(response.choices):
                content = choice['message']['content'].strip()
                if index == 0:
                    self.__add_to_history(chat_id, role="assistant", content=content)
                answer += f'{index + 1}\u20e3\n'
                answer += content
                answer += '\n\n'
        else:
            answer = response.choices[0]['message']['content'].strip()
            self.__add_to_history(chat_id, role="assistant", content=answer)
        if self.config['show_usage']:
            answer += "\n\n---\n" \
                      f"💰 Tokens used: {str(response.usage['total_tokens'])}" \
                      f" ({str(response.usage['prompt_tokens'])} prompt," \
                      f" {str(response.usage['completion_tokens'])} completion)"
        return answer, response.usage['total_tokens']
    async def get_chat_response_stream(self, chat_id: int, query: str):
        """
        Stream response from the GPT model.
        :param chat_id: The chat ID
        :param query: The query to send to the model
        :return: The answer from the model and the number of tokens used, or 'not_finished'
        """
        response = await self.__common_get_chat_response(chat_id, query)
        answer = ''
        async for item in response:
            if 'choices' not in item or len(item.choices) == 0:
                continue
            delta = item.choices[0].delta
            if 'content' in delta:
                answer += delta.content
                yield answer, 'not_finished'
        answer = answer.strip()
        self.__add_to_history(chat_id, role="assistant", content=answer)
        tokens_used = str(self.__count_tokens(self.conversations[chat_id]))
        if self.config['show_usage']:
            answer += f"\n\n---\n💰 Tokens used: {tokens_used}"
        yield answer, tokens_used
    async def __common_get_chat_response(self, chat_id: int, query: str):
        """
        Request a response from the GPT model.
        :param chat_id: The chat ID
        :param query: The query to send to the model
        :return: The answer from the model and the number of tokens used
@@ -69,7 +125,7 @@ class OpenAIHelper:
                    logging.warning(f'Error while summarising chat history: {str(e)}. Popping elements instead...')
                    self.conversations[chat_id] = self.conversations[chat_id][-self.config['max_history_size']:]
-            response = await openai.ChatCompletion.acreate(
+            return await openai.ChatCompletion.acreate(
                model=self.config['model'],
                messages=self.conversations[chat_id],
                temperature=self.config['temperature'],
@@ -77,34 +133,9 @@ class OpenAIHelper:
                max_tokens=self.config['max_tokens'],
                presence_penalty=self.config['presence_penalty'],
                frequency_penalty=self.config['frequency_penalty'],
                stream=self.config['stream']
            )
            if len(response.choices) > 0:
                answer = ''
                if len(response.choices) > 1 and self.config['n_choices'] > 1:
                    for index, choice in enumerate(response.choices):
                        content = choice['message']['content'].strip()
                        if index == 0:
                            self.__add_to_history(chat_id, role="assistant", content=content)
                        answer += f'{index+1}\u20e3\n'
                        answer += content
                        answer += '\n\n'
                else:
                    answer = response.choices[0]['message']['content'].strip()
                    self.__add_to_history(chat_id, role="assistant", content=answer)
                if self.config['show_usage']:
                    answer += "\n\n---\n" \
                              f"💰 Tokens used: {str(response.usage['total_tokens'])}" \
                              f" ({str(response.usage['prompt_tokens'])} prompt," \
                              f" {str(response.usage['completion_tokens'])} completion)"
                return answer, response.usage['total_tokens']
            logging.error(f'No response from GPT: {str(response)}')
            raise Exception('⚠️ _An error has occurred_ ⚠️\nPlease try again in a while.')
        except openai.error.RateLimitError as e:
            raise Exception(f'⚠️ _OpenAI Rate Limit exceeded_ ⚠️\n{str(e)}') from e
@@ -206,7 +237,7 @@ class OpenAIHelper:
        )
    # https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
-    def __count_tokens(self, messages):
+    def __count_tokens(self, messages) -> int:
        """
        Counts the number of tokens required to send the given messages.
        :param messages: the messages to send
--- a/bot/telegram_bot.py
+++ b/bot/telegram_bot.py
@@ -1,8 +1,12 @@
 import logging
 import os
 import asyncio
 import telegram
 from telegram import constants
 from telegram import Update, InlineQueryResultArticle, InputTextMessageContent, BotCommand
 from telegram.error import RetryAfter, TimedOut
 from telegram.ext import ApplicationBuilder, ContextTypes, CommandHandler, MessageHandler, \
    filters, InlineQueryHandler, Application
@@ -18,7 +22,7 @@ class ChatGPT3TelegramBot:
    def __init__(self, config: dict, openai: OpenAIHelper):
        """
-        Initializes the bot with the given configuration and GPT-3 bot object.
+        Initializes the bot with the given configuration and GPT bot object.
        :param config: A dictionary containing the bot configuration
        :param openai: OpenAIHelper object
        """
@@ -27,7 +31,7 @@ class ChatGPT3TelegramBot:
        self.commands = [
            BotCommand(command='help', description='Show help message'),
            BotCommand(command='reset', description='Reset the conversation. Optionally pass high-level instructions '
-                                                    'for the conversation (e.g. /reset You are a helpful assistant)'),
+                                                    '(e.g. /reset You are a helpful assistant)'),
            BotCommand(command='image', description='Generate image from prompt (e.g. /image cat)'),
            BotCommand(command='stats', description='Get your current usage statistics')
        ]
@@ -308,7 +312,73 @@ class ChatGPT3TelegramBot:
        await context.bot.send_chat_action(chat_id=chat_id, action=constants.ChatAction.TYPING)
        try:
-            response, total_tokens = await self.openai.get_chat_response(chat_id=chat_id, query=prompt)
+            if self.config['stream']:
                is_group_chat = self.is_group_chat(update)
                stream_response = self.openai.get_chat_response_stream(chat_id=chat_id, query=prompt)
                i = 0
                prev = ''
                sent_message = None
                async for content, tokens in stream_response:
                    if len(content.strip()) == 0:
                        continue
                    if is_group_chat:
                        # group chats have stricter flood limits
                        cutoff = 180 if len(content) > 1000 else 120 if len(content) > 200 else 90 if len(content) > 50 else 50
                    else:
                        cutoff = 120 if len(content) > 1000 else 100 if len(content) > 200 else 85 if len(content) > 50 else 40
                    if i == 0:
                        try:
                            if sent_message is not None:
                                await context.bot.delete_message(chat_id=sent_message.chat_id,
                                                                 message_id=sent_message.message_id)
                            sent_message = await update.message.reply_text(content)
                        except:
                            continue
                    elif abs(len(content) - len(prev)) > cutoff or tokens != 'not_finished':
                        prev = content
                        try:
                            await context.bot.edit_message_text(content, chat_id=sent_message.chat_id,
                                                                message_id=sent_message.message_id,
                                                                parse_mode=constants.ParseMode.MARKDOWN)
                        except telegram.error.BadRequest as e:
                            if str(e).startswith("Message is not modified"):
                                continue
                            await context.bot.edit_message_text(content, chat_id=sent_message.chat_id,
                                                                message_id=sent_message.message_id)
                        except RetryAfter as e:
                            logging.warning(str(e))
                            await asyncio.sleep(e.retry_after)
                        except TimedOut as e:
                            logging.warning(str(e))
                            await asyncio.sleep(1)
                        await asyncio.sleep(0.01)
                    i += 1
                    if tokens != 'not_finished':
                        total_tokens = int(tokens)
            else:
                response, total_tokens = await self.openai.get_chat_response(chat_id=chat_id, query=prompt)
                # Split into chunks of 4096 characters (Telegram's message limit)
                chunks = self.split_into_chunks(response)
                for index, chunk in enumerate(chunks):
                    await context.bot.send_message(
                        chat_id=chat_id,
                        reply_to_message_id=update.message.message_id if index == 0 else None,
                        text=chunk,
                        parse_mode=constants.ParseMode.MARKDOWN
                    )
            # add chat request to users usage tracker
            self.usage[user_id].add_chat_tokens(total_tokens, self.config['token_price'])
@@ -317,16 +387,6 @@ class ChatGPT3TelegramBot:
            if str(user_id) not in allowed_user_ids and 'guests' in self.usage:
                self.usage["guests"].add_chat_tokens(total_tokens, self.config['token_price'])
            # Split into chunks of 4096 characters (Telegram's message limit)
            chunks = self.split_into_chunks(response)
            for index, chunk in enumerate(chunks):
                await context.bot.send_message(
                    chat_id=chat_id,
                    reply_to_message_id=update.message.message_id if index == 0 else None,
                    text=chunk,
                    parse_mode=constants.ParseMode.MARKDOWN
                )
        except Exception as e:
            logging.exception(e)
            await context.bot.send_message(
@@ -484,6 +544,7 @@ class ChatGPT3TelegramBot:
            .proxy_url(self.config['proxy']) \
            .get_updates_proxy_url(self.config['proxy']) \
            .post_init(self.post_init) \
            .concurrent_updates(True) \
            .build()
        application.add_handler(CommandHandler('reset', self.reset))