diff --git a/bot/openai_helper.py b/bot/openai_helper.py index f487456..11e9df3 100644 --- a/bot/openai_helper.py +++ b/bot/openai_helper.py @@ -384,52 +384,177 @@ class OpenAIHelper: logging.exception(e) raise Exception(f"⚠️ _{localized_text('error', self.config['bot_language'])}._ ⚠️\n{str(e)}") from e - async def interpret_image(self, chat_id, fileobj, prompt=None): + @retry( + reraise=True, + retry=retry_if_exception_type(openai.RateLimitError), + wait=wait_fixed(20), + stop=stop_after_attempt(3) + ) + async def __common_get_chat_response_vision(self, chat_id: int, content: list, stream=False): """ - Interprets a given PNG image file using the Vision model. + Request a response from the GPT model. + :param chat_id: The chat ID + :param query: The query to send to the model + :return: The answer from the model and the number of tokens used """ + bot_language = self.config['bot_language'] try: - image = encode_image(fileobj) - prompt = self.config['vision_prompt'] if prompt is None else prompt - - # for now I am not adding the image itself to the history - if chat_id not in self.conversations or self.__max_age_reached(chat_id): self.reset_chat_history(chat_id) self.last_updated[chat_id] = datetime.datetime.now() + for message in content: + if message['type'] == 'text': + query = message['text'] + break + + self.__add_to_history(chat_id, role="user", content=query) + + # Summarize the chat history if it's too long to avoid excessive token usage + token_count = self.__count_tokens(self.conversations[chat_id]) + exceeded_max_tokens = token_count + self.config['max_tokens'] > self.__max_model_tokens() + exceeded_max_history_size = len(self.conversations[chat_id]) > self.config['max_history_size'] + + if exceeded_max_tokens or exceeded_max_history_size: + logging.info(f'Chat history for chat ID {chat_id} is too long. Summarising...') + try: + summary = await self.__summarise(self.conversations[chat_id][:-1]) + logging.debug(f'Summary: {summary}') + self.reset_chat_history(chat_id, self.conversations[chat_id][0]['content']) + self.__add_to_history(chat_id, role="assistant", content=summary) + self.__add_to_history(chat_id, role="user", content=query) + except Exception as e: + logging.warning(f'Error while summarising chat history: {str(e)}. Popping elements instead...') + self.conversations[chat_id] = self.conversations[chat_id][-self.config['max_history_size']:] + + message = {'role':'user', 'content':content} - message = {'role':'user', 'content':[{'type':'text', 'text':prompt}, {'type':'image_url', \ - 'image_url': {'url':f'data:image/jpeg;base64,{image}', 'detail':self.config['vision_detail'] } }]} common_args = { 'model': 'gpt-4-vision-preview', # the only one that currently makes sense here - 'messages': self.conversations[chat_id] + [message], + 'messages': self.conversations[chat_id][:-1] + [message], 'temperature': self.config['temperature'], 'n': 1, # several choices is not implemented yet 'max_tokens': self.config['vision_max_tokens'], 'presence_penalty': self.config['presence_penalty'], 'frequency_penalty': self.config['frequency_penalty'], - 'stream': False # We need to refactor this class to make this feasible without too much repetition + 'stream': stream } - self.__add_to_history(chat_id, role="user", content=prompt) - response = await self.client.chat.completions.create(**common_args) + + # vision model does not yet support functions + + # if self.config['enable_functions']: + # functions = self.plugin_manager.get_functions_specs() + # if len(functions) > 0: + # common_args['functions'] = self.plugin_manager.get_functions_specs() + # common_args['function_call'] = 'auto' + return await self.client.chat.completions.create(**common_args) - content = response.choices[0].message.content - self.__add_to_history(chat_id, role="assistant", content=content) - - - return content, response.usage.total_tokens - except openai.RateLimitError as e: raise e + except openai.BadRequestError as e: - raise Exception(f"⚠️ _{localized_text('openai_invalid', self.config['bot_language'])}._ ⚠️\n{str(e)}") from e + raise Exception(f"⚠️ _{localized_text('openai_invalid', bot_language)}._ ⚠️\n{str(e)}") from e + except Exception as e: - logging.exception(e) - raise Exception(f"⚠️ _{localized_text('error', self.config['bot_language'])}._ ⚠️\n{str(e)}") from e + raise Exception(f"⚠️ _{localized_text('error', bot_language)}._ ⚠️\n{str(e)}") from e + + + async def interpret_image(self, chat_id, fileobj, prompt=None): + """ + Interprets a given PNG image file using the Vision model. + """ + image = encode_image(fileobj) + prompt = self.config['vision_prompt'] if prompt is None else prompt + + content = [{'type':'text', 'text':prompt}, {'type':'image_url', \ + 'image_url': {'url':f'data:image/jpeg;base64,{image}', 'detail':self.config['vision_detail'] } }] + + response = await self.__common_get_chat_response_vision(chat_id, content) + + + + # functions are not available for this model + + # if self.config['enable_functions']: + # response, plugins_used = await self.__handle_function_call(chat_id, response) + # if is_direct_result(response): + # return response, '0' + + answer = '' + + if len(response.choices) > 1 and self.config['n_choices'] > 1: + for index, choice in enumerate(response.choices): + content = choice.message.content.strip() + if index == 0: + self.__add_to_history(chat_id, role="assistant", content=content) + answer += f'{index + 1}\u20e3\n' + answer += content + answer += '\n\n' + else: + answer = response.choices[0].message.content.strip() + self.__add_to_history(chat_id, role="assistant", content=answer) + + bot_language = self.config['bot_language'] + # Plugins are not enabled either + # show_plugins_used = len(plugins_used) > 0 and self.config['show_plugins_used'] + # plugin_names = tuple(self.plugin_manager.get_plugin_source_name(plugin) for plugin in plugins_used) + if self.config['show_usage']: + answer += "\n\n---\n" \ + f"💰 {str(response.usage.total_tokens)} {localized_text('stats_tokens', bot_language)}" \ + f" ({str(response.usage.prompt_tokens)} {localized_text('prompt', bot_language)}," \ + f" {str(response.usage.completion_tokens)} {localized_text('completion', bot_language)})" + # if show_plugins_used: + # answer += f"\n🔌 {', '.join(plugin_names)}" + # elif show_plugins_used: + # answer += f"\n\n---\n🔌 {', '.join(plugin_names)}" + + return answer, response.usage.total_tokens + + async def interpret_image_stream(self, chat_id, fileobj, prompt=None): + """ + Interprets a given PNG image file using the Vision model. + """ + image = encode_image(fileobj) + prompt = self.config['vision_prompt'] if prompt is None else prompt + + content = [{'type':'text', 'text':prompt}, {'type':'image_url', \ + 'image_url': {'url':f'data:image/jpeg;base64,{image}', 'detail':self.config['vision_detail'] } }] + + response = await self.__common_get_chat_response_vision(chat_id, content, stream=True) + + + + # if self.config['enable_functions']: + # response, plugins_used = await self.__handle_function_call(chat_id, response, stream=True) + # if is_direct_result(response): + # yield response, '0' + # return + + answer = '' + async for chunk in response: + if len(chunk.choices) == 0: + continue + delta = chunk.choices[0].delta + if delta.content: + answer += delta.content + yield answer, 'not_finished' + answer = answer.strip() + self.__add_to_history(chat_id, role="assistant", content=answer) + tokens_used = str(self.__count_tokens(self.conversations[chat_id])) + + #show_plugins_used = len(plugins_used) > 0 and self.config['show_plugins_used'] + #plugin_names = tuple(self.plugin_manager.get_plugin_source_name(plugin) for plugin in plugins_used) + if self.config['show_usage']: + answer += f"\n\n---\n💰 {tokens_used} {localized_text('stats_tokens', self.config['bot_language'])}" + # if show_plugins_used: + # answer += f"\n🔌 {', '.join(plugin_names)}" + # elif show_plugins_used: + # answer += f"\n\n---\n🔌 {', '.join(plugin_names)}" + + yield answer, tokens_used def reset_chat_history(self, chat_id, content=''): """ diff --git a/bot/telegram_bot.py b/bot/telegram_bot.py index 5253e99..7a536b1 100644 --- a/bot/telegram_bot.py +++ b/bot/telegram_bot.py @@ -518,46 +518,128 @@ class ChatGPTTelegramBot: if user_id not in self.usage: self.usage[user_id] = UsageTracker(user_id, update.message.from_user.name) - try: - interpretation, tokens = await self.openai.interpret_image(chat_id, temp_file_png, prompt=prompt) + if self.config['stream']: - vision_token_price = self.config['vision_token_price'] - self.usage[user_id].add_vision_tokens(tokens, vision_token_price) + stream_response = self.openai.interpret_image_stream(chat_id=chat_id, fileobj=temp_file_png, prompt=prompt) + i = 0 + prev = '' + sent_message = None + backoff = 0 + stream_chunk = 0 - allowed_user_ids = self.config['allowed_user_ids'].split(',') - if str(user_id) not in allowed_user_ids and 'guests' in self.usage: - self.usage["guests"].add_vision_tokens(tokens, vision_token_price) + async for content, tokens in stream_response: + if is_direct_result(content): + return await handle_direct_result(self.config, update, content) + + if len(content.strip()) == 0: + continue + + stream_chunks = split_into_chunks(content) + if len(stream_chunks) > 1: + content = stream_chunks[-1] + if stream_chunk != len(stream_chunks) - 1: + stream_chunk += 1 + try: + await edit_message_with_retry(context, chat_id, str(sent_message.message_id), + stream_chunks[-2]) + except: + pass + try: + sent_message = await update.effective_message.reply_text( + message_thread_id=get_thread_id(update), + text=content if len(content) > 0 else "..." + ) + except: + pass + continue + + cutoff = get_stream_cutoff_values(update, content) + cutoff += backoff + + if i == 0: + try: + if sent_message is not None: + await context.bot.delete_message(chat_id=sent_message.chat_id, + message_id=sent_message.message_id) + sent_message = await update.effective_message.reply_text( + message_thread_id=get_thread_id(update), + reply_to_message_id=get_reply_to_message_id(self.config, update), + text=content, + ) + except: + continue + + elif abs(len(content) - len(prev)) > cutoff or tokens != 'not_finished': + prev = content + + try: + use_markdown = tokens != 'not_finished' + await edit_message_with_retry(context, chat_id, str(sent_message.message_id), + text=content, markdown=use_markdown) + + except RetryAfter as e: + backoff += 5 + await asyncio.sleep(e.retry_after) + continue + + except TimedOut: + backoff += 5 + await asyncio.sleep(0.5) + continue + + except Exception: + backoff += 5 + continue + + await asyncio.sleep(0.01) + + i += 1 + if tokens != 'not_finished': + total_tokens = int(tokens) + + + else: try: - await update.effective_message.reply_text( - message_thread_id=get_thread_id(update), - reply_to_message_id=get_reply_to_message_id(self.config, update), - text=interpretation, - parse_mode=constants.ParseMode.MARKDOWN - ) - except BadRequest: + interpretation, total_tokens = await self.openai.interpret_image(chat_id, temp_file_png, prompt=prompt) + + try: await update.effective_message.reply_text( message_thread_id=get_thread_id(update), reply_to_message_id=get_reply_to_message_id(self.config, update), - text=interpretation - ) - except Exception as e: - logging.exception(e) - await update.effective_message.reply_text( - message_thread_id=get_thread_id(update), - reply_to_message_id=get_reply_to_message_id(self.config, update), - text=f"{localized_text('vision_fail', bot_language)}: {str(e)}", + text=interpretation, parse_mode=constants.ParseMode.MARKDOWN ) - except Exception as e: - logging.exception(e) - await update.effective_message.reply_text( - message_thread_id=get_thread_id(update), - reply_to_message_id=get_reply_to_message_id(self.config, update), - text=f"{localized_text('vision_fail', bot_language)}: {str(e)}", - parse_mode=constants.ParseMode.MARKDOWN - ) + except BadRequest: + try: + await update.effective_message.reply_text( + message_thread_id=get_thread_id(update), + reply_to_message_id=get_reply_to_message_id(self.config, update), + text=interpretation + ) + except Exception as e: + logging.exception(e) + await update.effective_message.reply_text( + message_thread_id=get_thread_id(update), + reply_to_message_id=get_reply_to_message_id(self.config, update), + text=f"{localized_text('vision_fail', bot_language)}: {str(e)}", + parse_mode=constants.ParseMode.MARKDOWN + ) + except Exception as e: + logging.exception(e) + await update.effective_message.reply_text( + message_thread_id=get_thread_id(update), + reply_to_message_id=get_reply_to_message_id(self.config, update), + text=f"{localized_text('vision_fail', bot_language)}: {str(e)}", + parse_mode=constants.ParseMode.MARKDOWN + ) + vision_token_price = self.config['vision_token_price'] + self.usage[user_id].add_vision_tokens(total_tokens, vision_token_price) + + allowed_user_ids = self.config['allowed_user_ids'].split(',') + if str(user_id) not in allowed_user_ids and 'guests' in self.usage: + self.usage["guests"].add_vision_tokens(total_tokens, vision_token_price) await wrap_with_indicator(update, context, _execute, constants.ChatAction.TYPING)