change to use image files in memory

2025-12-22 23:25:41 +01:00 · 2023-11-09 18:18:15 +01:00
parent f69f9e5034
commit b2f2114e36
3 changed files with 13 additions and 18 deletions
--- a/bot/openai_helper.py
+++ b/bot/openai_helper.py
@@ -353,12 +353,12 @@ class OpenAIHelper:
            logging.exception(e)
            raise Exception(f"⚠️ _{localized_text('error', self.config['bot_language'])}._ ⚠️\n{str(e)}") from e

-    async def interpret_image(self, chat_id, filename, prompt=None):
+    async def interpret_image(self, chat_id, fileobj, prompt=None):
        """
        Interprets a given PNG image file using the Vision model.
        """
        try:
-            image = encode_image(filename)
+            image = encode_image(fileobj)
            prompt = self.config['vision_prompt'] if prompt is None else prompt

            # for now I am not adding the image itself to the history
@@ -390,7 +390,7 @@ class OpenAIHelper:
            self.__add_to_history(chat_id, role="assistant", content=content)
            

-            return content, self.__count_tokens_vision(filename)
+            return content, self.__count_tokens_vision(fileobj)
        
        except openai.RateLimitError as e:
            raise e
@@ -500,13 +500,13 @@ class OpenAIHelper:
        num_tokens += 3  # every reply is primed with <|start|>assistant<|message|>
        return num_tokens

-    def __count_tokens_vision(self, filename) -> int:
+    def __count_tokens_vision(self, fileobj) -> int:
        """
        Counts the number of tokens for interpreting an image.
        :param image: image to interpret
        :return: the number of tokens required
        """
-        image = Image.open(filename)
+        image = Image.open(fileobj)
        model = self.config['model']
        if model not in GPT_4_VISION_MODELS:
            raise NotImplementedError(f"""num_tokens_from_messages() is not implemented for model {model}.""")
--- a/bot/telegram_bot.py
+++ b/bot/telegram_bot.py
@@ -3,7 +3,7 @@ from __future__ import annotations
 import asyncio
 import logging
 import os
-import tempfile
+import io

 from uuid import uuid4
 from telegram import BotCommandScopeAllGroupChats, Update, constants
@@ -406,13 +406,12 @@ class ChatGPTTelegramBot:
        
        image = update.message.effective_attachment[-1]
        
-        temp_file = tempfile.NamedTemporaryFile()

        async def _execute():
            bot_language = self.config['bot_language']
            try:
                media_file = await context.bot.get_file(image.file_id)
-                await media_file.download_to_drive(temp_file.name)
+                temp_file = io.BytesIO(await media_file.download_as_bytearray())
            except Exception as e:
                logging.exception(e)
                await update.effective_message.reply_text(
@@ -428,12 +427,12 @@ class ChatGPTTelegramBot:
            
            # convert jpg from telegram to png as understood by openai

-            temp_file_png = tempfile.NamedTemporaryFile()
+            temp_file_png = io.BytesIO()

            try:
-                original_image = Image.open(temp_file.name)
+                original_image = Image.open(temp_file)
                
-                original_image.save(temp_file_png.name, format='PNG')
+                original_image.save(temp_file_png, format='PNG')
                logging.info(f'New vision request received from user {update.message.from_user.name} '
                             f'(id: {update.message.from_user.id})')

@@ -452,7 +451,7 @@ class ChatGPTTelegramBot:
                self.usage[user_id] = UsageTracker(user_id, update.message.from_user.name)

            try:
-                interpretation, tokens = await self.openai.interpret_image(chat_id, temp_file_png.name, prompt=prompt)
+                interpretation, tokens = await self.openai.interpret_image(chat_id, temp_file_png, prompt=prompt)

                vision_token_price = self.config['vision_token_price']
                self.usage[user_id].add_vision_tokens(tokens, vision_token_price)
@@ -477,9 +476,6 @@ class ChatGPTTelegramBot:
                    text=f"{localized_text('vision_fail', bot_language)}: {str(e)}",
                    parse_mode=constants.ParseMode.MARKDOWN
                )
-            finally:
-                temp_file.close()
-                temp_file_png.close()

        await wrap_with_indicator(update, context, _execute, constants.ChatAction.TYPING)

--- a/bot/utils.py
+++ b/bot/utils.py
@@ -381,6 +381,5 @@ def cleanup_intermediate_files(response: any):


 # Function to encode the image
-def encode_image(image_path):
-    with open(image_path, "rb") as image_file:
-        return base64.b64encode(image_file.read()).decode('utf-8')
+def encode_image(fileobj):
+    return base64.b64encode(fileobj.getvalue()).decode('utf-8')