Merge branch 'main' into add-balance-function

2025-12-20 14:14:52 +01:00 · 2023-03-24 11:32:16 +01:00
parent 29ecd1723c 5d2c288789
commit 17028b8e57
3 changed files with 47 additions and 127 deletions
--- a/.env.example
+++ b/.env.example
@@ -6,68 +6,3 @@ TELEGRAM_BOT_TOKEN="XXX"
 # Comma separated list of telegram user IDs, or * to allow all. First USER_ID has access to admin commands.
 ALLOWED_TELEGRAM_USER_IDS="USER_ID_1,USER_ID_2"
 # Comma separated list of user budgets, USD limit per month, or * to allow all users unlimited usage
 # same order of users as in ALLOWED_TELEGRAM_USER_IDS 
 MONTHLY_USER_BUDGETS="100.0,100.0"
 # Guest Budget, USD limit per month for non-user requests in group chats with users
 MONTHLY_GUEST_BUDGET="20.0"
 # Proxy to be used for OpenAI and Telegram bot
 PROXY="http://localhost:8080"
 # Proxy to be used for OpenAI and Telegram bot
 OPENAI_MODEL="gpt-3.5-turbo"
 # A system message that sets the tone and controls the behavior of the assistant
 ASSISTANT_PROMPT="You are a helpful assistant."
 # Whether to show OpenAI token usage information after each response
 SHOW_USAGE=false
 # Upper bound on how many tokens the ChatGPT API will return
 MAX_TOKENS=1200
 # Max number of messages to keep in memory, after which the conversation will be summarised
 MAX_HISTORY_SIZE=10
 # Max minutes a conversation will live, after which the conversation will be reset to avoid excessive token usage
 MAX_CONVERSATION_AGE_MINUTES=180
 # Whether to answer to voice messages with the transcript or with a ChatGPT response of the transcript
 VOICE_REPLY_WITH_TRANSCRIPT_ONLY=false
 # How many chat completion choices to generate for each input message
 N_CHOICES=1
 # Number between 0 and 2. Higher values like 0.8 will make the output more random,
 # while lower values like 0.2 will make it more focused and deterministic
 TEMPERATURE=1.0
 # Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far,
 # increasing the model's likelihood to talk about new topics
 PRESENCE_PENALTY=0
 # Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far,
 # decreasing the model's likelihood to repeat the same line verbatim
 FREQUENCY_PENALTY=0
 # The DALL·E generated image size
 IMAGE_SIZE="512x512"
 # Group trigger keyword, if set, the bot will only respond to messages that start with this keyword
 # Useful for bots added to groups with privacy mode disabled
 GROUP_TRIGGER_KEYWORD=""
 # Whether transcriptions should be ignored in group chats
 IGNORE_GROUP_TRANSCRIPTIONS=true
 # USD-price per 1000 tokens for cost information in usage statistics, see https://openai.com/pricing
 TOKEN_PRICE=0.002
 # USD-prices per image for the sizes 256x256,512x512,1024x1024 for cost information in usage statistics
 IMAGE_PRICES="0.016,0.018,0.02"
 # USD-price for 1 minute of audio transcription for cost information in usage statistics
 TRANSCRIPTION_PRICE=0.006
--- a/README.md
+++ b/README.md
@@ -41,57 +41,37 @@ PRs are always welcome!
 ## Getting started
 ### Configuration
-Customize the configuration by copying `.env.example` and renaming it to `.env`, then editing the parameters as desired:
+Customize the configuration by copying `.env.example` and renaming it to `.env`, then editing the required parameters as desired:
 ```bash
 OPENAI_API_KEY="YOUR_OPENAI_API_KEY"
 TELEGRAM_BOT_TOKEN="YOUR_TELEGRAM_BOT_TOKEN"
-# Optional parameters
+| Parameter                   | Description                                                                                                                                                                                             |
-ALLOWED_TELEGRAM_USER_IDS="USER_ID_1,USER_ID_2,..." # Defaults to "*" (everyone)
+|-----------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-MONTHLY_USER_BUDGETS="BUDGET_USER_ID_1,BUDGET_USER_ID_2,..." # Defaults to "*" (no restrictions)
+| `OPENAI_API_KEY`            | Your OpenAI API key, you can get it from [here](https://platform.openai.com/account/api-keys)                                                                                                           |
-MONTHLY_GUEST_BUDGET="20.0" # Defaults to 100.0
+| `TELEGRAM_BOT_TOKEN`        | Your Telegram bot's token, obtained using [BotFather](http://t.me/botfather) (see [tutorial](https://core.telegram.org/bots/tutorial#obtain-your-bot-token))                                            |
-PROXY="YOUR_PROXY" # e.g. "http://localhost:8080", defaults to none
+| `ALLOWED_TELEGRAM_USER_IDS` | A comma-separated list of Telegram user IDs that are allowed to interact with the bot (use [getidsbot](https://t.me/getidsbot) to find your user ID). First USER_ID has access to admin commands. **Note**: by default, *everyone* is allowed and has access to admin commands (`*`)|
-OPENAI_MODEL="gpt-3.5-turbo" # Defaults to gpt-3.5-turbo
+
-ASSISTANT_PROMPT="Custom prompt" # Defaults to "You are a helpful assistant."
+### Optional configuration
-SHOW_USAGE=true # Defaults to false
+| Parameter                          | Description                                                                                                                                                                                                                      | Default value                  |
-MAX_TOKENS=2000 # Defaults to 1200
+|------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------|
-MAX_HISTORY_SIZE=15 # Defaults to 10
+| `MONTHLY_USER_BUDGETS`             | A comma-separated list of $-amounts per user from list `ALLOWED_TELEGRAM_USER_IDS` to set custom usage limit of OpenAI API costs for each.  **Note**: by default, *no limits* for anyone (`*`)                                    | `*`                            |
-MAX_CONVERSATION_AGE_MINUTES=120 # Defaults to 180 (3h)
+| `MONTHLY_GUEST_BUDGET`             | $-amount as usage limit for all guest users. Guest users are users in group chats that are not in the `ALLOWED_TELEGRAM_USER_IDS` list. Value is ignored if no usage limits are set in user budgets (`MONTHLY_USER_BUDGETS`="*") | `100.0`                        |
-VOICE_REPLY_WITH_TRANSCRIPT_ONLY=false # Defaults to true
+| `PROXY`                            | Proxy to be used for OpenAI and Telegram bot (e.g. `http://localhost:8080`)                                                                                                                                                      | -                              |
-N_CHOICES=1 # Defaults to 1
+| `OPENAI_MODEL`                     | The OpenAI model to use for generating responses                                                                                                                                                                                 | `gpt-3.5-turbo`                |
-TEMPERATURE=1.0 # Defaults to 1.0
+| `ASSISTANT_PROMPT`                 | A system message that sets the tone and controls the behavior of the assistant                                                                                                                                                   | `You are a helpful assistant.` |
-PRESENCE_PENALTY=0 # Defaults to 0
+| `SHOW_USAGE`                       | Whether to show OpenAI token usage information after each response                                                                                                                                                               | false                          |
-FREQUENCY_PENALTY=0 # Defaults to 0
+| `MAX_TOKENS`                       | Upper bound on how many tokens the ChatGPT API will return                                                                                                                                                                       | 2000                           |
-IMAGE_SIZE="256x256" # Defaults to 512x512
+| `MAX_HISTORY_SIZE`                 | Max number of messages to keep in memory, after which the conversation will be summarised to avoid excessive token usage                                                                                                         | 10                             |
-GROUP_TRIGGER_KEYWORD="@bot" # Defaults to "" (no keyword required)
+| `MAX_CONVERSATION_AGE_MINUTES`     | Maximum number of minutes a conversation should live, after which the conversation will be reset                                                                                                                                 | 120                            |
-IGNORE_GROUP_TRANSCRIPTIONS=true # Whether transcriptions should be ignored in group chats. Defaults to true
+| `VOICE_REPLY_WITH_TRANSCRIPT_ONLY` | Whether to answer to voice messages with the transcript only or with a ChatGPT response of the transcript                                                                                                                        | false                          |
-TOKEN_PRICE=0.002 # Defaults to 0.002, current price: https://openai.com/pricing
+| `N_CHOICES`                        | Number of answers to generate for each input message                                                                                                                                                                             | 1                              |
-IMAGE_PRICES="0.016,0.018,0.02" # Defaults to OpenAI Dall-E pricing for sizes 256x256,512x512,1024x1024
+| `TEMPERATURE`                      | Number between 0 and 2. Higher values like 0.8 will make the output more random                                                                                                                                                  | 1.0                            |
-TRANSCRIPTION_PRICE=0.006 # Defaults to minute price of OpenAI Whisper of 0.006
+| `PRESENCE_PENALTY`                 | Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far                                                                                                                 | 0                              |
-```
+| `FREQUENCY_PENALTY`                | Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far                                                                                                            | 0                              |
-* `OPENAI_API_KEY`: Your OpenAI API key, you can get it from [here](https://platform.openai.com/account/api-keys)
+| `IMAGE_SIZE`                       | The DALL·E generated image size. Allowed values: "256x256", "512x512", or "1024x1024"                                                                                                                                            | "256x256"                      |
-* `TELEGRAM_BOT_TOKEN`: Your Telegram bot's token, obtained using [BotFather](http://t.me/botfather) (see [tutorial](https://core.telegram.org/bots/tutorial#obtain-your-bot-token))
+| `GROUP_TRIGGER_KEYWORD`            | If set, the bot in group chats will only respond to messages that start with this keyword                                                                                                                                        | ""                             |
-* `ALLOWED_TELEGRAM_USER_IDS`: A comma-separated list of Telegram user IDs that are allowed to interact with the bot (use [getidsbot](https://t.me/getidsbot) to find your user ID). First USER_ID has access to admin commands. **Note**: by default, *everyone* is allowed (`*`) and has access to admin commands.
+| `IGNORE_GROUP_TRANSCRIPTIONS`      | If set to true, the bot will not process transcriptions in group chats                                                                                                                                                           | true                           |
-* `MONTHLY_USER_BUDGETS`: A comma-separated list of $-amounts per user from list `ALLOWED_TELEGRAM_USER_IDS` to set custom usage limit of OpenAI API costs for each. **Note**: by default, *no limits* for anyone (`*`)
+| `TOKEN_PRICE`                      | $-price per 1000 tokens used to compute cost information in usage statistics (https://openai.com/pricing)                                                                                                                        | 0.002                          |
-* `MONTHLY_GUEST_BUDGET`: $-amount as usage limit for all guest users. Guest users are users in group chats that are not in the `ALLOWED_TELEGRAM_USER_IDS` list. Value is ignored if no usage limits are set in user budgets (`MONTHLY_USER_BUDGETS`="*")
+| `IMAGE_PRICES`                     | A comma-separated list with 3 elements of prices for the different image sizes: 256x256, 512x512 and 1024x1024                                                                                                                   | "0.016,0.018,0.02"             |
-* `PROXY`: Proxy to be used for OpenAI and Telegram bot
+| `TRANSCRIPTION_PRICE`              | USD-price for one minute of audio transcription                                                                                                                                                                                  | 0.006                          |
 * `OPENAI_MODEL`: Define which OpenAI model to use (default is `gpt-3.5-turbo`)
 * `ASSISTANT_PROMPT`: A system message that sets the tone and controls the behavior of the assistant
 * `SHOW_USAGE`: Whether to show OpenAI token usage information after each response
 * `MAX_TOKENS`: Upper bound on how many tokens the ChatGPT API will return
 * `MAX_HISTORY_SIZE`: Max number of messages to keep in memory, after which the conversation will be summarised to avoid excessive token usage ([#34](https://github.com/n3d1117/chatgpt-telegram-bot/issues/34))
 * `MAX_CONVERSATION_AGE_MINUTES`: Maximum number of minutes a conversation should live, after which the conversation will be reset to avoid excessive token usage
 * `VOICE_REPLY_WITH_TRANSCRIPT_ONLY`: Whether to answer to voice messages with the transcript only or with a ChatGPT response of the transcript ([#38](https://github.com/n3d1117/chatgpt-telegram-bot/issues/38))
 * `N_CHOICES`: Number of answers to generate for each input message
 * `TEMPERATURE`: Number between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic
 * `PRESENCE_PENALTY`: Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics
 * `FREQUENCY_PENALTY`: Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim
 * `IMAGE_SIZE`: The DALL·E generated image size. Allowed values: "256x256", "512x512", or "1024x1024"
 * `GROUP_TRIGGER_KEYWORD`: If set, the bot will only respond to messages that start with this keyword. This is useful for bots added to groups with privacy mode disabled. **Note**: by default, *no keyword* is required (`""`)
 * `IGNORE_GROUP_TRANSCRIPTIONS`: If set to true, the bot will not process transcriptions in group chats
 * `TOKEN_PRICE`: USD-price per 1000 tokens for cost information in usage statistics. Defaults to [OpenAI price](https://openai.com/pricing) for gpt-3.5-turbo
 * `IMAGE_PRICES`: A comma-separated list with 3 elements of prices for the different image sizes 256x256, 512x512 and 1024x1024. Defaults to [OpenAI prices](https://openai.com/pricing) for Dall-E.
 * `TRANSCRIPTION_PRICE`: USD-price for one minute of audio transcription. Defaults to [OpenAI price](https://openai.com/pricing) for Whisper
 Check out the [official API reference](https://platform.openai.com/docs/api-reference/chat) for more details.
--- a/bot/openai_helper.py
+++ b/bot/openai_helper.py
@@ -6,7 +6,6 @@ import tiktoken
 import openai
 import requests
 import json
 from datetime import date
@@ -215,18 +214,24 @@ class OpenAIHelper:
            encoding = tiktoken.encoding_for_model(model)
        except KeyError:
            encoding = tiktoken.get_encoding("gpt-3.5-turbo")
-        if model in GPT_ALL_MODELS:
+
        if model in GPT_3_MODELS:
            tokens_per_message = 4  # every message follows <im_start>{role/name}\n{content}<im_end>\n
            tokens_per_name = -1  # if there's a name, the role is omitted
        elif model in GPT_4_MODELS + GPT_4_32K_MODELS:
            tokens_per_message = 3
            tokens_per_name = 1
        else:
            raise NotImplementedError(f"""num_tokens_from_messages() is not implemented for model {model}.""")
        num_tokens = 0
        for message in messages:
-                num_tokens += 4  # every message follows <im_start>{role/name}\n{content}<im_end>\n
+            num_tokens += tokens_per_message
            for key, value in message.items():
                num_tokens += len(encoding.encode(value))
-                    if key == "name":  # if there's a name, the role is omitted
+                if key == "name":
-                        num_tokens += -1  # role is always required and always 1 token
+                    num_tokens += tokens_per_name
        num_tokens += 2  # every reply is primed with <im_start>assistant
        return num_tokens
        else:
            raise NotImplementedError(f"__count_tokens() is not presently implemented for model {model}")
    def get_grant_balance(self):
        """Gets remaining grant balance for new users from OpenAI API.