From 1e851ba3ea7616e4bdc4ea14bade4424b6fb832a Mon Sep 17 00:00:00 2001
From: Stefan Ayala <stefanayala3266@gmail.com>
Date: Wed, 7 Jun 2023 01:16:53 -0700
Subject: [PATCH] Feat  set token limits based on model (#4498)

* feat: set max token limits for better user experience

* fix: use OPEN_AI_CHAT_MODELS max limits

* fix: use the old default of 8000

* fix: formatting so isort/black checks pass

* fix: avoid circular dependencies

* fix: use better to avoid circular imports

* feat: introduce soft limits and use them

* fix: circular import issue and missing field

* fix: move import to avoid overriding doc comment

* feat: DRY things up and set token limit for fast llm models too

* tests: make linter tests happy

* test: use the max token limits in config.py test

* fix: remove fast token limit from config

* feat: remove smart token limit from config

* fix: remove unused soft_token_limit var

* fix: remove unneeded tests, settings aren't in config anymore

---------

Co-authored-by: k-boikov <64261260+k-boikov@users.noreply.github.com>
Co-authored-by: Reinier van der Leer <github@pwuts.nl>
---
 .env.template             |  7 -------
 autogpt/agent/agent.py    |  6 ++++--
 autogpt/config/config.py  | 11 +----------
 tests/unit/test_config.py | 30 ------------------------------
 4 files changed, 5 insertions(+), 49 deletions(-)

diff --git a/.env.template b/.env.template
index d4d99baa..c78701a7 100644
--- a/.env.template
+++ b/.env.template
@@ -85,13 +85,6 @@ OPENAI_API_KEY=your-openai-api-key
 # SMART_LLM_MODEL=gpt-4
 # FAST_LLM_MODEL=gpt-3.5-turbo
 
-### LLM MODEL SETTINGS
-## FAST_TOKEN_LIMIT - Fast token limit for OpenAI (Default: 4000)
-## SMART_TOKEN_LIMIT - Smart token limit for OpenAI (Default: 8000)
-## When using --gpt3only this needs to be set to 4000.
-# FAST_TOKEN_LIMIT=4000
-# SMART_TOKEN_LIMIT=8000
-
 ### EMBEDDINGS
 ## EMBEDDING_MODEL       - Model to use for creating embeddings
 # EMBEDDING_MODEL=text-embedding-ada-002
diff --git a/autogpt/agent/agent.py b/autogpt/agent/agent.py
index 93c9c283..c21f31db 100644
--- a/autogpt/agent/agent.py
+++ b/autogpt/agent/agent.py
@@ -12,6 +12,7 @@ from autogpt.json_utils.json_fix_llm import fix_json_using_multiple_techniques
 from autogpt.json_utils.utilities import LLM_DEFAULT_RESPONSE_FORMAT, validate_json
 from autogpt.llm.base import ChatSequence
 from autogpt.llm.chat import chat_with_ai, create_chat_completion
+from autogpt.llm.providers.openai import OPEN_AI_CHAT_MODELS
 from autogpt.llm.utils import count_string_tokens
 from autogpt.log_cycle.log_cycle import (
     FULL_MESSAGE_HISTORY_FILE_NAME,
@@ -82,6 +83,7 @@ class Agent:
         self.created_at = datetime.now().strftime("%Y%m%d_%H%M%S")
         self.cycle_count = 0
         self.log_cycle_handler = LogCycleHandler()
+        self.fast_token_limit = OPEN_AI_CHAT_MODELS.get(cfg.fast_llm_model).max_tokens
 
     def start_interaction_loop(self):
         # Interaction Loop
@@ -132,7 +134,7 @@ class Agent:
                     self,
                     self.system_prompt,
                     self.triggering_prompt,
-                    cfg.fast_token_limit,
+                    self.fast_token_limit,
                     cfg.fast_llm_model,
                 )
 
@@ -290,7 +292,7 @@ class Agent:
                 memory_tlength = count_string_tokens(
                     str(self.history.summary_message()), cfg.fast_llm_model
                 )
-                if result_tlength + memory_tlength + 600 > cfg.fast_token_limit:
+                if result_tlength + memory_tlength + 600 > self.fast_token_limit:
                     result = f"Failure: command {command_name} returned too much output. \
                         Do not execute this command again with the same arguments."
 
diff --git a/autogpt/config/config.py b/autogpt/config/config.py
index 5f76bb74..629e9ffb 100644
--- a/autogpt/config/config.py
+++ b/autogpt/config/config.py
@@ -56,9 +56,8 @@ class Config(metaclass=Singleton):
         )
         self.fast_llm_model = os.getenv("FAST_LLM_MODEL", "gpt-3.5-turbo")
         self.smart_llm_model = os.getenv("SMART_LLM_MODEL", "gpt-4")
-        self.fast_token_limit = int(os.getenv("FAST_TOKEN_LIMIT", 4000))
-        self.smart_token_limit = int(os.getenv("SMART_TOKEN_LIMIT", 8000))
         self.embedding_model = os.getenv("EMBEDDING_MODEL", "text-embedding-ada-002")
+
         self.browse_spacy_language_model = os.getenv(
             "BROWSE_SPACY_LANGUAGE_MODEL", "en_core_web_sm"
         )
@@ -217,14 +216,6 @@ class Config(metaclass=Singleton):
         """Set the smart LLM model value."""
         self.smart_llm_model = value
 
-    def set_fast_token_limit(self, value: int) -> None:
-        """Set the fast token limit value."""
-        self.fast_token_limit = value
-
-    def set_smart_token_limit(self, value: int) -> None:
-        """Set the smart token limit value."""
-        self.smart_token_limit = value
-
     def set_embedding_model(self, value: str) -> None:
         """Set the model to use for creating embeddings."""
         self.embedding_model = value
diff --git a/tests/unit/test_config.py b/tests/unit/test_config.py
index eb6946c9..27daedcd 100644
--- a/tests/unit/test_config.py
+++ b/tests/unit/test_config.py
@@ -21,8 +21,6 @@ def test_initial_values(config: Config):
     assert config.speak_mode == False
     assert config.fast_llm_model == "gpt-3.5-turbo"
     assert config.smart_llm_model == "gpt-4"
-    assert config.fast_token_limit == 4000
-    assert config.smart_token_limit == 8000
 
 
 def test_set_continuous_mode(config: Config):
@@ -81,34 +79,6 @@ def test_set_smart_llm_model(config: Config):
     config.set_smart_llm_model(smart_llm_model)
 
 
-def test_set_fast_token_limit(config: Config):
-    """
-    Test if the set_fast_token_limit() method updates the fast_token_limit attribute.
-    """
-    # Store token limit to reset it after the test
-    fast_token_limit = config.fast_token_limit
-
-    config.set_fast_token_limit(5000)
-    assert config.fast_token_limit == 5000
-
-    # Reset token limit
-    config.set_fast_token_limit(fast_token_limit)
-
-
-def test_set_smart_token_limit(config: Config):
-    """
-    Test if the set_smart_token_limit() method updates the smart_token_limit attribute.
-    """
-    # Store token limit to reset it after the test
-    smart_token_limit = config.smart_token_limit
-
-    config.set_smart_token_limit(9000)
-    assert config.smart_token_limit == 9000
-
-    # Reset token limit
-    config.set_smart_token_limit(smart_token_limit)
-
-
 def test_set_debug_mode(config: Config):
     """
     Test if the set_debug_mode() method updates the debug_mode attribute.