From 1e851ba3ea7616e4bdc4ea14bade4424b6fb832a Mon Sep 17 00:00:00 2001 From: Stefan Ayala Date: Wed, 7 Jun 2023 01:16:53 -0700 Subject: [PATCH] Feat set token limits based on model (#4498) * feat: set max token limits for better user experience * fix: use OPEN_AI_CHAT_MODELS max limits * fix: use the old default of 8000 * fix: formatting so isort/black checks pass * fix: avoid circular dependencies * fix: use better to avoid circular imports * feat: introduce soft limits and use them * fix: circular import issue and missing field * fix: move import to avoid overriding doc comment * feat: DRY things up and set token limit for fast llm models too * tests: make linter tests happy * test: use the max token limits in config.py test * fix: remove fast token limit from config * feat: remove smart token limit from config * fix: remove unused soft_token_limit var * fix: remove unneeded tests, settings aren't in config anymore --------- Co-authored-by: k-boikov <64261260+k-boikov@users.noreply.github.com> Co-authored-by: Reinier van der Leer --- .env.template | 7 ------- autogpt/agent/agent.py | 6 ++++-- autogpt/config/config.py | 11 +---------- tests/unit/test_config.py | 30 ------------------------------ 4 files changed, 5 insertions(+), 49 deletions(-) diff --git a/.env.template b/.env.template index d4d99baa..c78701a7 100644 --- a/.env.template +++ b/.env.template @@ -85,13 +85,6 @@ OPENAI_API_KEY=your-openai-api-key # SMART_LLM_MODEL=gpt-4 # FAST_LLM_MODEL=gpt-3.5-turbo -### LLM MODEL SETTINGS -## FAST_TOKEN_LIMIT - Fast token limit for OpenAI (Default: 4000) -## SMART_TOKEN_LIMIT - Smart token limit for OpenAI (Default: 8000) -## When using --gpt3only this needs to be set to 4000. -# FAST_TOKEN_LIMIT=4000 -# SMART_TOKEN_LIMIT=8000 - ### EMBEDDINGS ## EMBEDDING_MODEL - Model to use for creating embeddings # EMBEDDING_MODEL=text-embedding-ada-002 diff --git a/autogpt/agent/agent.py b/autogpt/agent/agent.py index 93c9c283..c21f31db 100644 --- a/autogpt/agent/agent.py +++ b/autogpt/agent/agent.py @@ -12,6 +12,7 @@ from autogpt.json_utils.json_fix_llm import fix_json_using_multiple_techniques from autogpt.json_utils.utilities import LLM_DEFAULT_RESPONSE_FORMAT, validate_json from autogpt.llm.base import ChatSequence from autogpt.llm.chat import chat_with_ai, create_chat_completion +from autogpt.llm.providers.openai import OPEN_AI_CHAT_MODELS from autogpt.llm.utils import count_string_tokens from autogpt.log_cycle.log_cycle import ( FULL_MESSAGE_HISTORY_FILE_NAME, @@ -82,6 +83,7 @@ class Agent: self.created_at = datetime.now().strftime("%Y%m%d_%H%M%S") self.cycle_count = 0 self.log_cycle_handler = LogCycleHandler() + self.fast_token_limit = OPEN_AI_CHAT_MODELS.get(cfg.fast_llm_model).max_tokens def start_interaction_loop(self): # Interaction Loop @@ -132,7 +134,7 @@ class Agent: self, self.system_prompt, self.triggering_prompt, - cfg.fast_token_limit, + self.fast_token_limit, cfg.fast_llm_model, ) @@ -290,7 +292,7 @@ class Agent: memory_tlength = count_string_tokens( str(self.history.summary_message()), cfg.fast_llm_model ) - if result_tlength + memory_tlength + 600 > cfg.fast_token_limit: + if result_tlength + memory_tlength + 600 > self.fast_token_limit: result = f"Failure: command {command_name} returned too much output. \ Do not execute this command again with the same arguments." diff --git a/autogpt/config/config.py b/autogpt/config/config.py index 5f76bb74..629e9ffb 100644 --- a/autogpt/config/config.py +++ b/autogpt/config/config.py @@ -56,9 +56,8 @@ class Config(metaclass=Singleton): ) self.fast_llm_model = os.getenv("FAST_LLM_MODEL", "gpt-3.5-turbo") self.smart_llm_model = os.getenv("SMART_LLM_MODEL", "gpt-4") - self.fast_token_limit = int(os.getenv("FAST_TOKEN_LIMIT", 4000)) - self.smart_token_limit = int(os.getenv("SMART_TOKEN_LIMIT", 8000)) self.embedding_model = os.getenv("EMBEDDING_MODEL", "text-embedding-ada-002") + self.browse_spacy_language_model = os.getenv( "BROWSE_SPACY_LANGUAGE_MODEL", "en_core_web_sm" ) @@ -217,14 +216,6 @@ class Config(metaclass=Singleton): """Set the smart LLM model value.""" self.smart_llm_model = value - def set_fast_token_limit(self, value: int) -> None: - """Set the fast token limit value.""" - self.fast_token_limit = value - - def set_smart_token_limit(self, value: int) -> None: - """Set the smart token limit value.""" - self.smart_token_limit = value - def set_embedding_model(self, value: str) -> None: """Set the model to use for creating embeddings.""" self.embedding_model = value diff --git a/tests/unit/test_config.py b/tests/unit/test_config.py index eb6946c9..27daedcd 100644 --- a/tests/unit/test_config.py +++ b/tests/unit/test_config.py @@ -21,8 +21,6 @@ def test_initial_values(config: Config): assert config.speak_mode == False assert config.fast_llm_model == "gpt-3.5-turbo" assert config.smart_llm_model == "gpt-4" - assert config.fast_token_limit == 4000 - assert config.smart_token_limit == 8000 def test_set_continuous_mode(config: Config): @@ -81,34 +79,6 @@ def test_set_smart_llm_model(config: Config): config.set_smart_llm_model(smart_llm_model) -def test_set_fast_token_limit(config: Config): - """ - Test if the set_fast_token_limit() method updates the fast_token_limit attribute. - """ - # Store token limit to reset it after the test - fast_token_limit = config.fast_token_limit - - config.set_fast_token_limit(5000) - assert config.fast_token_limit == 5000 - - # Reset token limit - config.set_fast_token_limit(fast_token_limit) - - -def test_set_smart_token_limit(config: Config): - """ - Test if the set_smart_token_limit() method updates the smart_token_limit attribute. - """ - # Store token limit to reset it after the test - smart_token_limit = config.smart_token_limit - - config.set_smart_token_limit(9000) - assert config.smart_token_limit == 9000 - - # Reset token limit - config.set_smart_token_limit(smart_token_limit) - - def test_set_debug_mode(config: Config): """ Test if the set_debug_mode() method updates the debug_mode attribute.