mirror of
https://github.com/aljazceru/Auto-GPT.git
synced 2025-12-18 22:44:21 +01:00
Feat set token limits based on model (#4498)
* feat: set max token limits for better user experience * fix: use OPEN_AI_CHAT_MODELS max limits * fix: use the old default of 8000 * fix: formatting so isort/black checks pass * fix: avoid circular dependencies * fix: use better to avoid circular imports * feat: introduce soft limits and use them * fix: circular import issue and missing field * fix: move import to avoid overriding doc comment * feat: DRY things up and set token limit for fast llm models too * tests: make linter tests happy * test: use the max token limits in config.py test * fix: remove fast token limit from config * feat: remove smart token limit from config * fix: remove unused soft_token_limit var * fix: remove unneeded tests, settings aren't in config anymore --------- Co-authored-by: k-boikov <64261260+k-boikov@users.noreply.github.com> Co-authored-by: Reinier van der Leer <github@pwuts.nl>
This commit is contained in:
@@ -85,13 +85,6 @@ OPENAI_API_KEY=your-openai-api-key
|
|||||||
# SMART_LLM_MODEL=gpt-4
|
# SMART_LLM_MODEL=gpt-4
|
||||||
# FAST_LLM_MODEL=gpt-3.5-turbo
|
# FAST_LLM_MODEL=gpt-3.5-turbo
|
||||||
|
|
||||||
### LLM MODEL SETTINGS
|
|
||||||
## FAST_TOKEN_LIMIT - Fast token limit for OpenAI (Default: 4000)
|
|
||||||
## SMART_TOKEN_LIMIT - Smart token limit for OpenAI (Default: 8000)
|
|
||||||
## When using --gpt3only this needs to be set to 4000.
|
|
||||||
# FAST_TOKEN_LIMIT=4000
|
|
||||||
# SMART_TOKEN_LIMIT=8000
|
|
||||||
|
|
||||||
### EMBEDDINGS
|
### EMBEDDINGS
|
||||||
## EMBEDDING_MODEL - Model to use for creating embeddings
|
## EMBEDDING_MODEL - Model to use for creating embeddings
|
||||||
# EMBEDDING_MODEL=text-embedding-ada-002
|
# EMBEDDING_MODEL=text-embedding-ada-002
|
||||||
|
|||||||
@@ -12,6 +12,7 @@ from autogpt.json_utils.json_fix_llm import fix_json_using_multiple_techniques
|
|||||||
from autogpt.json_utils.utilities import LLM_DEFAULT_RESPONSE_FORMAT, validate_json
|
from autogpt.json_utils.utilities import LLM_DEFAULT_RESPONSE_FORMAT, validate_json
|
||||||
from autogpt.llm.base import ChatSequence
|
from autogpt.llm.base import ChatSequence
|
||||||
from autogpt.llm.chat import chat_with_ai, create_chat_completion
|
from autogpt.llm.chat import chat_with_ai, create_chat_completion
|
||||||
|
from autogpt.llm.providers.openai import OPEN_AI_CHAT_MODELS
|
||||||
from autogpt.llm.utils import count_string_tokens
|
from autogpt.llm.utils import count_string_tokens
|
||||||
from autogpt.log_cycle.log_cycle import (
|
from autogpt.log_cycle.log_cycle import (
|
||||||
FULL_MESSAGE_HISTORY_FILE_NAME,
|
FULL_MESSAGE_HISTORY_FILE_NAME,
|
||||||
@@ -82,6 +83,7 @@ class Agent:
|
|||||||
self.created_at = datetime.now().strftime("%Y%m%d_%H%M%S")
|
self.created_at = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||||
self.cycle_count = 0
|
self.cycle_count = 0
|
||||||
self.log_cycle_handler = LogCycleHandler()
|
self.log_cycle_handler = LogCycleHandler()
|
||||||
|
self.fast_token_limit = OPEN_AI_CHAT_MODELS.get(cfg.fast_llm_model).max_tokens
|
||||||
|
|
||||||
def start_interaction_loop(self):
|
def start_interaction_loop(self):
|
||||||
# Interaction Loop
|
# Interaction Loop
|
||||||
@@ -132,7 +134,7 @@ class Agent:
|
|||||||
self,
|
self,
|
||||||
self.system_prompt,
|
self.system_prompt,
|
||||||
self.triggering_prompt,
|
self.triggering_prompt,
|
||||||
cfg.fast_token_limit,
|
self.fast_token_limit,
|
||||||
cfg.fast_llm_model,
|
cfg.fast_llm_model,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -290,7 +292,7 @@ class Agent:
|
|||||||
memory_tlength = count_string_tokens(
|
memory_tlength = count_string_tokens(
|
||||||
str(self.history.summary_message()), cfg.fast_llm_model
|
str(self.history.summary_message()), cfg.fast_llm_model
|
||||||
)
|
)
|
||||||
if result_tlength + memory_tlength + 600 > cfg.fast_token_limit:
|
if result_tlength + memory_tlength + 600 > self.fast_token_limit:
|
||||||
result = f"Failure: command {command_name} returned too much output. \
|
result = f"Failure: command {command_name} returned too much output. \
|
||||||
Do not execute this command again with the same arguments."
|
Do not execute this command again with the same arguments."
|
||||||
|
|
||||||
|
|||||||
@@ -56,9 +56,8 @@ class Config(metaclass=Singleton):
|
|||||||
)
|
)
|
||||||
self.fast_llm_model = os.getenv("FAST_LLM_MODEL", "gpt-3.5-turbo")
|
self.fast_llm_model = os.getenv("FAST_LLM_MODEL", "gpt-3.5-turbo")
|
||||||
self.smart_llm_model = os.getenv("SMART_LLM_MODEL", "gpt-4")
|
self.smart_llm_model = os.getenv("SMART_LLM_MODEL", "gpt-4")
|
||||||
self.fast_token_limit = int(os.getenv("FAST_TOKEN_LIMIT", 4000))
|
|
||||||
self.smart_token_limit = int(os.getenv("SMART_TOKEN_LIMIT", 8000))
|
|
||||||
self.embedding_model = os.getenv("EMBEDDING_MODEL", "text-embedding-ada-002")
|
self.embedding_model = os.getenv("EMBEDDING_MODEL", "text-embedding-ada-002")
|
||||||
|
|
||||||
self.browse_spacy_language_model = os.getenv(
|
self.browse_spacy_language_model = os.getenv(
|
||||||
"BROWSE_SPACY_LANGUAGE_MODEL", "en_core_web_sm"
|
"BROWSE_SPACY_LANGUAGE_MODEL", "en_core_web_sm"
|
||||||
)
|
)
|
||||||
@@ -217,14 +216,6 @@ class Config(metaclass=Singleton):
|
|||||||
"""Set the smart LLM model value."""
|
"""Set the smart LLM model value."""
|
||||||
self.smart_llm_model = value
|
self.smart_llm_model = value
|
||||||
|
|
||||||
def set_fast_token_limit(self, value: int) -> None:
|
|
||||||
"""Set the fast token limit value."""
|
|
||||||
self.fast_token_limit = value
|
|
||||||
|
|
||||||
def set_smart_token_limit(self, value: int) -> None:
|
|
||||||
"""Set the smart token limit value."""
|
|
||||||
self.smart_token_limit = value
|
|
||||||
|
|
||||||
def set_embedding_model(self, value: str) -> None:
|
def set_embedding_model(self, value: str) -> None:
|
||||||
"""Set the model to use for creating embeddings."""
|
"""Set the model to use for creating embeddings."""
|
||||||
self.embedding_model = value
|
self.embedding_model = value
|
||||||
|
|||||||
@@ -21,8 +21,6 @@ def test_initial_values(config: Config):
|
|||||||
assert config.speak_mode == False
|
assert config.speak_mode == False
|
||||||
assert config.fast_llm_model == "gpt-3.5-turbo"
|
assert config.fast_llm_model == "gpt-3.5-turbo"
|
||||||
assert config.smart_llm_model == "gpt-4"
|
assert config.smart_llm_model == "gpt-4"
|
||||||
assert config.fast_token_limit == 4000
|
|
||||||
assert config.smart_token_limit == 8000
|
|
||||||
|
|
||||||
|
|
||||||
def test_set_continuous_mode(config: Config):
|
def test_set_continuous_mode(config: Config):
|
||||||
@@ -81,34 +79,6 @@ def test_set_smart_llm_model(config: Config):
|
|||||||
config.set_smart_llm_model(smart_llm_model)
|
config.set_smart_llm_model(smart_llm_model)
|
||||||
|
|
||||||
|
|
||||||
def test_set_fast_token_limit(config: Config):
|
|
||||||
"""
|
|
||||||
Test if the set_fast_token_limit() method updates the fast_token_limit attribute.
|
|
||||||
"""
|
|
||||||
# Store token limit to reset it after the test
|
|
||||||
fast_token_limit = config.fast_token_limit
|
|
||||||
|
|
||||||
config.set_fast_token_limit(5000)
|
|
||||||
assert config.fast_token_limit == 5000
|
|
||||||
|
|
||||||
# Reset token limit
|
|
||||||
config.set_fast_token_limit(fast_token_limit)
|
|
||||||
|
|
||||||
|
|
||||||
def test_set_smart_token_limit(config: Config):
|
|
||||||
"""
|
|
||||||
Test if the set_smart_token_limit() method updates the smart_token_limit attribute.
|
|
||||||
"""
|
|
||||||
# Store token limit to reset it after the test
|
|
||||||
smart_token_limit = config.smart_token_limit
|
|
||||||
|
|
||||||
config.set_smart_token_limit(9000)
|
|
||||||
assert config.smart_token_limit == 9000
|
|
||||||
|
|
||||||
# Reset token limit
|
|
||||||
config.set_smart_token_limit(smart_token_limit)
|
|
||||||
|
|
||||||
|
|
||||||
def test_set_debug_mode(config: Config):
|
def test_set_debug_mode(config: Config):
|
||||||
"""
|
"""
|
||||||
Test if the set_debug_mode() method updates the debug_mode attribute.
|
Test if the set_debug_mode() method updates the debug_mode attribute.
|
||||||
|
|||||||
Reference in New Issue
Block a user