mirror of
https://github.com/aljazceru/Auto-GPT.git
synced 2025-12-18 22:44:21 +01:00
Vastly improves context window management.
Now uses tokens and biggest context possible.
This commit is contained in:
@@ -2,10 +2,13 @@ import time
|
|||||||
import openai
|
import openai
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from config import Config
|
from config import Config
|
||||||
|
import token_counter
|
||||||
|
|
||||||
cfg = Config()
|
cfg = Config()
|
||||||
|
|
||||||
from llm_utils import create_chat_completion
|
from llm_utils import create_chat_completion
|
||||||
|
|
||||||
|
|
||||||
def create_chat_message(role, content):
|
def create_chat_message(role, content):
|
||||||
"""
|
"""
|
||||||
Create a chat message with the given role and content.
|
Create a chat message with the given role and content.
|
||||||
@@ -20,13 +23,15 @@ def create_chat_message(role, content):
|
|||||||
return {"role": role, "content": content}
|
return {"role": role, "content": content}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# TODO: Change debug from hardcode to argument
|
||||||
def chat_with_ai(
|
def chat_with_ai(
|
||||||
prompt,
|
prompt,
|
||||||
user_input,
|
user_input,
|
||||||
full_message_history,
|
full_message_history,
|
||||||
permanent_memory,
|
permanent_memory,
|
||||||
token_limit,
|
token_limit,
|
||||||
debug=True):
|
debug=False):
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
"""
|
"""
|
||||||
@@ -42,16 +47,55 @@ def chat_with_ai(
|
|||||||
Returns:
|
Returns:
|
||||||
str: The AI's response.
|
str: The AI's response.
|
||||||
"""
|
"""
|
||||||
|
model = cfg.fast_llm_model # TODO: Change model from hardcode to argument
|
||||||
|
# Reserve 1000 tokens for the response
|
||||||
|
if debug:
|
||||||
|
print(f"Token limit: {token_limit}")
|
||||||
|
send_token_limit = token_limit - 1000
|
||||||
|
|
||||||
current_context = [
|
current_context = [
|
||||||
create_chat_message(
|
create_chat_message(
|
||||||
"system", prompt), create_chat_message(
|
"system", prompt), create_chat_message(
|
||||||
"system", f"Permanent memory: {permanent_memory}")]
|
"system", f"Permanent memory: {permanent_memory}")]
|
||||||
current_context.extend(
|
|
||||||
full_message_history[-(token_limit - len(prompt) - len(permanent_memory) - 10):])
|
# Add messages from the full message history until we reach the token limit
|
||||||
|
next_message_to_add_index = len(full_message_history) - 1
|
||||||
|
current_tokens_used = 0
|
||||||
|
insertion_index = len(current_context)
|
||||||
|
|
||||||
|
# Count the currently used tokens
|
||||||
|
current_tokens_used = token_counter.count_message_tokens(current_context, model)
|
||||||
|
current_tokens_used += token_counter.count_message_tokens([create_chat_message("user", user_input)], model) # Account for user input (appended later)
|
||||||
|
|
||||||
|
while next_message_to_add_index >= 0:
|
||||||
|
# print (f"CURRENT TOKENS USED: {current_tokens_used}")
|
||||||
|
message_to_add = full_message_history[next_message_to_add_index]
|
||||||
|
|
||||||
|
tokens_to_add = token_counter.count_message_tokens([message_to_add], model)
|
||||||
|
if current_tokens_used + tokens_to_add > send_token_limit:
|
||||||
|
break
|
||||||
|
|
||||||
|
# Add the most recent message to the start of the current context, after the two system prompts.
|
||||||
|
current_context.insert(insertion_index, full_message_history[next_message_to_add_index])
|
||||||
|
|
||||||
|
# Count the currently used tokens
|
||||||
|
current_tokens_used += tokens_to_add
|
||||||
|
|
||||||
|
# Move to the next most recent message in the full message history
|
||||||
|
next_message_to_add_index -= 1
|
||||||
|
|
||||||
|
# Append user input, the length of this is accounted for above
|
||||||
current_context.extend([create_chat_message("user", user_input)])
|
current_context.extend([create_chat_message("user", user_input)])
|
||||||
|
|
||||||
|
# Calculate remaining tokens
|
||||||
|
tokens_remaining = token_limit - current_tokens_used
|
||||||
|
# assert tokens_remaining >= 0, "Tokens remaining is negative. This should never happen, please submit a bug report at https://www.github.com/Torantulino/Auto-GPT"
|
||||||
|
|
||||||
# Debug print the current context
|
# Debug print the current context
|
||||||
if debug:
|
if debug:
|
||||||
|
print(f"Token limit: {token_limit}")
|
||||||
|
print(f"Send Token Count: {current_tokens_used}")
|
||||||
|
print(f"Tokens remaining for response: {tokens_remaining}")
|
||||||
print("------------ CONTEXT SENT TO AI ---------------")
|
print("------------ CONTEXT SENT TO AI ---------------")
|
||||||
for message in current_context:
|
for message in current_context:
|
||||||
# Skip printing the prompt
|
# Skip printing the prompt
|
||||||
@@ -59,12 +103,14 @@ def chat_with_ai(
|
|||||||
continue
|
continue
|
||||||
print(
|
print(
|
||||||
f"{message['role'].capitalize()}: {message['content']}")
|
f"{message['role'].capitalize()}: {message['content']}")
|
||||||
|
print()
|
||||||
print("----------- END OF CONTEXT ----------------")
|
print("----------- END OF CONTEXT ----------------")
|
||||||
|
|
||||||
# TODO: use a model defined elsewhere, so that model can contain temperature and other settings we care about
|
# TODO: use a model defined elsewhere, so that model can contain temperature and other settings we care about
|
||||||
assistant_reply = create_chat_completion(
|
assistant_reply = create_chat_completion(
|
||||||
model=cfg.smart_llm_model,
|
model=model,
|
||||||
messages=current_context,
|
messages=current_context,
|
||||||
|
max_tokens=tokens_remaining,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Update full message history
|
# Update full message history
|
||||||
|
|||||||
Reference in New Issue
Block a user