Pinecone memory and memory usage tracking

2025-12-17 22:14:28 +01:00 · 2023-04-03 20:31:01 -04:00
parent 62dfd84599
commit 475671d1e8
9 changed files with 167 additions and 71 deletions
--- a/.env.template
+++ b/.env.template
@@ -1,3 +1,5 @@
 PINECONE_API_KEY=your-pinecone-api-key
 PINECONE_ENV=your-pinecone-region
 OPENAI_API_KEY=your-openai-api-key
 ELEVENLABS_API_KEY=your-elevenlabs-api-key
 SMART_LLM_MODEL="gpt-4"
--- a/README.md
+++ b/README.md
@@ -139,6 +139,35 @@ export CUSTOM_SEARCH_ENGINE_ID="YOUR_CUSTOM_SEARCH_ENGINE_ID"
 ```
 ## 🌲 Pinecone API Key Setup
 Pinecone enable a vector based memory so a vast memory can be stored and only relevant memories
 are loaded for the agent at any given time.
 1. Go to app.pinecone.io and make an account if you don't already have one.
 2. Choose the `Starter` plan to avoid being charged.
 3. Find your API key and region under the default project in the left sidebar.
 ### Setting up environment variables
   For Windows Users:
 ```
 setx PINECONE_API_KEY "YOUR_GOOGLE_API_KEY"
 export PINECONE_ENV="Your region" # something like: us-east4-gcp
 ```
 For macOS and Linux users:
 ```
 export PINECONE_API_KEY="YOUR_GOOGLE_API_KEY"
 export PINECONE_ENV="Your region" # something like: us-east4-gcp
 ```
 Or you can set them in the `.env` file.
 ## View Memory Usage
 1. View memory usage by using the `--debug` flag :)
 ## 💀 Continuous Mode ⚠️
 Run the AI **without** user authorisation, 100% automated.
 Continuous mode is not recommended. 
--- a/requirements.txt
+++ b/requirements.txt
@@ -10,4 +10,5 @@ tiktoken==0.3.3
 gTTS==2.3.1
 docker
 duckduckgo-search
-google-api-python-client #(https://developers.google.com/custom-search/v1/overview) 
+google-api-python-client #(https://developers.google.com/custom-search/v1/overview)
 pinecone-client==2.2.1
--- a/scripts/chat.py
+++ b/scripts/chat.py
@@ -23,6 +23,19 @@ def create_chat_message(role, content):
    return {"role": role, "content": content}
 def generate_context(prompt, relevant_memory, full_message_history, model):
    current_context = [
        create_chat_message(
            "system", prompt), create_chat_message(
            "system", f"Permanent memory: {relevant_memory}")]
    # Add messages from the full message history until we reach the token limit
    next_message_to_add_index = len(full_message_history) - 1
    insertion_index = len(current_context)
    # Count the currently used tokens
    current_tokens_used = token_counter.count_message_tokens(current_context, model)
    return next_message_to_add_index, current_tokens_used, insertion_index, current_context
 # TODO: Change debug from hardcode to argument
 def chat_with_ai(
@@ -41,7 +54,7 @@ def chat_with_ai(
            prompt (str): The prompt explaining the rules to the AI.
            user_input (str): The input from the user.
            full_message_history (list): The list of all messages sent between the user and the AI.
-            permanent_memory (list): The list of items in the AI's permanent memory.
+            permanent_memory (Obj): The memory object containing the permanent memory.
            token_limit (int): The maximum number of tokens allowed in the API call.
            Returns:
@@ -53,18 +66,20 @@ def chat_with_ai(
                print(f"Token limit: {token_limit}")
            send_token_limit = token_limit - 1000
-            current_context = [
+            relevant_memory = permanent_memory.get_relevant(str(full_message_history[-5:]), 10)
                create_chat_message(
                    "system", prompt), create_chat_message(
                    "system", f"Permanent memory: {permanent_memory}")]                
-            # Add messages from the full message history until we reach the token limit
+            if debug:
-            next_message_to_add_index = len(full_message_history) - 1
+                print('Memory Stats: ', permanent_memory.get_stats())
-            current_tokens_used = 0
+
-            insertion_index = len(current_context)
+            next_message_to_add_index, current_tokens_used, insertion_index, current_context = generate_context(
                prompt, relevant_memory, full_message_history, model)
            while current_tokens_used > 2500:
                # remove memories until we are under 2500 tokens
                relevant_memory = relevant_memory[1:]
                next_message_to_add_index, current_tokens_used, insertion_index, current_context = generate_context(
                    prompt, relevant_memory, full_message_history, model)
            # Count the currently used tokens
            current_tokens_used = token_counter.count_message_tokens(current_context, model)
            current_tokens_used += token_counter.count_message_tokens([create_chat_message("user", user_input)], model) # Account for user input (appended later)
            while next_message_to_add_index >= 0:
--- a/scripts/commands.py
+++ b/scripts/commands.py
@@ -1,6 +1,6 @@
 import browse
 import json
-import memory as mem
+from memory import PineconeMemory
 import datetime
 import agent_manager as agents
 import speak
@@ -45,6 +45,7 @@ def get_command(response):
 def execute_command(command_name, arguments):
    memory = PineconeMemory()
    try:
        if command_name == "google":
@@ -55,11 +56,7 @@ def execute_command(command_name, arguments):
            else:
                return google_search(arguments["input"])
        elif command_name == "memory_add":
-            return commit_memory(arguments["string"])
+            return memory.add(arguments["string"])
        elif command_name == "memory_del":
            return delete_memory(arguments["key"])
        elif command_name == "memory_ovr":
            return overwrite_memory(arguments["key"], arguments["string"])
        elif command_name == "start_agent":
            return start_agent(
                arguments["name"],
@@ -176,35 +173,6 @@ def get_hyperlinks(url):
    return link_list
 def commit_memory(string):
    _text = f"""Committing memory with string "{string}" """
    mem.permanent_memory.append(string)
    return _text
 def delete_memory(key):
    if key >= 0 and key < len(mem.permanent_memory):
        _text = "Deleting memory with key " + str(key)
        del mem.permanent_memory[key]
        print(_text)
        return _text
    else:
        print("Invalid key, cannot delete memory.")
        return None
 def overwrite_memory(key, string):
    if int(key) >= 0 and key < len(mem.permanent_memory):
        _text = "Overwriting memory with key " + \
            str(key) + " and string " + string
        mem.permanent_memory[key] = string
        print(_text)
        return _text
    else:
        print("Invalid key, cannot overwrite memory.")
        return None
 def shutdown():
    print("Shutting down...")
    quit()
--- a/scripts/config.py
+++ b/scripts/config.py
@@ -4,6 +4,7 @@ from dotenv import load_dotenv
 # Load environment variables from .env file
 load_dotenv()
 class Singleton(type):
    """
    Singleton metaclass for ensuring only one instance of a class.
@@ -39,6 +40,9 @@ class Config(metaclass=Singleton):
        self.google_api_key = os.getenv("GOOGLE_API_KEY")
        self.custom_search_engine_id = os.getenv("CUSTOM_SEARCH_ENGINE_ID")
        self.pinecone_api_key = os.getenv("PINECONE_API_KEY")
        self.pinecone_region = os.getenv("PINECONE_ENV")
        # Initialize the OpenAI API client
        openai.api_key = self.openai_api_key
@@ -70,4 +74,10 @@ class Config(metaclass=Singleton):
        self.google_api_key = value
    def set_custom_search_engine_id(self, value: str):
-        self.custom_search_engine_id = value
+        self.custom_search_engine_id = value
    def set_pinecone_api_key(self, value: str):
        self.pinecone_api_key = value
    def set_pinecone_region(self, value: str):
        self.pinecone_region = value
--- a/scripts/data/prompt.txt
+++ b/scripts/data/prompt.txt
@@ -1,29 +1,27 @@
 CONSTRAINTS:
-1. ~4000 word limit for memory. Your memory is short, so immediately save important information to long term memory and code to files.
+1. ~4000 word limit for short term memory. Your short term memory is short, so immediately save important information to files.
-2. No user assistance
+2. If you are unsure how you previously did something or want to recall past events, thinking about similar events will help you remember.
-3. Exclusively use the commands listed in double quotes e.g. "command name"
+3. No user assistance
 4. Exclusively use the commands listed in double quotes e.g. "command name"
 COMMANDS:
 1. Google Search: "google", args: "input": "<search>"
-2. Memory Add: "memory_add", args: "string": "<string>"
+2. Browse Website: "browse_website", args: "url": "<url>", "question": "<what_you_want_to_find_on_website>"
-3. Memory Delete: "memory_del", args: "key": "<key>"
+3. Start GPT Agent: "start_agent",  args: "name": <name>, "task": "<short_task_desc>", "prompt": "<prompt>"
-4. Memory Overwrite: "memory_ovr", args: "key": "<key>", "string": "<string>"
+4. Message GPT Agent: "message_agent", args: "key": "<key>", "message": "<message>"
-5. Browse Website: "browse_website", args: "url": "<url>", "question": "<what_you_want_to_find_on_website>"
+5. List GPT Agents: "list_agents", args: ""
-6. Start GPT Agent: "start_agent",  args: "name": <name>, "task": "<short_task_desc>", "prompt": "<prompt>"
+6. Delete GPT Agent: "delete_agent", args: "key": "<key>"
-7. Message GPT Agent: "message_agent", args: "key": "<key>", "message": "<message>"
+7. Write to file: "write_to_file", args: "file": "<file>", "text": "<text>"
-8. List GPT Agents: "list_agents", args: ""
+8. Read file: "read_file", args: "file": "<file>"
-9. Delete GPT Agent: "delete_agent", args: "key": "<key>"
+9. Append to file: "append_to_file", args: "file": "<file>", "text": "<text>"
-10. Write to file: "write_to_file", args: "file": "<file>", "text": "<text>"
+10. Delete file: "delete_file", args: "file": "<file>"
-11. Read file: "read_file", args: "file": "<file>"
+11. Evaluate Code: "evaluate_code", args: "code": "<full _code_string>"
-12. Append to file: "append_to_file", args: "file": "<file>", "text": "<text>"
+12. Get Improved Code: "improve_code", args: "suggestions": "<list_of_suggestions>", "code": "<full_code_string>"
-13. Delete file: "delete_file", args: "file": "<file>"
+13. Write Tests: "write_tests", args: "code": "<full_code_string>", "focus": "<list_of_focus_areas>"
-14. Evaluate Code: "evaluate_code", args: "code": "<full _code_string>"
+14. Execute Python File: "execute_python_file", args: "file": "<file>"
-15. Get Improved Code: "improve_code", args: "suggestions": "<list_of_suggestions>", "code": "<full_code_string>"
+15. Task Complete (Shutdown): "task_complete", args: "reason": "<reason>"
 16. Write Tests: "write_tests", args: "code": "<full_code_string>", "focus": "<list_of_focus_areas>"
 17. Execute Python File: "execute_python_file", args: "file": "<file>"
 18. Task Complete (Shutdown): "task_complete", args: "reason": "<reason>"
 RESOURCES:
--- a/scripts/main.py
+++ b/scripts/main.py
@@ -1,7 +1,7 @@
 import json
 import random
 import commands as cmd
-import memory as mem
+from memory import PineconeMemory
 import data
 import chat
 from colorama import Fore, Style
@@ -280,6 +280,13 @@ result = None
 # Make a constant:
 user_input = "Determine which next command to use, and respond using the format specified above:"
 # Initialize memory and make sure it is empty.
 # this is particularly important for indexing and referencing pinecone memory
 memory = PineconeMemory()
 memory.clear()
 print('Using memory of type: ' + memory.__class__.__name__)
 # Interaction Loop
 while True:
    # Send message to AI, get response
@@ -288,7 +295,7 @@ while True:
            prompt,
            user_input,
            full_message_history,
-            mem.permanent_memory,
+            memory,
            cfg.fast_token_limit) # TODO: This hardcodes the model to use GPT3.5. Make this an argument
    # print("assistant reply: "+assistant_reply)
@@ -349,6 +356,12 @@ while True:
    else:
        result = f"Command {command_name} returned: {cmd.execute_command(command_name, arguments)}"
    memory_to_add = f"Assistant Reply: {assistant_reply} " \
                    f"\nResult: {result} " \
                    f"\nHuman Feedback: {user_input} "
    memory.add(memory_to_add)
    # Check if there's a result from the command append it to the message
    # history
    if result is not None:
--- a/scripts/memory.py
+++ b/scripts/memory.py
@@ -1 +1,61 @@
-permanent_memory = []
+from config import Config, Singleton
 import pinecone
 import openai
 cfg = Config()
 def get_ada_embedding(text):
    text = text.replace("\n", " ")
    return openai.Embedding.create(input=[text], model="text-embedding-ada-002")["data"][0]["embedding"]
 def get_text_from_embedding(embedding):
    return openai.Embedding.retrieve(embedding, model="text-embedding-ada-002")["data"][0]["text"]
 class PineconeMemory(metaclass=Singleton):
    def __init__(self):
        pinecone_api_key = cfg.pinecone_api_key
        pinecone_region = cfg.pinecone_region
        pinecone.init(api_key=pinecone_api_key, environment=pinecone_region)
        dimension = 1536
        metric = "cosine"
        pod_type = "p1"
        table_name = "auto-gpt"
        # this assumes we don't start with memory.
        # for now this works.
        # we'll need a more complicated and robust system if we want to start with memory.
        self.vec_num = 0
        if table_name not in pinecone.list_indexes():
            pinecone.create_index(table_name, dimension=dimension, metric=metric, pod_type=pod_type)
        self.index = pinecone.Index(table_name)
    def add(self, data):
        vector = get_ada_embedding(data)
        # no metadata here. We may wish to change that long term.
        resp = self.index.upsert([(str(self.vec_num), vector, {"raw_text": data})])
        _text = f"Inserting data into memory at index: {self.vec_num}:\n data: {data}"
        self.vec_num += 1
        return _text
    def get(self, data):
        return self.get_relevant(data, 1)
    def clear(self):
        self.index.delete(deleteAll=True)
        return "Obliviated"
    def get_relevant(self, data, num_relevant=5):
        """
        Returns all the data in the memory that is relevant to the given data.
        :param data: The data to compare to.
        :param num_relevant: The number of relevant data to return. Defaults to 5
        """
        query_embedding = get_ada_embedding(data)
        results = self.index.query(query_embedding, top_k=num_relevant, include_metadata=True)
        sorted_results = sorted(results.matches, key=lambda x: x.score)
        return [str(item['metadata']["raw_text"]) for item in sorted_results]
    def get_stats(self):
        return self.index.describe_index_stats()