diff --git a/.env.template b/.env.template index 22bf8d74..b8140dd8 100644 --- a/.env.template +++ b/.env.template @@ -77,6 +77,13 @@ REDIS_PASSWORD= WIPE_REDIS_ON_START=False MEMORY_INDEX=auto-gpt +### MILVUS +# MILVUS_ADDR - Milvus remote address (e.g. localhost:19530) +# MILVUS_COLLECTION - Milvus collection, +# change it if you want to start a new memory and retain the old memory. +MILVUS_ADDR=your-milvus-cluster-host-port +MILVUS_COLLECTION=autogpt + ################################################################################ ### IMAGE GENERATION PROVIDER ################################################################################ diff --git a/README.md b/README.md index ba421774..00bb0ea3 100644 --- a/README.md +++ b/README.md @@ -35,22 +35,27 @@ Your support is greatly appreciated ## Table of Contents - [Auto-GPT: An Autonomous GPT-4 Experiment](#auto-gpt-an-autonomous-gpt-4-experiment) - - [Demo (30/03/2023):](#demo-30032023) + - [🔴 🔴 🔴 Urgent: USE `stable` not `master` 🔴 🔴 🔴](#----urgent-use-stable-not-master----) + - [Demo (30/03/2023):](#demo-30032023) - [Table of Contents](#table-of-contents) - [🚀 Features](#-features) - [📋 Requirements](#-requirements) - [💾 Installation](#-installation) - [🔧 Usage](#-usage) - [Logs](#logs) + - [Docker](#docker) + - [Command Line Arguments](#command-line-arguments) - [🗣️ Speech Mode](#️-speech-mode) - [🔍 Google API Keys Configuration](#-google-api-keys-configuration) - [Setting up environment variables](#setting-up-environment-variables) - - [Redis Setup](#redis-setup) - - [🌲 Pinecone API Key Setup](#-pinecone-api-key-setup) + - [Memory Backend Setup](#memory-backend-setup) + - [Redis Setup](#redis-setup) + - [🌲 Pinecone API Key Setup](#-pinecone-api-key-setup) + - [Milvus Setup](#milvus-setup) - [Setting up environment variables](#setting-up-environment-variables-1) - [Setting Your Cache Type](#setting-your-cache-type) - [View Memory Usage](#view-memory-usage) - - [🧠 Memory pre-seeding](#memory-pre-seeding) + - [🧠 Memory pre-seeding](#-memory-pre-seeding) - [💀 Continuous Mode ⚠️](#-continuous-mode-️) - [GPT3.5 ONLY Mode](#gpt35-only-mode) - [🖼 Image Generation](#-image-generation) @@ -75,10 +80,11 @@ Your support is greatly appreciated - [Python 3.8 or later](https://www.tutorialspoint.com/how-to-install-python-in-windows) - [OpenAI API key](https://platform.openai.com/account/api-keys) - Optional: -- [PINECONE API key](https://www.pinecone.io/) (If you want Pinecone backed memory) +- Memory backend + - [PINECONE API key](https://www.pinecone.io/) (If you want Pinecone backed memory) + - [Milvus](https://milvus.io/) (If you want Milvus as memory backend) - ElevenLabs Key (If you want the AI to speak) ## 💾 Installation @@ -209,7 +215,11 @@ export CUSTOM_SEARCH_ENGINE_ID="YOUR_CUSTOM_SEARCH_ENGINE_ID" ``` -## Redis Setup +## Memory Backend Setup + +Setup any one backend to persist memory. + +### Redis Setup Install docker desktop. @@ -246,7 +256,7 @@ You can specify the memory index for redis using the following: MEMORY_INDEX=whatever ``` -## 🌲 Pinecone API Key Setup +### 🌲 Pinecone API Key Setup Pinecone enables the storage of vast amounts of vector-based memory, allowing for only relevant memories to be loaded for the agent at any given time. @@ -254,6 +264,18 @@ Pinecone enables the storage of vast amounts of vector-based memory, allowing fo 2. Choose the `Starter` plan to avoid being charged. 3. Find your API key and region under the default project in the left sidebar. +### Milvus Setup + +[Milvus](https://milvus.io/) is a open-source, high scalable vector database to storage huge amount of vector-based memory and provide fast relevant search. + +- setup milvus database, keep your pymilvus version and milvus version same to avoid compatible issues. + - setup by open source [Install Milvus](https://milvus.io/docs/install_standalone-operator.md) + - or setup by [Zilliz Cloud](https://zilliz.com/cloud) +- set `MILVUS_ADDR` in `.env` to your milvus address `host:ip`. +- set `MEMORY_BACKEND` in `.env` to `milvus` to enable milvus as backend. +- optional + - set `MILVUS_COLLECTION` in `.env` to change milvus collection name as you want, `autogpt` is the default name. + ### Setting up environment variables In the `.env` file set: diff --git a/autogpt/config/config.py b/autogpt/config/config.py index 46764b9c..667c8178 100644 --- a/autogpt/config/config.py +++ b/autogpt/config/config.py @@ -62,6 +62,10 @@ class Config(metaclass=Singleton): self.pinecone_api_key = os.getenv("PINECONE_API_KEY") self.pinecone_region = os.getenv("PINECONE_ENV") + # milvus configuration, e.g., localhost:19530. + self.milvus_addr = os.getenv("MILVUS_ADDR", "localhost:19530") + self.milvus_collection = os.getenv("MILVUS_COLLECTION", "autogpt") + self.image_provider = os.getenv("IMAGE_PROVIDER") self.huggingface_api_token = os.getenv("HUGGINGFACE_API_TOKEN") diff --git a/autogpt/memory/__init__.py b/autogpt/memory/__init__.py index 42ba5a65..670efda1 100644 --- a/autogpt/memory/__init__.py +++ b/autogpt/memory/__init__.py @@ -21,6 +21,12 @@ except ImportError: print("Pinecone not installed. Skipping import.") PineconeMemory = None +try: + from memory.milvus import MilvusMemory +except ImportError: + print("pymilvus not installed. Skipping import.") + MilvusMemory = None + def get_memory(cfg, init=False): memory = None @@ -44,6 +50,12 @@ def get_memory(cfg, init=False): memory = RedisMemory(cfg) elif cfg.memory_backend == "no_memory": memory = NoMemory(cfg) + elif cfg.memory_backend == "milvus": + if not MilvusMemory: + print("Error: Milvus sdk is not installed." + "Please install pymilvus to use Milvus as memory backend.") + else: + memory = MilvusMemory(cfg) if memory is None: memory = LocalCache(cfg) @@ -56,4 +68,4 @@ def get_supported_memory_backends(): return supported_memory -__all__ = ["get_memory", "LocalCache", "RedisMemory", "PineconeMemory", "NoMemory"] +__all__ = ["get_memory", "LocalCache", "RedisMemory", "PineconeMemory", "NoMemory", "MilvusMemory"] diff --git a/requirements.txt b/requirements.txt index 44cd600f..2fec1b16 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,6 +12,7 @@ docker duckduckgo-search google-api-python-client #(https://developers.google.com/custom-search/v1/overview) pinecone-client==2.2.1 +pymilvus==2.2.4 redis orjson Pillow diff --git a/scripts/memory/milvus.py b/scripts/memory/milvus.py new file mode 100644 index 00000000..c6d31750 --- /dev/null +++ b/scripts/memory/milvus.py @@ -0,0 +1,90 @@ +from pymilvus import ( + connections, + FieldSchema, + CollectionSchema, + DataType, + Collection, +) + +from memory.base import MemoryProviderSingleton, get_ada_embedding + + +class MilvusMemory(MemoryProviderSingleton): + def __init__(self, cfg): + """ Construct a milvus memory storage connection. + + Args: + cfg (Config): Auto-GPT global config. + """ + # connect to milvus server. + connections.connect(address=cfg.milvus_addr) + fields = [ + FieldSchema(name="pk", dtype=DataType.INT64, + is_primary=True, auto_id=True), + FieldSchema(name="embeddings", + dtype=DataType.FLOAT_VECTOR, dim=1536), + FieldSchema(name="raw_text", dtype=DataType.VARCHAR, + max_length=65535) + ] + + # create collection if not exist and load it. + schema = CollectionSchema(fields, "auto-gpt memory storage") + self.collection = Collection(cfg.milvus_collection, schema) + # create index if not exist. + if not self.collection.has_index(index_name="embeddings"): + self.collection.release() + self.collection.create_index("embeddings", { + "index_type": "IVF_FLAT", + "metric_type": "IP", + "params": {"nlist": 128}, + }, index_name="embeddings") + self.collection.load() + + def add(self, data): + """ Add a embedding of data into memory. + + Args: + data (str): The raw text to construct embedding index. + + Returns: + str: log. + """ + embedding = get_ada_embedding(data) + result = self.collection.insert([[embedding], [data]]) + _text = f"Inserting data into memory at primary key: {result.primary_keys[0]}:\n data: {data}" + return _text + + def get(self, data): + """ Return the most relevant data in memory. + Args: + data: The data to compare to. + """ + return self.get_relevant(data, 1) + + def clear(self): + """ Drop the index in memory. + """ + self.collection.drop() + return "Obliviated" + + def get_relevant(self, data, num_relevant=5): + """ Return the top-k relevant data in memory. + Args: + data: The data to compare to. + num_relevant (int, optional): The max number of relevant data. Defaults to 5. + """ + # search the embedding and return the most relevant text. + embedding = get_ada_embedding(data) + search_params = { + "metrics_type": "IP", + "params": {"nprobe": 8}, + } + result = self.collection.search( + [embedding], "embeddings", search_params, num_relevant, output_fields=["raw_text"]) + return [item.entity.value_of_field("raw_text") for item in result[0]] + + def get_stats(self): + """ + Returns: The stats of the milvus cache. + """ + return f"Entities num: {self.collection.num_entities}"