Support secure and authenticated Milvus memory backends (#2127)

Co-authored-by: Reinier van der Leer (Pwuts) <github@pwuts.nl>
This commit is contained in:
chyezh
2023-04-24 00:11:04 +08:00
committed by GitHub
parent 6e588bb2ed
commit cac1ea27e2
5 changed files with 108 additions and 33 deletions

View File

@@ -52,7 +52,7 @@ OPENAI_API_KEY=your-openai-api-key
## local - Default ## local - Default
## pinecone - Pinecone (if configured) ## pinecone - Pinecone (if configured)
## redis - Redis (if configured) ## redis - Redis (if configured)
## milvus - Milvus (if configured) ## milvus - Milvus (if configured - also works with Zilliz)
## MEMORY_INDEX - Name of index created in Memory backend (Default: auto-gpt) ## MEMORY_INDEX - Name of index created in Memory backend (Default: auto-gpt)
# MEMORY_BACKEND=local # MEMORY_BACKEND=local
# MEMORY_INDEX=auto-gpt # MEMORY_INDEX=auto-gpt
@@ -93,10 +93,16 @@ OPENAI_API_KEY=your-openai-api-key
# WEAVIATE_API_KEY= # WEAVIATE_API_KEY=
### MILVUS ### MILVUS
## MILVUS_ADDR - Milvus remote address (e.g. localhost:19530) ## MILVUS_ADDR - Milvus remote address (e.g. localhost:19530, https://xxx-xxxx.xxxx.xxxx.zillizcloud.com:443)
## MILVUS_COLLECTION - Milvus collection, ## MILVUS_USERNAME - username for your Milvus database
## change it if you want to start a new memory and retain the old memory. ## MILVUS_PASSWORD - password for your Milvus database
# MILVUS_ADDR=your-milvus-cluster-host-port ## MILVUS_SECURE - True to enable TLS. (Default: False)
## Setting MILVUS_ADDR to a `https://` URL will override this setting.
## MILVUS_COLLECTION - Milvus collection, change it if you want to start a new memory and retain the old memory.
# MILVUS_ADDR=localhost:19530
# MILVUS_USERNAME=
# MILVUS_PASSWORD=
# MILVUS_SECURE=
# MILVUS_COLLECTION=autogpt # MILVUS_COLLECTION=autogpt
################################################################################ ################################################################################

View File

@@ -85,9 +85,12 @@ class Config(metaclass=Singleton):
os.getenv("USE_WEAVIATE_EMBEDDED", "False") == "True" os.getenv("USE_WEAVIATE_EMBEDDED", "False") == "True"
) )
# milvus configuration, e.g., localhost:19530. # milvus or zilliz cloud configuration.
self.milvus_addr = os.getenv("MILVUS_ADDR", "localhost:19530") self.milvus_addr = os.getenv("MILVUS_ADDR", "localhost:19530")
self.milvus_username = os.getenv("MILVUS_USERNAME")
self.milvus_password = os.getenv("MILVUS_PASSWORD")
self.milvus_collection = os.getenv("MILVUS_COLLECTION", "autogpt") self.milvus_collection = os.getenv("MILVUS_COLLECTION", "autogpt")
self.milvus_secure = os.getenv("MILVUS_SECURE") == "True"
self.image_provider = os.getenv("IMAGE_PROVIDER") self.image_provider = os.getenv("IMAGE_PROVIDER")
self.image_size = int(os.getenv("IMAGE_SIZE", 256)) self.image_size = int(os.getenv("IMAGE_SIZE", 256))

View File

@@ -69,8 +69,8 @@ def get_memory(cfg, init=False):
elif cfg.memory_backend == "milvus": elif cfg.memory_backend == "milvus":
if not MilvusMemory: if not MilvusMemory:
print( print(
"Error: Milvus sdk is not installed." "Error: pymilvus sdk is not installed."
"Please install pymilvus to use Milvus as memory backend." "Please install pymilvus to use Milvus or Zilliz Cloud as memory backend."
) )
else: else:
memory = MilvusMemory(cfg) memory = MilvusMemory(cfg)

View File

@@ -1,4 +1,7 @@
""" Milvus memory storage provider.""" """ Milvus memory storage provider."""
import re
from config import Config
from pymilvus import Collection, CollectionSchema, DataType, FieldSchema, connections from pymilvus import Collection, CollectionSchema, DataType, FieldSchema, connections
from autogpt.memory.base import MemoryProviderSingleton, get_ada_embedding from autogpt.memory.base import MemoryProviderSingleton, get_ada_embedding
@@ -7,14 +10,66 @@ from autogpt.memory.base import MemoryProviderSingleton, get_ada_embedding
class MilvusMemory(MemoryProviderSingleton): class MilvusMemory(MemoryProviderSingleton):
"""Milvus memory storage provider.""" """Milvus memory storage provider."""
def __init__(self, cfg) -> None: def __init__(self, cfg: Config) -> None:
"""Construct a milvus memory storage connection. """Construct a milvus memory storage connection.
Args: Args:
cfg (Config): Auto-GPT global config. cfg (Config): Auto-GPT global config.
""" """
# connect to milvus server. self.configure(cfg)
connections.connect(address=cfg.milvus_addr)
connect_kwargs = {}
if self.username:
connect_kwargs["user"] = self.username
connect_kwargs["password"] = self.password
connections.connect(
**connect_kwargs,
uri=self.uri or "",
address=self.address or "",
secure=self.secure,
)
self.init_collection()
def configure(self, cfg: Config) -> None:
# init with configuration.
self.uri = None
self.address = cfg.milvus_addr
self.secure = cfg.milvus_secure
self.username = cfg.milvus_username
self.password = cfg.milvus_password
self.collection_name = cfg.milvus_collection
# use HNSW by default.
self.index_params = {
"metric_type": "IP",
"index_type": "HNSW",
"params": {"M": 8, "efConstruction": 64},
}
if (self.username is None) != (self.password is None):
raise ValueError(
"Both username and password must be set to use authentication for Milvus"
)
# configured address may be a full URL.
if re.match(r"^(https?|tcp)://", self.address) is not None:
self.uri = self.address
self.address = None
if self.uri.startswith("https"):
self.secure = True
# Zilliz Cloud requires AutoIndex.
if re.match(r"^https://(.*)\.zillizcloud\.(com|cn)", self.address) is not None:
self.index_params = {
"metric_type": "IP",
"index_type": "AUTOINDEX",
"params": {},
}
def init_collection(self) -> None:
"""Initialize collection in vector database."""
fields = [ fields = [
FieldSchema(name="pk", dtype=DataType.INT64, is_primary=True, auto_id=True), FieldSchema(name="pk", dtype=DataType.INT64, is_primary=True, auto_id=True),
FieldSchema(name="embeddings", dtype=DataType.FLOAT_VECTOR, dim=1536), FieldSchema(name="embeddings", dtype=DataType.FLOAT_VECTOR, dim=1536),
@@ -22,19 +77,14 @@ class MilvusMemory(MemoryProviderSingleton):
] ]
# create collection if not exist and load it. # create collection if not exist and load it.
self.milvus_collection = cfg.milvus_collection
self.schema = CollectionSchema(fields, "auto-gpt memory storage") self.schema = CollectionSchema(fields, "auto-gpt memory storage")
self.collection = Collection(self.milvus_collection, self.schema) self.collection = Collection(self.collection_name, self.schema)
# create index if not exist. # create index if not exist.
if not self.collection.has_index(): if not self.collection.has_index():
self.collection.release() self.collection.release()
self.collection.create_index( self.collection.create_index(
"embeddings", "embeddings",
{ self.index_params,
"metric_type": "IP",
"index_type": "HNSW",
"params": {"M": 8, "efConstruction": 64},
},
index_name="embeddings", index_name="embeddings",
) )
self.collection.load() self.collection.load()
@@ -70,14 +120,10 @@ class MilvusMemory(MemoryProviderSingleton):
str: log. str: log.
""" """
self.collection.drop() self.collection.drop()
self.collection = Collection(self.milvus_collection, self.schema) self.collection = Collection(self.collection_name, self.schema)
self.collection.create_index( self.collection.create_index(
"embeddings", "embeddings",
{ self.index_params,
"metric_type": "IP",
"index_type": "HNSW",
"params": {"M": 8, "efConstruction": 64},
},
index_name="embeddings", index_name="embeddings",
) )
self.collection.load() self.collection.load()

View File

@@ -15,7 +15,7 @@ To switch to either, change the `MEMORY_BACKEND` env variable to the value that
Links to memory backends Links to memory backends
- [Pinecone](https://www.pinecone.io/) - [Pinecone](https://www.pinecone.io/)
- [Milvus](https://milvus.io/) - [Milvus](https://milvus.io/) &ndash; [self-hosted](https://milvus.io/docs), or managed with [Zilliz Cloud](https://zilliz.com/)
- [Redis](https://redis.io) - [Redis](https://redis.io)
- [Weaviate](https://weaviate.io) - [Weaviate](https://weaviate.io)
@@ -80,17 +80,37 @@ export MEMORY_BACKEND="pinecone"
### Milvus Setup ### Milvus Setup
[Milvus](https://milvus.io/) is an open-source, highly scalable vector database to store huge amounts of vector-based memory and provide fast relevant search. [Milvus](https://milvus.io/) is an open-source, highly scalable vector database to store huge amounts of vector-based memory and provide fast relevant search. And it can be quickly deployed by docker locally or as a cloud service provided by [Zilliz Cloud](https://zilliz.com/).
- setup milvus database, keep your pymilvus version and milvus version same to avoid compatible issues. 1. Deploy your Milvus service, either locally using docker or with a managed Zilliz Cloud database.
- setup by open source [Install Milvus](https://milvus.io/docs/install_standalone-operator.md) - [Install and deploy Milvus locally](https://milvus.io/docs/install_standalone-operator.md)
- or setup by [Zilliz Cloud](https://zilliz.com/cloud)
- set `MILVUS_ADDR` in `.env` to your milvus address `host:ip`.
- set `MEMORY_BACKEND` in `.env` to `milvus` to enable milvus as backend.
**Optional:** - <details><summary>Set up a managed Zilliz Cloud database <i>(click to expand)</i></summary>
- set `MILVUS_COLLECTION` in `.env` to change milvus collection name as you want, `autogpt` is the default name.
1. Go to [Zilliz Cloud](https://zilliz.com/) and sign up if you don't already have account.
2. In the *Databases* tab, create a new database.
- Remember your username and password
- Wait until the database status is changed to RUNNING.
3. In the *Database detail* tab of the database you have created, the public cloud endpoint, such as:
`https://xxx-xxxx.xxxx.xxxx.zillizcloud.com:443`.
</details>
2. Run `pip3 install pymilvus` to install the required client library.
Make sure your PyMilvus version and Milvus version are [compatible](https://github.com/milvus-io/pymilvus#compatibility) to avoid issues.
See also the [PyMilvus installation instructions](https://github.com/milvus-io/pymilvus#installation).
3. Update `.env`
- `MEMORY_BACKEND=milvus`
- One of:
- `MILVUS_ADDR=host:ip` (for local instance)
- `MILVUS_ADDR=https://xxx-xxxx.xxxx.xxxx.zillizcloud.com:443` (for Zilliz Cloud)
*The following settings are **optional**:*
- Set `MILVUS_USERNAME='username-of-your-milvus-instance'`
- Set `MILVUS_PASSWORD='password-of-your-milvus-instance'`
- Set `MILVUS_SECURE=True` to use a secure connection. Only use if your Milvus instance has TLS enabled.
Setting `MILVUS_ADDR` to a `https://` URL will override this setting.
- Set `MILVUS_COLLECTION` if you want to change the collection name to use in Milvus. Defaults to `autogpt`.
### Weaviate Setup ### Weaviate Setup
[Weaviate](https://weaviate.io/) is an open-source vector database. It allows to store data objects and vector embeddings from ML-models and scales seamlessly to billion of data objects. [An instance of Weaviate can be created locally (using Docker), on Kubernetes or using Weaviate Cloud Services](https://weaviate.io/developers/weaviate/quickstart). [Weaviate](https://weaviate.io/) is an open-source vector database. It allows to store data objects and vector embeddings from ML-models and scales seamlessly to billion of data objects. [An instance of Weaviate can be created locally (using Docker), on Kubernetes or using Weaviate Cloud Services](https://weaviate.io/developers/weaviate/quickstart).