Support secure and authenticated Milvus memory backends (#2127)

Co-authored-by: Reinier van der Leer (Pwuts) <github@pwuts.nl>
This commit is contained in:
chyezh
2023-04-24 00:11:04 +08:00
committed by GitHub
parent 6e588bb2ed
commit cac1ea27e2
5 changed files with 108 additions and 33 deletions

View File

@@ -52,7 +52,7 @@ OPENAI_API_KEY=your-openai-api-key
## local - Default
## pinecone - Pinecone (if configured)
## redis - Redis (if configured)
## milvus - Milvus (if configured)
## milvus - Milvus (if configured - also works with Zilliz)
## MEMORY_INDEX - Name of index created in Memory backend (Default: auto-gpt)
# MEMORY_BACKEND=local
# MEMORY_INDEX=auto-gpt
@@ -93,10 +93,16 @@ OPENAI_API_KEY=your-openai-api-key
# WEAVIATE_API_KEY=
### MILVUS
## MILVUS_ADDR - Milvus remote address (e.g. localhost:19530)
## MILVUS_COLLECTION - Milvus collection,
## change it if you want to start a new memory and retain the old memory.
# MILVUS_ADDR=your-milvus-cluster-host-port
## MILVUS_ADDR - Milvus remote address (e.g. localhost:19530, https://xxx-xxxx.xxxx.xxxx.zillizcloud.com:443)
## MILVUS_USERNAME - username for your Milvus database
## MILVUS_PASSWORD - password for your Milvus database
## MILVUS_SECURE - True to enable TLS. (Default: False)
## Setting MILVUS_ADDR to a `https://` URL will override this setting.
## MILVUS_COLLECTION - Milvus collection, change it if you want to start a new memory and retain the old memory.
# MILVUS_ADDR=localhost:19530
# MILVUS_USERNAME=
# MILVUS_PASSWORD=
# MILVUS_SECURE=
# MILVUS_COLLECTION=autogpt
################################################################################

View File

@@ -85,9 +85,12 @@ class Config(metaclass=Singleton):
os.getenv("USE_WEAVIATE_EMBEDDED", "False") == "True"
)
# milvus configuration, e.g., localhost:19530.
# milvus or zilliz cloud configuration.
self.milvus_addr = os.getenv("MILVUS_ADDR", "localhost:19530")
self.milvus_username = os.getenv("MILVUS_USERNAME")
self.milvus_password = os.getenv("MILVUS_PASSWORD")
self.milvus_collection = os.getenv("MILVUS_COLLECTION", "autogpt")
self.milvus_secure = os.getenv("MILVUS_SECURE") == "True"
self.image_provider = os.getenv("IMAGE_PROVIDER")
self.image_size = int(os.getenv("IMAGE_SIZE", 256))

View File

@@ -69,8 +69,8 @@ def get_memory(cfg, init=False):
elif cfg.memory_backend == "milvus":
if not MilvusMemory:
print(
"Error: Milvus sdk is not installed."
"Please install pymilvus to use Milvus as memory backend."
"Error: pymilvus sdk is not installed."
"Please install pymilvus to use Milvus or Zilliz Cloud as memory backend."
)
else:
memory = MilvusMemory(cfg)

View File

@@ -1,4 +1,7 @@
""" Milvus memory storage provider."""
import re
from config import Config
from pymilvus import Collection, CollectionSchema, DataType, FieldSchema, connections
from autogpt.memory.base import MemoryProviderSingleton, get_ada_embedding
@@ -7,14 +10,66 @@ from autogpt.memory.base import MemoryProviderSingleton, get_ada_embedding
class MilvusMemory(MemoryProviderSingleton):
"""Milvus memory storage provider."""
def __init__(self, cfg) -> None:
def __init__(self, cfg: Config) -> None:
"""Construct a milvus memory storage connection.
Args:
cfg (Config): Auto-GPT global config.
"""
# connect to milvus server.
connections.connect(address=cfg.milvus_addr)
self.configure(cfg)
connect_kwargs = {}
if self.username:
connect_kwargs["user"] = self.username
connect_kwargs["password"] = self.password
connections.connect(
**connect_kwargs,
uri=self.uri or "",
address=self.address or "",
secure=self.secure,
)
self.init_collection()
def configure(self, cfg: Config) -> None:
# init with configuration.
self.uri = None
self.address = cfg.milvus_addr
self.secure = cfg.milvus_secure
self.username = cfg.milvus_username
self.password = cfg.milvus_password
self.collection_name = cfg.milvus_collection
# use HNSW by default.
self.index_params = {
"metric_type": "IP",
"index_type": "HNSW",
"params": {"M": 8, "efConstruction": 64},
}
if (self.username is None) != (self.password is None):
raise ValueError(
"Both username and password must be set to use authentication for Milvus"
)
# configured address may be a full URL.
if re.match(r"^(https?|tcp)://", self.address) is not None:
self.uri = self.address
self.address = None
if self.uri.startswith("https"):
self.secure = True
# Zilliz Cloud requires AutoIndex.
if re.match(r"^https://(.*)\.zillizcloud\.(com|cn)", self.address) is not None:
self.index_params = {
"metric_type": "IP",
"index_type": "AUTOINDEX",
"params": {},
}
def init_collection(self) -> None:
"""Initialize collection in vector database."""
fields = [
FieldSchema(name="pk", dtype=DataType.INT64, is_primary=True, auto_id=True),
FieldSchema(name="embeddings", dtype=DataType.FLOAT_VECTOR, dim=1536),
@@ -22,19 +77,14 @@ class MilvusMemory(MemoryProviderSingleton):
]
# create collection if not exist and load it.
self.milvus_collection = cfg.milvus_collection
self.schema = CollectionSchema(fields, "auto-gpt memory storage")
self.collection = Collection(self.milvus_collection, self.schema)
self.collection = Collection(self.collection_name, self.schema)
# create index if not exist.
if not self.collection.has_index():
self.collection.release()
self.collection.create_index(
"embeddings",
{
"metric_type": "IP",
"index_type": "HNSW",
"params": {"M": 8, "efConstruction": 64},
},
self.index_params,
index_name="embeddings",
)
self.collection.load()
@@ -70,14 +120,10 @@ class MilvusMemory(MemoryProviderSingleton):
str: log.
"""
self.collection.drop()
self.collection = Collection(self.milvus_collection, self.schema)
self.collection = Collection(self.collection_name, self.schema)
self.collection.create_index(
"embeddings",
{
"metric_type": "IP",
"index_type": "HNSW",
"params": {"M": 8, "efConstruction": 64},
},
self.index_params,
index_name="embeddings",
)
self.collection.load()

View File

@@ -15,7 +15,7 @@ To switch to either, change the `MEMORY_BACKEND` env variable to the value that
Links to memory backends
- [Pinecone](https://www.pinecone.io/)
- [Milvus](https://milvus.io/)
- [Milvus](https://milvus.io/) &ndash; [self-hosted](https://milvus.io/docs), or managed with [Zilliz Cloud](https://zilliz.com/)
- [Redis](https://redis.io)
- [Weaviate](https://weaviate.io)
@@ -80,17 +80,37 @@ export MEMORY_BACKEND="pinecone"
### Milvus Setup
[Milvus](https://milvus.io/) is an open-source, highly scalable vector database to store huge amounts of vector-based memory and provide fast relevant search.
[Milvus](https://milvus.io/) is an open-source, highly scalable vector database to store huge amounts of vector-based memory and provide fast relevant search. And it can be quickly deployed by docker locally or as a cloud service provided by [Zilliz Cloud](https://zilliz.com/).
- setup milvus database, keep your pymilvus version and milvus version same to avoid compatible issues.
- setup by open source [Install Milvus](https://milvus.io/docs/install_standalone-operator.md)
- or setup by [Zilliz Cloud](https://zilliz.com/cloud)
- set `MILVUS_ADDR` in `.env` to your milvus address `host:ip`.
- set `MEMORY_BACKEND` in `.env` to `milvus` to enable milvus as backend.
1. Deploy your Milvus service, either locally using docker or with a managed Zilliz Cloud database.
- [Install and deploy Milvus locally](https://milvus.io/docs/install_standalone-operator.md)
**Optional:**
- set `MILVUS_COLLECTION` in `.env` to change milvus collection name as you want, `autogpt` is the default name.
- <details><summary>Set up a managed Zilliz Cloud database <i>(click to expand)</i></summary>
1. Go to [Zilliz Cloud](https://zilliz.com/) and sign up if you don't already have account.
2. In the *Databases* tab, create a new database.
- Remember your username and password
- Wait until the database status is changed to RUNNING.
3. In the *Database detail* tab of the database you have created, the public cloud endpoint, such as:
`https://xxx-xxxx.xxxx.xxxx.zillizcloud.com:443`.
</details>
2. Run `pip3 install pymilvus` to install the required client library.
Make sure your PyMilvus version and Milvus version are [compatible](https://github.com/milvus-io/pymilvus#compatibility) to avoid issues.
See also the [PyMilvus installation instructions](https://github.com/milvus-io/pymilvus#installation).
3. Update `.env`
- `MEMORY_BACKEND=milvus`
- One of:
- `MILVUS_ADDR=host:ip` (for local instance)
- `MILVUS_ADDR=https://xxx-xxxx.xxxx.xxxx.zillizcloud.com:443` (for Zilliz Cloud)
*The following settings are **optional**:*
- Set `MILVUS_USERNAME='username-of-your-milvus-instance'`
- Set `MILVUS_PASSWORD='password-of-your-milvus-instance'`
- Set `MILVUS_SECURE=True` to use a secure connection. Only use if your Milvus instance has TLS enabled.
Setting `MILVUS_ADDR` to a `https://` URL will override this setting.
- Set `MILVUS_COLLECTION` if you want to change the collection name to use in Milvus. Defaults to `autogpt`.
### Weaviate Setup
[Weaviate](https://weaviate.io/) is an open-source vector database. It allows to store data objects and vector embeddings from ML-models and scales seamlessly to billion of data objects. [An instance of Weaviate can be created locally (using Docker), on Kubernetes or using Weaviate Cloud Services](https://weaviate.io/developers/weaviate/quickstart).