From a791d7a2449684b2435956c3c08158a5c04ce81f Mon Sep 17 00:00:00 2001 From: Edgar Barrantes Date: Sat, 15 Apr 2023 12:06:40 +0300 Subject: [PATCH 01/11] Update docs: Data ingestion script location --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 55c3d833..7da3172a 100644 --- a/README.md +++ b/README.md @@ -297,7 +297,7 @@ To switch to either, change the `MEMORY_BACKEND` env variable to the value that ## 🧠 Memory pre-seeding ``` -# python scripts/data_ingestion.py -h +# python autogpt/data_ingestion.py -h usage: data_ingestion.py [-h] (--file FILE | --dir DIR) [--init] [--overlap OVERLAP] [--max_length MAX_LENGTH] Ingest a file or a directory with multiple files into memory. Make sure to set your .env before running this script. @@ -310,10 +310,10 @@ options: --overlap OVERLAP The overlap size between chunks when ingesting files (default: 200) --max_length MAX_LENGTH The max_length of each chunk when ingesting files (default: 4000 -# python scripts/data_ingestion.py --dir seed_data --init --overlap 200 --max_length 1000 +# python autogpt/data_ingestion.py --dir seed_data --init --overlap 200 --max_length 1000 ``` -This script located at scripts/data_ingestion.py, allows you to ingest files into memory and pre-seed it before running Auto-GPT. +This script located at autogpt/data_ingestion.py, allows you to ingest files into memory and pre-seed it before running Auto-GPT. Memory pre-seeding is a technique that involves ingesting relevant documents or data into the AI's memory so that it can use this information to generate more informed and accurate responses. From f67b81e200865ada260c4db3c0a2ee04e400a90b Mon Sep 17 00:00:00 2001 From: Imccccc Date: Sun, 16 Apr 2023 00:13:29 +0800 Subject: [PATCH 02/11] Embedding Improvement 1. move embedding function into llm_utils 2. add try feature with in embedding function --- autogpt/llm_utils.py | 32 ++++++++++++++++++++++++++++++++ autogpt/memory/local.py | 7 ++++--- autogpt/memory/pinecone.py | 8 ++++---- autogpt/memory/redismem.py | 7 ++++--- 4 files changed, 44 insertions(+), 10 deletions(-) diff --git a/autogpt/llm_utils.py b/autogpt/llm_utils.py index f2418b65..2e77db10 100644 --- a/autogpt/llm_utils.py +++ b/autogpt/llm_utils.py @@ -113,3 +113,35 @@ def create_chat_completion( raise RuntimeError(f"Failed to get response after {num_retries} retries") return response.choices[0].message["content"] + + +def create_embedding_with_ada(text) -> list: + """Create a embedding with text-ada-002 using the OpenAI SDK""" + num_retries = 10 + for attempt in range(num_retries): + backoff = 2 ** (attempt + 2) + try: + if CFG.use_azure: + return openai.Embedding.create(input=[text], + engine=CFG.get_azure_deployment_id_for_model("text-embedding-ada-002"), + )["data"][0]["embedding"] + else: + return openai.Embedding.create(input=[text], model="text-embedding-ada-002")[ + "data" + ][0]["embedding"] + except RateLimitError: + pass + except APIError as e: + if e.http_status == 502: + pass + else: + raise + if attempt == num_retries - 1: + raise + if CFG.debug_mode: + print( + Fore.RED + "Error: ", + f"API Bad gateway. Waiting {backoff} seconds..." + Fore.RESET, + ) + time.sleep(backoff) + diff --git a/autogpt/memory/local.py b/autogpt/memory/local.py index a5f6076e..004153c1 100644 --- a/autogpt/memory/local.py +++ b/autogpt/memory/local.py @@ -5,7 +5,8 @@ from typing import Any, List, Optional, Tuple import numpy as np import orjson -from autogpt.memory.base import MemoryProviderSingleton, get_ada_embedding +from autogpt.memory.base import MemoryProviderSingleton +from autogpt.llm_utils import create_embedding_with_ada EMBED_DIM = 1536 SAVE_OPTIONS = orjson.OPT_SERIALIZE_NUMPY | orjson.OPT_SERIALIZE_DATACLASS @@ -70,7 +71,7 @@ class LocalCache(MemoryProviderSingleton): return "" self.data.texts.append(text) - embedding = get_ada_embedding(text) + embedding = create_embedding_with_ada(text) vector = np.array(embedding).astype(np.float32) vector = vector[np.newaxis, :] @@ -118,7 +119,7 @@ class LocalCache(MemoryProviderSingleton): Returns: List[str] """ - embedding = get_ada_embedding(text) + embedding = create_embedding_with_ada(text) scores = np.dot(self.data.embeddings, embedding) diff --git a/autogpt/memory/pinecone.py b/autogpt/memory/pinecone.py index a7dbfa82..3817436d 100644 --- a/autogpt/memory/pinecone.py +++ b/autogpt/memory/pinecone.py @@ -2,8 +2,8 @@ import pinecone from colorama import Fore, Style from autogpt.logs import logger -from autogpt.memory.base import MemoryProviderSingleton, get_ada_embedding - +from autogpt.memory.base import MemoryProviderSingleton +from autogpt.llm_utils import create_embedding_with_ada class PineconeMemory(MemoryProviderSingleton): def __init__(self, cfg): @@ -43,7 +43,7 @@ class PineconeMemory(MemoryProviderSingleton): self.index = pinecone.Index(table_name) def add(self, data): - vector = get_ada_embedding(data) + vector = create_embedding_with_ada(data) # no metadata here. We may wish to change that long term. self.index.upsert([(str(self.vec_num), vector, {"raw_text": data})]) _text = f"Inserting data into memory at index: {self.vec_num}:\n data: {data}" @@ -63,7 +63,7 @@ class PineconeMemory(MemoryProviderSingleton): :param data: The data to compare to. :param num_relevant: The number of relevant data to return. Defaults to 5 """ - query_embedding = get_ada_embedding(data) + query_embedding = create_embedding_with_ada(data) results = self.index.query( query_embedding, top_k=num_relevant, include_metadata=True ) diff --git a/autogpt/memory/redismem.py b/autogpt/memory/redismem.py index df6d8fc0..4d73b741 100644 --- a/autogpt/memory/redismem.py +++ b/autogpt/memory/redismem.py @@ -9,7 +9,8 @@ from redis.commands.search.indexDefinition import IndexDefinition, IndexType from redis.commands.search.query import Query from autogpt.logs import logger -from autogpt.memory.base import MemoryProviderSingleton, get_ada_embedding +from autogpt.memory.base import MemoryProviderSingleton +from autogpt.llm_utils import create_embedding_with_ada SCHEMA = [ TextField("data"), @@ -85,7 +86,7 @@ class RedisMemory(MemoryProviderSingleton): """ if "Command Error:" in data: return "" - vector = get_ada_embedding(data) + vector = create_embedding_with_ada(data) vector = np.array(vector).astype(np.float32).tobytes() data_dict = {b"data": data, "embedding": vector} pipe = self.redis.pipeline() @@ -127,7 +128,7 @@ class RedisMemory(MemoryProviderSingleton): Returns: A list of the most relevant data. """ - query_embedding = get_ada_embedding(data) + query_embedding = create_embedding_with_ada(data) base_query = f"*=>[KNN {num_relevant} @embedding $vector AS vector_score]" query = ( Query(base_query) From 77f44cdbbecb7ccc959685650977245daa1545ca Mon Sep 17 00:00:00 2001 From: nponeccop Date: Sat, 15 Apr 2023 20:59:38 +0200 Subject: [PATCH 03/11] Fix run.bat to use the new module --- run.bat | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/run.bat b/run.bat index 081af67c..afbab57a 100644 --- a/run.bat +++ b/run.bat @@ -4,5 +4,5 @@ if errorlevel 1 ( echo Installing missing packages... pip install -r requirements.txt ) -python scripts/main.py %1 +python -m autogpt %* pause From 051b5372cec13b6f9031e0f678fc8fd5eaf60ac2 Mon Sep 17 00:00:00 2001 From: hyaxia Date: Sat, 15 Apr 2023 22:06:27 +0300 Subject: [PATCH 04/11] in debug mode add a log about rate limit error --- autogpt/llm_utils.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/autogpt/llm_utils.py b/autogpt/llm_utils.py index f2418b65..aafa84b3 100644 --- a/autogpt/llm_utils.py +++ b/autogpt/llm_utils.py @@ -95,6 +95,11 @@ def create_chat_completion( ) break except RateLimitError: + if CFG.debug_mode: + print( + Fore.RED + "Error: ", + f"Reached rate limit. Waiting {backoff} seconds..." + Fore.RESET, + ) pass except APIError as e: if e.http_status == 502: From 27a21e848d174d4599ebe6a863f9159a93e53985 Mon Sep 17 00:00:00 2001 From: Luca Meneghetti <100935601+thisislvca@users.noreply.github.com> Date: Sat, 15 Apr 2023 21:07:27 +0200 Subject: [PATCH 05/11] Update README.md Fixed a typo in the README.md file: From "may often in a **broken** state." to "may often be in a **broken** state." --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index cf370f13..ae487ceb 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ > This information takes precedence over any other information provided and should be followed accordingly. -Our workflow has been improved, but please note that `master` branch may often in a **broken** state. +Our workflow has been improved, but please note that `master` branch may often be in a **broken** state. Please download the latest `stable` release from here: https://github.com/Torantulino/Auto-GPT/releases/latest. ![GitHub Repo stars](https://img.shields.io/github/stars/Torantulino/auto-gpt?style=social) From 2f776957d8832eb2aa78c92d643266529b7685d7 Mon Sep 17 00:00:00 2001 From: hyaxia Date: Sat, 15 Apr 2023 22:20:05 +0300 Subject: [PATCH 06/11] changed error msg --- autogpt/llm_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/autogpt/llm_utils.py b/autogpt/llm_utils.py index aafa84b3..4e74566b 100644 --- a/autogpt/llm_utils.py +++ b/autogpt/llm_utils.py @@ -98,7 +98,7 @@ def create_chat_completion( if CFG.debug_mode: print( Fore.RED + "Error: ", - f"Reached rate limit. Waiting {backoff} seconds..." + Fore.RESET, + f"Reached rate limit, passing..." + Fore.RESET, ) pass except APIError as e: From bebc015eb3ea1cf5cfea83dfef1e60324d747445 Mon Sep 17 00:00:00 2001 From: DJ Stomp <85457381+DJStompZone@users.noreply.github.com> Date: Sat, 15 Apr 2023 12:30:09 -0700 Subject: [PATCH 07/11] Update requirements.txt --- requirements.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 23627f3e..dbfa7741 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,7 +12,6 @@ docker duckduckgo-search google-api-python-client #(https://developers.google.com/custom-search/v1/overview) pinecone-client==2.2.1 -# pymilvus==2.2.4 # Uncomment to use, but don't push uncommented to repo (causes trouble with package-installation in automated tests) redis orjson Pillow From 11d6dabe37f831d775c2a3d18b83c92069ea0640 Mon Sep 17 00:00:00 2001 From: BillSchumacher <34168009+BillSchumacher@users.noreply.github.com> Date: Sat, 15 Apr 2023 14:55:13 -0500 Subject: [PATCH 08/11] Quality update --- autogpt/__main__.py | 4 +- autogpt/app.py | 20 +++++--- autogpt/args.py | 2 +- autogpt/commands/execute_code.py | 4 +- autogpt/commands/git_operations.py | 20 +++++--- autogpt/commands/web_selenium.py | 6 ++- autogpt/config/config.py | 4 +- autogpt/json_fixes/auto_fix.py | 15 ++++-- autogpt/memory/__init__.py | 15 ++++-- autogpt/memory/milvus.py | 81 +++++++++++++++++++----------- autogpt/prompt.py | 6 ++- scripts/check_requirements.py | 18 ++++--- tests/milvus_memory_test.py | 1 - tests/smoke_test.py | 39 +++++++------- tests/test_token_counter.py | 17 ++++--- 15 files changed, 161 insertions(+), 91 deletions(-) diff --git a/autogpt/__main__.py b/autogpt/__main__.py index 5105df55..29ccddbf 100644 --- a/autogpt/__main__.py +++ b/autogpt/__main__.py @@ -34,7 +34,9 @@ def main() -> None: # Initialize memory and make sure it is empty. # this is particularly important for indexing and referencing pinecone memory memory = get_memory(cfg, init=True) - logger.typewriter_log(f"Using memory of type:", Fore.GREEN, f"{memory.__class__.__name__}") + logger.typewriter_log( + f"Using memory of type:", Fore.GREEN, f"{memory.__class__.__name__}" + ) logger.typewriter_log(f"Using Browser:", Fore.GREEN, cfg.selenium_web_browser) agent = Agent( ai_name=ai_name, diff --git a/autogpt/app.py b/autogpt/app.py index eb585134..e84241c5 100644 --- a/autogpt/app.py +++ b/autogpt/app.py @@ -89,13 +89,13 @@ def get_command(response: str): def map_command_synonyms(command_name: str): - """ Takes the original command name given by the AI, and checks if the - string matches a list of common/known hallucinations + """Takes the original command name given by the AI, and checks if the + string matches a list of common/known hallucinations """ synonyms = [ - ('write_file', 'write_to_file'), - ('create_file', 'write_to_file'), - ('search', 'google') + ("write_file", "write_to_file"), + ("create_file", "write_to_file"), + ("search", "google"), ] for seen_command, actual_command_name in synonyms: if command_name == seen_command: @@ -125,7 +125,7 @@ def execute_command(command_name: str, arguments): google_result = google_official_search(arguments["input"]) else: google_result = google_search(arguments["input"]) - safe_message = google_result.encode('utf-8', 'ignore') + safe_message = google_result.encode("utf-8", "ignore") return str(safe_message) elif command_name == "memory_add": return memory.add(arguments["string"]) @@ -144,7 +144,9 @@ def execute_command(command_name: str, arguments): elif command_name == "get_hyperlinks": return get_hyperlinks(arguments["url"]) elif command_name == "clone_repository": - return clone_repository(arguments["repository_url"], arguments["clone_path"]) + return clone_repository( + arguments["repository_url"], arguments["clone_path"] + ) elif command_name == "read_file": return read_file(arguments["file"]) elif command_name == "write_to_file": @@ -278,7 +280,9 @@ def list_agents(): Returns: str: A list of all agents """ - return "List of agents:\n" + "\n".join([str(x[0]) + ": " + x[1] for x in AGENT_MANAGER.list_agents()]) + return "List of agents:\n" + "\n".join( + [str(x[0]) + ": " + x[1] for x in AGENT_MANAGER.list_agents()] + ) def delete_agent(key: str) -> str: diff --git a/autogpt/args.py b/autogpt/args.py index 0bc3e680..eca32334 100644 --- a/autogpt/args.py +++ b/autogpt/args.py @@ -54,7 +54,7 @@ def parse_arguments() -> None: "--use-browser", "-b", dest="browser_name", - help="Specifies which web-browser to use when using selenium to scrape the web." + help="Specifies which web-browser to use when using selenium to scrape the web.", ) parser.add_argument( "--ai-settings", diff --git a/autogpt/commands/execute_code.py b/autogpt/commands/execute_code.py index 93f3532e..86d6c177 100644 --- a/autogpt/commands/execute_code.py +++ b/autogpt/commands/execute_code.py @@ -99,8 +99,8 @@ def execute_shell(command_line: str) -> str: str: The output of the command """ current_dir = os.getcwd() - - if str(WORKING_DIRECTORY) not in current_dir: # Change dir into workspace if necessary + # Change dir into workspace if necessary + if str(WORKING_DIRECTORY) not in current_dir: work_dir = os.path.join(os.getcwd(), WORKING_DIRECTORY) os.chdir(work_dir) diff --git a/autogpt/commands/git_operations.py b/autogpt/commands/git_operations.py index 51336052..3483474b 100644 --- a/autogpt/commands/git_operations.py +++ b/autogpt/commands/git_operations.py @@ -1,14 +1,20 @@ +"""Git operations for autogpt""" import git from autogpt.config import Config -cfg = Config() +CFG = Config() -def clone_repository(repo_url, clone_path): - """Clone a github repository locally""" +def clone_repository(repo_url: str, clone_path: str) -> str: + """Clone a github repository locally + + Args: + repo_url (str): The URL of the repository to clone + clone_path (str): The path to clone the repository to + + Returns: + str: The result of the clone operation""" split_url = repo_url.split("//") - auth_repo_url = f"//{cfg.github_username}:{cfg.github_api_key}@".join(split_url) + auth_repo_url = f"//{CFG.github_username}:{CFG.github_api_key}@".join(split_url) git.Repo.clone_from(auth_repo_url, clone_path) - result = f"""Cloned {repo_url} to {clone_path}""" - - return result + return f"""Cloned {repo_url} to {clone_path}""" diff --git a/autogpt/commands/web_selenium.py b/autogpt/commands/web_selenium.py index ddcb3155..d2ef1a7e 100644 --- a/autogpt/commands/web_selenium.py +++ b/autogpt/commands/web_selenium.py @@ -53,7 +53,11 @@ def scrape_text_with_selenium(url: str) -> Tuple[WebDriver, str]: """ logging.getLogger("selenium").setLevel(logging.CRITICAL) - options_available = {'chrome': ChromeOptions, 'safari': SafariOptions, 'firefox': FirefoxOptions} + options_available = { + "chrome": ChromeOptions, + "safari": SafariOptions, + "firefox": FirefoxOptions, + } options = options_available[CFG.selenium_web_browser]() options.add_argument( diff --git a/autogpt/config/config.py b/autogpt/config/config.py index 365bc320..c47b19af 100644 --- a/autogpt/config/config.py +++ b/autogpt/config/config.py @@ -137,7 +137,9 @@ class Config(metaclass=Singleton): config_params = {} self.openai_api_type = config_params.get("azure_api_type") or "azure" self.openai_api_base = config_params.get("azure_api_base") or "" - self.openai_api_version = config_params.get("azure_api_version") or "2023-03-15-preview" + self.openai_api_version = ( + config_params.get("azure_api_version") or "2023-03-15-preview" + ) self.azure_model_to_deployment_id_map = config_params.get("azure_model_map", []) def set_continuous_mode(self, value: bool) -> None: diff --git a/autogpt/json_fixes/auto_fix.py b/autogpt/json_fixes/auto_fix.py index ac648460..56664ba9 100644 --- a/autogpt/json_fixes/auto_fix.py +++ b/autogpt/json_fixes/auto_fix.py @@ -4,11 +4,20 @@ import json from autogpt.llm_utils import call_ai_function from autogpt.logs import logger from autogpt.config import Config -cfg = Config() + +CFG = Config() def fix_json(json_string: str, schema: str) -> str: - """Fix the given JSON string to make it parseable and fully compliant with the provided schema.""" + """Fix the given JSON string to make it parseable and fully compliant with + the provided schema. + + Args: + json_string (str): The JSON string to fix. + schema (str): The schema to use to fix the JSON. + Returns: + str: The fixed JSON string. + """ # Try to fix the JSON using GPT: function_string = "def fix_json(json_string: str, schema:str=None) -> str:" args = [f"'''{json_string}'''", f"'''{schema}'''"] @@ -24,7 +33,7 @@ def fix_json(json_string: str, schema: str) -> str: if not json_string.startswith("`"): json_string = "```json\n" + json_string + "\n```" result_string = call_ai_function( - function_string, args, description_string, model=cfg.fast_llm_model + function_string, args, description_string, model=CFG.fast_llm_model ) logger.debug("------------ JSON FIX ATTEMPT ---------------") logger.debug(f"Original JSON: {json_string}") diff --git a/autogpt/memory/__init__.py b/autogpt/memory/__init__.py index 102b93ae..ab953527 100644 --- a/autogpt/memory/__init__.py +++ b/autogpt/memory/__init__.py @@ -50,8 +50,10 @@ def get_memory(cfg, init=False): memory = RedisMemory(cfg) elif cfg.memory_backend == "milvus": if not MilvusMemory: - print("Error: Milvus sdk is not installed." - "Please install pymilvus to use Milvus as memory backend.") + print( + "Error: Milvus sdk is not installed." + "Please install pymilvus to use Milvus as memory backend." + ) else: memory = MilvusMemory(cfg) elif cfg.memory_backend == "no_memory": @@ -68,4 +70,11 @@ def get_supported_memory_backends(): return supported_memory -__all__ = ["get_memory", "LocalCache", "RedisMemory", "PineconeMemory", "NoMemory", "MilvusMemory"] +__all__ = [ + "get_memory", + "LocalCache", + "RedisMemory", + "PineconeMemory", + "NoMemory", + "MilvusMemory", +] diff --git a/autogpt/memory/milvus.py b/autogpt/memory/milvus.py index fce46a89..c6e7d5a3 100644 --- a/autogpt/memory/milvus.py +++ b/autogpt/memory/milvus.py @@ -1,3 +1,4 @@ +""" Milvus memory storage provider.""" from pymilvus import ( connections, FieldSchema, @@ -10,8 +11,10 @@ from autogpt.memory.base import MemoryProviderSingleton, get_ada_embedding class MilvusMemory(MemoryProviderSingleton): - def __init__(self, cfg): - """ Construct a milvus memory storage connection. + """Milvus memory storage provider.""" + + def __init__(self, cfg) -> None: + """Construct a milvus memory storage connection. Args: cfg (Config): Auto-GPT global config. @@ -19,12 +22,9 @@ class MilvusMemory(MemoryProviderSingleton): # connect to milvus server. connections.connect(address=cfg.milvus_addr) fields = [ - FieldSchema(name="pk", dtype=DataType.INT64, - is_primary=True, auto_id=True), - FieldSchema(name="embeddings", - dtype=DataType.FLOAT_VECTOR, dim=1536), - FieldSchema(name="raw_text", dtype=DataType.VARCHAR, - max_length=65535) + FieldSchema(name="pk", dtype=DataType.INT64, is_primary=True, auto_id=True), + FieldSchema(name="embeddings", dtype=DataType.FLOAT_VECTOR, dim=1536), + FieldSchema(name="raw_text", dtype=DataType.VARCHAR, max_length=65535), ] # create collection if not exist and load it. @@ -34,15 +34,19 @@ class MilvusMemory(MemoryProviderSingleton): # create index if not exist. if not self.collection.has_index(): self.collection.release() - self.collection.create_index("embeddings", { - "metric_type": "IP", - "index_type": "HNSW", - "params": {"M": 8, "efConstruction": 64}, - }, index_name="embeddings") + self.collection.create_index( + "embeddings", + { + "metric_type": "IP", + "index_type": "HNSW", + "params": {"M": 8, "efConstruction": 64}, + }, + index_name="embeddings", + ) self.collection.load() - def add(self, data): - """ Add a embedding of data into memory. + def add(self, data) -> str: + """Add a embedding of data into memory. Args: data (str): The raw text to construct embedding index. @@ -52,34 +56,48 @@ class MilvusMemory(MemoryProviderSingleton): """ embedding = get_ada_embedding(data) result = self.collection.insert([[embedding], [data]]) - _text = f"Inserting data into memory at primary key: {result.primary_keys[0]}:\n data: {data}" + _text = ( + "Inserting data into memory at primary key: " + f"{result.primary_keys[0]}:\n data: {data}" + ) return _text def get(self, data): - """ Return the most relevant data in memory. + """Return the most relevant data in memory. Args: data: The data to compare to. """ return self.get_relevant(data, 1) - def clear(self): - """ Drop the index in memory. + def clear(self) -> str: + """Drop the index in memory. + + Returns: + str: log. """ self.collection.drop() self.collection = Collection(self.milvus_collection, self.schema) - self.collection.create_index("embeddings", { - "metric_type": "IP", - "index_type": "HNSW", - "params": {"M": 8, "efConstruction": 64}, - }, index_name="embeddings") + self.collection.create_index( + "embeddings", + { + "metric_type": "IP", + "index_type": "HNSW", + "params": {"M": 8, "efConstruction": 64}, + }, + index_name="embeddings", + ) self.collection.load() return "Obliviated" - def get_relevant(self, data, num_relevant=5): - """ Return the top-k relevant data in memory. + def get_relevant(self, data: str, num_relevant: int = 5): + """Return the top-k relevant data in memory. Args: data: The data to compare to. - num_relevant (int, optional): The max number of relevant data. Defaults to 5. + num_relevant (int, optional): The max number of relevant data. + Defaults to 5. + + Returns: + list: The top-k relevant data. """ # search the embedding and return the most relevant text. embedding = get_ada_embedding(data) @@ -88,10 +106,15 @@ class MilvusMemory(MemoryProviderSingleton): "params": {"nprobe": 8}, } result = self.collection.search( - [embedding], "embeddings", search_params, num_relevant, output_fields=["raw_text"]) + [embedding], + "embeddings", + search_params, + num_relevant, + output_fields=["raw_text"], + ) return [item.entity.value_of_field("raw_text") for item in result[0]] - def get_stats(self): + def get_stats(self) -> str: """ Returns: The stats of the milvus cache. """ diff --git a/autogpt/prompt.py b/autogpt/prompt.py index 5924ce0c..6c51f33e 100644 --- a/autogpt/prompt.py +++ b/autogpt/prompt.py @@ -59,7 +59,11 @@ def get_prompt() -> str: ), ("List GPT Agents", "list_agents", {}), ("Delete GPT Agent", "delete_agent", {"key": ""}), - ("Clone Repository", "clone_repository", {"repository_url": "", "clone_path": ""}), + ( + "Clone Repository", + "clone_repository", + {"repository_url": "", "clone_path": ""}, + ), ("Write to file", "write_to_file", {"file": "", "text": ""}), ("Read file", "read_file", {"file": ""}), ("Append to file", "append_to_file", {"file": "", "text": ""}), diff --git a/scripts/check_requirements.py b/scripts/check_requirements.py index a74ba367..d1f23504 100644 --- a/scripts/check_requirements.py +++ b/scripts/check_requirements.py @@ -1,10 +1,13 @@ import pkg_resources import sys + def main(): requirements_file = sys.argv[1] - with open(requirements_file, 'r') as f: - required_packages = [line.strip().split('#')[0].strip() for line in f.readlines()] + with open(requirements_file, "r") as f: + required_packages = [ + line.strip().split("#")[0].strip() for line in f.readlines() + ] installed_packages = [package.key for package in pkg_resources.working_set] @@ -12,16 +15,17 @@ def main(): for package in required_packages: if not package: # Skip empty lines continue - package_name = package.strip().split('==')[0] + package_name = package.strip().split("==")[0] if package_name.lower() not in installed_packages: missing_packages.append(package_name) if missing_packages: - print('Missing packages:') - print(', '.join(missing_packages)) + print("Missing packages:") + print(", ".join(missing_packages)) sys.exit(1) else: - print('All packages are installed.') + print("All packages are installed.") -if __name__ == '__main__': + +if __name__ == "__main__": main() diff --git a/tests/milvus_memory_test.py b/tests/milvus_memory_test.py index 7e3f185a..0113fa1c 100644 --- a/tests/milvus_memory_test.py +++ b/tests/milvus_memory_test.py @@ -15,7 +15,6 @@ def MockConfig(): "speak_mode": False, "milvus_collection": "autogpt", "milvus_addr": "localhost:19530", - }, ) diff --git a/tests/smoke_test.py b/tests/smoke_test.py index 1f0f24f9..8d33cf89 100644 --- a/tests/smoke_test.py +++ b/tests/smoke_test.py @@ -5,56 +5,59 @@ import unittest from autogpt.file_operations import delete_file, read_file -env_vars = { - 'MEMORY_BACKEND': 'no_memory', - 'TEMPERATURE': "0" -} +env_vars = {"MEMORY_BACKEND": "no_memory", "TEMPERATURE": "0"} class TestCommands(unittest.TestCase): - def test_write_file(self): # Test case to check if the write_file command can successfully write 'Hello World' to a file # named 'hello_world.txt'. # Read the current ai_settings.yaml file and store its content. ai_settings = None - if os.path.exists('ai_settings.yaml'): - with open('ai_settings.yaml', 'r') as f: + if os.path.exists("ai_settings.yaml"): + with open("ai_settings.yaml", "r") as f: ai_settings = f.read() - os.remove('ai_settings.yaml') + os.remove("ai_settings.yaml") try: - if os.path.exists('hello_world.txt'): + if os.path.exists("hello_world.txt"): # Clean up any existing 'hello_world.txt' file before testing. - delete_file('hello_world.txt') + delete_file("hello_world.txt") # Prepare input data for the test. - input_data = '''write_file-GPT + input_data = """write_file-GPT an AI designed to use the write_file command to write 'Hello World' into a file named "hello_world.txt" and then use the task_complete command to complete the task. Use the write_file command to write 'Hello World' into a file named "hello_world.txt". Use the task_complete command to complete the task. Do not use any other commands. y -5 -EOF''' - command = f'{sys.executable} -m autogpt' +EOF""" + command = f"{sys.executable} -m autogpt" # Execute the script with the input data. - process = subprocess.Popen(command, stdin=subprocess.PIPE, shell=True, env={**os.environ, **env_vars}) + process = subprocess.Popen( + command, + stdin=subprocess.PIPE, + shell=True, + env={**os.environ, **env_vars}, + ) process.communicate(input_data.encode()) # Read the content of the 'hello_world.txt' file created during the test. - content = read_file('hello_world.txt') + content = read_file("hello_world.txt") finally: if ai_settings: # Restore the original ai_settings.yaml file. - with open('ai_settings.yaml', 'w') as f: + with open("ai_settings.yaml", "w") as f: f.write(ai_settings) # Check if the content of the 'hello_world.txt' file is equal to 'Hello World'. - self.assertEqual(content, 'Hello World', f"Expected 'Hello World', got {content}") + self.assertEqual( + content, "Hello World", f"Expected 'Hello World', got {content}" + ) # Run the test case. -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/tests/test_token_counter.py b/tests/test_token_counter.py index 42670e60..81e68277 100644 --- a/tests/test_token_counter.py +++ b/tests/test_token_counter.py @@ -4,18 +4,17 @@ from autogpt.token_counter import count_message_tokens, count_string_tokens class TestTokenCounter(unittest.TestCase): - def test_count_message_tokens(self): messages = [ {"role": "user", "content": "Hello"}, - {"role": "assistant", "content": "Hi there!"} + {"role": "assistant", "content": "Hi there!"}, ] self.assertEqual(count_message_tokens(messages), 17) def test_count_message_tokens_with_name(self): messages = [ {"role": "user", "content": "Hello", "name": "John"}, - {"role": "assistant", "content": "Hi there!"} + {"role": "assistant", "content": "Hi there!"}, ] self.assertEqual(count_message_tokens(messages), 17) @@ -25,7 +24,7 @@ class TestTokenCounter(unittest.TestCase): def test_count_message_tokens_invalid_model(self): messages = [ {"role": "user", "content": "Hello"}, - {"role": "assistant", "content": "Hi there!"} + {"role": "assistant", "content": "Hi there!"}, ] with self.assertRaises(KeyError): count_message_tokens(messages, model="invalid_model") @@ -33,13 +32,15 @@ class TestTokenCounter(unittest.TestCase): def test_count_message_tokens_gpt_4(self): messages = [ {"role": "user", "content": "Hello"}, - {"role": "assistant", "content": "Hi there!"} + {"role": "assistant", "content": "Hi there!"}, ] self.assertEqual(count_message_tokens(messages, model="gpt-4-0314"), 15) def test_count_string_tokens(self): string = "Hello, world!" - self.assertEqual(count_string_tokens(string, model_name="gpt-3.5-turbo-0301"), 4) + self.assertEqual( + count_string_tokens(string, model_name="gpt-3.5-turbo-0301"), 4 + ) def test_count_string_tokens_empty_input(self): self.assertEqual(count_string_tokens("", model_name="gpt-3.5-turbo-0301"), 0) @@ -47,7 +48,7 @@ class TestTokenCounter(unittest.TestCase): def test_count_message_tokens_invalid_model(self): messages = [ {"role": "user", "content": "Hello"}, - {"role": "assistant", "content": "Hi there!"} + {"role": "assistant", "content": "Hi there!"}, ] with self.assertRaises(NotImplementedError): count_message_tokens(messages, model="invalid_model") @@ -57,5 +58,5 @@ class TestTokenCounter(unittest.TestCase): self.assertEqual(count_string_tokens(string, model_name="gpt-4-0314"), 4) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() From 93b3e8428cadf42781122246396699dacc6ea61a Mon Sep 17 00:00:00 2001 From: BillSchumacher <34168009+BillSchumacher@users.noreply.github.com> Date: Sat, 15 Apr 2023 15:03:03 -0500 Subject: [PATCH 09/11] Update llm_utils.py Fix trailing whitespace --- autogpt/llm_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/autogpt/llm_utils.py b/autogpt/llm_utils.py index 2e77db10..043745aa 100644 --- a/autogpt/llm_utils.py +++ b/autogpt/llm_utils.py @@ -115,7 +115,7 @@ def create_chat_completion( return response.choices[0].message["content"] -def create_embedding_with_ada(text) -> list: +def create_embedding_with_ada(text) -> list: """Create a embedding with text-ada-002 using the OpenAI SDK""" num_retries = 10 for attempt in range(num_retries): From e758a4de3eaef2aaaf6e10ba16929ad633572926 Mon Sep 17 00:00:00 2001 From: BillSchumacher <34168009+BillSchumacher@users.noreply.github.com> Date: Sat, 15 Apr 2023 15:03:33 -0500 Subject: [PATCH 10/11] Update pinecone.py Fix blank lines. --- autogpt/memory/pinecone.py | 1 + 1 file changed, 1 insertion(+) diff --git a/autogpt/memory/pinecone.py b/autogpt/memory/pinecone.py index 3817436d..d781073e 100644 --- a/autogpt/memory/pinecone.py +++ b/autogpt/memory/pinecone.py @@ -5,6 +5,7 @@ from autogpt.logs import logger from autogpt.memory.base import MemoryProviderSingleton from autogpt.llm_utils import create_embedding_with_ada + class PineconeMemory(MemoryProviderSingleton): def __init__(self, cfg): pinecone_api_key = cfg.pinecone_api_key From 8978844111b594e3ee0727f87223f02fa008d1a3 Mon Sep 17 00:00:00 2001 From: BillSchumacher <34168009+BillSchumacher@users.noreply.github.com> Date: Sat, 15 Apr 2023 15:17:23 -0500 Subject: [PATCH 11/11] Update llm_utils.py Remove pass --- autogpt/llm_utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/autogpt/llm_utils.py b/autogpt/llm_utils.py index 4e74566b..238ac560 100644 --- a/autogpt/llm_utils.py +++ b/autogpt/llm_utils.py @@ -100,7 +100,6 @@ def create_chat_completion( Fore.RED + "Error: ", f"Reached rate limit, passing..." + Fore.RESET, ) - pass except APIError as e: if e.http_status == 502: pass