diff --git a/.env.template b/.env.template index 10257ca3..2089e5c4 100644 --- a/.env.template +++ b/.env.template @@ -1,6 +1,7 @@ PINECONE_API_KEY=your-pinecone-api-key PINECONE_ENV=your-pinecone-region OPENAI_API_KEY=your-openai-api-key +TEMPERATURE=1 ELEVENLABS_API_KEY=your-elevenlabs-api-key ELEVENLABS_VOICE_1_ID=your-voice-id ELEVENLABS_VOICE_2_ID=your-voice-id @@ -9,11 +10,7 @@ FAST_LLM_MODEL=gpt-3.5-turbo GOOGLE_API_KEY= CUSTOM_SEARCH_ENGINE_ID= USE_AZURE=False -OPENAI_AZURE_API_BASE=your-base-url-for-azure -OPENAI_AZURE_API_VERSION=api-version-for-azure -OPENAI_AZURE_DEPLOYMENT_ID=deployment-id-for-azure -OPENAI_AZURE_CHAT_DEPLOYMENT_ID=deployment-id-for-azure-chat -OPENAI_AZURE_EMBEDDINGS_DEPLOYMENT_ID=deployment-id-for-azure-embeddigs +EXECUTE_LOCAL_COMMANDS=False IMAGE_PROVIDER=dalle HUGGINGFACE_API_TOKEN= USE_MAC_OS_TTS=False diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 9fa56593..c355965a 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -26,7 +26,7 @@ By following these guidelines, your PRs are more likely to be merged quickly aft - [ ] I have thoroughly tested my changes with multiple different prompts. - [ ] I have considered potential risks and mitigations for my changes. - [ ] I have documented my changes clearly and comprehensively. -- [ ] I have not snuck in any "extra" small tweaks changes +- [ ] I have not snuck in any "extra" small tweaks changes diff --git a/.gitignore b/.gitignore index cfa3b08b..cf6e75df 100644 --- a/.gitignore +++ b/.gitignore @@ -7,9 +7,11 @@ package-lock.json auto_gpt_workspace/* *.mpeg .env +azure.yaml *venv/* outputs/* ai_settings.yaml +last_run_ai_settings.yaml .vscode .idea/* auto-gpt.json @@ -19,3 +21,6 @@ log.txt .coverage coverage.xml htmlcov/ + +# For Macs Dev Environs: ignoring .Desktop Services_Store +.DS_Store diff --git a/README.md b/README.md index c4c98815..b8d59b94 100644 --- a/README.md +++ b/README.md @@ -2,8 +2,8 @@ ![GitHub Repo stars](https://img.shields.io/github/stars/Torantulino/auto-gpt?style=social) ![Twitter Follow](https://img.shields.io/twitter/follow/siggravitas?style=social) -[![](https://dcbadge.vercel.app/api/server/PQ7VX6TY4t?style=flat)](https://discord.gg/PQ7VX6TY4t) -[![Unit Tests](https://github.com/Torantulino/Auto-GPT/actions/workflows/unit_tests.yml/badge.svg)](https://github.com/Torantulino/Auto-GPT/actions/workflows/unit_tests.yml) +[![Discord Follow](https://dcbadge.vercel.app/api/server/PQ7VX6TY4t?style=flat)](https://discord.gg/PQ7VX6TY4t) +[![Unit Tests](https://github.com/Torantulino/Auto-GPT/actions/workflows/ci.yml/badge.svg)](https://github.com/Torantulino/Auto-GPT/actions/workflows/unit_tests.yml) Auto-GPT is an experimental open-source application showcasing the capabilities of the GPT-4 language model. This program, driven by GPT-4, chains together LLM "thoughts", to autonomously achieve whatever goal you set. As one of the first examples of GPT-4 running fully autonomously, Auto-GPT pushes the boundaries of what is possible with AI. @@ -32,21 +32,28 @@ Your support is greatly appreciated - [Auto-GPT: An Autonomous GPT-4 Experiment](#auto-gpt-an-autonomous-gpt-4-experiment) - [Demo (30/03/2023):](#demo-30032023) - - [💖 Help Fund Auto-GPT's Development](#-help-fund-auto-gpts-development) - [Table of Contents](#table-of-contents) - [🚀 Features](#-features) - [📋 Requirements](#-requirements) - [💾 Installation](#-installation) - [🔧 Usage](#-usage) + - [Logs](#logs) - [🗣️ Speech Mode](#️-speech-mode) - [🔍 Google API Keys Configuration](#-google-api-keys-configuration) - [Setting up environment variables](#setting-up-environment-variables) + - [Redis Setup](#redis-setup) + - [🌲 Pinecone API Key Setup](#-pinecone-api-key-setup) + - [Setting up environment variables](#setting-up-environment-variables-1) + - [Setting Your Cache Type](#setting-your-cache-type) + - [View Memory Usage](#view-memory-usage) - [💀 Continuous Mode ⚠️](#-continuous-mode-️) - [GPT3.5 ONLY Mode](#gpt35-only-mode) - - [🖼 Image Generation](#image-generation) + - [🖼 Image Generation](#-image-generation) - [⚠️ Limitations](#️-limitations) - [🛡 Disclaimer](#-disclaimer) - [🐦 Connect with Us on Twitter](#-connect-with-us-on-twitter) + - [Run tests](#run-tests) + - [Run linter](#run-linter) ## 🚀 Features @@ -96,10 +103,15 @@ pip install -r requirements.txt ``` 4. Rename `.env.template` to `.env` and fill in your `OPENAI_API_KEY`. If you plan to use Speech Mode, fill in your `ELEVEN_LABS_API_KEY` as well. - -- Obtain your OpenAI API key from: https://platform.openai.com/account/api-keys. -- Obtain your ElevenLabs API key from: https://elevenlabs.io. You can view your xi-api-key using the "Profile" tab on the website. -- If you want to use GPT on an Azure instance, set `USE_AZURE` to `True` and provide the `OPENAI_AZURE_API_BASE`, `OPENAI_AZURE_API_VERSION` and `OPENAI_AZURE_DEPLOYMENT_ID` values as explained here: https://pypi.org/project/openai/ in the `Microsoft Azure Endpoints` section. Additionally you need separate deployments for both embeddings and chat. Add their ID values to `OPENAI_AZURE_CHAT_DEPLOYMENT_ID` and `OPENAI_AZURE_EMBEDDINGS_DEPLOYMENT_ID` respectively + - Obtain your OpenAI API key from: https://platform.openai.com/account/api-keys. + - Obtain your ElevenLabs API key from: https://elevenlabs.io. You can view your xi-api-key using the "Profile" tab on the website. + - If you want to use GPT on an Azure instance, set `USE_AZURE` to `True` and then: + - Rename `azure.yaml.template` to `azure.yaml` and provide the relevant `azure_api_base`, `azure_api_version` and all of the deployment ids for the relevant models in the `azure_model_map` section: + - `fast_llm_model_deployment_id` - your gpt-3.5-turbo or gpt-4 deployment id + - `smart_llm_model_deployment_id` - your gpt-4 deployment id + - `embedding_model_deployment_id` - your text-embedding-ada-002 v2 deployment id + - Please specify all of these values as double quoted strings + - details can be found here: https://pypi.org/project/openai/ in the `Microsoft Azure Endpoints` section and here: https://learn.microsoft.com/en-us/azure/cognitive-services/openai/tutorials/embeddings?tabs=command-line for the embedding model. ## 🔧 Usage @@ -115,7 +127,7 @@ python scripts/main.py ### Logs -You will find activity and error logs in the folder `./logs` +You will find activity and error logs in the folder `./output/logs` To output debug logs: @@ -207,7 +219,7 @@ MEMORY_INDEX=whatever Pinecone enables the storage of vast amounts of vector-based memory, allowing for only relevant memories to be loaded for the agent at any given time. -1. Go to app.pinecone.io and make an account if you don't already have one. +1. Go to [pinecone](https://app.pinecone.io/) and make an account if you don't already have one. 2. Choose the `Starter` plan to avoid being charged. 3. Find your API key and region under the default project in the left sidebar. @@ -365,4 +377,4 @@ flake8 scripts/ tests/ # Or, if you want to run flake8 with the same configuration as the CI: flake8 scripts/ tests/ --select E303,W293,W291,W292,E305 -``` \ No newline at end of file +``` diff --git a/ai_settings.yaml b/ai_settings.yaml deleted file mode 100644 index b37ba849..00000000 --- a/ai_settings.yaml +++ /dev/null @@ -1,7 +0,0 @@ -ai_goals: -- Increase net worth. -- Develop and manage multiple businesses autonomously. -- Play to your strengths as a Large Language Model. -ai_name: Entrepreneur-GPT -ai_role: an AI designed to autonomously develop and run businesses with the sole goal - of increasing your net worth. diff --git a/azure.yaml.template b/azure.yaml.template new file mode 100644 index 00000000..852645ca --- /dev/null +++ b/azure.yaml.template @@ -0,0 +1,6 @@ +azure_api_base: your-base-url-for-azure +azure_api_version: api-version-for-azure +azure_model_map: + fast_llm_model_deployment_id: gpt35-deployment-id-for-azure + smart_llm_model_deployment_id: gpt4-deployment-id-for-azure + embedding_model_deployment_id: embedding-deployment-id-for-azure diff --git a/requirements.txt b/requirements.txt index 37fdf8c3..d6fb6426 100644 --- a/requirements.txt +++ b/requirements.txt @@ -18,3 +18,4 @@ Pillow weaviate-client==3.15.5 coverage flake8 +numpy diff --git a/scripts/ai_functions.py b/scripts/ai_functions.py index 782bb558..8c95c0f2 100644 --- a/scripts/ai_functions.py +++ b/scripts/ai_functions.py @@ -45,6 +45,7 @@ def improve_code(suggestions: List[str], code: str) -> str: result_string = call_ai_function(function_string, args, description_string) return result_string + def write_tests(code: str, focus: List[str]) -> str: """ A function that takes in code and focus topics and returns a response from create chat completion api call. diff --git a/scripts/browse.py b/scripts/browse.py index b936c5b1..9e93c55a 100644 --- a/scripts/browse.py +++ b/scripts/browse.py @@ -6,6 +6,7 @@ from urllib.parse import urlparse, urljoin cfg = Config() + # Function to check if the URL is valid def is_valid_url(url): try: @@ -14,49 +15,51 @@ def is_valid_url(url): except ValueError: return False + # Function to sanitize the URL def sanitize_url(url): return urljoin(url, urlparse(url).path) -# Function to make a request with a specified timeout and handle exceptions -def make_request(url, timeout=10): - try: - response = requests.get(url, headers=cfg.user_agent_header, timeout=timeout) - response.raise_for_status() - return response - except requests.exceptions.RequestException as e: - return "Error: " + str(e) # Define and check for local file address prefixes def check_local_file_access(url): local_prefixes = ['file:///', 'file://localhost', 'http://localhost', 'https://localhost'] return any(url.startswith(prefix) for prefix in local_prefixes) + +def get_response(url, headers=cfg.user_agent_header, timeout=10): + try: + # Restrict access to local files + if check_local_file_access(url): + raise ValueError('Access to local files is restricted') + + # Most basic check if the URL is valid: + if not url.startswith('http://') and not url.startswith('https://'): + raise ValueError('Invalid URL format') + + sanitized_url = sanitize_url(url) + + response = requests.get(sanitized_url, headers=headers, timeout=timeout) + + # Check if the response contains an HTTP error + if response.status_code >= 400: + return None, "Error: HTTP " + str(response.status_code) + " error" + + return response, None + except ValueError as ve: + # Handle invalid URL format + return None, "Error: " + str(ve) + + except requests.exceptions.RequestException as re: + # Handle exceptions related to the HTTP request (e.g., connection errors, timeouts, etc.) + return None, "Error: " + str(re) + + def scrape_text(url): """Scrape text from a webpage""" - # Basic check if the URL is valid - if not url.startswith('http'): - return "Error: Invalid URL" - - # Restrict access to local files - if check_local_file_access(url): - return "Error: Access to local files is restricted" - - # Validate the input URL - if not is_valid_url(url): - # Sanitize the input URL - sanitized_url = sanitize_url(url) - - # Make the request with a timeout and handle exceptions - response = make_request(sanitized_url) - - if isinstance(response, str): - return response - else: - # Sanitize the input URL - sanitized_url = sanitize_url(url) - - response = requests.get(sanitized_url, headers=cfg.user_agent_header) + response, error_message = get_response(url) + if error_message: + return error_message soup = BeautifulSoup(response.text, "html.parser") @@ -89,11 +92,9 @@ def format_hyperlinks(hyperlinks): def scrape_links(url): """Scrape links from a webpage""" - response = requests.get(url, headers=cfg.user_agent_header) - - # Check if the response contains an HTTP error - if response.status_code >= 400: - return "error" + response, error_message = get_response(url) + if error_message: + return error_message soup = BeautifulSoup(response.text, "html.parser") @@ -131,6 +132,7 @@ def create_message(chunk, question): "content": f"\"\"\"{chunk}\"\"\" Using the above text, please answer the following question: \"{question}\" -- if the question cannot be answered using the text, please summarize the text." } + def summarize_text(text, question): """Summarize text using the LLM model""" if not text: diff --git a/scripts/commands.py b/scripts/commands.py index 92d46ae1..3966e86a 100644 --- a/scripts/commands.py +++ b/scripts/commands.py @@ -7,7 +7,7 @@ import speak from config import Config import ai_functions as ai from file_operations import read_file, write_to_file, append_to_file, delete_file, search_files -from execute_code import execute_python_file +from execute_code import execute_python_file, execute_shell from json_parser import fix_and_parse_json from image_gen import generate_image from duckduckgo_search import ddg @@ -103,6 +103,11 @@ def execute_command(command_name, arguments): return ai.write_tests(arguments["code"], arguments.get("focus")) elif command_name == "execute_python_file": # Add this command return execute_python_file(arguments["file"]) + elif command_name == "execute_shell": + if cfg.execute_local_commands: + return execute_shell(arguments["command_line"]) + else: + return "You are not allowed to run local shell commands. To execute shell commands, EXECUTE_LOCAL_COMMANDS must be set to 'True' in your config. Do not attempt to bypass the restriction." elif command_name == "generate_image": return generate_image(arguments["prompt"]) elif command_name == "do_nothing": diff --git a/scripts/config.py b/scripts/config.py index d41cf66c..516a2bd1 100644 --- a/scripts/config.py +++ b/scripts/config.py @@ -1,6 +1,7 @@ import abc import os import openai +import yaml from dotenv import load_dotenv # Load environment variables from .env file load_dotenv() @@ -43,14 +44,13 @@ class Config(metaclass=Singleton): self.smart_token_limit = int(os.getenv("SMART_TOKEN_LIMIT", 8000)) self.openai_api_key = os.getenv("OPENAI_API_KEY") + self.temperature = float(os.getenv("TEMPERATURE", "1")) self.use_azure = False self.use_azure = os.getenv("USE_AZURE") == 'True' + self.execute_local_commands = os.getenv('EXECUTE_LOCAL_COMMANDS', 'False') == 'True' + if self.use_azure: - self.openai_api_base = os.getenv("OPENAI_AZURE_API_BASE") - self.openai_api_version = os.getenv("OPENAI_AZURE_API_VERSION") - self.openai_deployment_id = os.getenv("OPENAI_AZURE_DEPLOYMENT_ID") - self.azure_chat_deployment_id = os.getenv("OPENAI_AZURE_CHAT_DEPLOYMENT_ID") - self.azure_embeddigs_deployment_id = os.getenv("OPENAI_AZURE_EMBEDDINGS_DEPLOYMENT_ID") + self.load_azure_config() openai.api_type = "azure" openai.api_base = self.openai_api_base openai.api_version = self.openai_api_version @@ -74,7 +74,7 @@ class Config(metaclass=Singleton): self.weaviate_username = os.getenv("WEAVIATE_USERNAME", None) self.weaviate_password = os.getenv("WEAVIATE_PASSWORD", None) self.weaviate_scopes = os.getenv("WEAVIATE_SCOPES", None) - self.weaviate_embedded_path = os.getenv("WEAVIATE_EMBEDDED_PATH", "~/.local/share/weaviate") + self.weaviate_embedded_path = os.getenv("WEAVIATE_EMBEDDED_PATH") self.weaviate_api_key = os.getenv("WEAVIATE_API_KEY", None) self.use_weaviate_embedded = os.getenv("USE_WEAVIATE_EMBEDDED", "False") == "True" @@ -95,6 +95,46 @@ class Config(metaclass=Singleton): # Initialize the OpenAI API client openai.api_key = self.openai_api_key + def get_azure_deployment_id_for_model(self, model: str) -> str: + """ + Returns the relevant deployment id for the model specified. + + Parameters: + model(str): The model to map to the deployment id. + + Returns: + The matching deployment id if found, otherwise an empty string. + """ + if model == self.fast_llm_model: + return self.azure_model_to_deployment_id_map["fast_llm_model_deployment_id"] + elif model == self.smart_llm_model: + return self.azure_model_to_deployment_id_map["smart_llm_model_deployment_id"] + elif model == "text-embedding-ada-002": + return self.azure_model_to_deployment_id_map["embedding_model_deployment_id"] + else: + return "" + + AZURE_CONFIG_FILE = os.path.join(os.path.dirname(__file__), '..', 'azure.yaml') + + def load_azure_config(self, config_file: str=AZURE_CONFIG_FILE) -> None: + """ + Loads the configuration parameters for Azure hosting from the specified file path as a yaml file. + + Parameters: + config_file(str): The path to the config yaml file. DEFAULT: "../azure.yaml" + + Returns: + None + """ + try: + with open(config_file) as file: + config_params = yaml.load(file, Loader=yaml.FullLoader) + except FileNotFoundError: + config_params = {} + self.openai_api_base = config_params.get("azure_api_base", "") + self.openai_api_version = config_params.get("azure_api_version", "") + self.azure_model_to_deployment_id_map = config_params.get("azure_model_map", []) + def set_continuous_mode(self, value: bool): """Set the continuous mode value.""" self.continuous_mode = value diff --git a/scripts/data/prompt.txt b/scripts/data/prompt.txt index fc68f3ae..ffb9eb50 100644 --- a/scripts/data/prompt.txt +++ b/scripts/data/prompt.txt @@ -22,9 +22,10 @@ COMMANDS: 16. Get Improved Code: "improve_code", args: "suggestions": "", "code": "" 17. Write Tests: "write_tests", args: "code": "", "focus": "" 18. Execute Python File: "execute_python_file", args: "file": "" -19. Task Complete (Shutdown): "task_complete", args: "reason": "" -20. Generate Image: "generate_image", args: "prompt": "" -21. Do Nothing: "do_nothing", args: "" +19. Execute Shell Command, non-interactive commands only: "execute_shell", args: "command_line": "". +20. Task Complete (Shutdown): "task_complete", args: "reason": "" +21. Generate Image: "generate_image", args: "prompt": "" +22. Do Nothing: "do_nothing", args: "" RESOURCES: diff --git a/scripts/execute_code.py b/scripts/execute_code.py index a8f90911..2c92903c 100644 --- a/scripts/execute_code.py +++ b/scripts/execute_code.py @@ -1,17 +1,20 @@ import docker import os +import subprocess + + +WORKSPACE_FOLDER = "auto_gpt_workspace" def execute_python_file(file): """Execute a Python file in a Docker container and return the output""" - workspace_folder = "auto_gpt_workspace" - print (f"Executing file '{file}' in workspace '{workspace_folder}'") + print (f"Executing file '{file}' in workspace '{WORKSPACE_FOLDER}'") if not file.endswith(".py"): return "Error: Invalid file type. Only .py files are allowed." - file_path = os.path.join(workspace_folder, file) + file_path = os.path.join(WORKSPACE_FOLDER, file) if not os.path.isfile(file_path): return f"Error: File '{file}' does not exist." @@ -19,14 +22,31 @@ def execute_python_file(file): try: client = docker.from_env() + image_name = 'python:3.10' + try: + client.images.get(image_name) + print(f"Image '{image_name}' found locally") + except docker.errors.ImageNotFound: + print(f"Image '{image_name}' not found locally, pulling from Docker Hub") + # Use the low-level API to stream the pull response + low_level_client = docker.APIClient() + for line in low_level_client.pull(image_name, stream=True, decode=True): + # Print the status and progress, if available + status = line.get('status') + progress = line.get('progress') + if status and progress: + print(f"{status}: {progress}") + elif status: + print(status) + # You can replace 'python:3.8' with the desired Python image/version # You can find available Python images on Docker Hub: # https://hub.docker.com/_/python container = client.containers.run( - 'python:3.10', + image_name, f'python {file}', volumes={ - os.path.abspath(workspace_folder): { + os.path.abspath(WORKSPACE_FOLDER): { 'bind': '/workspace', 'mode': 'ro'}}, working_dir='/workspace', @@ -46,3 +66,22 @@ def execute_python_file(file): except Exception as e: return f"Error: {str(e)}" + +def execute_shell(command_line): + + current_dir = os.getcwd() + + if not WORKSPACE_FOLDER in current_dir: # Change dir into workspace if necessary + work_dir = os.path.join(os.getcwd(), WORKSPACE_FOLDER) + os.chdir(work_dir) + + print (f"Executing command '{command_line}' in working directory '{os.getcwd()}'") + + result = subprocess.run(command_line, capture_output=True, shell=True) + output = f"STDOUT:\n{result.stdout}\nSTDERR:\n{result.stderr}" + + # Change back to whatever the prior working dir was + + os.chdir(current_dir) + + return output diff --git a/scripts/file_operations.py b/scripts/file_operations.py index 7b48c134..d0fa175f 100644 --- a/scripts/file_operations.py +++ b/scripts/file_operations.py @@ -38,7 +38,7 @@ def write_to_file(filename, text): directory = os.path.dirname(filepath) if not os.path.exists(directory): os.makedirs(directory) - with open(filepath, "w") as f: + with open(filepath, "w", encoding='utf-8') as f: f.write(text) return "File written to successfully." except Exception as e: diff --git a/scripts/llm_utils.py b/scripts/llm_utils.py index 3fb348f0..35cc5ce0 100644 --- a/scripts/llm_utils.py +++ b/scripts/llm_utils.py @@ -5,11 +5,11 @@ cfg = Config() openai.api_key = cfg.openai_api_key # Overly simple abstraction until we create something better -def create_chat_completion(messages, model=None, temperature=None, max_tokens=None)->str: +def create_chat_completion(messages, model=None, temperature=cfg.temperature, max_tokens=None)->str: """Create a chat completion using the OpenAI API""" if cfg.use_azure: response = openai.ChatCompletion.create( - deployment_id=cfg.azure_chat_deployment_id, + deployment_id=cfg.get_azure_deployment_id_for_model(model), model=model, messages=messages, temperature=temperature, diff --git a/scripts/logger.py b/scripts/logger.py index 5c7d68bb..3d131dd9 100644 --- a/scripts/logger.py +++ b/scripts/logger.py @@ -124,6 +124,12 @@ class Logger(metaclass=Singleton): self.logger.setLevel(level) self.typing_logger.setLevel(level) + def double_check(self, additionalText=None): + if not additionalText: + additionalText = "Please ensure you've setup and configured everything correctly. Read https://github.com/Torantulino/Auto-GPT#readme to double check. You can also create a github issue or join the discord and ask there!" + + self.typewriter_log("DOUBLE CHECK CONFIGURATION", Fore.YELLOW, additionalText) + ''' Output stream to console using simulated typing @@ -164,8 +170,6 @@ class ConsoleHandler(logging.StreamHandler): Allows to handle custom placeholders 'title_color' and 'message_no_color'. To use this formatter, make sure to pass 'color', 'title' as log extras. ''' - - class AutoGptFormatter(logging.Formatter): def format(self, record: LogRecord) -> str: if (hasattr(record, 'color')): diff --git a/scripts/main.py b/scripts/main.py index 7a4a32d4..5b84bd70 100644 --- a/scripts/main.py +++ b/scripts/main.py @@ -310,15 +310,14 @@ def parse_arguments(): supported_memory = get_supported_memory_backends() chosen = args.memory_type if not chosen in supported_memory: - print_to_console("ONLY THE FOLLOWING MEMORY BACKENDS ARE SUPPORTED: ", Fore.RED, f'{supported_memory}') - print_to_console(f"Defaulting to: ", Fore.YELLOW, cfg.memory_backend) + logger.typewriter_log("ONLY THE FOLLOWING MEMORY BACKENDS ARE SUPPORTED: ", Fore.RED, f'{supported_memory}') + logger.typewriter_log(f"Defaulting to: ", Fore.YELLOW, cfg.memory_backend) else: cfg.memory_backend = chosen # TODO: fill in llm values here check_openai_api_key() -cfg = Config() parse_arguments() logger.set_level(logging.DEBUG if cfg.debug_mode else logging.INFO) ai_name = "" diff --git a/scripts/memory/__init__.py b/scripts/memory/__init__.py index 15e74feb..0386f688 100644 --- a/scripts/memory/__init__.py +++ b/scripts/memory/__init__.py @@ -1,4 +1,5 @@ from memory.local import LocalCache +from memory.no_memory import NoMemory # List of supported memory backends # Add a backend to this list if the import attempt is successful @@ -47,6 +48,9 @@ def get_memory(cfg, init=False): else: memory = WeaviateMemory(cfg) + elif cfg.memory_backend == "no_memory": + memory = NoMemory(cfg) + if memory is None: memory = LocalCache(cfg) if init: @@ -63,4 +67,5 @@ __all__ = [ "RedisMemory", "PineconeMemory", "WeaviateMemory" + "NoMemory" ] diff --git a/scripts/memory/base.py b/scripts/memory/base.py index bb22963a..1bb4e89f 100644 --- a/scripts/memory/base.py +++ b/scripts/memory/base.py @@ -2,13 +2,13 @@ import abc from config import AbstractSingleton, Config import openai -cfg = Config() +cfg = Config() def get_ada_embedding(text): text = text.replace("\n", " ") if cfg.use_azure: - return openai.Embedding.create(input=[text], engine=cfg.azure_embeddigs_deployment_id, model="text-embedding-ada-002")["data"][0]["embedding"] + return openai.Embedding.create(input=[text], engine=cfg.get_azure_deployment_id_for_model("text-embedding-ada-002"))["data"][0]["embedding"] else: return openai.Embedding.create(input=[text], model="text-embedding-ada-002")["data"][0]["embedding"] diff --git a/scripts/memory/no_memory.py b/scripts/memory/no_memory.py new file mode 100644 index 00000000..45dbd734 --- /dev/null +++ b/scripts/memory/no_memory.py @@ -0,0 +1,65 @@ +from typing import Optional, List, Any + +from memory.base import MemoryProviderSingleton + +class NoMemory(MemoryProviderSingleton): + def __init__(self, cfg): + """ + Initializes the NoMemory provider. + + Args: + cfg: The config object. + + Returns: None + """ + pass + + def add(self, data: str) -> str: + """ + Adds a data point to the memory. No action is taken in NoMemory. + + Args: + data: The data to add. + + Returns: An empty string. + """ + return "" + + def get(self, data: str) -> Optional[List[Any]]: + """ + Gets the data from the memory that is most relevant to the given data. + NoMemory always returns None. + + Args: + data: The data to compare to. + + Returns: None + """ + return None + + def clear(self) -> str: + """ + Clears the memory. No action is taken in NoMemory. + + Returns: An empty string. + """ + return "" + + def get_relevant(self, data: str, num_relevant: int = 5) -> Optional[List[Any]]: + """ + Returns all the data in the memory that is relevant to the given data. + NoMemory always returns None. + + Args: + data: The data to compare to. + num_relevant: The number of relevant data to return. + + Returns: None + """ + return None + + def get_stats(self): + """ + Returns: An empty dictionary as there are no stats in NoMemory. + """ + return {} diff --git a/scripts/memory/pinecone.py b/scripts/memory/pinecone.py index d89daf25..c8d6c758 100644 --- a/scripts/memory/pinecone.py +++ b/scripts/memory/pinecone.py @@ -1,7 +1,8 @@ import pinecone from memory.base import MemoryProviderSingleton, get_ada_embedding - +from logger import logger +from colorama import Fore, Style class PineconeMemory(MemoryProviderSingleton): def __init__(self, cfg): @@ -16,6 +17,15 @@ class PineconeMemory(MemoryProviderSingleton): # for now this works. # we'll need a more complicated and robust system if we want to start with memory. self.vec_num = 0 + + try: + pinecone.whoami() + except Exception as e: + logger.typewriter_log("FAILED TO CONNECT TO PINECONE", Fore.RED, Style.BRIGHT + str(e) + Style.RESET_ALL) + logger.double_check("Please ensure you have setup and configured Pinecone properly for use. " + + f"You can check out {Fore.CYAN + Style.BRIGHT}https://github.com/Torantulino/Auto-GPT#-pinecone-api-key-setup{Style.RESET_ALL} to ensure you've set up everything correctly.") + exit(1) + if table_name not in pinecone.list_indexes(): pinecone.create_index(table_name, dimension=dimension, metric=metric, pod_type=pod_type) self.index = pinecone.Index(table_name) diff --git a/scripts/memory/redismem.py b/scripts/memory/redismem.py index 2082fe58..49045dd8 100644 --- a/scripts/memory/redismem.py +++ b/scripts/memory/redismem.py @@ -7,6 +7,8 @@ from redis.commands.search.indexDefinition import IndexDefinition, IndexType import numpy as np from memory.base import MemoryProviderSingleton, get_ada_embedding +from logger import logger +from colorama import Fore, Style SCHEMA = [ @@ -44,6 +46,16 @@ class RedisMemory(MemoryProviderSingleton): db=0 # Cannot be changed ) self.cfg = cfg + + # Check redis connection + try: + self.redis.ping() + except redis.ConnectionError as e: + logger.typewriter_log("FAILED TO CONNECT TO REDIS", Fore.RED, Style.BRIGHT + str(e) + Style.RESET_ALL) + logger.double_check("Please ensure you have setup and configured Redis properly for use. " + + f"You can check out {Fore.CYAN + Style.BRIGHT}https://github.com/Torantulino/Auto-GPT#redis-setup{Style.RESET_ALL} to ensure you've set up everything correctly.") + exit(1) + if cfg.wipe_redis_on_start: self.redis.flushall() try: diff --git a/tests.py b/tests.py index ce21c1f4..4dbfdd46 100644 --- a/tests.py +++ b/tests.py @@ -3,6 +3,6 @@ import unittest if __name__ == "__main__": # Load all tests from the 'scripts/tests' package suite = unittest.defaultTestLoader.discover('scripts/tests') - + # Run the tests unittest.TextTestRunner().run(suite) diff --git a/tests/test_json_parser.py b/tests/test_json_parser.py index b8cb2680..352cf3d4 100644 --- a/tests/test_json_parser.py +++ b/tests/test_json_parser.py @@ -13,12 +13,14 @@ class TestParseJson(unittest.TestCase): def test_invalid_json_minor(self): # Test that an invalid JSON string can be fixed with gpt json_str = '{"name": "John", "age": 30, "city": "New York",}' - self.assertRaises(Exception, fix_and_parse_json, json_str, try_to_fix_with_gpt=False) + with self.assertRaises(Exception): + fix_and_parse_json(json_str, try_to_fix_with_gpt=False) def test_invalid_json_major_with_gpt(self): # Test that an invalid JSON string raises an error when try_to_fix_with_gpt is False json_str = 'BEGIN: "name": "John" - "age": 30 - "city": "New York" :END' - self.assertRaises(Exception, fix_and_parse_json, json_str, try_to_fix_with_gpt=False) + with self.assertRaises(Exception): + fix_and_parse_json(json_str, try_to_fix_with_gpt=False) def test_invalid_json_major_without_gpt(self): # Test that a REALLY invalid JSON string raises an error when try_to_fix_with_gpt is False diff --git a/tests/test_weaviate_memory.py b/tests/test_weaviate_memory.py index 6f39a203..7b2b331f 100644 --- a/tests/test_weaviate_memory.py +++ b/tests/test_weaviate_memory.py @@ -21,6 +21,25 @@ from memory.base import get_ada_embedding "MEMORY_INDEX": "AutogptTests" }) class TestWeaviateMemory(unittest.TestCase): + cfg = None + client = None + + @classmethod + def setUpClass(cls): + # only create the connection to weaviate once + cls.cfg = Config() + + if cls.cfg.use_weaviate_embedded: + from weaviate.embedded import EmbeddedOptions + + cls.client = Client(embedded_options=EmbeddedOptions( + hostname=cls.cfg.weaviate_host, + port=int(cls.cfg.weaviate_port), + persistence_data_path=cls.cfg.weaviate_embedded_path + )) + else: + cls.client = Client(f"{cls.cfg.weaviate_protocol}://{cls.cfg.weaviate_host}:{self.cfg.weaviate_port}") + """ In order to run these tests you will need a local instance of Weaviate running. Refer to https://weaviate.io/developers/weaviate/installation/docker-compose @@ -31,19 +50,6 @@ class TestWeaviateMemory(unittest.TestCase): WEAVIATE_EMBEDDED_PATH="/home/me/.local/share/weaviate" """ def setUp(self): - self.cfg = Config() - - if self.cfg.use_weaviate_embedded: - from weaviate.embedded import EmbeddedOptions - - self.client = Client(embedded_options=EmbeddedOptions( - hostname=self.cfg.weaviate_host, - port=int(self.cfg.weaviate_port), - persistence_data_path=self.cfg.weaviate_embedded_path - )) - else: - self.client = Client(f"{self.cfg.weaviate_protocol}://{self.cfg.weaviate_host}:{self.cfg.weaviate_port}") - try: self.client.schema.delete_class(self.cfg.memory_index) except: diff --git a/tests/unit/test_browse_scrape_links.py b/tests/unit/test_browse_scrape_links.py new file mode 100644 index 00000000..2172d1a2 --- /dev/null +++ b/tests/unit/test_browse_scrape_links.py @@ -0,0 +1,118 @@ + +# Generated by CodiumAI + +# Dependencies: +# pip install pytest-mock +import pytest + +from scripts.browse import scrape_links + +""" +Code Analysis + +Objective: +The objective of the 'scrape_links' function is to scrape hyperlinks from a +given URL and return them in a formatted way. + +Inputs: +- url: a string representing the URL to be scraped. + +Flow: +1. Send a GET request to the given URL using the requests library and the user agent header from the config file. +2. Check if the response contains an HTTP error. If it does, return "error". +3. Parse the HTML content of the response using the BeautifulSoup library. +4. Remove any script and style tags from the parsed HTML. +5. Extract all hyperlinks from the parsed HTML using the 'extract_hyperlinks' function. +6. Format the extracted hyperlinks using the 'format_hyperlinks' function. +7. Return the formatted hyperlinks. + +Outputs: +- A list of formatted hyperlinks. + +Additional aspects: +- The function uses the 'requests' and 'BeautifulSoup' libraries to send HTTP +requests and parse HTML content, respectively. +- The 'extract_hyperlinks' function is called to extract hyperlinks from the parsed HTML. +- The 'format_hyperlinks' function is called to format the extracted hyperlinks. +- The function checks for HTTP errors and returns "error" if any are found. +""" + + +class TestScrapeLinks: + + # Tests that the function returns a list of formatted hyperlinks when + # provided with a valid url that returns a webpage with hyperlinks. + def test_valid_url_with_hyperlinks(self): + url = "https://www.google.com" + result = scrape_links(url) + assert len(result) > 0 + assert isinstance(result, list) + assert isinstance(result[0], str) + + # Tests that the function returns correctly formatted hyperlinks when given a valid url. + def test_valid_url(self, mocker): + # Mock the requests.get() function to return a response with sample HTML containing hyperlinks + mock_response = mocker.Mock() + mock_response.status_code = 200 + mock_response.text = "Google" + mocker.patch('requests.get', return_value=mock_response) + + # Call the function with a valid URL + result = scrape_links("https://www.example.com") + + # Assert that the function returns correctly formatted hyperlinks + assert result == ["Google (https://www.google.com)"] + + # Tests that the function returns "error" when given an invalid url. + def test_invalid_url(self, mocker): + # Mock the requests.get() function to return an HTTP error response + mock_response = mocker.Mock() + mock_response.status_code = 404 + mocker.patch('requests.get', return_value=mock_response) + + # Call the function with an invalid URL + result = scrape_links("https://www.invalidurl.com") + + # Assert that the function returns "error" + assert "Error:" in result + + # Tests that the function returns an empty list when the html contains no hyperlinks. + def test_no_hyperlinks(self, mocker): + # Mock the requests.get() function to return a response with sample HTML containing no hyperlinks + mock_response = mocker.Mock() + mock_response.status_code = 200 + mock_response.text = "

No hyperlinks here

" + mocker.patch('requests.get', return_value=mock_response) + + # Call the function with a URL containing no hyperlinks + result = scrape_links("https://www.example.com") + + # Assert that the function returns an empty list + assert result == [] + + # Tests that scrape_links() correctly extracts and formats hyperlinks from + # a sample HTML containing a few hyperlinks. + def test_scrape_links_with_few_hyperlinks(self, mocker): + # Mock the requests.get() function to return a response with a sample HTML containing hyperlinks + mock_response = mocker.Mock() + mock_response.status_code = 200 + mock_response.text = """ + + + + + + + + """ + mocker.patch('requests.get', return_value=mock_response) + + # Call the function being tested + result = scrape_links("https://www.example.com") + + # Assert that the function returns a list of formatted hyperlinks + assert isinstance(result, list) + assert len(result) == 3 + assert result[0] == "Google (https://www.google.com)" + assert result[1] == "GitHub (https://github.com)" + assert result[2] == "CodiumAI (https://www.codium.ai)" diff --git a/tests/test_browse_scrape_text.py b/tests/unit/test_browse_scrape_text.py similarity index 97% rename from tests/test_browse_scrape_text.py rename to tests/unit/test_browse_scrape_text.py index 775eefcd..9385cde7 100644 --- a/tests/test_browse_scrape_text.py +++ b/tests/unit/test_browse_scrape_text.py @@ -2,7 +2,6 @@ # Generated by CodiumAI import requests -import tests.context from scripts.browse import scrape_text @@ -10,7 +9,8 @@ from scripts.browse import scrape_text Code Analysis Objective: -The objective of the "scrape_text" function is to scrape the text content from a given URL and return it as a string, after removing any unwanted HTML tags and scripts. +The objective of the "scrape_text" function is to scrape the text content from +a given URL and return it as a string, after removing any unwanted HTML tags and scripts. Inputs: - url: a string representing the URL of the webpage to be scraped. @@ -33,6 +33,7 @@ Additional aspects: - The function uses a generator expression to split the text into lines and chunks, which can improve performance for large amounts of text. """ + class TestScrapeText: # Tests that scrape_text() returns the expected text when given a valid URL.