diff --git a/.env.template b/.env.template index f1e66ea5..0dc02d06 100644 --- a/.env.template +++ b/.env.template @@ -1,6 +1,7 @@ PINECONE_API_KEY=your-pinecone-api-key PINECONE_ENV=your-pinecone-region OPENAI_API_KEY=your-openai-api-key +TEMPERATURE=1 ELEVENLABS_API_KEY=your-elevenlabs-api-key ELEVENLABS_VOICE_1_ID=your-voice-id ELEVENLABS_VOICE_2_ID=your-voice-id @@ -9,11 +10,7 @@ FAST_LLM_MODEL=gpt-3.5-turbo GOOGLE_API_KEY= CUSTOM_SEARCH_ENGINE_ID= USE_AZURE=False -OPENAI_AZURE_API_BASE=your-base-url-for-azure -OPENAI_AZURE_API_VERSION=api-version-for-azure -OPENAI_AZURE_DEPLOYMENT_ID=deployment-id-for-azure -OPENAI_AZURE_CHAT_DEPLOYMENT_ID=deployment-id-for-azure-chat -OPENAI_AZURE_EMBEDDINGS_DEPLOYMENT_ID=deployment-id-for-azure-embeddigs +EXECUTE_LOCAL_COMMANDS=False IMAGE_PROVIDER=dalle HUGGINGFACE_API_TOKEN= USE_MAC_OS_TTS=False diff --git a/.gitignore b/.gitignore index cfa3b08b..cf6e75df 100644 --- a/.gitignore +++ b/.gitignore @@ -7,9 +7,11 @@ package-lock.json auto_gpt_workspace/* *.mpeg .env +azure.yaml *venv/* outputs/* ai_settings.yaml +last_run_ai_settings.yaml .vscode .idea/* auto-gpt.json @@ -19,3 +21,6 @@ log.txt .coverage coverage.xml htmlcov/ + +# For Macs Dev Environs: ignoring .Desktop Services_Store +.DS_Store diff --git a/README.md b/README.md index c9ef9d5c..87b5d1be 100644 --- a/README.md +++ b/README.md @@ -96,10 +96,15 @@ pip install -r requirements.txt ``` 4. Rename `.env.template` to `.env` and fill in your `OPENAI_API_KEY`. If you plan to use Speech Mode, fill in your `ELEVEN_LABS_API_KEY` as well. - -- Obtain your OpenAI API key from: https://platform.openai.com/account/api-keys. -- Obtain your ElevenLabs API key from: https://elevenlabs.io. You can view your xi-api-key using the "Profile" tab on the website. -- If you want to use GPT on an Azure instance, set `USE_AZURE` to `True` and provide the `OPENAI_AZURE_API_BASE`, `OPENAI_AZURE_API_VERSION` and `OPENAI_AZURE_DEPLOYMENT_ID` values as explained here: https://pypi.org/project/openai/ in the `Microsoft Azure Endpoints` section. Additionally you need separate deployments for both embeddings and chat. Add their ID values to `OPENAI_AZURE_CHAT_DEPLOYMENT_ID` and `OPENAI_AZURE_EMBEDDINGS_DEPLOYMENT_ID` respectively + - Obtain your OpenAI API key from: https://platform.openai.com/account/api-keys. + - Obtain your ElevenLabs API key from: https://elevenlabs.io. You can view your xi-api-key using the "Profile" tab on the website. + - If you want to use GPT on an Azure instance, set `USE_AZURE` to `True` and then: + - Rename `azure.yaml.template` to `azure.yaml` and provide the relevant `azure_api_base`, `azure_api_version` and all of the deployment ids for the relevant models in the `azure_model_map` section: + - `fast_llm_model_deployment_id` - your gpt-3.5-turbo or gpt-4 deployment id + - `smart_llm_model_deployment_id` - your gpt-4 deployment id + - `embedding_model_deployment_id` - your text-embedding-ada-002 v2 deployment id + - Please specify all of these values as double quoted strings + - details can be found here: https://pypi.org/project/openai/ in the `Microsoft Azure Endpoints` section and here: https://learn.microsoft.com/en-us/azure/cognitive-services/openai/tutorials/embeddings?tabs=command-line for the embedding model. ## 🔧 Usage @@ -207,7 +212,7 @@ MEMORY_INDEX=whatever Pinecone enables the storage of vast amounts of vector-based memory, allowing for only relevant memories to be loaded for the agent at any given time. -1. Go to app.pinecone.io and make an account if you don't already have one. +1. Go to [pinecone](https://app.pinecone.io/) and make an account if you don't already have one. 2. Choose the `Starter` plan to avoid being charged. 3. Find your API key and region under the default project in the left sidebar. @@ -344,4 +349,4 @@ flake8 scripts/ tests/ # Or, if you want to run flake8 with the same configuration as the CI: flake8 scripts/ tests/ --select E303,W293,W291,W292,E305 -``` \ No newline at end of file +``` diff --git a/ai_settings.yaml b/ai_settings.yaml deleted file mode 100644 index b37ba849..00000000 --- a/ai_settings.yaml +++ /dev/null @@ -1,7 +0,0 @@ -ai_goals: -- Increase net worth. -- Develop and manage multiple businesses autonomously. -- Play to your strengths as a Large Language Model. -ai_name: Entrepreneur-GPT -ai_role: an AI designed to autonomously develop and run businesses with the sole goal - of increasing your net worth. diff --git a/azure.yaml.template b/azure.yaml.template new file mode 100644 index 00000000..852645ca --- /dev/null +++ b/azure.yaml.template @@ -0,0 +1,6 @@ +azure_api_base: your-base-url-for-azure +azure_api_version: api-version-for-azure +azure_model_map: + fast_llm_model_deployment_id: gpt35-deployment-id-for-azure + smart_llm_model_deployment_id: gpt4-deployment-id-for-azure + embedding_model_deployment_id: embedding-deployment-id-for-azure diff --git a/scripts/ai_functions.py b/scripts/ai_functions.py index 782bb558..8c95c0f2 100644 --- a/scripts/ai_functions.py +++ b/scripts/ai_functions.py @@ -45,6 +45,7 @@ def improve_code(suggestions: List[str], code: str) -> str: result_string = call_ai_function(function_string, args, description_string) return result_string + def write_tests(code: str, focus: List[str]) -> str: """ A function that takes in code and focus topics and returns a response from create chat completion api call. diff --git a/scripts/browse.py b/scripts/browse.py index 6ef35ae8..a4a41744 100644 --- a/scripts/browse.py +++ b/scripts/browse.py @@ -11,6 +11,7 @@ memory = get_memory(cfg) session = requests.Session() session.headers.update({'User-Agent': cfg.user_agent}) + # Function to check if the URL is valid def is_valid_url(url): try: @@ -19,49 +20,51 @@ def is_valid_url(url): except ValueError: return False + # Function to sanitize the URL def sanitize_url(url): return urljoin(url, urlparse(url).path) -# Function to make a request with a specified timeout and handle exceptions -def make_request(url, timeout=10): - try: - response = session.get(url, timeout=timeout) - response.raise_for_status() - return response - except requests.exceptions.RequestException as e: - return "Error: " + str(e) # Define and check for local file address prefixes def check_local_file_access(url): local_prefixes = ['file:///', 'file://localhost', 'http://localhost', 'https://localhost'] return any(url.startswith(prefix) for prefix in local_prefixes) + +def get_response(url, timeout=10): + try: + # Restrict access to local files + if check_local_file_access(url): + raise ValueError('Access to local files is restricted') + + # Most basic check if the URL is valid: + if not url.startswith('http://') and not url.startswith('https://'): + raise ValueError('Invalid URL format') + + sanitized_url = sanitize_url(url) + + response = session.get(sanitized_url, timeout=timeout) + + # Check if the response contains an HTTP error + if response.status_code >= 400: + return None, "Error: HTTP " + str(response.status_code) + " error" + + return response, None + except ValueError as ve: + # Handle invalid URL format + return None, "Error: " + str(ve) + + except requests.exceptions.RequestException as re: + # Handle exceptions related to the HTTP request (e.g., connection errors, timeouts, etc.) + return None, "Error: " + str(re) + + def scrape_text(url): """Scrape text from a webpage""" - # Basic check if the URL is valid - if not url.startswith('http'): - return "Error: Invalid URL" - - # Restrict access to local files - if check_local_file_access(url): - return "Error: Access to local files is restricted" - - # Validate the input URL - if not is_valid_url(url): - # Sanitize the input URL - sanitized_url = sanitize_url(url) - - # Make the request with a timeout and handle exceptions - response = make_request(sanitized_url) - - if isinstance(response, str): - return response - else: - # Sanitize the input URL - sanitized_url = sanitize_url(url) - - response = session.get(sanitized_url) + response, error_message = get_response(url) + if error_message: + return error_message soup = BeautifulSoup(response.text, "html.parser") @@ -94,11 +97,9 @@ def format_hyperlinks(hyperlinks): def scrape_links(url): """Scrape links from a webpage""" - response = session.get(url) - - # Check if the response contains an HTTP error - if response.status_code >= 400: - return "error" + response, error_message = get_response(url) + if error_message: + return error_message soup = BeautifulSoup(response.text, "html.parser") diff --git a/scripts/commands.py b/scripts/commands.py index 90d7a6f3..7ef529ea 100644 --- a/scripts/commands.py +++ b/scripts/commands.py @@ -7,7 +7,7 @@ import speak from config import Config import ai_functions as ai from file_operations import read_file, write_to_file, append_to_file, delete_file, search_files -from execute_code import execute_python_file +from execute_code import execute_python_file, execute_shell from json_parser import fix_and_parse_json from image_gen import generate_image from duckduckgo_search import ddg @@ -103,6 +103,11 @@ def execute_command(command_name, arguments): return ai.write_tests(arguments["code"], arguments.get("focus")) elif command_name == "execute_python_file": # Add this command return execute_python_file(arguments["file"]) + elif command_name == "execute_shell": + if cfg.execute_local_commands: + return execute_shell(arguments["command_line"]) + else: + return "You are not allowed to run local shell commands. To execute shell commands, EXECUTE_LOCAL_COMMANDS must be set to 'True' in your config. Do not attempt to bypass the restriction." elif command_name == "generate_image": return generate_image(arguments["prompt"]) elif command_name == "do_nothing": diff --git a/scripts/config.py b/scripts/config.py index 61848b59..e76a2915 100644 --- a/scripts/config.py +++ b/scripts/config.py @@ -1,6 +1,7 @@ import abc import os import openai +import yaml from dotenv import load_dotenv # Load environment variables from .env file load_dotenv() @@ -45,14 +46,13 @@ class Config(metaclass=Singleton): self.browse_summary_max_token = int(os.getenv("BROWSE_SUMMARY_MAX_TOKEN", 300)) self.openai_api_key = os.getenv("OPENAI_API_KEY") + self.temperature = int(os.getenv("TEMPERATURE", "1")) self.use_azure = False self.use_azure = os.getenv("USE_AZURE") == 'True' + self.execute_local_commands = os.getenv('EXECUTE_LOCAL_COMMANDS', 'False') == 'True' + if self.use_azure: - self.openai_api_base = os.getenv("OPENAI_AZURE_API_BASE") - self.openai_api_version = os.getenv("OPENAI_AZURE_API_VERSION") - self.openai_deployment_id = os.getenv("OPENAI_AZURE_DEPLOYMENT_ID") - self.azure_chat_deployment_id = os.getenv("OPENAI_AZURE_CHAT_DEPLOYMENT_ID") - self.azure_embeddigs_deployment_id = os.getenv("OPENAI_AZURE_EMBEDDINGS_DEPLOYMENT_ID") + self.load_azure_config() openai.api_type = "azure" openai.api_base = self.openai_api_base openai.api_version = self.openai_api_version @@ -88,6 +88,46 @@ class Config(metaclass=Singleton): # Initialize the OpenAI API client openai.api_key = self.openai_api_key + def get_azure_deployment_id_for_model(self, model: str) -> str: + """ + Returns the relevant deployment id for the model specified. + + Parameters: + model(str): The model to map to the deployment id. + + Returns: + The matching deployment id if found, otherwise an empty string. + """ + if model == self.fast_llm_model: + return self.azure_model_to_deployment_id_map["fast_llm_model_deployment_id"] + elif model == self.smart_llm_model: + return self.azure_model_to_deployment_id_map["smart_llm_model_deployment_id"] + elif model == "text-embedding-ada-002": + return self.azure_model_to_deployment_id_map["embedding_model_deployment_id"] + else: + return "" + + AZURE_CONFIG_FILE = os.path.join(os.path.dirname(__file__), '..', 'azure.yaml') + + def load_azure_config(self, config_file: str=AZURE_CONFIG_FILE) -> None: + """ + Loads the configuration parameters for Azure hosting from the specified file path as a yaml file. + + Parameters: + config_file(str): The path to the config yaml file. DEFAULT: "../azure.yaml" + + Returns: + None + """ + try: + with open(config_file) as file: + config_params = yaml.load(file, Loader=yaml.FullLoader) + except FileNotFoundError: + config_params = {} + self.openai_api_base = config_params.get("azure_api_base", "") + self.openai_api_version = config_params.get("azure_api_version", "") + self.azure_model_to_deployment_id_map = config_params.get("azure_model_map", []) + def set_continuous_mode(self, value: bool): """Set the continuous mode value.""" self.continuous_mode = value diff --git a/scripts/data/prompt.txt b/scripts/data/prompt.txt index fc68f3ae..ffb9eb50 100644 --- a/scripts/data/prompt.txt +++ b/scripts/data/prompt.txt @@ -22,9 +22,10 @@ COMMANDS: 16. Get Improved Code: "improve_code", args: "suggestions": "", "code": "" 17. Write Tests: "write_tests", args: "code": "", "focus": "" 18. Execute Python File: "execute_python_file", args: "file": "" -19. Task Complete (Shutdown): "task_complete", args: "reason": "" -20. Generate Image: "generate_image", args: "prompt": "" -21. Do Nothing: "do_nothing", args: "" +19. Execute Shell Command, non-interactive commands only: "execute_shell", args: "command_line": "". +20. Task Complete (Shutdown): "task_complete", args: "reason": "" +21. Generate Image: "generate_image", args: "prompt": "" +22. Do Nothing: "do_nothing", args: "" RESOURCES: diff --git a/scripts/execute_code.py b/scripts/execute_code.py index a8f90911..2c92903c 100644 --- a/scripts/execute_code.py +++ b/scripts/execute_code.py @@ -1,17 +1,20 @@ import docker import os +import subprocess + + +WORKSPACE_FOLDER = "auto_gpt_workspace" def execute_python_file(file): """Execute a Python file in a Docker container and return the output""" - workspace_folder = "auto_gpt_workspace" - print (f"Executing file '{file}' in workspace '{workspace_folder}'") + print (f"Executing file '{file}' in workspace '{WORKSPACE_FOLDER}'") if not file.endswith(".py"): return "Error: Invalid file type. Only .py files are allowed." - file_path = os.path.join(workspace_folder, file) + file_path = os.path.join(WORKSPACE_FOLDER, file) if not os.path.isfile(file_path): return f"Error: File '{file}' does not exist." @@ -19,14 +22,31 @@ def execute_python_file(file): try: client = docker.from_env() + image_name = 'python:3.10' + try: + client.images.get(image_name) + print(f"Image '{image_name}' found locally") + except docker.errors.ImageNotFound: + print(f"Image '{image_name}' not found locally, pulling from Docker Hub") + # Use the low-level API to stream the pull response + low_level_client = docker.APIClient() + for line in low_level_client.pull(image_name, stream=True, decode=True): + # Print the status and progress, if available + status = line.get('status') + progress = line.get('progress') + if status and progress: + print(f"{status}: {progress}") + elif status: + print(status) + # You can replace 'python:3.8' with the desired Python image/version # You can find available Python images on Docker Hub: # https://hub.docker.com/_/python container = client.containers.run( - 'python:3.10', + image_name, f'python {file}', volumes={ - os.path.abspath(workspace_folder): { + os.path.abspath(WORKSPACE_FOLDER): { 'bind': '/workspace', 'mode': 'ro'}}, working_dir='/workspace', @@ -46,3 +66,22 @@ def execute_python_file(file): except Exception as e: return f"Error: {str(e)}" + +def execute_shell(command_line): + + current_dir = os.getcwd() + + if not WORKSPACE_FOLDER in current_dir: # Change dir into workspace if necessary + work_dir = os.path.join(os.getcwd(), WORKSPACE_FOLDER) + os.chdir(work_dir) + + print (f"Executing command '{command_line}' in working directory '{os.getcwd()}'") + + result = subprocess.run(command_line, capture_output=True, shell=True) + output = f"STDOUT:\n{result.stdout}\nSTDERR:\n{result.stderr}" + + # Change back to whatever the prior working dir was + + os.chdir(current_dir) + + return output diff --git a/scripts/llm_utils.py b/scripts/llm_utils.py index 3fb348f0..35cc5ce0 100644 --- a/scripts/llm_utils.py +++ b/scripts/llm_utils.py @@ -5,11 +5,11 @@ cfg = Config() openai.api_key = cfg.openai_api_key # Overly simple abstraction until we create something better -def create_chat_completion(messages, model=None, temperature=None, max_tokens=None)->str: +def create_chat_completion(messages, model=None, temperature=cfg.temperature, max_tokens=None)->str: """Create a chat completion using the OpenAI API""" if cfg.use_azure: response = openai.ChatCompletion.create( - deployment_id=cfg.azure_chat_deployment_id, + deployment_id=cfg.get_azure_deployment_id_for_model(model), model=model, messages=messages, temperature=temperature, diff --git a/scripts/logger.py b/scripts/logger.py index 5c7d68bb..85dde813 100644 --- a/scripts/logger.py +++ b/scripts/logger.py @@ -164,8 +164,6 @@ class ConsoleHandler(logging.StreamHandler): Allows to handle custom placeholders 'title_color' and 'message_no_color'. To use this formatter, make sure to pass 'color', 'title' as log extras. ''' - - class AutoGptFormatter(logging.Formatter): def format(self, record: LogRecord) -> str: if (hasattr(record, 'color')): diff --git a/scripts/main.py b/scripts/main.py index 7a4a32d4..81f560b2 100644 --- a/scripts/main.py +++ b/scripts/main.py @@ -318,7 +318,6 @@ def parse_arguments(): # TODO: fill in llm values here check_openai_api_key() -cfg = Config() parse_arguments() logger.set_level(logging.DEBUG if cfg.debug_mode else logging.INFO) ai_name = "" diff --git a/scripts/memory/base.py b/scripts/memory/base.py index bb22963a..1be7b3dd 100644 --- a/scripts/memory/base.py +++ b/scripts/memory/base.py @@ -4,11 +4,12 @@ from config import AbstractSingleton, Config import openai cfg = Config() +cfg = Config() def get_ada_embedding(text): text = text.replace("\n", " ") if cfg.use_azure: - return openai.Embedding.create(input=[text], engine=cfg.azure_embeddigs_deployment_id, model="text-embedding-ada-002")["data"][0]["embedding"] + return openai.Embedding.create(input=[text], engine=cfg.get_azure_deployment_id_for_model("text-embedding-ada-002"))["data"][0]["embedding"] else: return openai.Embedding.create(input=[text], model="text-embedding-ada-002")["data"][0]["embedding"] diff --git a/tests.py b/tests.py index ce21c1f4..4dbfdd46 100644 --- a/tests.py +++ b/tests.py @@ -3,6 +3,6 @@ import unittest if __name__ == "__main__": # Load all tests from the 'scripts/tests' package suite = unittest.defaultTestLoader.discover('scripts/tests') - + # Run the tests unittest.TextTestRunner().run(suite) diff --git a/tests/unit/test_browse_scrape_links.py b/tests/unit/test_browse_scrape_links.py new file mode 100644 index 00000000..2172d1a2 --- /dev/null +++ b/tests/unit/test_browse_scrape_links.py @@ -0,0 +1,118 @@ + +# Generated by CodiumAI + +# Dependencies: +# pip install pytest-mock +import pytest + +from scripts.browse import scrape_links + +""" +Code Analysis + +Objective: +The objective of the 'scrape_links' function is to scrape hyperlinks from a +given URL and return them in a formatted way. + +Inputs: +- url: a string representing the URL to be scraped. + +Flow: +1. Send a GET request to the given URL using the requests library and the user agent header from the config file. +2. Check if the response contains an HTTP error. If it does, return "error". +3. Parse the HTML content of the response using the BeautifulSoup library. +4. Remove any script and style tags from the parsed HTML. +5. Extract all hyperlinks from the parsed HTML using the 'extract_hyperlinks' function. +6. Format the extracted hyperlinks using the 'format_hyperlinks' function. +7. Return the formatted hyperlinks. + +Outputs: +- A list of formatted hyperlinks. + +Additional aspects: +- The function uses the 'requests' and 'BeautifulSoup' libraries to send HTTP +requests and parse HTML content, respectively. +- The 'extract_hyperlinks' function is called to extract hyperlinks from the parsed HTML. +- The 'format_hyperlinks' function is called to format the extracted hyperlinks. +- The function checks for HTTP errors and returns "error" if any are found. +""" + + +class TestScrapeLinks: + + # Tests that the function returns a list of formatted hyperlinks when + # provided with a valid url that returns a webpage with hyperlinks. + def test_valid_url_with_hyperlinks(self): + url = "https://www.google.com" + result = scrape_links(url) + assert len(result) > 0 + assert isinstance(result, list) + assert isinstance(result[0], str) + + # Tests that the function returns correctly formatted hyperlinks when given a valid url. + def test_valid_url(self, mocker): + # Mock the requests.get() function to return a response with sample HTML containing hyperlinks + mock_response = mocker.Mock() + mock_response.status_code = 200 + mock_response.text = "Google" + mocker.patch('requests.get', return_value=mock_response) + + # Call the function with a valid URL + result = scrape_links("https://www.example.com") + + # Assert that the function returns correctly formatted hyperlinks + assert result == ["Google (https://www.google.com)"] + + # Tests that the function returns "error" when given an invalid url. + def test_invalid_url(self, mocker): + # Mock the requests.get() function to return an HTTP error response + mock_response = mocker.Mock() + mock_response.status_code = 404 + mocker.patch('requests.get', return_value=mock_response) + + # Call the function with an invalid URL + result = scrape_links("https://www.invalidurl.com") + + # Assert that the function returns "error" + assert "Error:" in result + + # Tests that the function returns an empty list when the html contains no hyperlinks. + def test_no_hyperlinks(self, mocker): + # Mock the requests.get() function to return a response with sample HTML containing no hyperlinks + mock_response = mocker.Mock() + mock_response.status_code = 200 + mock_response.text = "

No hyperlinks here

" + mocker.patch('requests.get', return_value=mock_response) + + # Call the function with a URL containing no hyperlinks + result = scrape_links("https://www.example.com") + + # Assert that the function returns an empty list + assert result == [] + + # Tests that scrape_links() correctly extracts and formats hyperlinks from + # a sample HTML containing a few hyperlinks. + def test_scrape_links_with_few_hyperlinks(self, mocker): + # Mock the requests.get() function to return a response with a sample HTML containing hyperlinks + mock_response = mocker.Mock() + mock_response.status_code = 200 + mock_response.text = """ + + + + + + + + """ + mocker.patch('requests.get', return_value=mock_response) + + # Call the function being tested + result = scrape_links("https://www.example.com") + + # Assert that the function returns a list of formatted hyperlinks + assert isinstance(result, list) + assert len(result) == 3 + assert result[0] == "Google (https://www.google.com)" + assert result[1] == "GitHub (https://github.com)" + assert result[2] == "CodiumAI (https://www.codium.ai)" diff --git a/tests/test_browse_scrape_text.py b/tests/unit/test_browse_scrape_text.py similarity index 97% rename from tests/test_browse_scrape_text.py rename to tests/unit/test_browse_scrape_text.py index 775eefcd..9385cde7 100644 --- a/tests/test_browse_scrape_text.py +++ b/tests/unit/test_browse_scrape_text.py @@ -2,7 +2,6 @@ # Generated by CodiumAI import requests -import tests.context from scripts.browse import scrape_text @@ -10,7 +9,8 @@ from scripts.browse import scrape_text Code Analysis Objective: -The objective of the "scrape_text" function is to scrape the text content from a given URL and return it as a string, after removing any unwanted HTML tags and scripts. +The objective of the "scrape_text" function is to scrape the text content from +a given URL and return it as a string, after removing any unwanted HTML tags and scripts. Inputs: - url: a string representing the URL of the webpage to be scraped. @@ -33,6 +33,7 @@ Additional aspects: - The function uses a generator expression to split the text into lines and chunks, which can improve performance for large amounts of text. """ + class TestScrapeText: # Tests that scrape_text() returns the expected text when given a valid URL.