diff --git a/.env.template b/.env.template index f328bb75..474b2727 100644 --- a/.env.template +++ b/.env.template @@ -1,25 +1,31 @@ +################################################################################ +### AUTO-GPT - GENERAL SETTINGS +################################################################################ +# EXECUTE_LOCAL_COMMANDS - Allow local command execution (Example: False) +EXECUTE_LOCAL_COMMANDS=False + ################################################################################ ### LLM PROVIDER ################################################################################ ### OPENAI # OPENAI_API_KEY - OpenAI API Key (Example: my-openai-api-key) -OPENAI_API_KEY=your-openai-api-key - -# Use Azure OpenAI +# TEMPERATURE - Sets temperature in OpenAI (Default: 1) # USE_AZURE - Use Azure OpenAI or not (Default: False) +OPENAI_API_KEY=your-openai-api-key +TEMPERATURE=1 USE_AZURE=False -### AZURE OPENAI +### AZURE # OPENAI_AZURE_API_BASE - OpenAI API base URL for Azure (Example: https://my-azure-openai-url.com) -OPENAI_AZURE_API_BASE=your-base-url-for-azure # OPENAI_AZURE_API_VERSION - OpenAI API version for Azure (Example: v1) -OPENAI_AZURE_API_VERSION=api-version-for-azure # OPENAI_AZURE_DEPLOYMENT_ID - OpenAI deployment ID for Azure (Example: my-deployment-id) -OPENAI_AZURE_DEPLOYMENT_ID=deployment-id-for-azure # OPENAI_AZURE_CHAT_DEPLOYMENT_ID - OpenAI deployment ID for Azure Chat (Example: my-deployment-id-for-azure-chat) -OPENAI_AZURE_CHAT_DEPLOYMENT_ID=deployment-id-for-azure-chat # OPENAI_AZURE_EMBEDDINGS_DEPLOYMENT_ID - OpenAI deployment ID for Embedding (Example: my-deployment-id-for-azure-embeddigs) +OPENAI_AZURE_API_BASE=your-base-url-for-azure +OPENAI_AZURE_API_VERSION=api-version-for-azure +OPENAI_AZURE_DEPLOYMENT_ID=deployment-id-for-azure +OPENAI_AZURE_CHAT_DEPLOYMENT_ID=deployment-id-for-azure-chat OPENAI_AZURE_EMBEDDINGS_DEPLOYMENT_ID=deployment-id-for-azure-embeddigs ################################################################################ @@ -27,15 +33,15 @@ OPENAI_AZURE_EMBEDDINGS_DEPLOYMENT_ID=deployment-id-for-azure-embeddigs ################################################################################ # SMART_LLM_MODEL - Smart language model (Default: gpt-4) -SMART_LLM_MODEL=gpt-4 # FAST_LLM_MODEL - Fast language model (Default: gpt-3.5-turbo) +SMART_LLM_MODEL=gpt-4 FAST_LLM_MODEL=gpt-3.5-turbo ### LLM MODEL SETTINGS # FAST_TOKEN_LIMIT - Fast token limit for OpenAI (Default: 4000) -FAST_TOKEN_LIMIT=4000 # SMART_TOKEN_LIMIT - Smart token limit for OpenAI (Default: 8000) -# When using --gpt3only this needs to be set to 4000. +# When using --gpt3onlythis needs to be set to 4000. +FAST_TOKEN_LIMIT=4000 SMART_TOKEN_LIMIT=8000 ################################################################################ @@ -47,20 +53,20 @@ MEMORY_BACKEND=local ### PINECONE # PINECONE_API_KEY - Pinecone API Key (Example: my-pinecone-api-key) -PINECONE_API_KEY=your-pinecone-api-key # PINECONE_ENV - Pinecone environment (region) (Example: us-west-2) +PINECONE_API_KEY=your-pinecone-api-key PINECONE_ENV=your-pinecone-region ### REDIS # REDIS_HOST - Redis host (Default: localhost) -REDIS_HOST=localhost # REDIS_PORT - Redis port (Default: 6379) -REDIS_PORT=6379 # REDIS_PASSWORD - Redis password (Default: "") -REDIS_PASSWORD= # WIPE_REDIS_ON_START - Wipes data / index on start (Default: False) -WIPE_REDIS_ON_START=False # MEMORY_INDEX - Name of index created in Redis database (Default: auto-gpt) +REDIS_HOST=localhost +REDIS_PORT=6379 +REDIS_PASSWORD= +WIPE_REDIS_ON_START=False MEMORY_INDEX=auto-gpt ################################################################################ @@ -72,11 +78,11 @@ MEMORY_INDEX=auto-gpt IMAGE_PROVIDER=dalle ### HUGGINGFACE -# STABLE DIFFUSION( -# Default URL: https://api-inference.huggingface.co/models/CompVis/stable-diffusion-v1-4 +# STABLE DIFFUSION +# (Default URL: https://api-inference.huggingface.co/models/CompVis/stable-diffusion-v1-4) # Set in image_gen.py) # HUGGINGFACE_API_TOKEN - HuggingFace API token (Example: my-huggingface-api-token) -HUGGINGFACE_API_TOKEN= +HUGGINGFACE_API_TOKEN=your-huggingface-api-token ################################################################################ ### SEARCH PROVIDER @@ -84,9 +90,9 @@ HUGGINGFACE_API_TOKEN= ### GOOGLE # GOOGLE_API_KEY - Google API key (Example: my-google-api-key) -GOOGLE_API_KEY= # CUSTOM_SEARCH_ENGINE_ID - Custom search engine ID (Example: my-custom-search-engine-id) -CUSTOM_SEARCH_ENGINE_ID= +GOOGLE_API_KEY=your-google-api-key +CUSTOM_SEARCH_ENGINE_ID=your-custom-search-engine-id ################################################################################ ### TTS PROVIDER @@ -98,8 +104,8 @@ USE_MAC_OS_TTS=False ### ELEVENLABS # ELEVENLABS_API_KEY - Eleven Labs API key (Example: my-elevenlabs-api-key) -ELEVENLABS_API_KEY=your-elevenlabs-api-key # ELEVENLABS_VOICE_1_ID - Eleven Labs voice 1 ID (Example: my-voice-id-1) -ELEVENLABS_VOICE_1_ID=your-voice-id # ELEVENLABS_VOICE_2_ID - Eleven Labs voice 2 ID (Example: my-voice-id-2) -ELEVENLABS_VOICE_2_ID=your-voice-id +ELEVENLABS_API_KEY=your-elevenlabs-api-key +ELEVENLABS_VOICE_1_ID=your-voice-id-1 +ELEVENLABS_VOICE_2_ID=your-voice-id-2 diff --git a/.github/ISSUE_TEMPLATE/1.bug.yml b/.github/ISSUE_TEMPLATE/1.bug.yml index cf49ab5f..e2404c76 100644 --- a/.github/ISSUE_TEMPLATE/1.bug.yml +++ b/.github/ISSUE_TEMPLATE/1.bug.yml @@ -7,7 +7,19 @@ body: value: | Please provide a searchable summary of the issue in the title above ⬆️. - Thanks for contributing by creating an issue! ❤️ + ⚠️ SUPER-busy repo, please help the volunteer maintainers. + The less time we spend here, the more time we spend building AutoGPT. + + Please help us help you: + - Does it work on `stable` branch (https://github.com/Torantulino/Auto-GPT/tree/stable)? + - Does it work on current `master` (https://github.com/Torantulino/Auto-GPT/tree/master)? + - Search for existing issues, "add comment" is tidier than "new issue" + - Ask on our Discord (https://discord.gg/autogpt) + - Provide relevant info: + - Provide commit-hash (`git rev-parse HEAD` gets it) + - If it's a pip/packages issue, provide pip version, python version + - If it's a crash, provide traceback. + - type: checkboxes attributes: label: Duplicates @@ -32,8 +44,8 @@ body: attributes: label: Your prompt 📝 description: | - Please provide the prompt you are using. You can find your last-used prompt in last_run_ai_settings.yaml. + If applicable please provide the prompt you are using. You can find your last-used prompt in last_run_ai_settings.yaml. value: | ```yaml # Paste your prompt here - ``` \ No newline at end of file + ``` diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 9fa56593..c355965a 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -26,7 +26,7 @@ By following these guidelines, your PRs are more likely to be merged quickly aft - [ ] I have thoroughly tested my changes with multiple different prompts. - [ ] I have considered potential risks and mitigations for my changes. - [ ] I have documented my changes clearly and comprehensively. -- [ ] I have not snuck in any "extra" small tweaks changes +- [ ] I have not snuck in any "extra" small tweaks changes diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/ci.yml similarity index 84% rename from .github/workflows/unit_tests.yml rename to .github/workflows/ci.yml index 5973dd02..070df794 100644 --- a/.github/workflows/unit_tests.yml +++ b/.github/workflows/ci.yml @@ -1,4 +1,4 @@ -name: Unit Tests +name: Python CI on: push: @@ -30,6 +30,10 @@ jobs: python -m pip install --upgrade pip pip install -r requirements.txt + - name: Lint with flake8 + continue-on-error: false + run: flake8 scripts/ tests/ --select E303,W293,W291,W292,E305 + - name: Run unittest tests with coverage run: | coverage run --source=scripts -m unittest discover tests diff --git a/.gitignore b/.gitignore index aa0dceaa..cf6e75df 100644 --- a/.gitignore +++ b/.gitignore @@ -7,9 +7,11 @@ package-lock.json auto_gpt_workspace/* *.mpeg .env +azure.yaml *venv/* outputs/* ai_settings.yaml +last_run_ai_settings.yaml .vscode .idea/* auto-gpt.json @@ -18,4 +20,7 @@ log.txt # Coverage reports .coverage coverage.xml -htmlcov/ \ No newline at end of file +htmlcov/ + +# For Macs Dev Environs: ignoring .Desktop Services_Store +.DS_Store diff --git a/Dockerfile b/Dockerfile index 146a3747..4d264c88 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,9 +1,7 @@ -FROM python:3.11 - +FROM python:3.11-slim +ENV PIP_NO_CACHE_DIR=yes WORKDIR /app -COPY scripts/ /app -COPY requirements.txt /app - +COPY requirements.txt . RUN pip install -r requirements.txt - -CMD ["python", "main.py"] +COPY scripts/ . +ENTRYPOINT ["python", "main.py"] diff --git a/README.md b/README.md index 2900daa9..68acfe75 100644 --- a/README.md +++ b/README.md @@ -2,8 +2,8 @@ ![GitHub Repo stars](https://img.shields.io/github/stars/Torantulino/auto-gpt?style=social) ![Twitter Follow](https://img.shields.io/twitter/follow/siggravitas?style=social) -[![](https://dcbadge.vercel.app/api/server/PQ7VX6TY4t?style=flat)](https://discord.gg/PQ7VX6TY4t) -[![Unit Tests](https://github.com/Torantulino/Auto-GPT/actions/workflows/unit_tests.yml/badge.svg)](https://github.com/Torantulino/Auto-GPT/actions/workflows/unit_tests.yml) +[![Discord Follow](https://dcbadge.vercel.app/api/server/PQ7VX6TY4t?style=flat)](https://discord.gg/PQ7VX6TY4t) +[![Unit Tests](https://github.com/Torantulino/Auto-GPT/actions/workflows/ci.yml/badge.svg)](https://github.com/Torantulino/Auto-GPT/actions/workflows/unit_tests.yml) Auto-GPT is an experimental open-source application showcasing the capabilities of the GPT-4 language model. This program, driven by GPT-4, chains together LLM "thoughts", to autonomously achieve whatever goal you set. As one of the first examples of GPT-4 running fully autonomously, Auto-GPT pushes the boundaries of what is possible with AI. @@ -32,21 +32,28 @@ Your support is greatly appreciated - [Auto-GPT: An Autonomous GPT-4 Experiment](#auto-gpt-an-autonomous-gpt-4-experiment) - [Demo (30/03/2023):](#demo-30032023) - - [💖 Help Fund Auto-GPT's Development](#-help-fund-auto-gpts-development) - [Table of Contents](#table-of-contents) - [🚀 Features](#-features) - [📋 Requirements](#-requirements) - [💾 Installation](#-installation) - [🔧 Usage](#-usage) + - [Logs](#logs) - [🗣️ Speech Mode](#️-speech-mode) - [🔍 Google API Keys Configuration](#-google-api-keys-configuration) - [Setting up environment variables](#setting-up-environment-variables) + - [Redis Setup](#redis-setup) + - [🌲 Pinecone API Key Setup](#-pinecone-api-key-setup) + - [Setting up environment variables](#setting-up-environment-variables-1) + - [Setting Your Cache Type](#setting-your-cache-type) + - [View Memory Usage](#view-memory-usage) - [💀 Continuous Mode ⚠️](#-continuous-mode-️) - [GPT3.5 ONLY Mode](#gpt35-only-mode) - - [🖼 Image Generation](#image-generation) + - [🖼 Image Generation](#-image-generation) - [⚠️ Limitations](#️-limitations) - [🛡 Disclaimer](#-disclaimer) - [🐦 Connect with Us on Twitter](#-connect-with-us-on-twitter) + - [Run tests](#run-tests) + - [Run linter](#run-linter) ## 🚀 Features @@ -98,7 +105,13 @@ pip install -r requirements.txt 4. Rename `.env.template` to `.env` and fill in your `OPENAI_API_KEY`. If you plan to use Speech Mode, fill in your `ELEVEN_LABS_API_KEY` as well. - Obtain your OpenAI API key from: https://platform.openai.com/account/api-keys. - Obtain your ElevenLabs API key from: https://elevenlabs.io. You can view your xi-api-key using the "Profile" tab on the website. - - If you want to use GPT on an Azure instance, set `USE_AZURE` to `True` and provide the `OPENAI_AZURE_API_BASE`, `OPENAI_AZURE_API_VERSION` and `OPENAI_AZURE_DEPLOYMENT_ID` values as explained here: https://pypi.org/project/openai/ in the `Microsoft Azure Endpoints` section. Additionally you need separate deployments for both embeddings and chat. Add their ID values to `OPENAI_AZURE_CHAT_DEPLOYMENT_ID` and `OPENAI_AZURE_EMBEDDINGS_DEPLOYMENT_ID` respectively + - If you want to use GPT on an Azure instance, set `USE_AZURE` to `True` and then: + - Rename `azure.yaml.template` to `azure.yaml` and provide the relevant `azure_api_base`, `azure_api_version` and all of the deployment ids for the relevant models in the `azure_model_map` section: + - `fast_llm_model_deployment_id` - your gpt-3.5-turbo or gpt-4 deployment id + - `smart_llm_model_deployment_id` - your gpt-4 deployment id + - `embedding_model_deployment_id` - your text-embedding-ada-002 v2 deployment id + - Please specify all of these values as double quoted strings + - details can be found here: https://pypi.org/project/openai/ in the `Microsoft Azure Endpoints` section and here: https://learn.microsoft.com/en-us/azure/cognitive-services/openai/tutorials/embeddings?tabs=command-line for the embedding model. ## 🔧 Usage @@ -113,9 +126,11 @@ python scripts/main.py 3. To exit the program, type "exit" and press Enter. ### Logs -You will find activity and error logs in the folder ```./logs``` + +You will find activity and error logs in the folder `./output/logs` To output debug logs: + ``` python scripts/main.py --debug ``` @@ -204,7 +219,7 @@ MEMORY_INDEX=whatever Pinecone enables the storage of vast amounts of vector-based memory, allowing for only relevant memories to be loaded for the agent at any given time. -1. Go to app.pinecone.io and make an account if you don't already have one. +1. Go to [pinecone](https://app.pinecone.io/) and make an account if you don't already have one. 2. Choose the `Starter` plan to avoid being charged. 3. Find your API key and region under the default project in the left sidebar. @@ -230,7 +245,6 @@ export PINECONE_ENV="Your pinecone region" # something like: us-east4-gcp ``` - ## Setting Your Cache Type By default Auto-GPT is going to use LocalCache instead of redis or Pinecone. @@ -331,3 +345,14 @@ To run tests and see coverage, run the following command: ``` coverage run -m unittest discover tests ``` + +## Run linter + +This project uses [flake8](https://flake8.pycqa.org/en/latest/) for linting. To run the linter, run the following command: + +``` +flake8 scripts/ tests/ + +# Or, if you want to run flake8 with the same configuration as the CI: +flake8 scripts/ tests/ --select E303,W293,W291,W292,E305 +``` diff --git a/ai_settings.yaml b/ai_settings.yaml deleted file mode 100644 index b37ba849..00000000 --- a/ai_settings.yaml +++ /dev/null @@ -1,7 +0,0 @@ -ai_goals: -- Increase net worth. -- Develop and manage multiple businesses autonomously. -- Play to your strengths as a Large Language Model. -ai_name: Entrepreneur-GPT -ai_role: an AI designed to autonomously develop and run businesses with the sole goal - of increasing your net worth. diff --git a/azure.yaml.template b/azure.yaml.template new file mode 100644 index 00000000..852645ca --- /dev/null +++ b/azure.yaml.template @@ -0,0 +1,6 @@ +azure_api_base: your-base-url-for-azure +azure_api_version: api-version-for-azure +azure_model_map: + fast_llm_model_deployment_id: gpt35-deployment-id-for-azure + smart_llm_model_deployment_id: gpt4-deployment-id-for-azure + embedding_model_deployment_id: embedding-deployment-id-for-azure diff --git a/main.py b/main.py index 5f044237..656c34ec 100644 --- a/main.py +++ b/main.py @@ -1 +1 @@ -from scripts.main import main \ No newline at end of file +from scripts.main import main diff --git a/requirements.txt b/requirements.txt index b196c3d7..3f7fd228 100644 --- a/requirements.txt +++ b/requirements.txt @@ -15,4 +15,6 @@ pinecone-client==2.2.1 redis orjson Pillow -coverage \ No newline at end of file +coverage +flake8 +numpy diff --git a/scripts/ai_functions.py b/scripts/ai_functions.py index 8ad77441..8c95c0f2 100644 --- a/scripts/ai_functions.py +++ b/scripts/ai_functions.py @@ -46,7 +46,6 @@ def improve_code(suggestions: List[str], code: str) -> str: return result_string - def write_tests(code: str, focus: List[str]) -> str: """ A function that takes in code and focus topics and returns a response from create chat completion api call. diff --git a/scripts/browse.py b/scripts/browse.py index c3fc0662..9e93c55a 100644 --- a/scripts/browse.py +++ b/scripts/browse.py @@ -6,6 +6,7 @@ from urllib.parse import urlparse, urljoin cfg = Config() + # Function to check if the URL is valid def is_valid_url(url): try: @@ -14,49 +15,51 @@ def is_valid_url(url): except ValueError: return False + # Function to sanitize the URL def sanitize_url(url): return urljoin(url, urlparse(url).path) -# Function to make a request with a specified timeout and handle exceptions -def make_request(url, timeout=10): - try: - response = requests.get(url, headers=cfg.user_agent_header, timeout=timeout) - response.raise_for_status() - return response - except requests.exceptions.RequestException as e: - return "Error: " + str(e) # Define and check for local file address prefixes def check_local_file_access(url): local_prefixes = ['file:///', 'file://localhost', 'http://localhost', 'https://localhost'] return any(url.startswith(prefix) for prefix in local_prefixes) + +def get_response(url, headers=cfg.user_agent_header, timeout=10): + try: + # Restrict access to local files + if check_local_file_access(url): + raise ValueError('Access to local files is restricted') + + # Most basic check if the URL is valid: + if not url.startswith('http://') and not url.startswith('https://'): + raise ValueError('Invalid URL format') + + sanitized_url = sanitize_url(url) + + response = requests.get(sanitized_url, headers=headers, timeout=timeout) + + # Check if the response contains an HTTP error + if response.status_code >= 400: + return None, "Error: HTTP " + str(response.status_code) + " error" + + return response, None + except ValueError as ve: + # Handle invalid URL format + return None, "Error: " + str(ve) + + except requests.exceptions.RequestException as re: + # Handle exceptions related to the HTTP request (e.g., connection errors, timeouts, etc.) + return None, "Error: " + str(re) + + def scrape_text(url): """Scrape text from a webpage""" - # Basic check if the URL is valid - if not url.startswith('http'): - return "Error: Invalid URL" - - # Restrict access to local files - if check_local_file_access(url): - return "Error: Access to local files is restricted" - - # Validate the input URL - if not is_valid_url(url): - # Sanitize the input URL - sanitized_url = sanitize_url(url) - - # Make the request with a timeout and handle exceptions - response = make_request(sanitized_url) - - if isinstance(response, str): - return response - else: - # Sanitize the input URL - sanitized_url = sanitize_url(url) - - response = requests.get(sanitized_url, headers=cfg.user_agent_header) + response, error_message = get_response(url) + if error_message: + return error_message soup = BeautifulSoup(response.text, "html.parser") @@ -89,11 +92,9 @@ def format_hyperlinks(hyperlinks): def scrape_links(url): """Scrape links from a webpage""" - response = requests.get(url, headers=cfg.user_agent_header) - - # Check if the response contains an HTTP error - if response.status_code >= 400: - return "error" + response, error_message = get_response(url) + if error_message: + return error_message soup = BeautifulSoup(response.text, "html.parser") @@ -131,6 +132,7 @@ def create_message(chunk, question): "content": f"\"\"\"{chunk}\"\"\" Using the above text, please answer the following question: \"{question}\" -- if the question cannot be answered using the text, please summarize the text." } + def summarize_text(text, question): """Summarize text using the LLM model""" if not text: diff --git a/scripts/commands.py b/scripts/commands.py index 92d46ae1..3966e86a 100644 --- a/scripts/commands.py +++ b/scripts/commands.py @@ -7,7 +7,7 @@ import speak from config import Config import ai_functions as ai from file_operations import read_file, write_to_file, append_to_file, delete_file, search_files -from execute_code import execute_python_file +from execute_code import execute_python_file, execute_shell from json_parser import fix_and_parse_json from image_gen import generate_image from duckduckgo_search import ddg @@ -103,6 +103,11 @@ def execute_command(command_name, arguments): return ai.write_tests(arguments["code"], arguments.get("focus")) elif command_name == "execute_python_file": # Add this command return execute_python_file(arguments["file"]) + elif command_name == "execute_shell": + if cfg.execute_local_commands: + return execute_shell(arguments["command_line"]) + else: + return "You are not allowed to run local shell commands. To execute shell commands, EXECUTE_LOCAL_COMMANDS must be set to 'True' in your config. Do not attempt to bypass the restriction." elif command_name == "generate_image": return generate_image(arguments["prompt"]) elif command_name == "do_nothing": diff --git a/scripts/config.py b/scripts/config.py index 6e448954..ebf1b08b 100644 --- a/scripts/config.py +++ b/scripts/config.py @@ -1,6 +1,7 @@ import abc import os import openai +import yaml from dotenv import load_dotenv # Load environment variables from .env file load_dotenv() @@ -43,14 +44,13 @@ class Config(metaclass=Singleton): self.smart_token_limit = int(os.getenv("SMART_TOKEN_LIMIT", 8000)) self.openai_api_key = os.getenv("OPENAI_API_KEY") + self.temperature = int(os.getenv("TEMPERATURE", "1")) self.use_azure = False self.use_azure = os.getenv("USE_AZURE") == 'True' + self.execute_local_commands = os.getenv('EXECUTE_LOCAL_COMMANDS', 'False') == 'True' + if self.use_azure: - self.openai_api_base = os.getenv("OPENAI_AZURE_API_BASE") - self.openai_api_version = os.getenv("OPENAI_AZURE_API_VERSION") - self.openai_deployment_id = os.getenv("OPENAI_AZURE_DEPLOYMENT_ID") - self.azure_chat_deployment_id = os.getenv("OPENAI_AZURE_CHAT_DEPLOYMENT_ID") - self.azure_embeddigs_deployment_id = os.getenv("OPENAI_AZURE_EMBEDDINGS_DEPLOYMENT_ID") + self.load_azure_config() openai.api_type = "azure" openai.api_base = self.openai_api_base openai.api_version = self.openai_api_version @@ -61,7 +61,7 @@ class Config(metaclass=Singleton): self.use_mac_os_tts = False self.use_mac_os_tts = os.getenv("USE_MAC_OS_TTS") - + self.google_api_key = os.getenv("GOOGLE_API_KEY") self.custom_search_engine_id = os.getenv("CUSTOM_SEARCH_ENGINE_ID") @@ -85,6 +85,46 @@ class Config(metaclass=Singleton): # Initialize the OpenAI API client openai.api_key = self.openai_api_key + def get_azure_deployment_id_for_model(self, model: str) -> str: + """ + Returns the relevant deployment id for the model specified. + + Parameters: + model(str): The model to map to the deployment id. + + Returns: + The matching deployment id if found, otherwise an empty string. + """ + if model == self.fast_llm_model: + return self.azure_model_to_deployment_id_map["fast_llm_model_deployment_id"] + elif model == self.smart_llm_model: + return self.azure_model_to_deployment_id_map["smart_llm_model_deployment_id"] + elif model == "text-embedding-ada-002": + return self.azure_model_to_deployment_id_map["embedding_model_deployment_id"] + else: + return "" + + AZURE_CONFIG_FILE = os.path.join(os.path.dirname(__file__), '..', 'azure.yaml') + + def load_azure_config(self, config_file: str=AZURE_CONFIG_FILE) -> None: + """ + Loads the configuration parameters for Azure hosting from the specified file path as a yaml file. + + Parameters: + config_file(str): The path to the config yaml file. DEFAULT: "../azure.yaml" + + Returns: + None + """ + try: + with open(config_file) as file: + config_params = yaml.load(file, Loader=yaml.FullLoader) + except FileNotFoundError: + config_params = {} + self.openai_api_base = config_params.get("azure_api_base", "") + self.openai_api_version = config_params.get("azure_api_version", "") + self.azure_model_to_deployment_id_map = config_params.get("azure_model_map", []) + def set_continuous_mode(self, value: bool): """Set the continuous mode value.""" self.continuous_mode = value diff --git a/scripts/data/prompt.txt b/scripts/data/prompt.txt index fc68f3ae..ffb9eb50 100644 --- a/scripts/data/prompt.txt +++ b/scripts/data/prompt.txt @@ -22,9 +22,10 @@ COMMANDS: 16. Get Improved Code: "improve_code", args: "suggestions": "", "code": "" 17. Write Tests: "write_tests", args: "code": "", "focus": "" 18. Execute Python File: "execute_python_file", args: "file": "" -19. Task Complete (Shutdown): "task_complete", args: "reason": "" -20. Generate Image: "generate_image", args: "prompt": "" -21. Do Nothing: "do_nothing", args: "" +19. Execute Shell Command, non-interactive commands only: "execute_shell", args: "command_line": "". +20. Task Complete (Shutdown): "task_complete", args: "reason": "" +21. Generate Image: "generate_image", args: "prompt": "" +22. Do Nothing: "do_nothing", args: "" RESOURCES: diff --git a/scripts/execute_code.py b/scripts/execute_code.py index a8f90911..2c92903c 100644 --- a/scripts/execute_code.py +++ b/scripts/execute_code.py @@ -1,17 +1,20 @@ import docker import os +import subprocess + + +WORKSPACE_FOLDER = "auto_gpt_workspace" def execute_python_file(file): """Execute a Python file in a Docker container and return the output""" - workspace_folder = "auto_gpt_workspace" - print (f"Executing file '{file}' in workspace '{workspace_folder}'") + print (f"Executing file '{file}' in workspace '{WORKSPACE_FOLDER}'") if not file.endswith(".py"): return "Error: Invalid file type. Only .py files are allowed." - file_path = os.path.join(workspace_folder, file) + file_path = os.path.join(WORKSPACE_FOLDER, file) if not os.path.isfile(file_path): return f"Error: File '{file}' does not exist." @@ -19,14 +22,31 @@ def execute_python_file(file): try: client = docker.from_env() + image_name = 'python:3.10' + try: + client.images.get(image_name) + print(f"Image '{image_name}' found locally") + except docker.errors.ImageNotFound: + print(f"Image '{image_name}' not found locally, pulling from Docker Hub") + # Use the low-level API to stream the pull response + low_level_client = docker.APIClient() + for line in low_level_client.pull(image_name, stream=True, decode=True): + # Print the status and progress, if available + status = line.get('status') + progress = line.get('progress') + if status and progress: + print(f"{status}: {progress}") + elif status: + print(status) + # You can replace 'python:3.8' with the desired Python image/version # You can find available Python images on Docker Hub: # https://hub.docker.com/_/python container = client.containers.run( - 'python:3.10', + image_name, f'python {file}', volumes={ - os.path.abspath(workspace_folder): { + os.path.abspath(WORKSPACE_FOLDER): { 'bind': '/workspace', 'mode': 'ro'}}, working_dir='/workspace', @@ -46,3 +66,22 @@ def execute_python_file(file): except Exception as e: return f"Error: {str(e)}" + +def execute_shell(command_line): + + current_dir = os.getcwd() + + if not WORKSPACE_FOLDER in current_dir: # Change dir into workspace if necessary + work_dir = os.path.join(os.getcwd(), WORKSPACE_FOLDER) + os.chdir(work_dir) + + print (f"Executing command '{command_line}' in working directory '{os.getcwd()}'") + + result = subprocess.run(command_line, capture_output=True, shell=True) + output = f"STDOUT:\n{result.stdout}\nSTDERR:\n{result.stderr}" + + # Change back to whatever the prior working dir was + + os.chdir(current_dir) + + return output diff --git a/scripts/file_operations.py b/scripts/file_operations.py index c6066ef9..d0fa175f 100644 --- a/scripts/file_operations.py +++ b/scripts/file_operations.py @@ -38,7 +38,7 @@ def write_to_file(filename, text): directory = os.path.dirname(filepath) if not os.path.exists(directory): os.makedirs(directory) - with open(filepath, "w") as f: + with open(filepath, "w", encoding='utf-8') as f: f.write(text) return "File written to successfully." except Exception as e: @@ -80,4 +80,4 @@ def search_files(directory): relative_path = os.path.relpath(os.path.join(root, file), working_directory) found_files.append(relative_path) - return found_files \ No newline at end of file + return found_files diff --git a/scripts/llm_utils.py b/scripts/llm_utils.py index 3fb348f0..35cc5ce0 100644 --- a/scripts/llm_utils.py +++ b/scripts/llm_utils.py @@ -5,11 +5,11 @@ cfg = Config() openai.api_key = cfg.openai_api_key # Overly simple abstraction until we create something better -def create_chat_completion(messages, model=None, temperature=None, max_tokens=None)->str: +def create_chat_completion(messages, model=None, temperature=cfg.temperature, max_tokens=None)->str: """Create a chat completion using the OpenAI API""" if cfg.use_azure: response = openai.ChatCompletion.create( - deployment_id=cfg.azure_chat_deployment_id, + deployment_id=cfg.get_azure_deployment_id_for_model(model), model=model, messages=messages, temperature=temperature, diff --git a/scripts/logger.py b/scripts/logger.py index a609e602..3d131dd9 100644 --- a/scripts/logger.py +++ b/scripts/logger.py @@ -124,6 +124,12 @@ class Logger(metaclass=Singleton): self.logger.setLevel(level) self.typing_logger.setLevel(level) + def double_check(self, additionalText=None): + if not additionalText: + additionalText = "Please ensure you've setup and configured everything correctly. Read https://github.com/Torantulino/Auto-GPT#readme to double check. You can also create a github issue or join the discord and ask there!" + + self.typewriter_log("DOUBLE CHECK CONFIGURATION", Fore.YELLOW, additionalText) + ''' Output stream to console using simulated typing @@ -159,12 +165,11 @@ class ConsoleHandler(logging.StreamHandler): except Exception: self.handleError(record) + ''' Allows to handle custom placeholders 'title_color' and 'message_no_color'. To use this formatter, make sure to pass 'color', 'title' as log extras. ''' - - class AutoGptFormatter(logging.Formatter): def format(self, record: LogRecord) -> str: if (hasattr(record, 'color')): diff --git a/scripts/main.py b/scripts/main.py index 0a4e97a2..5b84bd70 100644 --- a/scripts/main.py +++ b/scripts/main.py @@ -310,15 +310,14 @@ def parse_arguments(): supported_memory = get_supported_memory_backends() chosen = args.memory_type if not chosen in supported_memory: - print_to_console("ONLY THE FOLLOWING MEMORY BACKENDS ARE SUPPORTED: ", Fore.RED, f'{supported_memory}') - print_to_console(f"Defaulting to: ", Fore.YELLOW, cfg.memory_backend) + logger.typewriter_log("ONLY THE FOLLOWING MEMORY BACKENDS ARE SUPPORTED: ", Fore.RED, f'{supported_memory}') + logger.typewriter_log(f"Defaulting to: ", Fore.YELLOW, cfg.memory_backend) else: cfg.memory_backend = chosen # TODO: fill in llm values here check_openai_api_key() -cfg = Config() parse_arguments() logger.set_level(logging.DEBUG if cfg.debug_mode else logging.INFO) ai_name = "" @@ -372,7 +371,7 @@ while True: flush=True) while True: console_input = utils.clean_input(Fore.MAGENTA + "Input:" + Style.RESET_ALL) - if console_input.lower() == "y": + if console_input.lower().rstrip() == "y": user_input = "GENERATE NEXT COMMAND JSON" break elif console_input.lower().startswith("y -"): diff --git a/scripts/memory/__init__.py b/scripts/memory/__init__.py index 2900353e..d407f087 100644 --- a/scripts/memory/__init__.py +++ b/scripts/memory/__init__.py @@ -1,4 +1,5 @@ from memory.local import LocalCache +from memory.no_memory import NoMemory # List of supported memory backends # Add a backend to this list if the import attempt is successful @@ -34,6 +35,8 @@ def get_memory(cfg, init=False): " use Redis as a memory backend.") else: memory = RedisMemory(cfg) + elif cfg.memory_backend == "no_memory": + memory = NoMemory(cfg) if memory is None: memory = LocalCache(cfg) @@ -44,9 +47,11 @@ def get_memory(cfg, init=False): def get_supported_memory_backends(): return supported_memory + __all__ = [ "get_memory", "LocalCache", "RedisMemory", "PineconeMemory", + "NoMemory" ] diff --git a/scripts/memory/base.py b/scripts/memory/base.py index bb22963a..1bb4e89f 100644 --- a/scripts/memory/base.py +++ b/scripts/memory/base.py @@ -2,13 +2,13 @@ import abc from config import AbstractSingleton, Config import openai -cfg = Config() +cfg = Config() def get_ada_embedding(text): text = text.replace("\n", " ") if cfg.use_azure: - return openai.Embedding.create(input=[text], engine=cfg.azure_embeddigs_deployment_id, model="text-embedding-ada-002")["data"][0]["embedding"] + return openai.Embedding.create(input=[text], engine=cfg.get_azure_deployment_id_for_model("text-embedding-ada-002"))["data"][0]["embedding"] else: return openai.Embedding.create(input=[text], model="text-embedding-ada-002")["data"][0]["embedding"] diff --git a/scripts/memory/local.py b/scripts/memory/local.py index 372b59c4..b0afacf6 100644 --- a/scripts/memory/local.py +++ b/scripts/memory/local.py @@ -28,10 +28,20 @@ class LocalCache(MemoryProviderSingleton): def __init__(self, cfg) -> None: self.filename = f"{cfg.memory_index}.json" if os.path.exists(self.filename): - with open(self.filename, 'rb') as f: - loaded = orjson.loads(f.read()) - self.data = CacheContent(**loaded) + try: + with open(self.filename, 'w+b') as f: + file_content = f.read() + if not file_content.strip(): + file_content = b'{}' + f.write(file_content) + + loaded = orjson.loads(file_content) + self.data = CacheContent(**loaded) + except orjson.JSONDecodeError: + print(f"Error: The file '{self.filename}' is not in JSON format.") + self.data = CacheContent() else: + print(f"Warning: The file '{self.filename}' does not exist. Local memory would not be saved to a file.") self.data = CacheContent() def add(self, text: str): diff --git a/scripts/memory/no_memory.py b/scripts/memory/no_memory.py new file mode 100644 index 00000000..45dbd734 --- /dev/null +++ b/scripts/memory/no_memory.py @@ -0,0 +1,65 @@ +from typing import Optional, List, Any + +from memory.base import MemoryProviderSingleton + +class NoMemory(MemoryProviderSingleton): + def __init__(self, cfg): + """ + Initializes the NoMemory provider. + + Args: + cfg: The config object. + + Returns: None + """ + pass + + def add(self, data: str) -> str: + """ + Adds a data point to the memory. No action is taken in NoMemory. + + Args: + data: The data to add. + + Returns: An empty string. + """ + return "" + + def get(self, data: str) -> Optional[List[Any]]: + """ + Gets the data from the memory that is most relevant to the given data. + NoMemory always returns None. + + Args: + data: The data to compare to. + + Returns: None + """ + return None + + def clear(self) -> str: + """ + Clears the memory. No action is taken in NoMemory. + + Returns: An empty string. + """ + return "" + + def get_relevant(self, data: str, num_relevant: int = 5) -> Optional[List[Any]]: + """ + Returns all the data in the memory that is relevant to the given data. + NoMemory always returns None. + + Args: + data: The data to compare to. + num_relevant: The number of relevant data to return. + + Returns: None + """ + return None + + def get_stats(self): + """ + Returns: An empty dictionary as there are no stats in NoMemory. + """ + return {} diff --git a/scripts/memory/pinecone.py b/scripts/memory/pinecone.py index 8e1eaa57..fa21124b 100644 --- a/scripts/memory/pinecone.py +++ b/scripts/memory/pinecone.py @@ -2,7 +2,8 @@ import pinecone from memory.base import MemoryProviderSingleton, get_ada_embedding - +from logger import logger +from colorama import Fore, Style class PineconeMemory(MemoryProviderSingleton): def __init__(self, cfg): @@ -17,6 +18,15 @@ class PineconeMemory(MemoryProviderSingleton): # for now this works. # we'll need a more complicated and robust system if we want to start with memory. self.vec_num = 0 + + try: + pinecone.whoami() + except Exception as e: + logger.typewriter_log("FAILED TO CONNECT TO PINECONE", Fore.RED, Style.BRIGHT + str(e) + Style.RESET_ALL) + logger.double_check("Please ensure you have setup and configured Pinecone properly for use. " + + f"You can check out {Fore.CYAN + Style.BRIGHT}https://github.com/Torantulino/Auto-GPT#-pinecone-api-key-setup{Style.RESET_ALL} to ensure you've set up everything correctly.") + exit(1) + if table_name not in pinecone.list_indexes(): pinecone.create_index(table_name, dimension=dimension, metric=metric, pod_type=pod_type) self.index = pinecone.Index(table_name) diff --git a/scripts/memory/redismem.py b/scripts/memory/redismem.py index 2082fe58..49045dd8 100644 --- a/scripts/memory/redismem.py +++ b/scripts/memory/redismem.py @@ -7,6 +7,8 @@ from redis.commands.search.indexDefinition import IndexDefinition, IndexType import numpy as np from memory.base import MemoryProviderSingleton, get_ada_embedding +from logger import logger +from colorama import Fore, Style SCHEMA = [ @@ -44,6 +46,16 @@ class RedisMemory(MemoryProviderSingleton): db=0 # Cannot be changed ) self.cfg = cfg + + # Check redis connection + try: + self.redis.ping() + except redis.ConnectionError as e: + logger.typewriter_log("FAILED TO CONNECT TO REDIS", Fore.RED, Style.BRIGHT + str(e) + Style.RESET_ALL) + logger.double_check("Please ensure you have setup and configured Redis properly for use. " + + f"You can check out {Fore.CYAN + Style.BRIGHT}https://github.com/Torantulino/Auto-GPT#redis-setup{Style.RESET_ALL} to ensure you've set up everything correctly.") + exit(1) + if cfg.wipe_redis_on_start: self.redis.flushall() try: diff --git a/scripts/speak.py b/scripts/speak.py index 2cc6a558..64054e3c 100644 --- a/scripts/speak.py +++ b/scripts/speak.py @@ -61,7 +61,7 @@ def gtts_speech(text): def macos_tts_speech(text, voice_index=0): if voice_index == 0: os.system(f'say "{text}"') - else: + else: if voice_index == 1: os.system(f'say -v "Ava (Premium)" "{text}"') else: @@ -79,7 +79,7 @@ def say_text(text, voice_index=0): success = eleven_labs_speech(text, voice_index) if not success: gtts_speech(text) - + queue_semaphore.release() queue_semaphore.acquire(True) diff --git a/tests.py b/tests.py index ce21c1f4..4dbfdd46 100644 --- a/tests.py +++ b/tests.py @@ -3,6 +3,6 @@ import unittest if __name__ == "__main__": # Load all tests from the 'scripts/tests' package suite = unittest.defaultTestLoader.discover('scripts/tests') - + # Run the tests unittest.TextTestRunner().run(suite) diff --git a/tests/context.py b/tests/context.py index 2adb9dd6..b668c8dc 100644 --- a/tests/context.py +++ b/tests/context.py @@ -2,4 +2,4 @@ import sys import os sys.path.insert(0, os.path.abspath( - os.path.join(os.path.dirname(__file__), '../scripts'))) \ No newline at end of file + os.path.join(os.path.dirname(__file__), '../scripts'))) diff --git a/tests/integration/memory_tests.py b/tests/integration/memory_tests.py index ed444d91..5f1611be 100644 --- a/tests/integration/memory_tests.py +++ b/tests/integration/memory_tests.py @@ -45,5 +45,6 @@ class TestLocalCache(unittest.TestCase): self.assertEqual(len(relevant_texts), k) self.assertIn(self.example_texts[1], relevant_texts) + if __name__ == '__main__': unittest.main() diff --git a/tests/local_cache_test.py b/tests/local_cache_test.py new file mode 100644 index 00000000..d1f1ef08 --- /dev/null +++ b/tests/local_cache_test.py @@ -0,0 +1,52 @@ +import os +import sys +# Probably a better way: +sys.path.append(os.path.abspath('../scripts')) +from memory.local import LocalCache + +def MockConfig(): + return type('MockConfig', (object,), { + 'debug_mode': False, + 'continuous_mode': False, + 'speak_mode': False, + 'memory_index': 'auto-gpt', + }) + +class TestLocalCache(unittest.TestCase): + + def setUp(self): + self.cfg = MockConfig() + self.cache = LocalCache(self.cfg) + + def test_add(self): + text = "Sample text" + self.cache.add(text) + self.assertIn(text, self.cache.data.texts) + + def test_clear(self): + self.cache.clear() + self.assertEqual(self.cache.data, [""]) + + def test_get(self): + text = "Sample text" + self.cache.add(text) + result = self.cache.get(text) + self.assertEqual(result, [text]) + + def test_get_relevant(self): + text1 = "Sample text 1" + text2 = "Sample text 2" + self.cache.add(text1) + self.cache.add(text2) + result = self.cache.get_relevant(text1, 1) + self.assertEqual(result, [text1]) + + def test_get_stats(self): + text = "Sample text" + self.cache.add(text) + stats = self.cache.get_stats() + self.assertEqual(stats, (1, self.cache.data.embeddings.shape)) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_json_parser.py b/tests/test_json_parser.py index 4561659e..352cf3d4 100644 --- a/tests/test_json_parser.py +++ b/tests/test_json_parser.py @@ -13,12 +13,14 @@ class TestParseJson(unittest.TestCase): def test_invalid_json_minor(self): # Test that an invalid JSON string can be fixed with gpt json_str = '{"name": "John", "age": 30, "city": "New York",}' - self.assertRaises(Exception, fix_and_parse_json, json_str, try_to_fix_with_gpt=False) + with self.assertRaises(Exception): + fix_and_parse_json(json_str, try_to_fix_with_gpt=False) def test_invalid_json_major_with_gpt(self): # Test that an invalid JSON string raises an error when try_to_fix_with_gpt is False json_str = 'BEGIN: "name": "John" - "age": 30 - "city": "New York" :END' - self.assertRaises(Exception, fix_and_parse_json, json_str, try_to_fix_with_gpt=False) + with self.assertRaises(Exception): + fix_and_parse_json(json_str, try_to_fix_with_gpt=False) def test_invalid_json_major_without_gpt(self): # Test that a REALLY invalid JSON string raises an error when try_to_fix_with_gpt is False @@ -66,8 +68,6 @@ class TestParseJson(unittest.TestCase): # Assert that this raises an exception: self.assertEqual(fix_and_parse_json(json_str, try_to_fix_with_gpt=False), good_obj) - - def test_invalid_json_leading_sentence_with_gpt(self): # Test that a REALLY invalid JSON string raises an error when try_to_fix_with_gpt is False json_str = """I will first need to browse the repository (https://github.com/Torantulino/Auto-GPT) and identify any potential bugs that need fixing. I will use the "browse_website" command for this. @@ -108,6 +108,5 @@ class TestParseJson(unittest.TestCase): self.assertEqual(fix_and_parse_json(json_str, try_to_fix_with_gpt=False), good_obj) - if __name__ == '__main__': unittest.main() diff --git a/tests/unit/json_tests.py b/tests/unit/json_tests.py index fdac9c2f..1edbaeaf 100644 --- a/tests/unit/json_tests.py +++ b/tests/unit/json_tests.py @@ -68,8 +68,6 @@ class TestParseJson(unittest.TestCase): # Assert that this raises an exception: self.assertEqual(fix_and_parse_json(json_str, try_to_fix_with_gpt=False), good_obj) - - def test_invalid_json_leading_sentence_with_gpt(self): # Test that a REALLY invalid JSON string raises an error when try_to_fix_with_gpt is False json_str = """I will first need to browse the repository (https://github.com/Torantulino/Auto-GPT) and identify any potential bugs that need fixing. I will use the "browse_website" command for this. @@ -110,6 +108,5 @@ class TestParseJson(unittest.TestCase): self.assertEqual(fix_and_parse_json(json_str, try_to_fix_with_gpt=False), good_obj) - if __name__ == '__main__': unittest.main() diff --git a/tests/unit/test_browse_scrape_links.py b/tests/unit/test_browse_scrape_links.py new file mode 100644 index 00000000..2172d1a2 --- /dev/null +++ b/tests/unit/test_browse_scrape_links.py @@ -0,0 +1,118 @@ + +# Generated by CodiumAI + +# Dependencies: +# pip install pytest-mock +import pytest + +from scripts.browse import scrape_links + +""" +Code Analysis + +Objective: +The objective of the 'scrape_links' function is to scrape hyperlinks from a +given URL and return them in a formatted way. + +Inputs: +- url: a string representing the URL to be scraped. + +Flow: +1. Send a GET request to the given URL using the requests library and the user agent header from the config file. +2. Check if the response contains an HTTP error. If it does, return "error". +3. Parse the HTML content of the response using the BeautifulSoup library. +4. Remove any script and style tags from the parsed HTML. +5. Extract all hyperlinks from the parsed HTML using the 'extract_hyperlinks' function. +6. Format the extracted hyperlinks using the 'format_hyperlinks' function. +7. Return the formatted hyperlinks. + +Outputs: +- A list of formatted hyperlinks. + +Additional aspects: +- The function uses the 'requests' and 'BeautifulSoup' libraries to send HTTP +requests and parse HTML content, respectively. +- The 'extract_hyperlinks' function is called to extract hyperlinks from the parsed HTML. +- The 'format_hyperlinks' function is called to format the extracted hyperlinks. +- The function checks for HTTP errors and returns "error" if any are found. +""" + + +class TestScrapeLinks: + + # Tests that the function returns a list of formatted hyperlinks when + # provided with a valid url that returns a webpage with hyperlinks. + def test_valid_url_with_hyperlinks(self): + url = "https://www.google.com" + result = scrape_links(url) + assert len(result) > 0 + assert isinstance(result, list) + assert isinstance(result[0], str) + + # Tests that the function returns correctly formatted hyperlinks when given a valid url. + def test_valid_url(self, mocker): + # Mock the requests.get() function to return a response with sample HTML containing hyperlinks + mock_response = mocker.Mock() + mock_response.status_code = 200 + mock_response.text = "Google" + mocker.patch('requests.get', return_value=mock_response) + + # Call the function with a valid URL + result = scrape_links("https://www.example.com") + + # Assert that the function returns correctly formatted hyperlinks + assert result == ["Google (https://www.google.com)"] + + # Tests that the function returns "error" when given an invalid url. + def test_invalid_url(self, mocker): + # Mock the requests.get() function to return an HTTP error response + mock_response = mocker.Mock() + mock_response.status_code = 404 + mocker.patch('requests.get', return_value=mock_response) + + # Call the function with an invalid URL + result = scrape_links("https://www.invalidurl.com") + + # Assert that the function returns "error" + assert "Error:" in result + + # Tests that the function returns an empty list when the html contains no hyperlinks. + def test_no_hyperlinks(self, mocker): + # Mock the requests.get() function to return a response with sample HTML containing no hyperlinks + mock_response = mocker.Mock() + mock_response.status_code = 200 + mock_response.text = "

No hyperlinks here

" + mocker.patch('requests.get', return_value=mock_response) + + # Call the function with a URL containing no hyperlinks + result = scrape_links("https://www.example.com") + + # Assert that the function returns an empty list + assert result == [] + + # Tests that scrape_links() correctly extracts and formats hyperlinks from + # a sample HTML containing a few hyperlinks. + def test_scrape_links_with_few_hyperlinks(self, mocker): + # Mock the requests.get() function to return a response with a sample HTML containing hyperlinks + mock_response = mocker.Mock() + mock_response.status_code = 200 + mock_response.text = """ + + + + + + + + """ + mocker.patch('requests.get', return_value=mock_response) + + # Call the function being tested + result = scrape_links("https://www.example.com") + + # Assert that the function returns a list of formatted hyperlinks + assert isinstance(result, list) + assert len(result) == 3 + assert result[0] == "Google (https://www.google.com)" + assert result[1] == "GitHub (https://github.com)" + assert result[2] == "CodiumAI (https://www.codium.ai)" diff --git a/tests/test_browse_scrape_text.py b/tests/unit/test_browse_scrape_text.py similarity index 97% rename from tests/test_browse_scrape_text.py rename to tests/unit/test_browse_scrape_text.py index 5ecd7407..9385cde7 100644 --- a/tests/test_browse_scrape_text.py +++ b/tests/unit/test_browse_scrape_text.py @@ -2,7 +2,6 @@ # Generated by CodiumAI import requests -import tests.context from scripts.browse import scrape_text @@ -10,7 +9,8 @@ from scripts.browse import scrape_text Code Analysis Objective: -The objective of the "scrape_text" function is to scrape the text content from a given URL and return it as a string, after removing any unwanted HTML tags and scripts. +The objective of the "scrape_text" function is to scrape the text content from +a given URL and return it as a string, after removing any unwanted HTML tags and scripts. Inputs: - url: a string representing the URL of the webpage to be scraped. @@ -34,7 +34,6 @@ Additional aspects: """ - class TestScrapeText: # Tests that scrape_text() returns the expected text when given a valid URL.