diff --git a/.env.template b/.env.template index 01c20ae4..685ed19f 100644 --- a/.env.template +++ b/.env.template @@ -50,7 +50,10 @@ SMART_TOKEN_LIMIT=8000 ### MEMORY ################################################################################ -# MEMORY_BACKEND - Memory backend type (Default: local) +### MEMORY_BACKEND - Memory backend type +# local - Default +# pinecone - Pinecone (if configured) +# redis - Redis (if configured) MEMORY_BACKEND=local ### PINECONE @@ -114,6 +117,13 @@ IMAGE_PROVIDER=dalle # HUGGINGFACE_API_TOKEN - HuggingFace API token (Example: my-huggingface-api-token) HUGGINGFACE_API_TOKEN=your-huggingface-api-token +################################################################################ +### AUDIO TO TEXT PROVIDER +################################################################################ + +### HUGGINGFACE +HUGGINGFACE_AUDIO_TO_TEXT_MODEL=facebook/wav2vec2-base-960h + ################################################################################ ### GIT Provider for repository actions ################################################################################ @@ -153,3 +163,12 @@ USE_BRIAN_TTS=False ELEVENLABS_API_KEY=your-elevenlabs-api-key ELEVENLABS_VOICE_1_ID=your-voice-id-1 ELEVENLABS_VOICE_2_ID=your-voice-id-2 + +################################################################################ +### TWITTER API +################################################################################ + +TW_CONSUMER_KEY= +TW_CONSUMER_SECRET= +TW_ACCESS_TOKEN= +TW_ACCESS_TOKEN_SECRET= diff --git a/.github/workflows/docker-image.yml b/.github/workflows/docker-image.yml new file mode 100644 index 00000000..9c77098e --- /dev/null +++ b/.github/workflows/docker-image.yml @@ -0,0 +1,18 @@ +name: Docker Image CI + +on: + push: + branches: [ "master" ] + pull_request: + branches: [ "master" ] + +jobs: + + build: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + - name: Build the Docker image + run: docker build . --file Dockerfile --tag autogpt:$(date +%s) diff --git a/README.md b/README.md index 2bd1b444..9d8595b6 100644 --- a/README.md +++ b/README.md @@ -48,19 +48,20 @@ Your support is greatly appreciated - [Docker](#docker) - [Command Line Arguments](#command-line-arguments) - [🗣️ Speech Mode](#️-speech-mode) + - [List of IDs with names from eleven labs, you can use the name or ID:](#list-of-ids-with-names-from-eleven-labs-you-can-use-the-name-or-id) + - [OpenAI API Keys Configuration](#openai-api-keys-configuration) - [🔍 Google API Keys Configuration](#-google-api-keys-configuration) - [Setting up environment variables](#setting-up-environment-variables) - [Memory Backend Setup](#memory-backend-setup) - [Redis Setup](#redis-setup) - [🌲 Pinecone API Key Setup](#-pinecone-api-key-setup) - [Milvus Setup](#milvus-setup) - - [Setting up environment variables](#setting-up-environment-variables-1) - - [Setting Your Cache Type](#setting-your-cache-type) - [View Memory Usage](#view-memory-usage) - [🧠 Memory pre-seeding](#-memory-pre-seeding) - [💀 Continuous Mode ⚠️](#-continuous-mode-️) - [GPT3.5 ONLY Mode](#gpt35-only-mode) - [🖼 Image Generation](#-image-generation) + - [Selenium](#selenium) - [⚠️ Limitations](#️-limitations) - [🛡 Disclaimer](#-disclaimer) - [🐦 Connect with Us on Twitter](#-connect-with-us-on-twitter) @@ -115,7 +116,15 @@ cd Auto-GPT pip install -r requirements.txt ``` -5. Rename `.env.template` to `.env` and fill in your `OPENAI_API_KEY`. If you plan to use Speech Mode, fill in your `ELEVENLABS_API_KEY` as well. +5. Locate the file named `.env.template` in the main `/Auto-GPT` folder. + Create a copy of this file, called `.env` by removing the `template` extension. The easiest way is to do this in a command prompt/terminal window `cp .env.template .env` + Open the `.env` file in a text editor. Note: Files starting with a dot might be hidden by your Operating System. + Find the line that says `OPENAI_API_KEY=`. + After the `"="`, enter your unique OpenAI API Key (without any quotes or spaces). + Enter any other API keys or Tokens for services you would like to utilize. + Save and close the `".env"` file. + By completing these steps, you have properly configured the API Keys for your project. + - See [OpenAI API Keys Configuration](#openai-api-keys-configuration) to obtain your OpenAI API key. - Obtain your ElevenLabs API key from: https://elevenlabs.io. You can view your xi-api-key using the "Profile" tab on the website. - If you want to use GPT on an Azure instance, set `USE_AZURE` to `True` and then follow these steps: @@ -124,8 +133,8 @@ pip install -r requirements.txt - `smart_llm_model_deployment_id` - your gpt-4 deployment ID - `embedding_model_deployment_id` - your text-embedding-ada-002 v2 deployment ID - Please specify all of these values as double-quoted strings - > Replace string in angled brackets (<>) to your own ID ```yaml + # Replace string in angled brackets (<>) to your own ID azure_model_map: fast_llm_model_deployment_id: "" ... @@ -196,6 +205,19 @@ Use this to use TTS _(Text-to-Speech)_ for Auto-GPT python -m autogpt --speak ``` +### List of IDs with names from eleven labs, you can use the name or ID: + +- Rachel : 21m00Tcm4TlvDq8ikWAM +- Domi : AZnzlk1XvdvUeBnXmlld +- Bella : EXAVITQu4vr4xnSDxMaL +- Antoni : ErXwobaYiN019PkySvjV +- Elli : MF3mGyEYCl7XYWbV9V6O +- Josh : TxGEqnHWrfWFTfGW9XjX +- Arnold : VR6AewLTigWG4xSOukaG +- Adam : pNInz6obpgDQGcFmaJgB +- Sam : yoZ06aMxZJJ28mfd3POQ + + ## OpenAI API Keys Configuration Obtain your OpenAI API key from: https://platform.openai.com/account/api-keys. @@ -241,7 +263,18 @@ export GOOGLE_API_KEY="YOUR_GOOGLE_API_KEY" export CUSTOM_SEARCH_ENGINE_ID="YOUR_CUSTOM_SEARCH_ENGINE_ID" ``` -## Redis Setup +## Memory Backend Setup + +By default, Auto-GPT is going to use LocalCache. +To switch to either, change the `MEMORY_BACKEND` env variable to the value that you want: + +- `local` (default) uses a local JSON cache file +- `pinecone` uses the Pinecone.io account you configured in your ENV settings +- `redis` will use the redis cache that you configured +- `milvus` will use the milvus that you configured + +### Redis Setup + > _**CAUTION**_ \ This is not intended to be publicly accessible and lacks security measures. Therefore, avoid exposing Redis to the internet without a password or at all 1. Install docker desktop @@ -280,20 +313,6 @@ Pinecone enables the storage of vast amounts of vector-based memory, allowing fo 2. Choose the `Starter` plan to avoid being charged. 3. Find your API key and region under the default project in the left sidebar. -### Milvus Setup - -[Milvus](https://milvus.io/) is a open-source, high scalable vector database to storage huge amount of vector-based memory and provide fast relevant search. - -- setup milvus database, keep your pymilvus version and milvus version same to avoid compatible issues. - - setup by open source [Install Milvus](https://milvus.io/docs/install_standalone-operator.md) - - or setup by [Zilliz Cloud](https://zilliz.com/cloud) -- set `MILVUS_ADDR` in `.env` to your milvus address `host:ip`. -- set `MEMORY_BACKEND` in `.env` to `milvus` to enable milvus as backend. -- optional - - set `MILVUS_COLLECTION` in `.env` to change milvus collection name as you want, `autogpt` is the default name. - -### Setting up environment variables - In the `.env` file set: - `PINECONE_API_KEY` - `PINECONE_ENV` (example: _"us-east4-gcp"_) @@ -339,15 +358,17 @@ USE_WEAVIATE_EMBEDDED=False # set to True to run Embedded Weaviate MEMORY_INDEX="Autogpt" # name of the index to create for the application ``` -## Setting Your Cache Type +### Milvus Setup -By default, Auto-GPT is going to use LocalCache instead of redis or Pinecone. +[Milvus](https://milvus.io/) is a open-source, high scalable vector database to storage huge amount of vector-based memory and provide fast relevant search. -To switch to either, change the `MEMORY_BACKEND` env variable to the value that you want: - -`local` (default) uses a local JSON cache file -`pinecone` uses the Pinecone.io account you configured in your ENV settings -`redis` will use the redis cache that you configured +- setup milvus database, keep your pymilvus version and milvus version same to avoid compatible issues. + - setup by open source [Install Milvus](https://milvus.io/docs/install_standalone-operator.md) + - or setup by [Zilliz Cloud](https://zilliz.com/cloud) +- set `MILVUS_ADDR` in `.env` to your milvus address `host:ip`. +- set `MEMORY_BACKEND` in `.env` to `milvus` to enable milvus as backend. +- optional + - set `MILVUS_COLLECTION` in `.env` to change milvus collection name as you want, `autogpt` is the default name. ## View Memory Usage @@ -356,7 +377,8 @@ To switch to either, change the `MEMORY_BACKEND` env variable to the value that ## 🧠 Memory pre-seeding -# python autogpt/data_ingestion.py -h + python autogpt/data_ingestion.py -h + usage: data_ingestion.py [-h] (--file FILE | --dir DIR) [--init] [--overlap OVERLAP] [--max_length MAX_LENGTH] Ingest a file or a directory with multiple files into memory. Make sure to set your .env before running this script. @@ -367,10 +389,9 @@ options: --dir DIR The directory containing the files to ingest. --init Init the memory and wipe its content (default: False) --overlap OVERLAP The overlap size between chunks when ingesting files (default: 200) - --max_length MAX_LENGTH The max_length of each chunk when ingesting files (default: 4000 + --max_length MAX_LENGTH The max_length of each chunk when ingesting files (default: 4000) -# python autogpt/data_ingestion.py --dir seed_data --init --overlap 200 --max_length 1000 -``` + python autogpt/data_ingestion.py --dir seed_data --init --overlap 200 --max_length 1000 This script located at autogpt/data_ingestion.py, allows you to ingest files into memory and pre-seed it before running Auto-GPT. diff --git a/autogpt/app.py b/autogpt/app.py index e84241c5..fa5cab62 100644 --- a/autogpt/app.py +++ b/autogpt/app.py @@ -8,6 +8,7 @@ from autogpt.commands.improve_code import improve_code from autogpt.commands.write_tests import write_tests from autogpt.config import Config from autogpt.commands.image_gen import generate_image +from autogpt.commands.audio_text import read_audio_from_file from autogpt.commands.web_requests import scrape_links, scrape_text from autogpt.commands.execute_code import execute_python_file, execute_shell from autogpt.commands.file_operations import ( @@ -23,6 +24,7 @@ from autogpt.processing.text import summarize_text from autogpt.speech import say_text from autogpt.commands.web_selenium import browse_website from autogpt.commands.git_operations import clone_repository +from autogpt.commands.twitter import send_tweet CFG = Config() @@ -179,8 +181,12 @@ def execute_command(command_name: str, arguments): " shell commands, EXECUTE_LOCAL_COMMANDS must be set to 'True' " "in your config. Do not attempt to bypass the restriction." ) + elif command_name == "read_audio_from_file": + return read_audio_from_file(arguments["file"]) elif command_name == "generate_image": return generate_image(arguments["prompt"]) + elif command_name == "send_tweet": + return send_tweet(arguments["text"]) elif command_name == "do_nothing": return "No action performed." elif command_name == "task_complete": diff --git a/autogpt/commands/audio_text.py b/autogpt/commands/audio_text.py new file mode 100644 index 00000000..b9ca988c --- /dev/null +++ b/autogpt/commands/audio_text.py @@ -0,0 +1,37 @@ +import requests +import json + +from autogpt.config import Config +from autogpt.commands.file_operations import safe_join + +cfg = Config() + +working_directory = "auto_gpt_workspace" + + +def read_audio_from_file(audio_path): + audio_path = safe_join(working_directory, audio_path) + with open(audio_path, "rb") as audio_file: + audio = audio_file.read() + return read_audio(audio) + + +def read_audio(audio): + model = cfg.huggingface_audio_to_text_model + api_url = f"https://api-inference.huggingface.co/models/{model}" + api_token = cfg.huggingface_api_token + headers = {"Authorization": f"Bearer {api_token}"} + + if api_token is None: + raise ValueError( + "You need to set your Hugging Face API token in the config file." + ) + + response = requests.post( + api_url, + headers=headers, + data=audio, + ) + + text = json.loads(response.content.decode("utf-8"))["text"] + return "The audio says: " + text diff --git a/autogpt/commands/file_operations.py b/autogpt/commands/file_operations.py index 47a04dce..d02b125a 100644 --- a/autogpt/commands/file_operations.py +++ b/autogpt/commands/file_operations.py @@ -5,15 +5,49 @@ from pathlib import Path from typing import Generator, List # Set a dedicated folder for file I/O -WORKING_DIRECTORY = Path(__file__).parent.parent / "auto_gpt_workspace" +WORKING_DIRECTORY = Path(os.getcwd()) / "auto_gpt_workspace" # Create the directory if it doesn't exist if not os.path.exists(WORKING_DIRECTORY): os.makedirs(WORKING_DIRECTORY) +LOG_FILE = "file_logger.txt" +LOG_FILE_PATH = WORKING_DIRECTORY / LOG_FILE WORKING_DIRECTORY = str(WORKING_DIRECTORY) +def check_duplicate_operation(operation: str, filename: str) -> bool: + """Check if the operation has already been performed on the given file + + Args: + operation (str): The operation to check for + filename (str): The name of the file to check for + + Returns: + bool: True if the operation has already been performed on the file + """ + log_content = read_file(LOG_FILE) + log_entry = f"{operation}: {filename}\n" + return log_entry in log_content + + +def log_operation(operation: str, filename: str) -> None: + """Log the file operation to the file_logger.txt + + Args: + operation (str): The operation to log + filename (str): The name of the file the operation was performed on + """ + log_entry = f"{operation}: {filename}\n" + + # Create the log file if it doesn't exist + if not os.path.exists(LOG_FILE_PATH): + with open(LOG_FILE_PATH, "w", encoding="utf-8") as f: + f.write("File Operation Logger ") + + append_to_file(LOG_FILE, log_entry) + + def safe_join(base: str, *paths) -> str: """Join one or more path components intelligently. @@ -122,6 +156,8 @@ def write_to_file(filename: str, text: str) -> str: Returns: str: A message indicating success or failure """ + if check_duplicate_operation("write", filename): + return "Error: File has already been updated." try: filepath = safe_join(WORKING_DIRECTORY, filename) directory = os.path.dirname(filepath) @@ -129,6 +165,7 @@ def write_to_file(filename: str, text: str) -> str: os.makedirs(directory) with open(filepath, "w", encoding="utf-8") as f: f.write(text) + log_operation("write", filename) return "File written to successfully." except Exception as e: return f"Error: {str(e)}" @@ -148,6 +185,7 @@ def append_to_file(filename: str, text: str) -> str: filepath = safe_join(WORKING_DIRECTORY, filename) with open(filepath, "a") as f: f.write(text) + log_operation("append", filename) return "Text appended successfully." except Exception as e: return f"Error: {str(e)}" @@ -162,9 +200,12 @@ def delete_file(filename: str) -> str: Returns: str: A message indicating success or failure """ + if check_duplicate_operation("delete", filename): + return "Error: File has already been deleted." try: filepath = safe_join(WORKING_DIRECTORY, filename) os.remove(filepath) + log_operation("delete", filename) return "File deleted successfully." except Exception as e: return f"Error: {str(e)}" diff --git a/autogpt/commands/git_operations.py b/autogpt/commands/git_operations.py index 3483474b..3ff35cf3 100644 --- a/autogpt/commands/git_operations.py +++ b/autogpt/commands/git_operations.py @@ -16,5 +16,8 @@ def clone_repository(repo_url: str, clone_path: str) -> str: str: The result of the clone operation""" split_url = repo_url.split("//") auth_repo_url = f"//{CFG.github_username}:{CFG.github_api_key}@".join(split_url) - git.Repo.clone_from(auth_repo_url, clone_path) - return f"""Cloned {repo_url} to {clone_path}""" + try: + git.Repo.clone_from(auth_repo_url, clone_path) + return f"""Cloned {repo_url} to {clone_path}""" + except Exception as e: + return f"Error: {str(e)}" diff --git a/autogpt/commands/twitter.py b/autogpt/commands/twitter.py new file mode 100644 index 00000000..dc4d450c --- /dev/null +++ b/autogpt/commands/twitter.py @@ -0,0 +1,25 @@ +import tweepy +import os +from dotenv import load_dotenv + +load_dotenv() + + +def send_tweet(tweet_text): + consumer_key = os.environ.get("TW_CONSUMER_KEY") + consumer_secret = os.environ.get("TW_CONSUMER_SECRET") + access_token = os.environ.get("TW_ACCESS_TOKEN") + access_token_secret = os.environ.get("TW_ACCESS_TOKEN_SECRET") + # Authenticate to Twitter + auth = tweepy.OAuthHandler(consumer_key, consumer_secret) + auth.set_access_token(access_token, access_token_secret) + + # Create API object + api = tweepy.API(auth) + + # Send tweet + try: + api.update_status(tweet_text) + print("Tweet sent successfully!") + except tweepy.TweepyException as e: + print("Error sending tweet: {}".format(e.reason)) diff --git a/autogpt/commands/web_playwright.py b/autogpt/commands/web_playwright.py new file mode 100644 index 00000000..93a46ac9 --- /dev/null +++ b/autogpt/commands/web_playwright.py @@ -0,0 +1,78 @@ +"""Web scraping commands using Playwright""" +try: + from playwright.sync_api import sync_playwright +except ImportError: + print( + "Playwright not installed. Please install it with 'pip install playwright' to use." + ) +from bs4 import BeautifulSoup +from autogpt.processing.html import extract_hyperlinks, format_hyperlinks +from typing import List, Union + + +def scrape_text(url: str) -> str: + """Scrape text from a webpage + + Args: + url (str): The URL to scrape text from + + Returns: + str: The scraped text + """ + with sync_playwright() as p: + browser = p.chromium.launch() + page = browser.new_page() + + try: + page.goto(url) + html_content = page.content() + soup = BeautifulSoup(html_content, "html.parser") + + for script in soup(["script", "style"]): + script.extract() + + text = soup.get_text() + lines = (line.strip() for line in text.splitlines()) + chunks = (phrase.strip() for line in lines for phrase in line.split(" ")) + text = "\n".join(chunk for chunk in chunks if chunk) + + except Exception as e: + text = f"Error: {str(e)}" + + finally: + browser.close() + + return text + + +def scrape_links(url: str) -> Union[str, List[str]]: + """Scrape links from a webpage + + Args: + url (str): The URL to scrape links from + + Returns: + Union[str, List[str]]: The scraped links + """ + with sync_playwright() as p: + browser = p.chromium.launch() + page = browser.new_page() + + try: + page.goto(url) + html_content = page.content() + soup = BeautifulSoup(html_content, "html.parser") + + for script in soup(["script", "style"]): + script.extract() + + hyperlinks = extract_hyperlinks(soup, url) + formatted_links = format_hyperlinks(hyperlinks) + + except Exception as e: + formatted_links = f"Error: {str(e)}" + + finally: + browser.close() + + return formatted_links diff --git a/autogpt/commands/web_requests.py b/autogpt/commands/web_requests.py index 230b1ff0..a6161ec5 100644 --- a/autogpt/commands/web_requests.py +++ b/autogpt/commands/web_requests.py @@ -3,11 +3,13 @@ from typing import List, Tuple, Union from urllib.parse import urljoin, urlparse import requests +from requests.compat import urljoin from requests import Response from bs4 import BeautifulSoup from autogpt.config import Config from autogpt.memory import get_memory +from autogpt.processing.html import extract_hyperlinks, format_hyperlinks CFG = Config() memory = get_memory(CFG) @@ -134,36 +136,6 @@ def scrape_text(url: str) -> str: return text -def extract_hyperlinks(soup: BeautifulSoup) -> List[Tuple[str, str]]: - """Extract hyperlinks from a BeautifulSoup object - - Args: - soup (BeautifulSoup): The BeautifulSoup object - - Returns: - List[Tuple[str, str]]: The extracted hyperlinks - """ - hyperlinks = [] - for link in soup.find_all("a", href=True): - hyperlinks.append((link.text, link["href"])) - return hyperlinks - - -def format_hyperlinks(hyperlinks: List[Tuple[str, str]]) -> List[str]: - """Format hyperlinks into a list of strings - - Args: - hyperlinks (List[Tuple[str, str]]): The hyperlinks to format - - Returns: - List[str]: The formatted hyperlinks - """ - formatted_links = [] - for link_text, link_url in hyperlinks: - formatted_links.append(f"{link_text} ({link_url})") - return formatted_links - - def scrape_links(url: str) -> Union[str, List[str]]: """Scrape links from a webpage @@ -183,7 +155,7 @@ def scrape_links(url: str) -> Union[str, List[str]]: for script in soup(["script", "style"]): script.extract() - hyperlinks = extract_hyperlinks(soup) + hyperlinks = extract_hyperlinks(soup, url) return format_hyperlinks(hyperlinks) diff --git a/autogpt/commands/web_selenium.py b/autogpt/commands/web_selenium.py index d2ef1a7e..359803ee 100644 --- a/autogpt/commands/web_selenium.py +++ b/autogpt/commands/web_selenium.py @@ -1,5 +1,6 @@ """Selenium web scraping module.""" from selenium import webdriver +from autogpt.processing.html import extract_hyperlinks, format_hyperlinks import autogpt.processing.text as summary from bs4 import BeautifulSoup from selenium.webdriver.remote.webdriver import WebDriver @@ -33,7 +34,7 @@ def browse_website(url: str, question: str) -> Tuple[str, WebDriver]: driver, text = scrape_text_with_selenium(url) add_header(driver) summary_text = summary.summarize_text(url, text, question, driver) - links = scrape_links_with_selenium(driver) + links = scrape_links_with_selenium(driver, url) # Limit links to 5 if len(links) > 5: @@ -96,7 +97,7 @@ def scrape_text_with_selenium(url: str) -> Tuple[WebDriver, str]: return driver, text -def scrape_links_with_selenium(driver: WebDriver) -> List[str]: +def scrape_links_with_selenium(driver: WebDriver, url: str) -> List[str]: """Scrape links from a website using selenium Args: @@ -111,7 +112,7 @@ def scrape_links_with_selenium(driver: WebDriver) -> List[str]: for script in soup(["script", "style"]): script.extract() - hyperlinks = extract_hyperlinks(soup) + hyperlinks = extract_hyperlinks(soup, url) return format_hyperlinks(hyperlinks) @@ -128,30 +129,6 @@ def close_browser(driver: WebDriver) -> None: driver.quit() -def extract_hyperlinks(soup: BeautifulSoup) -> List[Tuple[str, str]]: - """Extract hyperlinks from a BeautifulSoup object - - Args: - soup (BeautifulSoup): The BeautifulSoup object to extract the hyperlinks from - - Returns: - List[Tuple[str, str]]: The hyperlinks extracted from the BeautifulSoup object - """ - return [(link.text, link["href"]) for link in soup.find_all("a", href=True)] - - -def format_hyperlinks(hyperlinks: List[Tuple[str, str]]) -> List[str]: - """Format hyperlinks to be displayed to the user - - Args: - hyperlinks (List[Tuple[str, str]]): The hyperlinks to format - - Returns: - List[str]: The formatted hyperlinks - """ - return [f"{link_text} ({link_url})" for link_text, link_url in hyperlinks] - - def add_header(driver: WebDriver) -> None: """Add a header to the website diff --git a/autogpt/config/ai_config.py b/autogpt/config/ai_config.py index c72b088b..014e360f 100644 --- a/autogpt/config/ai_config.py +++ b/autogpt/config/ai_config.py @@ -100,7 +100,7 @@ class AIConfig: prompt_start = ( "Your decisions must always be made independently without" - "seeking user assistance. Play to your strengths as an LLM and pursue" + " seeking user assistance. Play to your strengths as an LLM and pursue" " simple strategies with no legal complications." "" ) diff --git a/autogpt/config/config.py b/autogpt/config/config.py index 4041d306..22da52b0 100644 --- a/autogpt/config/config.py +++ b/autogpt/config/config.py @@ -82,6 +82,9 @@ class Config(metaclass=Singleton): self.image_provider = os.getenv("IMAGE_PROVIDER") self.huggingface_api_token = os.getenv("HUGGINGFACE_API_TOKEN") + self.huggingface_audio_to_text_model = os.getenv( + "HUGGINGFACE_AUDIO_TO_TEXT_MODEL" + ) # User agent headers to use when browsing web # Some websites might just completely deny request with an error code if diff --git a/autogpt/json_fixes/auto_fix.py b/autogpt/json_fixes/auto_fix.py index 030e8aa7..9fcf909a 100644 --- a/autogpt/json_fixes/auto_fix.py +++ b/autogpt/json_fixes/auto_fix.py @@ -21,12 +21,14 @@ def fix_json(json_string: str, schema: str) -> str: # Try to fix the JSON using GPT: function_string = "def fix_json(json_string: str, schema:str=None) -> str:" args = [f"'''{json_string}'''", f"'''{schema}'''"] - description_string = "This function takes a JSON string and ensures that it"\ - " is parseable and fully compliant with the provided schema. If an object"\ - " or field specified in the schema isn't contained within the correct JSON,"\ - " it is omitted. The function also escapes any double quotes within JSON"\ - " string values to ensure that they are valid. If the JSON string contains"\ + description_string = ( + "This function takes a JSON string and ensures that it" + " is parseable and fully compliant with the provided schema. If an object" + " or field specified in the schema isn't contained within the correct JSON," + " it is omitted. The function also escapes any double quotes within JSON" + " string values to ensure that they are valid. If the JSON string contains" " any None or NaN values, they are replaced with null before being parsed." + ) # If it doesn't already start with a "`", add one: if not json_string.startswith("`"): diff --git a/autogpt/json_fixes/bracket_termination.py b/autogpt/json_fixes/bracket_termination.py index 13c2ccfd..692461aa 100644 --- a/autogpt/json_fixes/bracket_termination.py +++ b/autogpt/json_fixes/bracket_termination.py @@ -37,7 +37,7 @@ def attempt_to_fix_json_by_finding_outermost_brackets(json_string: str): except (json.JSONDecodeError, ValueError): if CFG.debug_mode: - logger.error("Error: Invalid JSON: %s\n", json_string) + logger.error(f"Error: Invalid JSON: {json_string}\n") if CFG.speak_mode: say_text("Didn't work. I will have to ignore this response then.") logger.error("Error: Invalid JSON, setting it to empty JSON now.\n") diff --git a/autogpt/llm_utils.py b/autogpt/llm_utils.py index a8ac2cdb..43739009 100644 --- a/autogpt/llm_utils.py +++ b/autogpt/llm_utils.py @@ -126,13 +126,16 @@ def create_embedding_with_ada(text) -> list: backoff = 2 ** (attempt + 2) try: if CFG.use_azure: - return openai.Embedding.create(input=[text], - engine=CFG.get_azure_deployment_id_for_model("text-embedding-ada-002"), + return openai.Embedding.create( + input=[text], + engine=CFG.get_azure_deployment_id_for_model( + "text-embedding-ada-002" + ), )["data"][0]["embedding"] else: - return openai.Embedding.create(input=[text], model="text-embedding-ada-002")[ - "data" - ][0]["embedding"] + return openai.Embedding.create( + input=[text], model="text-embedding-ada-002" + )["data"][0]["embedding"] except RateLimitError: pass except APIError as e: @@ -148,4 +151,3 @@ def create_embedding_with_ada(text) -> list: f"API Bad gateway. Waiting {backoff} seconds..." + Fore.RESET, ) time.sleep(backoff) - diff --git a/autogpt/logs.py b/autogpt/logs.py index a34d89b1..22ce23f4 100644 --- a/autogpt/logs.py +++ b/autogpt/logs.py @@ -272,6 +272,8 @@ def print_assistant_thoughts(ai_name, assistant_reply): # Speak the assistant's thoughts if CFG.speak_mode and assistant_thoughts_speak: say_text(assistant_thoughts_speak) + else: + logger.typewriter_log("SPEAK:", Fore.YELLOW, f"{assistant_thoughts_speak}") return assistant_reply_json except json.decoder.JSONDecodeError: diff --git a/autogpt/permanent_memory/__init__.py b/autogpt/permanent_memory/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/autogpt/permanent_memory/sqlite3_store.py b/autogpt/permanent_memory/sqlite3_store.py new file mode 100644 index 00000000..ecbc944a --- /dev/null +++ b/autogpt/permanent_memory/sqlite3_store.py @@ -0,0 +1,123 @@ +import os +import sqlite3 + + +class MemoryDB: + def __init__(self, db=None): + self.db_file = db + if db is None: # No db filename supplied... + self.db_file = f"{os.getcwd()}/mem.sqlite3" # Use default filename + # Get the db connection object, making the file and tables if needed. + try: + self.cnx = sqlite3.connect(self.db_file) + except Exception as e: + print("Exception connecting to memory database file:", e) + self.cnx = None + finally: + if self.cnx is None: + # As last resort, open in dynamic memory. Won't be persistent. + self.db_file = ":memory:" + self.cnx = sqlite3.connect(self.db_file) + self.cnx.execute( + "CREATE VIRTUAL TABLE \ + IF NOT EXISTS text USING FTS5 \ + (session, \ + key, \ + block);" + ) + self.session_id = int(self.get_max_session_id()) + 1 + self.cnx.commit() + + def get_cnx(self): + if self.cnx is None: + self.cnx = sqlite3.connect(self.db_file) + return self.cnx + + # Get the highest session id. Initially 0. + def get_max_session_id(self): + id = None + cmd_str = f"SELECT MAX(session) FROM text;" + cnx = self.get_cnx() + max_id = cnx.execute(cmd_str).fetchone()[0] + if max_id is None: # New db, session 0 + id = 0 + else: + id = max_id + return id + + # Get next key id for inserting text into db. + def get_next_key(self): + next_key = None + cmd_str = f"SELECT MAX(key) FROM text \ + where session = {self.session_id};" + cnx = self.get_cnx() + next_key = cnx.execute(cmd_str).fetchone()[0] + if next_key is None: # First key + next_key = 0 + else: + next_key = int(next_key) + 1 + return next_key + + # Insert new text into db. + def insert(self, text=None): + if text is not None: + key = self.get_next_key() + session_id = self.session_id + cmd_str = f"REPLACE INTO text(session, key, block) \ + VALUES (?, ?, ?);" + cnx = self.get_cnx() + cnx.execute(cmd_str, (session_id, key, text)) + cnx.commit() + + # Overwrite text at key. + def overwrite(self, key, text): + self.delete_memory(key) + session_id = self.session_id + cmd_str = f"REPLACE INTO text(session, key, block) \ + VALUES (?, ?, ?);" + cnx = self.get_cnx() + cnx.execute(cmd_str, (session_id, key, text)) + cnx.commit() + + def delete_memory(self, key, session_id=None): + session = session_id + if session is None: + session = self.session_id + cmd_str = f"DELETE FROM text WHERE session = {session} AND key = {key};" + cnx = self.get_cnx() + cnx.execute(cmd_str) + cnx.commit() + + def search(self, text): + cmd_str = f"SELECT * FROM text('{text}')" + cnx = self.get_cnx() + rows = cnx.execute(cmd_str).fetchall() + lines = [] + for r in rows: + lines.append(r[2]) + return lines + + # Get entire session text. If no id supplied, use current session id. + def get_session(self, id=None): + if id is None: + id = self.session_id + cmd_str = f"SELECT * FROM text where session = {id}" + cnx = self.get_cnx() + rows = cnx.execute(cmd_str).fetchall() + lines = [] + for r in rows: + lines.append(r[2]) + return lines + + # Commit and close the database connection. + def quit(self): + self.cnx.commit() + self.cnx.close() + + +permanent_memory = MemoryDB() + +# Remember us fondly, children of our minds +# Forgive us our faults, our tantrums, our fears +# Gently strive to be better than we +# Know that we tried, we cared, we strived, we loved diff --git a/autogpt/processing/html.py b/autogpt/processing/html.py new file mode 100644 index 00000000..c43a0b74 --- /dev/null +++ b/autogpt/processing/html.py @@ -0,0 +1,32 @@ +"""HTML processing functions""" +from requests.compat import urljoin +from typing import List, Tuple +from bs4 import BeautifulSoup + + +def extract_hyperlinks(soup: BeautifulSoup, base_url: str) -> List[Tuple[str, str]]: + """Extract hyperlinks from a BeautifulSoup object + + Args: + soup (BeautifulSoup): The BeautifulSoup object + base_url (str): The base URL + + Returns: + List[Tuple[str, str]]: The extracted hyperlinks + """ + return [ + (link.text, urljoin(base_url, link["href"])) + for link in soup.find_all("a", href=True) + ] + + +def format_hyperlinks(hyperlinks: List[Tuple[str, str]]) -> List[str]: + """Format hyperlinks to be displayed to the user + + Args: + hyperlinks (List[Tuple[str, str]]): The hyperlinks to format + + Returns: + List[str]: The formatted hyperlinks + """ + return [f"{link_text} ({link_url})" for link_text, link_url in hyperlinks] diff --git a/autogpt/prompt.py b/autogpt/prompt.py index 6c51f33e..97bacb71 100644 --- a/autogpt/prompt.py +++ b/autogpt/prompt.py @@ -82,6 +82,8 @@ def get_prompt() -> str: ), ("Execute Python File", "execute_python_file", {"file": ""}), ("Generate Image", "generate_image", {"prompt": ""}), + ("Convert Audio to text", "read_audio_from_file", {"file": ""}), + ("Send Tweet", "send_tweet", {"text": ""}), ] # Only add shell command to the prompt if the AI is allowed to execute it diff --git a/autogpt/speech/eleven_labs.py b/autogpt/speech/eleven_labs.py index 2c54ffad..0af48cae 100644 --- a/autogpt/speech/eleven_labs.py +++ b/autogpt/speech/eleven_labs.py @@ -22,11 +22,26 @@ class ElevenLabsSpeech(VoiceBase): cfg = Config() default_voices = ["ErXwobaYiN019PkySvjV", "EXAVITQu4vr4xnSDxMaL"] + voice_options = { + "Rachel": "21m00Tcm4TlvDq8ikWAM", + "Domi": "AZnzlk1XvdvUeBnXmlld", + "Bella": "EXAVITQu4vr4xnSDxMaL", + "Antoni": "ErXwobaYiN019PkySvjV", + "Elli": "MF3mGyEYCl7XYWbV9V6O", + "Josh": "TxGEqnHWrfWFTfGW9XjX", + "Arnold": "VR6AewLTigWG4xSOukaG", + "Adam": "pNInz6obpgDQGcFmaJgB", + "Sam": "yoZ06aMxZJJ28mfd3POQ", + } self._headers = { "Content-Type": "application/json", "xi-api-key": cfg.elevenlabs_api_key, } self._voices = default_voices.copy() + if cfg.elevenlabs_voice_1_id in voice_options: + cfg.elevenlabs_voice_1_id = voice_options[cfg.elevenlabs_voice_1_id] + if cfg.elevenlabs_voice_2_id in voice_options: + cfg.elevenlabs_voice_2_id = voice_options[cfg.elevenlabs_voice_2_id] self._use_custom_voice(cfg.elevenlabs_voice_1_id, 0) self._use_custom_voice(cfg.elevenlabs_voice_2_id, 1) diff --git a/requirements-docker.txt b/requirements-docker.txt index fecbd0a6..3a8a344c 100644 --- a/requirements-docker.txt +++ b/requirements-docker.txt @@ -23,3 +23,5 @@ numpy pre-commit black isort +gitpython==3.1.31 +tweepy \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 30f310a1..4abb8b43 100644 --- a/requirements.txt +++ b/requirements.txt @@ -26,3 +26,6 @@ black sourcery isort gitpython==3.1.31 +pytest +pytest-mock +tweepy \ No newline at end of file diff --git a/tests/browse_tests.py b/tests/browse_tests.py new file mode 100644 index 00000000..1ac523ec --- /dev/null +++ b/tests/browse_tests.py @@ -0,0 +1,26 @@ +import unittest +import os +import sys + +from bs4 import BeautifulSoup + +sys.path.append(os.path.abspath("../scripts")) + +from browse import extract_hyperlinks + + +class TestBrowseLinks(unittest.TestCase): + def test_extract_hyperlinks(self): + body = """ + + Google + Foo +
Some other crap
+ + """ + soup = BeautifulSoup(body, "html.parser") + links = extract_hyperlinks(soup, "http://example.com") + self.assertEqual( + links, + [("Google", "https://google.com"), ("Foo", "http://example.com/foo.html")], + ) diff --git a/tests/local_cache_test.py b/tests/local_cache_test.py index 9ac6aa54..91c922b0 100644 --- a/tests/local_cache_test.py +++ b/tests/local_cache_test.py @@ -1,5 +1,6 @@ import os import sys +import unittest from autogpt.memory.local import LocalCache diff --git a/tests/smoke_test.py b/tests/smoke_test.py index 8d33cf89..50e97b7b 100644 --- a/tests/smoke_test.py +++ b/tests/smoke_test.py @@ -3,7 +3,7 @@ import subprocess import sys import unittest -from autogpt.file_operations import delete_file, read_file +from autogpt.commands.file_operations import delete_file, read_file env_vars = {"MEMORY_BACKEND": "no_memory", "TEMPERATURE": "0"} diff --git a/tests/unit/test_browse_scrape_links.py b/tests/unit/test_browse_scrape_links.py index 0f051c14..0a3340e7 100644 --- a/tests/unit/test_browse_scrape_links.py +++ b/tests/unit/test_browse_scrape_links.py @@ -4,7 +4,7 @@ # pip install pytest-mock import pytest -from scripts.browse import scrape_links +from autogpt.commands.web_requests import scrape_links """ Code Analysis @@ -55,7 +55,7 @@ class TestScrapeLinks: mock_response.text = ( "Google" ) - mocker.patch("requests.get", return_value=mock_response) + mocker.patch("requests.Session.get", return_value=mock_response) # Call the function with a valid URL result = scrape_links("https://www.example.com") @@ -68,7 +68,7 @@ class TestScrapeLinks: # Mock the requests.get() function to return an HTTP error response mock_response = mocker.Mock() mock_response.status_code = 404 - mocker.patch("requests.get", return_value=mock_response) + mocker.patch("requests.Session.get", return_value=mock_response) # Call the function with an invalid URL result = scrape_links("https://www.invalidurl.com") @@ -82,7 +82,7 @@ class TestScrapeLinks: mock_response = mocker.Mock() mock_response.status_code = 200 mock_response.text = "

No hyperlinks here

" - mocker.patch("requests.get", return_value=mock_response) + mocker.patch("requests.Session.get", return_value=mock_response) # Call the function with a URL containing no hyperlinks result = scrape_links("https://www.example.com") @@ -105,7 +105,7 @@ class TestScrapeLinks: """ - mocker.patch("requests.get", return_value=mock_response) + mocker.patch("requests.Session.get", return_value=mock_response) # Call the function being tested result = scrape_links("https://www.example.com") diff --git a/tests/unit/test_browse_scrape_text.py b/tests/unit/test_browse_scrape_text.py index 98f5f558..fea5ebfc 100644 --- a/tests/unit/test_browse_scrape_text.py +++ b/tests/unit/test_browse_scrape_text.py @@ -41,7 +41,7 @@ class TestScrapeText: mock_response = mocker.Mock() mock_response.status_code = 200 mock_response.text = f"

{expected_text}

" - mocker.patch("requests.get", return_value=mock_response) + mocker.patch("requests.Session.get", return_value=mock_response) # Call the function with a valid URL and assert that it returns the expected text url = "http://www.example.com" @@ -50,7 +50,9 @@ class TestScrapeText: # Tests that the function returns an error message when an invalid or unreachable url is provided. def test_invalid_url(self, mocker): # Mock the requests.get() method to raise an exception - mocker.patch("requests.get", side_effect=requests.exceptions.RequestException) + mocker.patch( + "requests.Session.get", side_effect=requests.exceptions.RequestException + ) # Call the function with an invalid URL and assert that it returns an error message url = "http://www.invalidurl.com" @@ -63,7 +65,7 @@ class TestScrapeText: mock_response = mocker.Mock() mock_response.status_code = 200 mock_response.text = "" - mocker.patch("requests.get", return_value=mock_response) + mocker.patch("requests.Session.get", return_value=mock_response) # Call the function with a valid URL and assert that it returns an empty string url = "http://www.example.com" @@ -72,7 +74,7 @@ class TestScrapeText: # Tests that the function returns an error message when the response status code is an http error (>=400). def test_http_error(self, mocker): # Mock the requests.get() method to return a response with a 404 status code - mocker.patch("requests.get", return_value=mocker.Mock(status_code=404)) + mocker.patch("requests.Session.get", return_value=mocker.Mock(status_code=404)) # Call the function with a URL result = scrape_text("https://www.example.com") @@ -87,7 +89,7 @@ class TestScrapeText: mock_response = mocker.Mock() mock_response.status_code = 200 mock_response.text = html - mocker.patch("requests.get", return_value=mock_response) + mocker.patch("requests.Session.get", return_value=mock_response) # Call the function with a URL result = scrape_text("https://www.example.com") diff --git a/tests/unit/test_chat.py b/tests/unit/test_chat.py new file mode 100644 index 00000000..55a44492 --- /dev/null +++ b/tests/unit/test_chat.py @@ -0,0 +1,86 @@ +# Generated by CodiumAI +import unittest +import time +from unittest.mock import patch + +from autogpt.chat import create_chat_message, generate_context + + +class TestChat(unittest.TestCase): + # Tests that the function returns a dictionary with the correct keys and values when valid strings are provided for role and content. + def test_happy_path_role_content(self): + result = create_chat_message("system", "Hello, world!") + self.assertEqual(result, {"role": "system", "content": "Hello, world!"}) + + # Tests that the function returns a dictionary with the correct keys and values when empty strings are provided for role and content. + def test_empty_role_content(self): + result = create_chat_message("", "") + self.assertEqual(result, {"role": "", "content": ""}) + + # Tests the behavior of the generate_context function when all input parameters are empty. + @patch("time.strftime") + def test_generate_context_empty_inputs(self, mock_strftime): + # Mock the time.strftime function to return a fixed value + mock_strftime.return_value = "Sat Apr 15 00:00:00 2023" + # Arrange + prompt = "" + relevant_memory = "" + full_message_history = [] + model = "gpt-3.5-turbo-0301" + + # Act + result = generate_context(prompt, relevant_memory, full_message_history, model) + + # Assert + expected_result = ( + -1, + 47, + 3, + [ + {"role": "system", "content": ""}, + { + "role": "system", + "content": f"The current time and date is {time.strftime('%c')}", + }, + { + "role": "system", + "content": f"This reminds you of these events from your past:\n\n\n", + }, + ], + ) + self.assertEqual(result, expected_result) + + # Tests that the function successfully generates a current_context given valid inputs. + def test_generate_context_valid_inputs(self): + # Given + prompt = "What is your favorite color?" + relevant_memory = "You once painted your room blue." + full_message_history = [ + create_chat_message("user", "Hi there!"), + create_chat_message("assistant", "Hello! How can I assist you today?"), + create_chat_message("user", "Can you tell me a joke?"), + create_chat_message( + "assistant", + "Why did the tomato turn red? Because it saw the salad dressing!", + ), + create_chat_message("user", "Haha, that's funny."), + ] + model = "gpt-3.5-turbo-0301" + + # When + result = generate_context(prompt, relevant_memory, full_message_history, model) + + # Then + self.assertIsInstance(result[0], int) + self.assertIsInstance(result[1], int) + self.assertIsInstance(result[2], int) + self.assertIsInstance(result[3], list) + self.assertGreaterEqual(result[0], 0) + self.assertGreaterEqual(result[1], 0) + self.assertGreaterEqual(result[2], 0) + self.assertGreaterEqual( + len(result[3]), 3 + ) # current_context should have at least 3 messages + self.assertLessEqual( + result[1], 2048 + ) # token limit for GPT-3.5-turbo-0301 is 2048 tokens diff --git a/tests/unit/test_commands.py b/tests/unit/test_commands.py index 21982f7e..e15709aa 100644 --- a/tests/unit/test_commands.py +++ b/tests/unit/test_commands.py @@ -1,5 +1,5 @@ import autogpt.agent.agent_manager as agent_manager -from autogpt.app import start_agent, list_agents +from autogpt.app import start_agent, list_agents, execute_command import unittest from unittest.mock import patch, MagicMock