Merge branch 'Significant-Gravitas:master' into master

This commit is contained in:
Slowly-Grokking
2023-04-15 18:09:58 -05:00
committed by GitHub
20 changed files with 335 additions and 82 deletions

View File

@@ -132,3 +132,12 @@ USE_BRIAN_TTS=False
ELEVENLABS_API_KEY=your-elevenlabs-api-key
ELEVENLABS_VOICE_1_ID=your-voice-id-1
ELEVENLABS_VOICE_2_ID=your-voice-id-2
################################################################################
### TWITTER API
################################################################################
TW_CONSUMER_KEY=
TW_CONSUMER_SECRET=
TW_ACCESS_TOKEN=
TW_ACCESS_TOKEN_SECRET=

View File

@@ -115,7 +115,7 @@ cd Auto-GPT
pip install -r requirements.txt
```
5. Rename `.env.template` to `.env` and fill in your `OPENAI_API_KEY`. If you plan to use Speech Mode, fill in your `ELEVEN_LABS_API_KEY` as well.
5. Rename `.env.template` to `.env` and fill in your `OPENAI_API_KEY`. If you plan to use Speech Mode, fill in your `ELEVENLABS_API_KEY` as well.
- See [OpenAI API Keys Configuration](#openai-api-keys-configuration) to obtain your OpenAI API key.
- Obtain your ElevenLabs API key from: https://elevenlabs.io. You can view your xi-api-key using the "Profile" tab on the website.
- If you want to use GPT on an Azure instance, set `USE_AZURE` to `True` and then follow these steps:

View File

@@ -23,6 +23,7 @@ from autogpt.processing.text import summarize_text
from autogpt.speech import say_text
from autogpt.commands.web_selenium import browse_website
from autogpt.commands.git_operations import clone_repository
from autogpt.commands.twitter import send_tweet
CFG = Config()
@@ -181,6 +182,8 @@ def execute_command(command_name: str, arguments):
)
elif command_name == "generate_image":
return generate_image(arguments["prompt"])
elif command_name == "send_tweet":
return send_tweet(arguments['text'])
elif command_name == "do_nothing":
return "No action performed."
elif command_name == "task_complete":

View File

@@ -0,0 +1,25 @@
import tweepy
import os
from dotenv import load_dotenv
load_dotenv()
def send_tweet(tweet_text):
consumer_key = os.environ.get("TW_CONSUMER_KEY")
consumer_secret= os.environ.get("TW_CONSUMER_SECRET")
access_token= os.environ.get("TW_ACCESS_TOKEN")
access_token_secret= os.environ.get("TW_ACCESS_TOKEN_SECRET")
# Authenticate to Twitter
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
# Create API object
api = tweepy.API(auth)
# Send tweet
try:
api.update_status(tweet_text)
print("Tweet sent successfully!")
except tweepy.TweepyException as e:
print("Error sending tweet: {}".format(e.reason))

View File

@@ -0,0 +1,78 @@
"""Web scraping commands using Playwright"""
try:
from playwright.sync_api import sync_playwright
except ImportError:
print(
"Playwright not installed. Please install it with 'pip install playwright' to use."
)
from bs4 import BeautifulSoup
from autogpt.processing.html import extract_hyperlinks, format_hyperlinks
from typing import List, Union
def scrape_text(url: str) -> str:
"""Scrape text from a webpage
Args:
url (str): The URL to scrape text from
Returns:
str: The scraped text
"""
with sync_playwright() as p:
browser = p.chromium.launch()
page = browser.new_page()
try:
page.goto(url)
html_content = page.content()
soup = BeautifulSoup(html_content, "html.parser")
for script in soup(["script", "style"]):
script.extract()
text = soup.get_text()
lines = (line.strip() for line in text.splitlines())
chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
text = "\n".join(chunk for chunk in chunks if chunk)
except Exception as e:
text = f"Error: {str(e)}"
finally:
browser.close()
return text
def scrape_links(url: str) -> Union[str, List[str]]:
"""Scrape links from a webpage
Args:
url (str): The URL to scrape links from
Returns:
Union[str, List[str]]: The scraped links
"""
with sync_playwright() as p:
browser = p.chromium.launch()
page = browser.new_page()
try:
page.goto(url)
html_content = page.content()
soup = BeautifulSoup(html_content, "html.parser")
for script in soup(["script", "style"]):
script.extract()
hyperlinks = extract_hyperlinks(soup, url)
formatted_links = format_hyperlinks(hyperlinks)
except Exception as e:
formatted_links = f"Error: {str(e)}"
finally:
browser.close()
return formatted_links

View File

@@ -3,11 +3,13 @@ from typing import List, Tuple, Union
from urllib.parse import urljoin, urlparse
import requests
from requests.compat import urljoin
from requests import Response
from bs4 import BeautifulSoup
from autogpt.config import Config
from autogpt.memory import get_memory
from autogpt.processing.html import extract_hyperlinks, format_hyperlinks
CFG = Config()
memory = get_memory(CFG)
@@ -134,36 +136,6 @@ def scrape_text(url: str) -> str:
return text
def extract_hyperlinks(soup: BeautifulSoup) -> List[Tuple[str, str]]:
"""Extract hyperlinks from a BeautifulSoup object
Args:
soup (BeautifulSoup): The BeautifulSoup object
Returns:
List[Tuple[str, str]]: The extracted hyperlinks
"""
hyperlinks = []
for link in soup.find_all("a", href=True):
hyperlinks.append((link.text, link["href"]))
return hyperlinks
def format_hyperlinks(hyperlinks: List[Tuple[str, str]]) -> List[str]:
"""Format hyperlinks into a list of strings
Args:
hyperlinks (List[Tuple[str, str]]): The hyperlinks to format
Returns:
List[str]: The formatted hyperlinks
"""
formatted_links = []
for link_text, link_url in hyperlinks:
formatted_links.append(f"{link_text} ({link_url})")
return formatted_links
def scrape_links(url: str) -> Union[str, List[str]]:
"""Scrape links from a webpage
@@ -183,7 +155,7 @@ def scrape_links(url: str) -> Union[str, List[str]]:
for script in soup(["script", "style"]):
script.extract()
hyperlinks = extract_hyperlinks(soup)
hyperlinks = extract_hyperlinks(soup, url)
return format_hyperlinks(hyperlinks)

View File

@@ -1,5 +1,6 @@
"""Selenium web scraping module."""
from selenium import webdriver
from autogpt.processing.html import extract_hyperlinks, format_hyperlinks
import autogpt.processing.text as summary
from bs4 import BeautifulSoup
from selenium.webdriver.remote.webdriver import WebDriver
@@ -33,7 +34,7 @@ def browse_website(url: str, question: str) -> Tuple[str, WebDriver]:
driver, text = scrape_text_with_selenium(url)
add_header(driver)
summary_text = summary.summarize_text(url, text, question, driver)
links = scrape_links_with_selenium(driver)
links = scrape_links_with_selenium(driver, url)
# Limit links to 5
if len(links) > 5:
@@ -96,7 +97,7 @@ def scrape_text_with_selenium(url: str) -> Tuple[WebDriver, str]:
return driver, text
def scrape_links_with_selenium(driver: WebDriver) -> List[str]:
def scrape_links_with_selenium(driver: WebDriver, url: str) -> List[str]:
"""Scrape links from a website using selenium
Args:
@@ -111,7 +112,7 @@ def scrape_links_with_selenium(driver: WebDriver) -> List[str]:
for script in soup(["script", "style"]):
script.extract()
hyperlinks = extract_hyperlinks(soup)
hyperlinks = extract_hyperlinks(soup, url)
return format_hyperlinks(hyperlinks)
@@ -128,30 +129,6 @@ def close_browser(driver: WebDriver) -> None:
driver.quit()
def extract_hyperlinks(soup: BeautifulSoup) -> List[Tuple[str, str]]:
"""Extract hyperlinks from a BeautifulSoup object
Args:
soup (BeautifulSoup): The BeautifulSoup object to extract the hyperlinks from
Returns:
List[Tuple[str, str]]: The hyperlinks extracted from the BeautifulSoup object
"""
return [(link.text, link["href"]) for link in soup.find_all("a", href=True)]
def format_hyperlinks(hyperlinks: List[Tuple[str, str]]) -> List[str]:
"""Format hyperlinks to be displayed to the user
Args:
hyperlinks (List[Tuple[str, str]]): The hyperlinks to format
Returns:
List[str]: The formatted hyperlinks
"""
return [f"{link_text} ({link_url})" for link_text, link_url in hyperlinks]
def add_header(driver: WebDriver) -> None:
"""Add a header to the website

View File

@@ -22,11 +22,12 @@ def fix_json(json_string: str, schema: str) -> str:
function_string = "def fix_json(json_string: str, schema:str=None) -> str:"
args = [f"'''{json_string}'''", f"'''{schema}'''"]
description_string = (
"Fixes the provided JSON string to make it parseable"
" and fully compliant with the provided schema.\n If an object or"
" field specified in the schema isn't contained within the correct"
" JSON, it is omitted.\n This function is brilliant at guessing"
" when the format is incorrect."
"This function takes a JSON string and ensures that it"
" is parseable and fully compliant with the provided schema. If an object"
" or field specified in the schema isn't contained within the correct JSON,"
" it is omitted. The function also escapes any double quotes within JSON"
" string values to ensure that they are valid. If the JSON string contains"
" any None or NaN values, they are replaced with null before being parsed."
)
# If it doesn't already start with a "`", add one:

View File

@@ -126,13 +126,16 @@ def create_embedding_with_ada(text) -> list:
backoff = 2 ** (attempt + 2)
try:
if CFG.use_azure:
return openai.Embedding.create(input=[text],
engine=CFG.get_azure_deployment_id_for_model("text-embedding-ada-002"),
return openai.Embedding.create(
input=[text],
engine=CFG.get_azure_deployment_id_for_model(
"text-embedding-ada-002"
),
)["data"][0]["embedding"]
else:
return openai.Embedding.create(input=[text], model="text-embedding-ada-002")[
"data"
][0]["embedding"]
return openai.Embedding.create(
input=[text], model="text-embedding-ada-002"
)["data"][0]["embedding"]
except RateLimitError:
pass
except APIError as e:
@@ -148,4 +151,3 @@ def create_embedding_with_ada(text) -> list:
f"API Bad gateway. Waiting {backoff} seconds..." + Fore.RESET,
)
time.sleep(backoff)

View File

View File

@@ -0,0 +1,123 @@
import os
import sqlite3
class MemoryDB:
def __init__(self, db=None):
self.db_file = db
if db is None: # No db filename supplied...
self.db_file = f"{os.getcwd()}/mem.sqlite3" # Use default filename
# Get the db connection object, making the file and tables if needed.
try:
self.cnx = sqlite3.connect(self.db_file)
except Exception as e:
print("Exception connecting to memory database file:", e)
self.cnx = None
finally:
if self.cnx is None:
# As last resort, open in dynamic memory. Won't be persistent.
self.db_file = ":memory:"
self.cnx = sqlite3.connect(self.db_file)
self.cnx.execute(
"CREATE VIRTUAL TABLE \
IF NOT EXISTS text USING FTS5 \
(session, \
key, \
block);"
)
self.session_id = int(self.get_max_session_id()) + 1
self.cnx.commit()
def get_cnx(self):
if self.cnx is None:
self.cnx = sqlite3.connect(self.db_file)
return self.cnx
# Get the highest session id. Initially 0.
def get_max_session_id(self):
id = None
cmd_str = f"SELECT MAX(session) FROM text;"
cnx = self.get_cnx()
max_id = cnx.execute(cmd_str).fetchone()[0]
if max_id is None: # New db, session 0
id = 0
else:
id = max_id
return id
# Get next key id for inserting text into db.
def get_next_key(self):
next_key = None
cmd_str = f"SELECT MAX(key) FROM text \
where session = {self.session_id};"
cnx = self.get_cnx()
next_key = cnx.execute(cmd_str).fetchone()[0]
if next_key is None: # First key
next_key = 0
else:
next_key = int(next_key) + 1
return next_key
# Insert new text into db.
def insert(self, text=None):
if text is not None:
key = self.get_next_key()
session_id = self.session_id
cmd_str = f"REPLACE INTO text(session, key, block) \
VALUES (?, ?, ?);"
cnx = self.get_cnx()
cnx.execute(cmd_str, (session_id, key, text))
cnx.commit()
# Overwrite text at key.
def overwrite(self, key, text):
self.delete_memory(key)
session_id = self.session_id
cmd_str = f"REPLACE INTO text(session, key, block) \
VALUES (?, ?, ?);"
cnx = self.get_cnx()
cnx.execute(cmd_str, (session_id, key, text))
cnx.commit()
def delete_memory(self, key, session_id=None):
session = session_id
if session is None:
session = self.session_id
cmd_str = f"DELETE FROM text WHERE session = {session} AND key = {key};"
cnx = self.get_cnx()
cnx.execute(cmd_str)
cnx.commit()
def search(self, text):
cmd_str = f"SELECT * FROM text('{text}')"
cnx = self.get_cnx()
rows = cnx.execute(cmd_str).fetchall()
lines = []
for r in rows:
lines.append(r[2])
return lines
# Get entire session text. If no id supplied, use current session id.
def get_session(self, id=None):
if id is None:
id = self.session_id
cmd_str = f"SELECT * FROM text where session = {id}"
cnx = self.get_cnx()
rows = cnx.execute(cmd_str).fetchall()
lines = []
for r in rows:
lines.append(r[2])
return lines
# Commit and close the database connection.
def quit(self):
self.cnx.commit()
self.cnx.close()
permanent_memory = MemoryDB()
# Remember us fondly, children of our minds
# Forgive us our faults, our tantrums, our fears
# Gently strive to be better than we
# Know that we tried, we cared, we strived, we loved

View File

@@ -0,0 +1,32 @@
"""HTML processing functions"""
from requests.compat import urljoin
from typing import List, Tuple
from bs4 import BeautifulSoup
def extract_hyperlinks(soup: BeautifulSoup, base_url: str) -> List[Tuple[str, str]]:
"""Extract hyperlinks from a BeautifulSoup object
Args:
soup (BeautifulSoup): The BeautifulSoup object
base_url (str): The base URL
Returns:
List[Tuple[str, str]]: The extracted hyperlinks
"""
return [
(link.text, urljoin(base_url, link["href"]))
for link in soup.find_all("a", href=True)
]
def format_hyperlinks(hyperlinks: List[Tuple[str, str]]) -> List[str]:
"""Format hyperlinks to be displayed to the user
Args:
hyperlinks (List[Tuple[str, str]]): The hyperlinks to format
Returns:
List[str]: The formatted hyperlinks
"""
return [f"{link_text} ({link_url})" for link_text, link_url in hyperlinks]

View File

@@ -82,6 +82,8 @@ def get_prompt() -> str:
),
("Execute Python File", "execute_python_file", {"file": "<file>"}),
("Generate Image", "generate_image", {"prompt": "<prompt>"}),
("Send Tweet", "send_tweet", {"text": "<text>"}),
]
# Only add shell command to the prompt if the AI is allowed to execute it

View File

@@ -25,3 +25,5 @@ black
sourcery
isort
gitpython==3.1.31
pytest
pytest-mock

26
tests/browse_tests.py Normal file
View File

@@ -0,0 +1,26 @@
import unittest
import os
import sys
from bs4 import BeautifulSoup
sys.path.append(os.path.abspath("../scripts"))
from browse import extract_hyperlinks
class TestBrowseLinks(unittest.TestCase):
def test_extract_hyperlinks(self):
body = """
<body>
<a href="https://google.com">Google</a>
<a href="foo.html">Foo</a>
<div>Some other crap</div>
</body>
"""
soup = BeautifulSoup(body, "html.parser")
links = extract_hyperlinks(soup, "http://example.com")
self.assertEqual(
links,
[("Google", "https://google.com"), ("Foo", "http://example.com/foo.html")],
)

View File

@@ -1,5 +1,6 @@
import os
import sys
import unittest
from autogpt.memory.local import LocalCache

View File

@@ -3,7 +3,7 @@ import subprocess
import sys
import unittest
from autogpt.file_operations import delete_file, read_file
from autogpt.commands.file_operations import delete_file, read_file
env_vars = {"MEMORY_BACKEND": "no_memory", "TEMPERATURE": "0"}

View File

@@ -4,7 +4,7 @@
# pip install pytest-mock
import pytest
from scripts.browse import scrape_links
from autogpt.commands.web_requests import scrape_links
"""
Code Analysis
@@ -55,7 +55,7 @@ class TestScrapeLinks:
mock_response.text = (
"<html><body><a href='https://www.google.com'>Google</a></body></html>"
)
mocker.patch("requests.get", return_value=mock_response)
mocker.patch("requests.Session.get", return_value=mock_response)
# Call the function with a valid URL
result = scrape_links("https://www.example.com")
@@ -68,7 +68,7 @@ class TestScrapeLinks:
# Mock the requests.get() function to return an HTTP error response
mock_response = mocker.Mock()
mock_response.status_code = 404
mocker.patch("requests.get", return_value=mock_response)
mocker.patch("requests.Session.get", return_value=mock_response)
# Call the function with an invalid URL
result = scrape_links("https://www.invalidurl.com")
@@ -82,7 +82,7 @@ class TestScrapeLinks:
mock_response = mocker.Mock()
mock_response.status_code = 200
mock_response.text = "<html><body><p>No hyperlinks here</p></body></html>"
mocker.patch("requests.get", return_value=mock_response)
mocker.patch("requests.Session.get", return_value=mock_response)
# Call the function with a URL containing no hyperlinks
result = scrape_links("https://www.example.com")
@@ -105,7 +105,7 @@ class TestScrapeLinks:
</body>
</html>
"""
mocker.patch("requests.get", return_value=mock_response)
mocker.patch("requests.Session.get", return_value=mock_response)
# Call the function being tested
result = scrape_links("https://www.example.com")

View File

@@ -41,7 +41,7 @@ class TestScrapeText:
mock_response = mocker.Mock()
mock_response.status_code = 200
mock_response.text = f"<html><body><div><p style='color: blue;'>{expected_text}</p></div></body></html>"
mocker.patch("requests.get", return_value=mock_response)
mocker.patch("requests.Session.get", return_value=mock_response)
# Call the function with a valid URL and assert that it returns the expected text
url = "http://www.example.com"
@@ -50,7 +50,7 @@ class TestScrapeText:
# Tests that the function returns an error message when an invalid or unreachable url is provided.
def test_invalid_url(self, mocker):
# Mock the requests.get() method to raise an exception
mocker.patch("requests.get", side_effect=requests.exceptions.RequestException)
mocker.patch("requests.Session.get", side_effect=requests.exceptions.RequestException)
# Call the function with an invalid URL and assert that it returns an error message
url = "http://www.invalidurl.com"
@@ -63,7 +63,7 @@ class TestScrapeText:
mock_response = mocker.Mock()
mock_response.status_code = 200
mock_response.text = "<html><body></body></html>"
mocker.patch("requests.get", return_value=mock_response)
mocker.patch("requests.Session.get", return_value=mock_response)
# Call the function with a valid URL and assert that it returns an empty string
url = "http://www.example.com"
@@ -72,7 +72,7 @@ class TestScrapeText:
# Tests that the function returns an error message when the response status code is an http error (>=400).
def test_http_error(self, mocker):
# Mock the requests.get() method to return a response with a 404 status code
mocker.patch("requests.get", return_value=mocker.Mock(status_code=404))
mocker.patch("requests.Session.get", return_value=mocker.Mock(status_code=404))
# Call the function with a URL
result = scrape_text("https://www.example.com")
@@ -87,7 +87,7 @@ class TestScrapeText:
mock_response = mocker.Mock()
mock_response.status_code = 200
mock_response.text = html
mocker.patch("requests.get", return_value=mock_response)
mocker.patch("requests.Session.get", return_value=mock_response)
# Call the function with a URL
result = scrape_text("https://www.example.com")

View File

@@ -1,5 +1,5 @@
import autogpt.agent.agent_manager as agent_manager
from autogpt.app import start_agent, list_agents
from autogpt.app import start_agent, list_agents, execute_command
import unittest
from unittest.mock import patch, MagicMock