From 1e073736963d1789774ee2f532dfd3eeeec03fe5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CPhilip?= Date: Mon, 3 Apr 2023 14:58:27 +0100 Subject: [PATCH 01/18] Fix JSON string escaping issue Fixes an issue where double quotes were not being escaped in JSON strings, causing parse errors. --- scripts/json_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/json_parser.py b/scripts/json_parser.py index 11ff9ed2..c36b2c14 100644 --- a/scripts/json_parser.py +++ b/scripts/json_parser.py @@ -52,7 +52,7 @@ def fix_json(json_str: str, schema: str, debug=False) -> str: # Try to fix the JSON using gpt: function_string = "def fix_json(json_str: str, schema:str=None) -> str:" args = [json_str, schema] - description_string = """Fixes the provided JSON string to make it parseable and fully complient with the provided schema.\n If an object or field specifed in the schema isn't contained within the correct JSON, it is ommited.\n This function is brilliant at guessing when the format is incorrect.""" + description_string = """This function takes a JSON string and ensures that it is parseable and fully compliant with the provided schema. If an object or field specified in the schema isn't contained within the correct JSON, it is omitted. The function also escapes any double quotes in the JSON string to ensure that it is valid.""" # If it doesn't already start with a "`", add one: if not json_str.startswith("`"): From f20d6f3fdb731c43910ba49916a7c8e2e1fd9eb5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CPhilip?= Date: Mon, 3 Apr 2023 15:07:47 +0100 Subject: [PATCH 02/18] Breaking on None and NaN values returned fix by converting to valid null value for JSON --- scripts/json_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/json_parser.py b/scripts/json_parser.py index c36b2c14..90bf83db 100644 --- a/scripts/json_parser.py +++ b/scripts/json_parser.py @@ -52,7 +52,7 @@ def fix_json(json_str: str, schema: str, debug=False) -> str: # Try to fix the JSON using gpt: function_string = "def fix_json(json_str: str, schema:str=None) -> str:" args = [json_str, schema] - description_string = """This function takes a JSON string and ensures that it is parseable and fully compliant with the provided schema. If an object or field specified in the schema isn't contained within the correct JSON, it is omitted. The function also escapes any double quotes in the JSON string to ensure that it is valid.""" + description_string = """This function takes a JSON string and ensures that it is parseable and fully compliant with the provided schema. If an object or field specified in the schema isn't contained within the correct JSON, it is omitted. The function also escapes any double quotes in the JSON string to ensure that it is valid. If the JSON string contains any None or NaN values, they are replaced with null before being parsed.""" # If it doesn't already start with a "`", add one: if not json_str.startswith("`"): From 4cde35267b30c373159cf3e8d0dd6b72a645cdc5 Mon Sep 17 00:00:00 2001 From: Mike Harris Date: Mon, 3 Apr 2023 12:51:50 -0400 Subject: [PATCH 03/18] Improve extract_hyperlinks to honor base url --- scripts/browse.py | 12 ++++++------ tests/browse_tests.py | 26 ++++++++++++++++++++++++++ 2 files changed, 32 insertions(+), 6 deletions(-) create mode 100644 tests/browse_tests.py diff --git a/scripts/browse.py b/scripts/browse.py index f096c5f3..284ce5fc 100644 --- a/scripts/browse.py +++ b/scripts/browse.py @@ -25,11 +25,11 @@ def scrape_text(url): return text -def extract_hyperlinks(soup): - hyperlinks = [] - for link in soup.find_all('a', href=True): - hyperlinks.append((link.text, link['href'])) - return hyperlinks +def extract_hyperlinks(soup, base_url): + return [ + (link.text, requests.compat.urljoin(base_url, link["href"])) + for link in soup.find_all("a", href=True) + ] def format_hyperlinks(hyperlinks): @@ -51,7 +51,7 @@ def scrape_links(url): for script in soup(["script", "style"]): script.extract() - hyperlinks = extract_hyperlinks(soup) + hyperlinks = extract_hyperlinks(soup, url) return format_hyperlinks(hyperlinks) diff --git a/tests/browse_tests.py b/tests/browse_tests.py new file mode 100644 index 00000000..1ac523ec --- /dev/null +++ b/tests/browse_tests.py @@ -0,0 +1,26 @@ +import unittest +import os +import sys + +from bs4 import BeautifulSoup + +sys.path.append(os.path.abspath("../scripts")) + +from browse import extract_hyperlinks + + +class TestBrowseLinks(unittest.TestCase): + def test_extract_hyperlinks(self): + body = """ + + Google + Foo +
Some other crap
+ + """ + soup = BeautifulSoup(body, "html.parser") + links = extract_hyperlinks(soup, "http://example.com") + self.assertEqual( + links, + [("Google", "https://google.com"), ("Foo", "http://example.com/foo.html")], + ) From ac7fefe96ea740e14188754b5457efc8ff7c1507 Mon Sep 17 00:00:00 2001 From: ryanmac Date: Mon, 3 Apr 2023 14:05:32 -0500 Subject: [PATCH 04/18] Use playwright instead of requests for browse --- .gitignore | 1 + requirements-new.txt | 13 +++ requirements.txt | 4 +- scripts/browse_playwright.py | 150 +++++++++++++++++++++++++++++++++++ scripts/commands.py | 2 +- scripts/json_parser.py | 10 +-- 6 files changed, 172 insertions(+), 8 deletions(-) create mode 100644 requirements-new.txt create mode 100644 scripts/browse_playwright.py diff --git a/.gitignore b/.gitignore index a4e3cc2d..b361b4bf 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,7 @@ scripts/__pycache__/keys.cpython-310.pyc package-lock.json *.pyc scripts/auto_gpt_workspace/* +auto_gpt_workspace/* *.mpeg .env last_run_ai_settings.yaml \ No newline at end of file diff --git a/requirements-new.txt b/requirements-new.txt new file mode 100644 index 00000000..7253c19b --- /dev/null +++ b/requirements-new.txt @@ -0,0 +1,13 @@ +beautifulsoup4==4.12.0 +colorama==0.4.6 +docker_py==1.10.6 +googlesearch_python==1.1.0 +numpy==1.24.2 +openai==0.27.2 +playsound==1.3.0 +playwright==1.32.1 +python-dotenv==1.0.0 +PyYAML==6.0 +requests==2.28.2 +scipy==1.10.1 +tiktoken==0.3.3 diff --git a/requirements.txt b/requirements.txt index e731354b..4b5de5ba 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ beautifulsoup4 colorama==0.4.6 -dirtyjson==1.0. +# dirtyjson==1.0. openai==0.27.2 playsound==1.3.0 python-dotenv==1.0.0 @@ -9,5 +9,5 @@ readability-lxml==0.8.1 requests tiktoken==0.3.3 docker -# googlesearch-python +googlesearch_python==1.1.0 # Googlesearch python seems to be a bit cursed, anyone good at fixing thigns like this? \ No newline at end of file diff --git a/scripts/browse_playwright.py b/scripts/browse_playwright.py new file mode 100644 index 00000000..51372451 --- /dev/null +++ b/scripts/browse_playwright.py @@ -0,0 +1,150 @@ +from playwright.sync_api import sync_playwright +from bs4 import BeautifulSoup +from config import Config +from llm_utils import create_chat_completion + +cfg = Config() + +def scrape_text(url): + with sync_playwright() as p: + browser = p.chromium.launch() + page = browser.new_page() + + try: + page.goto(url) + html_content = page.content() + soup = BeautifulSoup(html_content, "html.parser") + + for script in soup(["script", "style"]): + script.extract() + + text = soup.get_text() + lines = (line.strip() for line in text.splitlines()) + chunks = (phrase.strip() for line in lines for phrase in line.split(" ")) + text = '\n'.join(chunk for chunk in chunks if chunk) + + except Exception as e: + text = "Error: " + str(e) + + finally: + browser.close() + + return text + + +def extract_hyperlinks(soup): + hyperlinks = [] + for link in soup.find_all('a', href=True): + hyperlinks.append((link.text, link['href'])) + return hyperlinks + + +def format_hyperlinks(hyperlinks): + formatted_links = [] + for link_text, link_url in hyperlinks: + formatted_links.append(f"{link_text} ({link_url})") + return formatted_links + + +def scrape_links(url): + with sync_playwright() as p: + browser = p.chromium.launch() + page = browser.new_page() + + try: + page.goto(url) + html_content = page.content() + soup = BeautifulSoup(html_content, "html.parser") + + for script in soup(["script", "style"]): + script.extract() + + hyperlinks = extract_hyperlinks(soup) + formatted_links = format_hyperlinks(hyperlinks) + + except Exception as e: + formatted_links = "Error: " + str(e) + + finally: + browser.close() + + return formatted_links + +# The rest of the code remains unchanged. + +def split_text(text, max_length=8192): + paragraphs = text.split("\n") + current_length = 0 + current_chunk = [] + + for paragraph in paragraphs: + if current_length + len(paragraph) + 1 <= max_length: + current_chunk.append(paragraph) + current_length += len(paragraph) + 1 + else: + yield "\n".join(current_chunk) + current_chunk = [paragraph] + current_length = len(paragraph) + 1 + + if current_chunk: + yield "\n".join(current_chunk) + + +def summarize_text(text, is_website=True): + if text == "": + return "Error: No text to summarize" + + print("Text length: " + str(len(text)) + " characters") + summaries = [] + chunks = list(split_text(text)) + + for i, chunk in enumerate(chunks): + print("Summarizing chunk " + str(i + 1) + " / " + str(len(chunks))) + if is_website: + messages = [ + { + "role": "user", + "content": "Please summarize the following website text, do not describe the general website, but instead concisely extract the specific information this subpage contains.: " + + chunk}, + ] + else: + messages = [ + { + "role": "user", + "content": "Please summarize the following text, focusing on extracting concise and specific information: " + + chunk}, + ] + + summary = create_chat_completion( + model=cfg.fast_llm_model, + messages=messages, + max_tokens=300, + ) + summaries.append(summary) + print("Summarized " + str(len(chunks)) + " chunks.") + + combined_summary = "\n".join(summaries) + + # Summarize the combined summary + if is_website: + messages = [ + { + "role": "user", + "content": "Please summarize the following website text, do not describe the general website, but instead concisely extract the specific information this subpage contains.: " + + combined_summary}, + ] + else: + messages = [ + { + "role": "user", + "content": "Please summarize the following text, focusing on extracting concise and specific infomation: " + + combined_summary}, + ] + + final_summary = create_chat_completion( + model=cfg.fast_llm_model, + messages=messages, + max_tokens=300, + ) + + return final_summary diff --git a/scripts/commands.py b/scripts/commands.py index 2e332711..3c8cba8e 100644 --- a/scripts/commands.py +++ b/scripts/commands.py @@ -1,4 +1,4 @@ -import browse +import browse_playwright as browse import json import memory as mem import datetime diff --git a/scripts/json_parser.py b/scripts/json_parser.py index 8154b584..2cf2aecc 100644 --- a/scripts/json_parser.py +++ b/scripts/json_parser.py @@ -1,4 +1,4 @@ -import dirtyjson +import json from call_ai_function import call_ai_function from config import Config cfg = Config() @@ -24,7 +24,7 @@ def fix_and_parse_json(json_str: str, try_to_fix_with_gpt: bool = True): """ try: - return dirtyjson.loads(json_str) + return json.loads(json_str) except Exception as e: # Let's do something manually - sometimes GPT responds with something BEFORE the braces: # "I'm sorry, I don't understand. Please try again."{"text": "I'm sorry, I don't understand. Please try again.", "confidence": 0.0} @@ -34,14 +34,14 @@ def fix_and_parse_json(json_str: str, try_to_fix_with_gpt: bool = True): json_str = json_str[brace_index:] last_brace_index = json_str.rindex("}") json_str = json_str[:last_brace_index+1] - return dirtyjson.loads(json_str) + return json.loads(json_str) except Exception as e: if try_to_fix_with_gpt: print(f"Warning: Failed to parse AI output, attempting to fix.\n If you see this warning frequently, it's likely that your prompt is confusing the AI. Try changing it up slightly.") # Now try to fix this up using the ai_functions ai_fixed_json = fix_json(json_str, json_schema, False) if ai_fixed_json != "failed": - return dirtyjson.loads(ai_fixed_json) + return json.loads(ai_fixed_json) else: print(f"Failed to fix ai output, telling the AI.") # This allows the AI to react to the error message, which usually results in it correcting its ways. return json_str @@ -68,7 +68,7 @@ def fix_json(json_str: str, schema: str, debug=False) -> str: print(f"Fixed JSON: {result_string}") print("----------- END OF FIX ATTEMPT ----------------") try: - return dirtyjson.loads(result_string) + return json.loads(result_string) except: # Get the call stack: # import traceback From 6ea2a97e83e3e80525c97a0657ca2af9f7eb8d72 Mon Sep 17 00:00:00 2001 From: ryanmac Date: Mon, 3 Apr 2023 14:15:21 -0500 Subject: [PATCH 05/18] Rename requirements-new.txt to requirements-mac-Python-3.11.txt --- requirements-new.txt => requirements-mac-Python-3.11.txt | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename requirements-new.txt => requirements-mac-Python-3.11.txt (100%) diff --git a/requirements-new.txt b/requirements-mac-Python-3.11.txt similarity index 100% rename from requirements-new.txt rename to requirements-mac-Python-3.11.txt From 6003d98f3ae62b40f5c0ebdaf995e5749d2b4c5d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CPhilip?= Date: Mon, 3 Apr 2023 20:35:12 +0100 Subject: [PATCH 06/18] More specific wording consistent escaping --- scripts/json_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/json_parser.py b/scripts/json_parser.py index 90bf83db..91e4e80f 100644 --- a/scripts/json_parser.py +++ b/scripts/json_parser.py @@ -52,7 +52,7 @@ def fix_json(json_str: str, schema: str, debug=False) -> str: # Try to fix the JSON using gpt: function_string = "def fix_json(json_str: str, schema:str=None) -> str:" args = [json_str, schema] - description_string = """This function takes a JSON string and ensures that it is parseable and fully compliant with the provided schema. If an object or field specified in the schema isn't contained within the correct JSON, it is omitted. The function also escapes any double quotes in the JSON string to ensure that it is valid. If the JSON string contains any None or NaN values, they are replaced with null before being parsed.""" + description_string = """This function takes a JSON string and ensures that it is parseable and fully compliant with the provided schema. If an object or field specified in the schema isn't contained within the correct JSON, it is omitted. The function also escapes any double quotes within JSON string values to ensure that they are valid. If the JSON string contains any None or NaN values, they are replaced with null before being parsed.""" # If it doesn't already start with a "`", add one: if not json_str.startswith("`"): From 6adef8ed7cc9de9400a8b76e5149debfe7ebdd1c Mon Sep 17 00:00:00 2001 From: Eric Fedrowisch Date: Mon, 3 Apr 2023 19:38:59 -0500 Subject: [PATCH 07/18] First draft at adding persistent memory via sqlite3 --- scripts/commands.py | 27 ++++------ scripts/memory.py | 122 +++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 131 insertions(+), 18 deletions(-) diff --git a/scripts/commands.py b/scripts/commands.py index 8ad95336..ffb88d61 100644 --- a/scripts/commands.py +++ b/scripts/commands.py @@ -178,35 +178,28 @@ def get_hyperlinks(url): def commit_memory(string): _text = f"""Committing memory with string "{string}" """ - mem.permanent_memory.append(string) + mem.permanent_memory.insert(string) return _text def delete_memory(key): - if key >= 0 and key < len(mem.permanent_memory): - _text = "Deleting memory with key " + str(key) - del mem.permanent_memory[key] - print(_text) - return _text - else: - print("Invalid key, cannot delete memory.") - return None + _text = "Deleting memory with key " + str(key) + mem.permanent_memory.delete_memory(key) + print(_text) + return _text def overwrite_memory(key, string): - if int(key) >= 0 and key < len(mem.permanent_memory): - _text = "Overwriting memory with key " + \ + _text = "Overwriting memory with key " + \ str(key) + " and string " + string - mem.permanent_memory[key] = string - print(_text) - return _text - else: - print("Invalid key, cannot overwrite memory.") - return None + mem.permanent_memory.overwrite(key, string) + print(_text) + return _text def shutdown(): print("Shutting down...") + mem.permanent_memory.quit() quit() diff --git a/scripts/memory.py b/scripts/memory.py index 0dc5b766..57b29f6d 100644 --- a/scripts/memory.py +++ b/scripts/memory.py @@ -1 +1,121 @@ -permanent_memory = [] +import os +import sqlite3 + + +class MemoryDB: + def __init__(self, db=None): + self.db_file = db + if db is None: # No db filename supplied... + self.db_file = f"{os.getcwd()}/mem.sqlite3" # Use default filename + # Get the db connection object, making the file and tables if needed. + try: + self.cnx = sqlite3.connect(self.db_file) + except Exception as e: + print("Exception connecting to memory database file:", e) + self.cnx = None + finally: + if self.cnx is None: + # As last resort, open in dynamic memory. Won't be persistent. + self.db_file = ":memory:" + self.cnx = sqlite3.connect(self.db_file) + self.cnx.execute("CREATE VIRTUAL TABLE \ + IF NOT EXISTS text USING FTS5 \ + (session, \ + key, \ + block);") + self.session_id = int(self.get_max_session_id()) + 1 + self.cnx.commit() + + def get_cnx(self): + if self.cnx is None: + self.cnx = sqlite3.connect(self.db_file) + return self.cnx + + # Get the highest session id. Initially 0. + def get_max_session_id(self): + id = None + cmd_str = f"SELECT MAX(session) FROM text;" + cnx = self.get_cnx() + max_id = cnx.execute(cmd_str).fetchone()[0] + if max_id is None: # New db, session 0 + id = 0 + else: + id = max_id + return id + + # Get next key id for inserting text into db. + def get_next_key(self): + next_key = None + cmd_str = f"SELECT MAX(key) FROM text \ + where session = {self.session_id};" + cnx = self.get_cnx() + next_key = cnx.execute(cmd_str).fetchone()[0] + if next_key is None: # First key + next_key = 0 + else: + next_key = int(next_key) + 1 + return next_key + + # Insert new text into db. + def insert(self, text=None): + if text is not None: + key = self.get_next_key() + session_id = self.session_id + cmd_str = f"REPLACE INTO text(session, key, block) \ + VALUES (?, ?, ?);" + cnx = self.get_cnx() + cnx.execute(cmd_str, (session_id, key, text)) + cnx.commit() + + # Overwrite text at key. + def overwrite(self, key, text): + self.delete_memory(key) + session_id = self.session_id + cmd_str = f"REPLACE INTO text(session, key, block) \ + VALUES (?, ?, ?);" + cnx = self.get_cnx() + cnx.execute(cmd_str, (session_id, key, text)) + cnx.commit() + + def delete_memory(self, key, session_id = None): + session = session_id + if session is None: + session = self.session_id + cmd_str = f"DELETE FROM text WHERE session = {session} AND key = {key};" + cnx = self.get_cnx() + cnx.execute(cmd_str) + cnx.commit() + + def search(self, text): + cmd_str = f"SELECT * FROM text('{text}')" + cnx = self.get_cnx() + rows = cnx.execute(cmd_str).fetchall() + lines = [] + for r in rows: + lines.append(r[2]) + return lines + + # Get entire session text. If no id supplied, use current session id. + def get_session(self, id=None): + if id is None: + id = self.session_id + cmd_str = f"SELECT * FROM text where session = {id}" + cnx = self.get_cnx() + rows = cnx.execute(cmd_str).fetchall() + lines = [] + for r in rows: + lines.append(r[2]) + return lines + + # Commit and close the database connection. + def quit(self): + self.cnx.commit() + self.cnx.close() + + +permanent_memory = MemoryDB() + +# Remember us fondly, children of our minds +# Forgive us our faults, our tantrums, our fears +# Gently strive to be better than we +# Know that we tried, we cared, we strived, we loved From 29c0b544a40dea5e8bc802ec77c1c572c90064fc Mon Sep 17 00:00:00 2001 From: ryanmac Date: Wed, 5 Apr 2023 20:03:46 -0500 Subject: [PATCH 08/18] Delete requirements-mac-Python-3.11.txt Removing unnecessary files --- requirements-mac-Python-3.11.txt | 13 ------------- 1 file changed, 13 deletions(-) delete mode 100644 requirements-mac-Python-3.11.txt diff --git a/requirements-mac-Python-3.11.txt b/requirements-mac-Python-3.11.txt deleted file mode 100644 index 7253c19b..00000000 --- a/requirements-mac-Python-3.11.txt +++ /dev/null @@ -1,13 +0,0 @@ -beautifulsoup4==4.12.0 -colorama==0.4.6 -docker_py==1.10.6 -googlesearch_python==1.1.0 -numpy==1.24.2 -openai==0.27.2 -playsound==1.3.0 -playwright==1.32.1 -python-dotenv==1.0.0 -PyYAML==6.0 -requests==2.28.2 -scipy==1.10.1 -tiktoken==0.3.3 From 9607ae0c1e126adffb0f0e44cf9497a4db93e945 Mon Sep 17 00:00:00 2001 From: Mansy Date: Sat, 15 Apr 2023 22:26:00 +0200 Subject: [PATCH 09/18] Update README.md Use correct var name "ELEVENLABS_API_KEY" --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index ae487ceb..91dc487b 100644 --- a/README.md +++ b/README.md @@ -115,7 +115,7 @@ cd Auto-GPT pip install -r requirements.txt ``` -5. Rename `.env.template` to `.env` and fill in your `OPENAI_API_KEY`. If you plan to use Speech Mode, fill in your `ELEVEN_LABS_API_KEY` as well. +5. Rename `.env.template` to `.env` and fill in your `OPENAI_API_KEY`. If you plan to use Speech Mode, fill in your `ELEVENLABS_API_KEY` as well. - See [OpenAI API Keys Configuration](#openai-api-keys-configuration) to obtain your OpenAI API key. - Obtain your ElevenLabs API key from: https://elevenlabs.io. You can view your xi-api-key using the "Profile" tab on the website. - If you want to use GPT on an Azure instance, set `USE_AZURE` to `True` and then follow these steps: From 52bb22d8d1c7665b4a7341bc450269dd430f1e36 Mon Sep 17 00:00:00 2001 From: BillSchumacher <34168009+BillSchumacher@users.noreply.github.com> Date: Sat, 15 Apr 2023 16:20:43 -0500 Subject: [PATCH 10/18] Merge --- autogpt/commands/web_requests.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/autogpt/commands/web_requests.py b/autogpt/commands/web_requests.py index 230b1ff0..051cc710 100644 --- a/autogpt/commands/web_requests.py +++ b/autogpt/commands/web_requests.py @@ -3,6 +3,7 @@ from typing import List, Tuple, Union from urllib.parse import urljoin, urlparse import requests +from requests.compat import urljoin from requests import Response from bs4 import BeautifulSoup @@ -134,19 +135,20 @@ def scrape_text(url: str) -> str: return text -def extract_hyperlinks(soup: BeautifulSoup) -> List[Tuple[str, str]]: +def extract_hyperlinks(soup: BeautifulSoup, base_url: str) -> List[Tuple[str, str]]: """Extract hyperlinks from a BeautifulSoup object Args: soup (BeautifulSoup): The BeautifulSoup object + base_url (str): The base URL Returns: List[Tuple[str, str]]: The extracted hyperlinks """ - hyperlinks = [] - for link in soup.find_all("a", href=True): - hyperlinks.append((link.text, link["href"])) - return hyperlinks + return [ + (link.text, urljoin(base_url, link["href"])) + for link in soup.find_all("a", href=True) + ] def format_hyperlinks(hyperlinks: List[Tuple[str, str]]) -> List[str]: @@ -183,7 +185,7 @@ def scrape_links(url: str) -> Union[str, List[str]]: for script in soup(["script", "style"]): script.extract() - hyperlinks = extract_hyperlinks(soup) + hyperlinks = extract_hyperlinks(soup, url) return format_hyperlinks(hyperlinks) From 4a19124cb7ca0f502733f57f4512f9d96c441ad5 Mon Sep 17 00:00:00 2001 From: BillSchumacher <34168009+BillSchumacher@users.noreply.github.com> Date: Sat, 15 Apr 2023 16:40:12 -0500 Subject: [PATCH 11/18] Blacked. --- autogpt/json_fixes/auto_fix.py | 12 +++++++----- autogpt/llm_utils.py | 14 ++++++++------ autogpt/permanent_memory/sqlite3_store.py | 12 +++++++----- 3 files changed, 22 insertions(+), 16 deletions(-) diff --git a/autogpt/json_fixes/auto_fix.py b/autogpt/json_fixes/auto_fix.py index 030e8aa7..9fcf909a 100644 --- a/autogpt/json_fixes/auto_fix.py +++ b/autogpt/json_fixes/auto_fix.py @@ -21,12 +21,14 @@ def fix_json(json_string: str, schema: str) -> str: # Try to fix the JSON using GPT: function_string = "def fix_json(json_string: str, schema:str=None) -> str:" args = [f"'''{json_string}'''", f"'''{schema}'''"] - description_string = "This function takes a JSON string and ensures that it"\ - " is parseable and fully compliant with the provided schema. If an object"\ - " or field specified in the schema isn't contained within the correct JSON,"\ - " it is omitted. The function also escapes any double quotes within JSON"\ - " string values to ensure that they are valid. If the JSON string contains"\ + description_string = ( + "This function takes a JSON string and ensures that it" + " is parseable and fully compliant with the provided schema. If an object" + " or field specified in the schema isn't contained within the correct JSON," + " it is omitted. The function also escapes any double quotes within JSON" + " string values to ensure that they are valid. If the JSON string contains" " any None or NaN values, they are replaced with null before being parsed." + ) # If it doesn't already start with a "`", add one: if not json_string.startswith("`"): diff --git a/autogpt/llm_utils.py b/autogpt/llm_utils.py index a8ac2cdb..43739009 100644 --- a/autogpt/llm_utils.py +++ b/autogpt/llm_utils.py @@ -126,13 +126,16 @@ def create_embedding_with_ada(text) -> list: backoff = 2 ** (attempt + 2) try: if CFG.use_azure: - return openai.Embedding.create(input=[text], - engine=CFG.get_azure_deployment_id_for_model("text-embedding-ada-002"), + return openai.Embedding.create( + input=[text], + engine=CFG.get_azure_deployment_id_for_model( + "text-embedding-ada-002" + ), )["data"][0]["embedding"] else: - return openai.Embedding.create(input=[text], model="text-embedding-ada-002")[ - "data" - ][0]["embedding"] + return openai.Embedding.create( + input=[text], model="text-embedding-ada-002" + )["data"][0]["embedding"] except RateLimitError: pass except APIError as e: @@ -148,4 +151,3 @@ def create_embedding_with_ada(text) -> list: f"API Bad gateway. Waiting {backoff} seconds..." + Fore.RESET, ) time.sleep(backoff) - diff --git a/autogpt/permanent_memory/sqlite3_store.py b/autogpt/permanent_memory/sqlite3_store.py index 57b29f6d..ecbc944a 100644 --- a/autogpt/permanent_memory/sqlite3_store.py +++ b/autogpt/permanent_memory/sqlite3_store.py @@ -18,11 +18,13 @@ class MemoryDB: # As last resort, open in dynamic memory. Won't be persistent. self.db_file = ":memory:" self.cnx = sqlite3.connect(self.db_file) - self.cnx.execute("CREATE VIRTUAL TABLE \ + self.cnx.execute( + "CREATE VIRTUAL TABLE \ IF NOT EXISTS text USING FTS5 \ (session, \ key, \ - block);") + block);" + ) self.session_id = int(self.get_max_session_id()) + 1 self.cnx.commit() @@ -66,7 +68,7 @@ class MemoryDB: cnx = self.get_cnx() cnx.execute(cmd_str, (session_id, key, text)) cnx.commit() - + # Overwrite text at key. def overwrite(self, key, text): self.delete_memory(key) @@ -76,8 +78,8 @@ class MemoryDB: cnx = self.get_cnx() cnx.execute(cmd_str, (session_id, key, text)) cnx.commit() - - def delete_memory(self, key, session_id = None): + + def delete_memory(self, key, session_id=None): session = session_id if session is None: session = self.session_id From 5a8700060e551086b2bb22cfb597dd01390d4000 Mon Sep 17 00:00:00 2001 From: Itamar Friedman Date: Sun, 16 Apr 2023 00:42:53 +0300 Subject: [PATCH 12/18] fixing tests to fit latest merges into master --- requirements.txt | 2 ++ tests/local_cache_test.py | 1 + tests/smoke_test.py | 2 +- tests/unit/test_browse_scrape_links.py | 10 +++++----- tests/unit/test_browse_scrape_text.py | 10 +++++----- tests/unit/test_commands.py | 2 +- 6 files changed, 15 insertions(+), 12 deletions(-) diff --git a/requirements.txt b/requirements.txt index dbfa7741..210f06d0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -25,3 +25,5 @@ black sourcery isort gitpython==3.1.31 +pytest +pytest-mock \ No newline at end of file diff --git a/tests/local_cache_test.py b/tests/local_cache_test.py index 9ac6aa54..91c922b0 100644 --- a/tests/local_cache_test.py +++ b/tests/local_cache_test.py @@ -1,5 +1,6 @@ import os import sys +import unittest from autogpt.memory.local import LocalCache diff --git a/tests/smoke_test.py b/tests/smoke_test.py index 8d33cf89..50e97b7b 100644 --- a/tests/smoke_test.py +++ b/tests/smoke_test.py @@ -3,7 +3,7 @@ import subprocess import sys import unittest -from autogpt.file_operations import delete_file, read_file +from autogpt.commands.file_operations import delete_file, read_file env_vars = {"MEMORY_BACKEND": "no_memory", "TEMPERATURE": "0"} diff --git a/tests/unit/test_browse_scrape_links.py b/tests/unit/test_browse_scrape_links.py index 0f051c14..0a3340e7 100644 --- a/tests/unit/test_browse_scrape_links.py +++ b/tests/unit/test_browse_scrape_links.py @@ -4,7 +4,7 @@ # pip install pytest-mock import pytest -from scripts.browse import scrape_links +from autogpt.commands.web_requests import scrape_links """ Code Analysis @@ -55,7 +55,7 @@ class TestScrapeLinks: mock_response.text = ( "Google" ) - mocker.patch("requests.get", return_value=mock_response) + mocker.patch("requests.Session.get", return_value=mock_response) # Call the function with a valid URL result = scrape_links("https://www.example.com") @@ -68,7 +68,7 @@ class TestScrapeLinks: # Mock the requests.get() function to return an HTTP error response mock_response = mocker.Mock() mock_response.status_code = 404 - mocker.patch("requests.get", return_value=mock_response) + mocker.patch("requests.Session.get", return_value=mock_response) # Call the function with an invalid URL result = scrape_links("https://www.invalidurl.com") @@ -82,7 +82,7 @@ class TestScrapeLinks: mock_response = mocker.Mock() mock_response.status_code = 200 mock_response.text = "

No hyperlinks here

" - mocker.patch("requests.get", return_value=mock_response) + mocker.patch("requests.Session.get", return_value=mock_response) # Call the function with a URL containing no hyperlinks result = scrape_links("https://www.example.com") @@ -105,7 +105,7 @@ class TestScrapeLinks: """ - mocker.patch("requests.get", return_value=mock_response) + mocker.patch("requests.Session.get", return_value=mock_response) # Call the function being tested result = scrape_links("https://www.example.com") diff --git a/tests/unit/test_browse_scrape_text.py b/tests/unit/test_browse_scrape_text.py index 98f5f558..61c19b05 100644 --- a/tests/unit/test_browse_scrape_text.py +++ b/tests/unit/test_browse_scrape_text.py @@ -41,7 +41,7 @@ class TestScrapeText: mock_response = mocker.Mock() mock_response.status_code = 200 mock_response.text = f"

{expected_text}

" - mocker.patch("requests.get", return_value=mock_response) + mocker.patch("requests.Session.get", return_value=mock_response) # Call the function with a valid URL and assert that it returns the expected text url = "http://www.example.com" @@ -50,7 +50,7 @@ class TestScrapeText: # Tests that the function returns an error message when an invalid or unreachable url is provided. def test_invalid_url(self, mocker): # Mock the requests.get() method to raise an exception - mocker.patch("requests.get", side_effect=requests.exceptions.RequestException) + mocker.patch("requests.Session.get", side_effect=requests.exceptions.RequestException) # Call the function with an invalid URL and assert that it returns an error message url = "http://www.invalidurl.com" @@ -63,7 +63,7 @@ class TestScrapeText: mock_response = mocker.Mock() mock_response.status_code = 200 mock_response.text = "" - mocker.patch("requests.get", return_value=mock_response) + mocker.patch("requests.Session.get", return_value=mock_response) # Call the function with a valid URL and assert that it returns an empty string url = "http://www.example.com" @@ -72,7 +72,7 @@ class TestScrapeText: # Tests that the function returns an error message when the response status code is an http error (>=400). def test_http_error(self, mocker): # Mock the requests.get() method to return a response with a 404 status code - mocker.patch("requests.get", return_value=mocker.Mock(status_code=404)) + mocker.patch("requests.Session.get", return_value=mocker.Mock(status_code=404)) # Call the function with a URL result = scrape_text("https://www.example.com") @@ -87,7 +87,7 @@ class TestScrapeText: mock_response = mocker.Mock() mock_response.status_code = 200 mock_response.text = html - mocker.patch("requests.get", return_value=mock_response) + mocker.patch("requests.Session.get", return_value=mock_response) # Call the function with a URL result = scrape_text("https://www.example.com") diff --git a/tests/unit/test_commands.py b/tests/unit/test_commands.py index 21982f7e..e15709aa 100644 --- a/tests/unit/test_commands.py +++ b/tests/unit/test_commands.py @@ -1,5 +1,5 @@ import autogpt.agent.agent_manager as agent_manager -from autogpt.app import start_agent, list_agents +from autogpt.app import start_agent, list_agents, execute_command import unittest from unittest.mock import patch, MagicMock From ef4e4eb5d4d9fc6f8ba5cd22e058b0d2d09b149d Mon Sep 17 00:00:00 2001 From: BillSchumacher <34168009+BillSchumacher@users.noreply.github.com> Date: Sat, 15 Apr 2023 17:30:28 -0500 Subject: [PATCH 13/18] Blacked --- autogpt/commands/web_playwright.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/autogpt/commands/web_playwright.py b/autogpt/commands/web_playwright.py index 2b0118d2..93a46ac9 100644 --- a/autogpt/commands/web_playwright.py +++ b/autogpt/commands/web_playwright.py @@ -2,7 +2,9 @@ try: from playwright.sync_api import sync_playwright except ImportError: - print("Playwright not installed. Please install it with 'pip install playwright' to use.") + print( + "Playwright not installed. Please install it with 'pip install playwright' to use." + ) from bs4 import BeautifulSoup from autogpt.processing.html import extract_hyperlinks, format_hyperlinks from typing import List, Union @@ -10,10 +12,10 @@ from typing import List, Union def scrape_text(url: str) -> str: """Scrape text from a webpage - + Args: url (str): The URL to scrape text from - + Returns: str: The scraped text """ @@ -32,7 +34,7 @@ def scrape_text(url: str) -> str: text = soup.get_text() lines = (line.strip() for line in text.splitlines()) chunks = (phrase.strip() for line in lines for phrase in line.split(" ")) - text = '\n'.join(chunk for chunk in chunks if chunk) + text = "\n".join(chunk for chunk in chunks if chunk) except Exception as e: text = f"Error: {str(e)}" From cfba3d0a606d8cf43109746d9bffeadcf00c8d25 Mon Sep 17 00:00:00 2001 From: DaoAdvocate <12145726+rihp@users.noreply.github.com> Date: Sat, 15 Apr 2023 22:58:22 +0200 Subject: [PATCH 14/18] twitter_send_tweets_command --- autogpt/app.py | 3 +++ autogpt/commands/twitter.py | 25 +++++++++++++++++++++++++ autogpt/prompt.py | 2 ++ 3 files changed, 30 insertions(+) create mode 100644 autogpt/commands/twitter.py diff --git a/autogpt/app.py b/autogpt/app.py index e84241c5..40a71c8e 100644 --- a/autogpt/app.py +++ b/autogpt/app.py @@ -23,6 +23,7 @@ from autogpt.processing.text import summarize_text from autogpt.speech import say_text from autogpt.commands.web_selenium import browse_website from autogpt.commands.git_operations import clone_repository +from autogpt.commands.twitter import send_tweet CFG = Config() @@ -181,6 +182,8 @@ def execute_command(command_name: str, arguments): ) elif command_name == "generate_image": return generate_image(arguments["prompt"]) + elif command_name == "send_tweet": + return send_tweet(arguments['text']) elif command_name == "do_nothing": return "No action performed." elif command_name == "task_complete": diff --git a/autogpt/commands/twitter.py b/autogpt/commands/twitter.py new file mode 100644 index 00000000..31781ee3 --- /dev/null +++ b/autogpt/commands/twitter.py @@ -0,0 +1,25 @@ +import tweepy +import os +from dotenv import load_dotenv + +load_dotenv() + +def send_tweet(tweet_text): + + consumer_key = os.environ.get("TW_CONSUMER_KEY") + consumer_secret= os.environ.get("TW_CONSUMER_SECRET") + access_token= os.environ.get("TW_ACCESS_TOKEN") + access_token_secret= os.environ.get("TW_ACCESS_TOKEN_SECRET") + # Authenticate to Twitter + auth = tweepy.OAuthHandler(consumer_key, consumer_secret) + auth.set_access_token(access_token, access_token_secret) + + # Create API object + api = tweepy.API(auth) + + # Send tweet + try: + api.update_status(tweet_text) + print("Tweet sent successfully!") + except tweepy.TweepError as e: + print("Error sending tweet: {}".format(e.reason)) diff --git a/autogpt/prompt.py b/autogpt/prompt.py index 6c51f33e..a760bd72 100644 --- a/autogpt/prompt.py +++ b/autogpt/prompt.py @@ -82,6 +82,8 @@ def get_prompt() -> str: ), ("Execute Python File", "execute_python_file", {"file": ""}), ("Generate Image", "generate_image", {"prompt": ""}), + ("Send Tweet", "send_tweet", {"text": ""}), + ] # Only add shell command to the prompt if the AI is allowed to execute it From c30a621195c330f886921e3ee142bc15c134673d Mon Sep 17 00:00:00 2001 From: DaoAdvocate <12145726+rihp@users.noreply.github.com> Date: Sat, 15 Apr 2023 23:24:05 +0200 Subject: [PATCH 15/18] updates --- .env.template | 10 ++++++++++ autogpt/commands/twitter.py | 5 ++--- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/.env.template b/.env.template index 6565dfdc..7146aafc 100644 --- a/.env.template +++ b/.env.template @@ -132,3 +132,13 @@ USE_BRIAN_TTS=False ELEVENLABS_API_KEY=your-elevenlabs-api-key ELEVENLABS_VOICE_1_ID=your-voice-id-1 ELEVENLABS_VOICE_2_ID=your-voice-id-2 + +################################################################################ +### TWITTER API +################################################################################ + +TW_CONSUMER_KEY= +TW_CONSUMER_SECRET= +TW_ACCESS_TOKEN= +TW_ACCESS_TOKEN_SECRET= +TW_SEND_TWEETS=True \ No newline at end of file diff --git a/autogpt/commands/twitter.py b/autogpt/commands/twitter.py index 31781ee3..5589503f 100644 --- a/autogpt/commands/twitter.py +++ b/autogpt/commands/twitter.py @@ -5,7 +5,6 @@ from dotenv import load_dotenv load_dotenv() def send_tweet(tweet_text): - consumer_key = os.environ.get("TW_CONSUMER_KEY") consumer_secret= os.environ.get("TW_CONSUMER_SECRET") access_token= os.environ.get("TW_ACCESS_TOKEN") @@ -21,5 +20,5 @@ def send_tweet(tweet_text): try: api.update_status(tweet_text) print("Tweet sent successfully!") - except tweepy.TweepError as e: - print("Error sending tweet: {}".format(e.reason)) + except tweepy.TweepyException as e: + print("Error sending tweet: {}".format(e.reason)) \ No newline at end of file From 424564825a280ae5cf8ef7aa0c2ecedbc2e3800f Mon Sep 17 00:00:00 2001 From: DaoAdvocate <12145726+rihp@users.noreply.github.com> Date: Sat, 15 Apr 2023 23:24:54 +0200 Subject: [PATCH 16/18] .env --- .env.template | 1 - 1 file changed, 1 deletion(-) diff --git a/.env.template b/.env.template index 7146aafc..3279d2f1 100644 --- a/.env.template +++ b/.env.template @@ -141,4 +141,3 @@ TW_CONSUMER_KEY= TW_CONSUMER_SECRET= TW_ACCESS_TOKEN= TW_ACCESS_TOKEN_SECRET= -TW_SEND_TWEETS=True \ No newline at end of file From d5534f1e5f8ea3cfa1b2fdc185f3cacabf056a0b Mon Sep 17 00:00:00 2001 From: Pi Date: Sat, 15 Apr 2023 23:45:47 +0100 Subject: [PATCH 17/18] Add missing terminal \n --- autogpt/commands/twitter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/autogpt/commands/twitter.py b/autogpt/commands/twitter.py index 5589503f..2e0bf10d 100644 --- a/autogpt/commands/twitter.py +++ b/autogpt/commands/twitter.py @@ -21,4 +21,4 @@ def send_tweet(tweet_text): api.update_status(tweet_text) print("Tweet sent successfully!") except tweepy.TweepyException as e: - print("Error sending tweet: {}".format(e.reason)) \ No newline at end of file + print("Error sending tweet: {}".format(e.reason)) From 60881ed85624943695753c8be1a3f76ec185aa1b Mon Sep 17 00:00:00 2001 From: Pi Date: Sat, 15 Apr 2023 23:47:27 +0100 Subject: [PATCH 18/18] Add \n to pass linter-check --- autogpt/commands/twitter.py | 1 + 1 file changed, 1 insertion(+) diff --git a/autogpt/commands/twitter.py b/autogpt/commands/twitter.py index 2e0bf10d..1774bfb9 100644 --- a/autogpt/commands/twitter.py +++ b/autogpt/commands/twitter.py @@ -4,6 +4,7 @@ from dotenv import load_dotenv load_dotenv() + def send_tweet(tweet_text): consumer_key = os.environ.get("TW_CONSUMER_KEY") consumer_secret= os.environ.get("TW_CONSUMER_SECRET")