diff --git a/.gitignore b/.gitignore index a4e3cc2d..b361b4bf 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,7 @@ scripts/__pycache__/keys.cpython-310.pyc package-lock.json *.pyc scripts/auto_gpt_workspace/* +auto_gpt_workspace/* *.mpeg .env last_run_ai_settings.yaml \ No newline at end of file diff --git a/requirements-new.txt b/requirements-new.txt new file mode 100644 index 00000000..7253c19b --- /dev/null +++ b/requirements-new.txt @@ -0,0 +1,13 @@ +beautifulsoup4==4.12.0 +colorama==0.4.6 +docker_py==1.10.6 +googlesearch_python==1.1.0 +numpy==1.24.2 +openai==0.27.2 +playsound==1.3.0 +playwright==1.32.1 +python-dotenv==1.0.0 +PyYAML==6.0 +requests==2.28.2 +scipy==1.10.1 +tiktoken==0.3.3 diff --git a/requirements.txt b/requirements.txt index e731354b..4b5de5ba 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ beautifulsoup4 colorama==0.4.6 -dirtyjson==1.0. +# dirtyjson==1.0. openai==0.27.2 playsound==1.3.0 python-dotenv==1.0.0 @@ -9,5 +9,5 @@ readability-lxml==0.8.1 requests tiktoken==0.3.3 docker -# googlesearch-python +googlesearch_python==1.1.0 # Googlesearch python seems to be a bit cursed, anyone good at fixing thigns like this? \ No newline at end of file diff --git a/scripts/browse_playwright.py b/scripts/browse_playwright.py new file mode 100644 index 00000000..51372451 --- /dev/null +++ b/scripts/browse_playwright.py @@ -0,0 +1,150 @@ +from playwright.sync_api import sync_playwright +from bs4 import BeautifulSoup +from config import Config +from llm_utils import create_chat_completion + +cfg = Config() + +def scrape_text(url): + with sync_playwright() as p: + browser = p.chromium.launch() + page = browser.new_page() + + try: + page.goto(url) + html_content = page.content() + soup = BeautifulSoup(html_content, "html.parser") + + for script in soup(["script", "style"]): + script.extract() + + text = soup.get_text() + lines = (line.strip() for line in text.splitlines()) + chunks = (phrase.strip() for line in lines for phrase in line.split(" ")) + text = '\n'.join(chunk for chunk in chunks if chunk) + + except Exception as e: + text = "Error: " + str(e) + + finally: + browser.close() + + return text + + +def extract_hyperlinks(soup): + hyperlinks = [] + for link in soup.find_all('a', href=True): + hyperlinks.append((link.text, link['href'])) + return hyperlinks + + +def format_hyperlinks(hyperlinks): + formatted_links = [] + for link_text, link_url in hyperlinks: + formatted_links.append(f"{link_text} ({link_url})") + return formatted_links + + +def scrape_links(url): + with sync_playwright() as p: + browser = p.chromium.launch() + page = browser.new_page() + + try: + page.goto(url) + html_content = page.content() + soup = BeautifulSoup(html_content, "html.parser") + + for script in soup(["script", "style"]): + script.extract() + + hyperlinks = extract_hyperlinks(soup) + formatted_links = format_hyperlinks(hyperlinks) + + except Exception as e: + formatted_links = "Error: " + str(e) + + finally: + browser.close() + + return formatted_links + +# The rest of the code remains unchanged. + +def split_text(text, max_length=8192): + paragraphs = text.split("\n") + current_length = 0 + current_chunk = [] + + for paragraph in paragraphs: + if current_length + len(paragraph) + 1 <= max_length: + current_chunk.append(paragraph) + current_length += len(paragraph) + 1 + else: + yield "\n".join(current_chunk) + current_chunk = [paragraph] + current_length = len(paragraph) + 1 + + if current_chunk: + yield "\n".join(current_chunk) + + +def summarize_text(text, is_website=True): + if text == "": + return "Error: No text to summarize" + + print("Text length: " + str(len(text)) + " characters") + summaries = [] + chunks = list(split_text(text)) + + for i, chunk in enumerate(chunks): + print("Summarizing chunk " + str(i + 1) + " / " + str(len(chunks))) + if is_website: + messages = [ + { + "role": "user", + "content": "Please summarize the following website text, do not describe the general website, but instead concisely extract the specific information this subpage contains.: " + + chunk}, + ] + else: + messages = [ + { + "role": "user", + "content": "Please summarize the following text, focusing on extracting concise and specific information: " + + chunk}, + ] + + summary = create_chat_completion( + model=cfg.fast_llm_model, + messages=messages, + max_tokens=300, + ) + summaries.append(summary) + print("Summarized " + str(len(chunks)) + " chunks.") + + combined_summary = "\n".join(summaries) + + # Summarize the combined summary + if is_website: + messages = [ + { + "role": "user", + "content": "Please summarize the following website text, do not describe the general website, but instead concisely extract the specific information this subpage contains.: " + + combined_summary}, + ] + else: + messages = [ + { + "role": "user", + "content": "Please summarize the following text, focusing on extracting concise and specific infomation: " + + combined_summary}, + ] + + final_summary = create_chat_completion( + model=cfg.fast_llm_model, + messages=messages, + max_tokens=300, + ) + + return final_summary diff --git a/scripts/commands.py b/scripts/commands.py index 2e332711..3c8cba8e 100644 --- a/scripts/commands.py +++ b/scripts/commands.py @@ -1,4 +1,4 @@ -import browse +import browse_playwright as browse import json import memory as mem import datetime diff --git a/scripts/json_parser.py b/scripts/json_parser.py index 8154b584..2cf2aecc 100644 --- a/scripts/json_parser.py +++ b/scripts/json_parser.py @@ -1,4 +1,4 @@ -import dirtyjson +import json from call_ai_function import call_ai_function from config import Config cfg = Config() @@ -24,7 +24,7 @@ def fix_and_parse_json(json_str: str, try_to_fix_with_gpt: bool = True): """ try: - return dirtyjson.loads(json_str) + return json.loads(json_str) except Exception as e: # Let's do something manually - sometimes GPT responds with something BEFORE the braces: # "I'm sorry, I don't understand. Please try again."{"text": "I'm sorry, I don't understand. Please try again.", "confidence": 0.0} @@ -34,14 +34,14 @@ def fix_and_parse_json(json_str: str, try_to_fix_with_gpt: bool = True): json_str = json_str[brace_index:] last_brace_index = json_str.rindex("}") json_str = json_str[:last_brace_index+1] - return dirtyjson.loads(json_str) + return json.loads(json_str) except Exception as e: if try_to_fix_with_gpt: print(f"Warning: Failed to parse AI output, attempting to fix.\n If you see this warning frequently, it's likely that your prompt is confusing the AI. Try changing it up slightly.") # Now try to fix this up using the ai_functions ai_fixed_json = fix_json(json_str, json_schema, False) if ai_fixed_json != "failed": - return dirtyjson.loads(ai_fixed_json) + return json.loads(ai_fixed_json) else: print(f"Failed to fix ai output, telling the AI.") # This allows the AI to react to the error message, which usually results in it correcting its ways. return json_str @@ -68,7 +68,7 @@ def fix_json(json_str: str, schema: str, debug=False) -> str: print(f"Fixed JSON: {result_string}") print("----------- END OF FIX ATTEMPT ----------------") try: - return dirtyjson.loads(result_string) + return json.loads(result_string) except: # Get the call stack: # import traceback