mirror of
https://github.com/aljazceru/Auto-GPT.git
synced 2025-12-23 08:54:24 +01:00
* feat: pass config to call_ai_functions in coimmands * feat: config for read_audio_from_file * feat: file operations cfg NOTE: we replaced the CFG in the command enable with TRUE b/c not sure how to handle this yet * feat: git command conversion * feat: google search * feat: image generation * feat: extract cfg from browser commands * feat: remove cfg from execute code commands * fix: file operation related tests * fix: linting * fix: tests for read_audio * fix: test error * feat: update cassettes * fix: linting * fix: test typechecking * fix: google_search errors if unexpected kw arg is passed * fix: pass config param to google search test * fix: agent commands were broken + cassettes * fix: agent test * feat: cassettes * feat: enable/disable logic for commands * fix: some commands threw errors * feat: fix tests * Add new cassettes * Add new cassettes * ci: trigger ci * Update autogpt/commands/execute_code.py Co-authored-by: Reinier van der Leer <github@pwuts.nl> * fix prompt * fix prompt + rebase * add config remove useless imports * put back CFG just for download file * lint * The signature should be mandatory in the decorator * black isort * fix: remove the CFG * fix: non typed arg * lint: type some args * lint: add types for libraries * Add new cassettes * fix: windows compatibility * fix: add config access to decorator * fix: remove twitter mention * DDGS search works at 3.0.2 version * ci: linting --------- Co-authored-by: Auto-GPT-Bot <github-bot@agpt.co> Co-authored-by: merwanehamadi <merwanehamadi@gmail.com> Co-authored-by: Reinier van der Leer <github@pwuts.nl> Co-authored-by: kinance <kinance@gmail.com>
101 lines
2.8 KiB
Python
101 lines
2.8 KiB
Python
"""Browse a webpage and summarize it using the LLM model"""
|
|
from __future__ import annotations
|
|
|
|
import requests
|
|
from bs4 import BeautifulSoup
|
|
from requests import Response
|
|
|
|
from autogpt.config import Config
|
|
from autogpt.processing.html import extract_hyperlinks, format_hyperlinks
|
|
from autogpt.url_utils.validators import validate_url
|
|
|
|
session = requests.Session()
|
|
|
|
|
|
@validate_url
|
|
def get_response(
|
|
url: str, config: Config, timeout: int = 10
|
|
) -> tuple[None, str] | tuple[Response, None]:
|
|
"""Get the response from a URL
|
|
|
|
Args:
|
|
url (str): The URL to get the response from
|
|
timeout (int): The timeout for the HTTP request
|
|
|
|
Returns:
|
|
tuple[None, str] | tuple[Response, None]: The response and error message
|
|
|
|
Raises:
|
|
ValueError: If the URL is invalid
|
|
requests.exceptions.RequestException: If the HTTP request fails
|
|
"""
|
|
try:
|
|
session.headers.update({"User-Agent": config.user_agent})
|
|
response = session.get(url, timeout=timeout)
|
|
|
|
# Check if the response contains an HTTP error
|
|
if response.status_code >= 400:
|
|
return None, f"Error: HTTP {str(response.status_code)} error"
|
|
|
|
return response, None
|
|
except ValueError as ve:
|
|
# Handle invalid URL format
|
|
return None, f"Error: {str(ve)}"
|
|
|
|
except requests.exceptions.RequestException as re:
|
|
# Handle exceptions related to the HTTP request
|
|
# (e.g., connection errors, timeouts, etc.)
|
|
return None, f"Error: {str(re)}"
|
|
|
|
|
|
def scrape_text(url: str, config: Config) -> str:
|
|
"""Scrape text from a webpage
|
|
|
|
Args:
|
|
url (str): The URL to scrape text from
|
|
|
|
Returns:
|
|
str: The scraped text
|
|
"""
|
|
response, error_message = get_response(url, config)
|
|
if error_message:
|
|
return error_message
|
|
if not response:
|
|
return "Error: Could not get response"
|
|
|
|
soup = BeautifulSoup(response.text, "html.parser")
|
|
|
|
for script in soup(["script", "style"]):
|
|
script.extract()
|
|
|
|
text = soup.get_text()
|
|
lines = (line.strip() for line in text.splitlines())
|
|
chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
|
|
text = "\n".join(chunk for chunk in chunks if chunk)
|
|
|
|
return text
|
|
|
|
|
|
def scrape_links(url: str, config: Config) -> str | list[str]:
|
|
"""Scrape links from a webpage
|
|
|
|
Args:
|
|
url (str): The URL to scrape links from
|
|
|
|
Returns:
|
|
str | list[str]: The scraped links
|
|
"""
|
|
response, error_message = get_response(url, config)
|
|
if error_message:
|
|
return error_message
|
|
if not response:
|
|
return "Error: Could not get response"
|
|
soup = BeautifulSoup(response.text, "html.parser")
|
|
|
|
for script in soup(["script", "style"]):
|
|
script.extract()
|
|
|
|
hyperlinks = extract_hyperlinks(soup, url)
|
|
|
|
return format_hyperlinks(hyperlinks)
|