mirror of
https://github.com/aljazceru/Auto-GPT.git
synced 2025-12-17 22:14:28 +01:00
Refactor and Merge branch 'master' of https://github.com/Significant-Gravitas/Auto-GPT into pr/96
This commit is contained in:
32
autogpt/processing/html.py
Normal file
32
autogpt/processing/html.py
Normal file
@@ -0,0 +1,32 @@
|
||||
"""HTML processing functions"""
|
||||
from requests.compat import urljoin
|
||||
from typing import List, Tuple
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
|
||||
def extract_hyperlinks(soup: BeautifulSoup, base_url: str) -> List[Tuple[str, str]]:
|
||||
"""Extract hyperlinks from a BeautifulSoup object
|
||||
|
||||
Args:
|
||||
soup (BeautifulSoup): The BeautifulSoup object
|
||||
base_url (str): The base URL
|
||||
|
||||
Returns:
|
||||
List[Tuple[str, str]]: The extracted hyperlinks
|
||||
"""
|
||||
return [
|
||||
(link.text, urljoin(base_url, link["href"]))
|
||||
for link in soup.find_all("a", href=True)
|
||||
]
|
||||
|
||||
|
||||
def format_hyperlinks(hyperlinks: List[Tuple[str, str]]) -> List[str]:
|
||||
"""Format hyperlinks to be displayed to the user
|
||||
|
||||
Args:
|
||||
hyperlinks (List[Tuple[str, str]]): The hyperlinks to format
|
||||
|
||||
Returns:
|
||||
List[str]: The formatted hyperlinks
|
||||
"""
|
||||
return [f"{link_text} ({link_url})" for link_text, link_url in hyperlinks]
|
||||
Reference in New Issue
Block a user