mirror of
https://github.com/aljazceru/Auto-GPT.git
synced 2025-12-19 06:54:22 +01:00
limits the number of links that a webpage can return.
This commit is contained in:
@@ -34,7 +34,7 @@ def format_hyperlinks(hyperlinks):
|
|||||||
formatted_links = []
|
formatted_links = []
|
||||||
for link_text, link_url in hyperlinks:
|
for link_text, link_url in hyperlinks:
|
||||||
formatted_links.append(f"{link_text} ({link_url})")
|
formatted_links.append(f"{link_text} ({link_url})")
|
||||||
return '\n'.join(formatted_links)
|
return formatted_links
|
||||||
|
|
||||||
def scrape_links(url):
|
def scrape_links(url):
|
||||||
response = requests.get(url)
|
response = requests.get(url)
|
||||||
@@ -49,14 +49,8 @@ def scrape_links(url):
|
|||||||
script.extract()
|
script.extract()
|
||||||
|
|
||||||
hyperlinks = extract_hyperlinks(soup)
|
hyperlinks = extract_hyperlinks(soup)
|
||||||
|
|
||||||
text = soup.get_text()
|
return format_hyperlinks(hyperlinks)
|
||||||
lines = (line.strip() for line in text.splitlines())
|
|
||||||
chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
|
|
||||||
text = '\n'.join(chunk for chunk in chunks if chunk)
|
|
||||||
text = format_hyperlinks(hyperlinks)
|
|
||||||
|
|
||||||
return text
|
|
||||||
|
|
||||||
def split_text(text, max_length=8192):
|
def split_text(text, max_length=8192):
|
||||||
paragraphs = text.split("\n")
|
paragraphs = text.split("\n")
|
||||||
|
|||||||
@@ -80,6 +80,10 @@ def browse_website(url):
|
|||||||
summary = get_text_summary(url)
|
summary = get_text_summary(url)
|
||||||
links = get_hyperlinks(url)
|
links = get_hyperlinks(url)
|
||||||
|
|
||||||
|
# Limit links to 5
|
||||||
|
if len(links) > 5:
|
||||||
|
links = links[:5]
|
||||||
|
|
||||||
result = f"""Website Content Summary: {summary}\n\nLinks: {links}"""
|
result = f"""Website Content Summary: {summary}\n\nLinks: {links}"""
|
||||||
|
|
||||||
return result
|
return result
|
||||||
@@ -90,8 +94,8 @@ def get_text_summary(url):
|
|||||||
return """ "Result" : """ + summary
|
return """ "Result" : """ + summary
|
||||||
|
|
||||||
def get_hyperlinks(url):
|
def get_hyperlinks(url):
|
||||||
text = browse.scrape_links(url)
|
link_list = browse.scrape_links(url)
|
||||||
return text
|
return link_list
|
||||||
|
|
||||||
def check_news(source):
|
def check_news(source):
|
||||||
print("Checking news from BBC world instead of " + source)
|
print("Checking news from BBC world instead of " + source)
|
||||||
|
|||||||
Reference in New Issue
Block a user