mirror of
https://github.com/aljazceru/Auto-GPT.git
synced 2026-01-15 12:04:28 +01:00
Removes scrape_main_content function.
This commit is contained in:
@@ -24,23 +24,6 @@ def scrape_text(url):
|
||||
|
||||
return text
|
||||
|
||||
def scrape_main_content(url):
|
||||
response = requests.get(url)
|
||||
|
||||
# Try using Readability
|
||||
doc = Document(response.text)
|
||||
content = doc.summary()
|
||||
soup = BeautifulSoup(content, "html.parser")
|
||||
text = soup.get_text('\n', strip=True)
|
||||
|
||||
# Check if Readability provided a satisfactory result (e.g., a minimum length)
|
||||
# min_length = 50
|
||||
# if len(text) < min_length:
|
||||
# # Fallback to the custom function
|
||||
# text = scrape_main_content_custom(response.text)
|
||||
|
||||
return text
|
||||
|
||||
def split_text(text, max_length=8192):
|
||||
paragraphs = text.split("\n")
|
||||
current_length = 0
|
||||
|
||||
@@ -60,7 +60,7 @@ def google_search(query, num_results = 3):
|
||||
return json.dumps(search_results, ensure_ascii=False, indent=4)
|
||||
|
||||
def transcribe_summarise(url):
|
||||
text = browse.scrape_main_content(url)
|
||||
text = browse.scrape_text(url)
|
||||
summary = browse.summarize_text(text)
|
||||
return """ "Result" : """ + summary
|
||||
|
||||
|
||||
Reference in New Issue
Block a user