mirror of
https://github.com/aljazceru/Auto-GPT.git
synced 2025-12-23 00:44:22 +01:00
* feat: pass config to call_ai_functions in coimmands * feat: config for read_audio_from_file * feat: file operations cfg NOTE: we replaced the CFG in the command enable with TRUE b/c not sure how to handle this yet * feat: git command conversion * feat: google search * feat: image generation * feat: extract cfg from browser commands * feat: remove cfg from execute code commands * fix: file operation related tests * fix: linting * fix: tests for read_audio * fix: test error * feat: update cassettes * fix: linting * fix: test typechecking * fix: google_search errors if unexpected kw arg is passed * fix: pass config param to google search test * fix: agent commands were broken + cassettes * fix: agent test * feat: cassettes * feat: enable/disable logic for commands * fix: some commands threw errors * feat: fix tests * Add new cassettes * Add new cassettes * ci: trigger ci * Update autogpt/commands/execute_code.py Co-authored-by: Reinier van der Leer <github@pwuts.nl> * fix prompt * fix prompt + rebase * add config remove useless imports * put back CFG just for download file * lint * The signature should be mandatory in the decorator * black isort * fix: remove the CFG * fix: non typed arg * lint: type some args * lint: add types for libraries * Add new cassettes * fix: windows compatibility * fix: add config access to decorator * fix: remove twitter mention * DDGS search works at 3.0.2 version * ci: linting --------- Co-authored-by: Auto-GPT-Bot <github-bot@agpt.co> Co-authored-by: merwanehamadi <merwanehamadi@gmail.com> Co-authored-by: Reinier van der Leer <github@pwuts.nl> Co-authored-by: kinance <kinance@gmail.com>
117 lines
4.8 KiB
Python
117 lines
4.8 KiB
Python
# Generated by CodiumAI
|
|
|
|
import pytest
|
|
import requests
|
|
|
|
from autogpt.commands.web_requests import scrape_text
|
|
|
|
"""
|
|
Code Analysis
|
|
|
|
Objective:
|
|
The objective of the "scrape_text" function is to scrape the text content from
|
|
a given URL and return it as a string, after removing any unwanted HTML tags and
|
|
scripts.
|
|
|
|
Inputs:
|
|
- url: a string representing the URL of the webpage to be scraped.
|
|
|
|
Flow:
|
|
1. Send a GET request to the given URL using the requests library and the user agent
|
|
header from the config file.
|
|
2. Check if the response contains an HTTP error. If it does, return an error message.
|
|
3. Use BeautifulSoup to parse the HTML content of the response and extract all script
|
|
and style tags.
|
|
4. Get the text content of the remaining HTML using the get_text() method of
|
|
BeautifulSoup.
|
|
5. Split the text into lines and then into chunks, removing any extra whitespace.
|
|
6. Join the chunks into a single string with newline characters between them.
|
|
7. Return the cleaned text.
|
|
|
|
Outputs:
|
|
- A string representing the cleaned text content of the webpage.
|
|
|
|
Additional aspects:
|
|
- The function uses the requests library and BeautifulSoup to handle the HTTP request
|
|
and HTML parsing, respectively.
|
|
- The function removes script and style tags from the HTML to avoid including unwanted
|
|
content in the text output.
|
|
- The function uses a generator expression to split the text into lines and chunks,
|
|
which can improve performance for large amounts of text.
|
|
"""
|
|
|
|
|
|
class TestScrapeText:
|
|
def test_scrape_text_with_valid_url(self, mocker, config):
|
|
"""Tests that scrape_text() returns the expected text when given a valid URL."""
|
|
# Mock the requests.get() method to return a response with expected text
|
|
expected_text = "This is some sample text"
|
|
mock_response = mocker.Mock()
|
|
mock_response.status_code = 200
|
|
mock_response.text = (
|
|
"<html><body><div><p style='color: blue;'>"
|
|
f"{expected_text}</p></div></body></html>"
|
|
)
|
|
mocker.patch("requests.Session.get", return_value=mock_response)
|
|
|
|
# Call the function with a valid URL and assert that it returns the
|
|
# expected text
|
|
url = "http://www.example.com"
|
|
assert scrape_text(url, config) == expected_text
|
|
|
|
def test_invalid_url(self, config):
|
|
"""Tests that an error is raised when an invalid url is provided."""
|
|
url = "invalidurl.com"
|
|
pytest.raises(ValueError, scrape_text, url, config)
|
|
|
|
def test_unreachable_url(self, mocker, config):
|
|
"""Test that scrape_text returns an error message when an invalid or unreachable url is provided."""
|
|
# Mock the requests.get() method to raise an exception
|
|
mocker.patch(
|
|
"requests.Session.get", side_effect=requests.exceptions.RequestException
|
|
)
|
|
|
|
# Call the function with an invalid URL and assert that it returns an error
|
|
# message
|
|
url = "http://thiswebsitedoesnotexist.net/"
|
|
error_message = scrape_text(url, config)
|
|
assert "Error:" in error_message
|
|
|
|
def test_no_text(self, mocker, config):
|
|
"""Test that scrape_text returns an empty string when the html page contains no text to be scraped."""
|
|
# Mock the requests.get() method to return a response with no text
|
|
mock_response = mocker.Mock()
|
|
mock_response.status_code = 200
|
|
mock_response.text = "<html><body></body></html>"
|
|
mocker.patch("requests.Session.get", return_value=mock_response)
|
|
|
|
# Call the function with a valid URL and assert that it returns an empty string
|
|
url = "http://www.example.com"
|
|
assert scrape_text(url, config) == ""
|
|
|
|
def test_http_error(self, mocker, config):
|
|
"""Test that scrape_text returns an error message when the response status code is an http error (>=400)."""
|
|
# Mock the requests.get() method to return a response with a 404 status code
|
|
mocker.patch("requests.Session.get", return_value=mocker.Mock(status_code=404))
|
|
|
|
# Call the function with a URL
|
|
result = scrape_text("https://www.example.com", config)
|
|
|
|
# Check that the function returns an error message
|
|
assert result == "Error: HTTP 404 error"
|
|
|
|
def test_scrape_text_with_html_tags(self, mocker, config):
|
|
"""Test that scrape_text() properly handles HTML tags."""
|
|
# Create a mock response object with HTML containing tags
|
|
html = "<html><body><p>This is <b>bold</b> text.</p></body></html>"
|
|
mock_response = mocker.Mock()
|
|
mock_response.status_code = 200
|
|
mock_response.text = html
|
|
mocker.patch("requests.Session.get", return_value=mock_response)
|
|
|
|
# Call the function with a URL
|
|
result = scrape_text("https://www.example.com", config)
|
|
|
|
# Check that the function properly handles HTML tags
|
|
assert result == "This is bold text."
|