mirror of
https://github.com/aljazceru/Auto-GPT.git
synced 2025-12-17 22:14:28 +01:00
redo suggested changes. move unit test files to the fitting directory
This commit is contained in:
@@ -19,22 +19,13 @@ def is_valid_url(url):
|
|||||||
def sanitize_url(url):
|
def sanitize_url(url):
|
||||||
return urljoin(url, urlparse(url).path)
|
return urljoin(url, urlparse(url).path)
|
||||||
|
|
||||||
# Function to make a request with a specified timeout and handle exceptions
|
|
||||||
def make_request(url, timeout=10):
|
|
||||||
try:
|
|
||||||
response = requests.get(url, headers=cfg.user_agent_header, timeout=timeout)
|
|
||||||
response.raise_for_status()
|
|
||||||
return response
|
|
||||||
except requests.exceptions.RequestException as e:
|
|
||||||
return "Error: " + str(e)
|
|
||||||
|
|
||||||
# Define and check for local file address prefixes
|
# Define and check for local file address prefixes
|
||||||
def check_local_file_access(url):
|
def check_local_file_access(url):
|
||||||
# Define and check for local file address prefixes
|
# Define and check for local file address prefixes
|
||||||
local_prefixes = ['file:///', 'file://localhost', 'http://localhost', 'https://localhost']
|
local_prefixes = ['file:///', 'file://localhost', 'http://localhost', 'https://localhost']
|
||||||
return any(url.startswith(prefix) for prefix in local_prefixes)
|
return any(url.startswith(prefix) for prefix in local_prefixes)
|
||||||
|
|
||||||
def get_validated_response(url, headers=cfg.user_agent_header):
|
def get_response(url, headers=cfg.user_agent_header, timeout=10):
|
||||||
try:
|
try:
|
||||||
# Restrict access to local files
|
# Restrict access to local files
|
||||||
if check_local_file_access(url):
|
if check_local_file_access(url):
|
||||||
@@ -44,9 +35,14 @@ def get_validated_response(url, headers=cfg.user_agent_header):
|
|||||||
if not url.startswith('http://') and not url.startswith('https://'):
|
if not url.startswith('http://') and not url.startswith('https://'):
|
||||||
raise ValueError('Invalid URL format')
|
raise ValueError('Invalid URL format')
|
||||||
|
|
||||||
# Make the HTTP request and return the response
|
sanitized_url = sanitize_url(url)
|
||||||
response = requests.get(url, headers=headers)
|
|
||||||
response.raise_for_status() # Raise an exception if the response contains an HTTP error status code
|
response = requests.get(sanitized_url, headers=headers, timeout=timeout)
|
||||||
|
|
||||||
|
# Check if the response contains an HTTP error
|
||||||
|
if response.status_code >= 400:
|
||||||
|
return None, "Error: HTTP " + str(response.status_code) + " error"
|
||||||
|
|
||||||
return response, None
|
return response, None
|
||||||
except ValueError as ve:
|
except ValueError as ve:
|
||||||
# Handle invalid URL format
|
# Handle invalid URL format
|
||||||
@@ -58,29 +54,9 @@ def get_validated_response(url, headers=cfg.user_agent_header):
|
|||||||
|
|
||||||
def scrape_text(url):
|
def scrape_text(url):
|
||||||
"""Scrape text from a webpage"""
|
"""Scrape text from a webpage"""
|
||||||
# Basic check if the URL is valid
|
response, error_message = get_response(url)
|
||||||
if not url.startswith('http'):
|
if error_message:
|
||||||
return "Error: Invalid URL"
|
return error_message
|
||||||
|
|
||||||
# Restrict access to local files
|
|
||||||
if check_local_file_access(url):
|
|
||||||
return "Error: Access to local files is restricted"
|
|
||||||
|
|
||||||
# Validate the input URL
|
|
||||||
if not is_valid_url(url):
|
|
||||||
# Sanitize the input URL
|
|
||||||
sanitized_url = sanitize_url(url)
|
|
||||||
|
|
||||||
# Make the request with a timeout and handle exceptions
|
|
||||||
response = make_request(sanitized_url)
|
|
||||||
|
|
||||||
if isinstance(response, str):
|
|
||||||
return response
|
|
||||||
else:
|
|
||||||
# Sanitize the input URL
|
|
||||||
sanitized_url = sanitize_url(url)
|
|
||||||
|
|
||||||
response = requests.get(sanitized_url, headers=cfg.user_agent_header)
|
|
||||||
|
|
||||||
soup = BeautifulSoup(response.text, "html.parser")
|
soup = BeautifulSoup(response.text, "html.parser")
|
||||||
|
|
||||||
@@ -113,11 +89,9 @@ def format_hyperlinks(hyperlinks):
|
|||||||
|
|
||||||
def scrape_links(url):
|
def scrape_links(url):
|
||||||
"""Scrape links from a webpage"""
|
"""Scrape links from a webpage"""
|
||||||
response = requests.get(url, headers=cfg.user_agent_header)
|
response, error_message = get_response(url)
|
||||||
|
if error_message:
|
||||||
# Check if the response contains an HTTP error
|
return error_message
|
||||||
if response.status_code >= 400:
|
|
||||||
return "Error: HTTP " + str(response.status_code) + " error"
|
|
||||||
|
|
||||||
soup = BeautifulSoup(response.text, "html.parser")
|
soup = BeautifulSoup(response.text, "html.parser")
|
||||||
|
|
||||||
|
|||||||
@@ -1,12 +1,12 @@
|
|||||||
|
|
||||||
# Generated by CodiumAI
|
# Generated by CodiumAI
|
||||||
from scripts.browse import scrape_links
|
|
||||||
|
|
||||||
|
|
||||||
# Dependencies:
|
# Dependencies:
|
||||||
# pip install pytest-mock
|
# pip install pytest-mock
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
from scripts.browse import scrape_links
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Code Analysis
|
Code Analysis
|
||||||
|
|
||||||
Reference in New Issue
Block a user