feat: Add support for running Chrome in Headless mode.

Add headless mode support for Chrome and refactor web page text extraction
This commit is contained in:
chao ma
2023-04-15 12:18:19 +08:00
parent 6a93537c42
commit 773324dcd6
3 changed files with 9 additions and 0 deletions

View File

@@ -121,3 +121,6 @@ USE_BRIAN_TTS=False
ELEVENLABS_API_KEY=your-elevenlabs-api-key
ELEVENLABS_VOICE_1_ID=your-voice-id-1
ELEVENLABS_VOICE_2_ID=your-voice-id-2
# Chrome Headless Mode
HEADLESS_BROWSER=True

View File

@@ -86,6 +86,7 @@ class Config(metaclass=Singleton):
"USER_AGENT",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36",
)
self.headless_browser = os.getenv('HEADLESS_BROWSER',"True") == "True"
self.redis_host = os.getenv("REDIS_HOST", "localhost")
self.redis_port = os.getenv("REDIS_PORT", "6379")
self.redis_password = os.getenv("REDIS_PASSWORD", "")

View File

@@ -31,7 +31,12 @@ def scrape_text_with_selenium(url):
logging.getLogger("selenium").setLevel(logging.CRITICAL)
options = Options()
if cfg.headless_browser:
options.add_argument('--headless')
options.add_argument('--disable-gpu')
options.add_argument('--no-sandbox')
options.add_argument(
"user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.5615.49 Safari/537.36"
)
driver = webdriver.Chrome(