Merge branch 'Significant-Gravitas:master' into master

2026-02-03 05:14:28 +01:00 · 2023-04-15 11:33:04 -04:00
parent d28ac11d56 7c0789252e
commit 3564fdaec6
9 changed files with 102 additions and 10 deletions
--- a/.env.template
+++ b/.env.template
@@ -11,6 +11,9 @@ BROWSE_SUMMARY_MAX_TOKEN=300
 # USER_AGENT="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36"
 # AI_SETTINGS_FILE - Specifies which AI Settings file to use (defaults to ai_settings.yaml)
 AI_SETTINGS_FILE=ai_settings.yaml
+# USE_WEB_BROWSER - Sets the web-browser drivers to use with selenium (defaults to chrome).
+# Note: set this to either 'chrome', 'firefox', or 'safari' depending on your current browser
+# USE_WEB_BROWSER=chrome

 ################################################################################
 ### LLM PROVIDER
--- a/README.md
+++ b/README.md
@@ -366,7 +366,7 @@ Memories will be available to the AI immediately as they are ingested, even if i
 In the example above, the script initializes the memory, ingests all files within the seed_data directory into memory with an overlap between chunks of 200 and a maximum length of each chunk of 4000.
 Note that you can also use the --file argument to ingest a single file into memory and that the script will only ingest files within the auto_gpt_workspace directory.

-You can adjust the max_length and overlap parameters to fine-tune the way the docuents are presented to the AI when it "recall" that memory:
+You can adjust the max_length and overlap parameters to fine-tune the way the documents are presented to the AI when it "recall" that memory:

 - Adjusting the overlap value allows the AI to access more contextual information from each chunk when recalling information, but will result in more chunks being created and therefore increase memory backend usage and OpenAI API requests.
 - Reducing the max_length value will create more chunks, which can save prompt tokens by allowing for more message history in the context, but will also increase the number of chunks.
--- a/autogpt/main.py
+++ b/autogpt/main.py
@@ -1,5 +1,6 @@
 """Main script for the autogpt package."""
 import logging
+from colorama import Fore
 from autogpt.agent.agent import Agent
 from autogpt.args import parse_arguments

@@ -33,7 +34,8 @@ def main() -> None:
    # Initialize memory and make sure it is empty.
    # this is particularly important for indexing and referencing pinecone memory
    memory = get_memory(cfg, init=True)
-    print(f"Using memory of type: {memory.__class__.__name__}")
+    logger.typewriter_log(f"Using memory of type:", Fore.GREEN, f"{memory.__class__.__name__}")
+    logger.typewriter_log(f"Using Browser:", Fore.GREEN, cfg.selenium_web_browser)
    agent = Agent(
        ai_name=ai_name,
        memory=memory,
--- a/autogpt/args.py
+++ b/autogpt/args.py
@@ -50,6 +50,12 @@ def parse_arguments() -> None:
        action="store_true",
        help="Skips the re-prompting messages at the beginning of the script",
    )
+    parser.add_argument(
+        "--use-browser",
+        "-b",
+        dest="browser_name",
+        help="Specifies which web-browser to use when using selenium to scrape the web."
+    )
    parser.add_argument(
        "--ai-settings",
        "-C",
@@ -126,3 +132,6 @@ def parse_arguments() -> None:
        logger.typewriter_log("Using AI Settings File:", Fore.GREEN, file)
        CFG.ai_settings_file = file
        CFG.skip_reprompt = True
+
+    if args.browser_name:
+        CFG.selenium_web_browser = args.browser_name
--- a/autogpt/commands/web_selenium.py
+++ b/autogpt/commands/web_selenium.py
@@ -7,7 +7,10 @@ from selenium.webdriver.common.by import By
 from selenium.webdriver.support.wait import WebDriverWait
 from selenium.webdriver.support import expected_conditions as EC
 from webdriver_manager.chrome import ChromeDriverManager
-from selenium.webdriver.chrome.options import Options
+from webdriver_manager.firefox import GeckoDriverManager
+from selenium.webdriver.chrome.options import Options as ChromeOptions
+from selenium.webdriver.firefox.options import Options as FirefoxOptions
+from selenium.webdriver.safari.options import Options as SafariOptions
 import logging
 from pathlib import Path
 from autogpt.config import Config
@@ -49,14 +52,25 @@ def scrape_text_with_selenium(url: str) -> tuple[WebDriver, str]:
    """
    logging.getLogger("selenium").setLevel(logging.CRITICAL)

-    options = Options()
+    options_available = {'chrome': ChromeOptions, 'safari': SafariOptions, 'firefox': FirefoxOptions}
+
+    options = options_available[CFG.selenium_web_browser]()
    options.add_argument(
-        "user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
-        " (KHTML, like Gecko) Chrome/112.0.5615.49 Safari/537.36"
-    )
-    driver = webdriver.Chrome(
-        executable_path=ChromeDriverManager().install(), options=options
+        "user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.5615.49 Safari/537.36"
    )
+
+    if CFG.selenium_web_browser == "firefox":
+        driver = webdriver.Firefox(
+            executable_path=GeckoDriverManager().install(), options=options
+        )
+    elif CFG.selenium_web_browser == "safari":
+        # Requires a bit more setup on the users end
+        # See https://developer.apple.com/documentation/webkit/testing_with_webdriver_in_safari
+        driver = webdriver.Safari(options=options)
+    else:
+        driver = webdriver.Chrome(
+            executable_path=ChromeDriverManager().install(), options=options
+        )
    driver.get(url)

    WebDriverWait(driver, 10).until(
--- a/autogpt/config/config.py
+++ b/autogpt/config/config.py
@@ -25,6 +25,7 @@ class Config(metaclass=Singleton):
        self.speak_mode = False
        self.skip_reprompt = False

+        self.selenium_web_browser = os.getenv("USE_WEB_BROWSER", "chrome")
        self.ai_settings_file = os.getenv("AI_SETTINGS_FILE", "ai_settings.yaml")
        self.fast_llm_model = os.getenv("FAST_LLM_MODEL", "gpt-3.5-turbo")
        self.smart_llm_model = os.getenv("SMART_LLM_MODEL", "gpt-4")
--- a/autogpt/token_counter.py
+++ b/autogpt/token_counter.py
@@ -27,7 +27,7 @@ def count_message_tokens(
        logger.warn("Warning: model not found. Using cl100k_base encoding.")
        encoding = tiktoken.get_encoding("cl100k_base")
    if model == "gpt-3.5-turbo":
-        # !Node: gpt-3.5-turbo may change over time.
+        # !Note: gpt-3.5-turbo may change over time.
        # Returning num tokens assuming gpt-3.5-turbo-0301.")
        return count_message_tokens(messages, model="gpt-3.5-turbo-0301")
    elif model == "gpt-4":
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -7,6 +7,8 @@ services:
    depends_on:
      - redis
    build: ./
+    env_file:
+      - .env
    volumes:
      - "./autogpt:/app"
      - ".env:/app/.env"
--- a/tests/test_token_counter.py
+++ b/tests/test_token_counter.py
@@ -0,0 +1,61 @@
+import unittest
+import tests.context
+from autogpt.token_counter import count_message_tokens, count_string_tokens
+
+
+class TestTokenCounter(unittest.TestCase):
+
+    def test_count_message_tokens(self):
+        messages = [
+            {"role": "user", "content": "Hello"},
+            {"role": "assistant", "content": "Hi there!"}
+        ]
+        self.assertEqual(count_message_tokens(messages), 17)
+
+    def test_count_message_tokens_with_name(self):
+        messages = [
+            {"role": "user", "content": "Hello", "name": "John"},
+            {"role": "assistant", "content": "Hi there!"}
+        ]
+        self.assertEqual(count_message_tokens(messages), 17)
+
+    def test_count_message_tokens_empty_input(self):
+        self.assertEqual(count_message_tokens([]), 3)
+
+    def test_count_message_tokens_invalid_model(self):
+        messages = [
+            {"role": "user", "content": "Hello"},
+            {"role": "assistant", "content": "Hi there!"}
+        ]
+        with self.assertRaises(KeyError):
+            count_message_tokens(messages, model="invalid_model")
+
+    def test_count_message_tokens_gpt_4(self):
+        messages = [
+            {"role": "user", "content": "Hello"},
+            {"role": "assistant", "content": "Hi there!"}
+        ]
+        self.assertEqual(count_message_tokens(messages, model="gpt-4-0314"), 15)
+
+    def test_count_string_tokens(self):
+        string = "Hello, world!"
+        self.assertEqual(count_string_tokens(string, model_name="gpt-3.5-turbo-0301"), 4)
+
+    def test_count_string_tokens_empty_input(self):
+        self.assertEqual(count_string_tokens("", model_name="gpt-3.5-turbo-0301"), 0)
+
+    def test_count_message_tokens_invalid_model(self):
+        messages = [
+            {"role": "user", "content": "Hello"},
+            {"role": "assistant", "content": "Hi there!"}
+        ]
+        with self.assertRaises(NotImplementedError):
+            count_message_tokens(messages, model="invalid_model")
+
+    def test_count_string_tokens_gpt_4(self):
+        string = "Hello, world!"
+        self.assertEqual(count_string_tokens(string, model_name="gpt-4-0314"), 4)
+
+
+if __name__ == '__main__':
+    unittest.main()