diff --git a/.env.template b/.env.template index 6e521af1..f1b511c2 100644 --- a/.env.template +++ b/.env.template @@ -1,187 +1,190 @@ ################################################################################ ### AUTO-GPT - GENERAL SETTINGS ################################################################################ -# EXECUTE_LOCAL_COMMANDS - Allow local command execution (Example: False) -EXECUTE_LOCAL_COMMANDS=False -# RESTRICT_TO_WORKSPACE - Restrict file operations to workspace ./auto_gpt_workspace (Default: True) -RESTRICT_TO_WORKSPACE=True -# BROWSE_CHUNK_MAX_LENGTH - When browsing website, define the length of chunk stored in memory -BROWSE_CHUNK_MAX_LENGTH=8192 -# USER_AGENT - Define the user-agent used by the requests library to browse website (string) + +## EXECUTE_LOCAL_COMMANDS - Allow local command execution (Default: False) +## RESTRICT_TO_WORKSPACE - Restrict file operations to workspace ./auto_gpt_workspace (Default: True) +# EXECUTE_LOCAL_COMMANDS=False +# RESTRICT_TO_WORKSPACE=True + +## USER_AGENT - Define the user-agent used by the requests library to browse website (string) # USER_AGENT="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36" -# AI_SETTINGS_FILE - Specifies which AI Settings file to use (defaults to ai_settings.yaml) -AI_SETTINGS_FILE=ai_settings.yaml + +## AI_SETTINGS_FILE - Specifies which AI Settings file to use (defaults to ai_settings.yaml) +# AI_SETTINGS_FILE=ai_settings.yaml ################################################################################ ### LLM PROVIDER ################################################################################ ### OPENAI -# OPENAI_API_KEY - OpenAI API Key (Example: my-openai-api-key) -# TEMPERATURE - Sets temperature in OpenAI (Default: 0) -# USE_AZURE - Use Azure OpenAI or not (Default: False) +## OPENAI_API_KEY - OpenAI API Key (Example: my-openai-api-key) +## TEMPERATURE - Sets temperature in OpenAI (Default: 0) +## USE_AZURE - Use Azure OpenAI or not (Default: False) OPENAI_API_KEY=your-openai-api-key -TEMPERATURE=0 -USE_AZURE=False +# TEMPERATURE=0 +# USE_AZURE=False ### AZURE -# cleanup azure env as already moved to `azure.yaml.template` +# moved to `azure.yaml.template` ################################################################################ ### LLM MODELS ################################################################################ -# SMART_LLM_MODEL - Smart language model (Default: gpt-4) -# FAST_LLM_MODEL - Fast language model (Default: gpt-3.5-turbo) -SMART_LLM_MODEL=gpt-4 -FAST_LLM_MODEL=gpt-3.5-turbo +## SMART_LLM_MODEL - Smart language model (Default: gpt-4) +## FAST_LLM_MODEL - Fast language model (Default: gpt-3.5-turbo) +# SMART_LLM_MODEL=gpt-4 +# FAST_LLM_MODEL=gpt-3.5-turbo ### LLM MODEL SETTINGS -# FAST_TOKEN_LIMIT - Fast token limit for OpenAI (Default: 4000) -# SMART_TOKEN_LIMIT - Smart token limit for OpenAI (Default: 8000) -# When using --gpt3only this needs to be set to 4000. -FAST_TOKEN_LIMIT=4000 -SMART_TOKEN_LIMIT=8000 +## FAST_TOKEN_LIMIT - Fast token limit for OpenAI (Default: 4000) +## SMART_TOKEN_LIMIT - Smart token limit for OpenAI (Default: 8000) +## When using --gpt3only this needs to be set to 4000. +# FAST_TOKEN_LIMIT=4000 +# SMART_TOKEN_LIMIT=8000 ################################################################################ ### MEMORY ################################################################################ ### MEMORY_BACKEND - Memory backend type -# local - Default -# pinecone - Pinecone (if configured) -# redis - Redis (if configured) -# milvus - Milvus (if configured) -MEMORY_BACKEND=local +## local - Default +## pinecone - Pinecone (if configured) +## redis - Redis (if configured) +## milvus - Milvus (if configured) +## MEMORY_INDEX - Name of index created in Memory backend (Default: auto-gpt) +# MEMORY_BACKEND=local +# MEMORY_INDEX=auto-gpt ### PINECONE -# PINECONE_API_KEY - Pinecone API Key (Example: my-pinecone-api-key) -# PINECONE_ENV - Pinecone environment (region) (Example: us-west-2) -PINECONE_API_KEY=your-pinecone-api-key -PINECONE_ENV=your-pinecone-region +## PINECONE_API_KEY - Pinecone API Key (Example: my-pinecone-api-key) +## PINECONE_ENV - Pinecone environment (region) (Example: us-west-2) +# PINECONE_API_KEY=your-pinecone-api-key +# PINECONE_ENV=your-pinecone-region ### REDIS -# REDIS_HOST - Redis host (Default: localhost, use "redis" for docker-compose) -# REDIS_PORT - Redis port (Default: 6379) -# REDIS_PASSWORD - Redis password (Default: "") -# WIPE_REDIS_ON_START - Wipes data / index on start (Default: False) -# MEMORY_INDEX - Name of index created in Redis database (Default: auto-gpt) -REDIS_HOST=localhost -REDIS_PORT=6379 -REDIS_PASSWORD= -WIPE_REDIS_ON_START=False -MEMORY_INDEX=auto-gpt +## REDIS_HOST - Redis host (Default: localhost, use "redis" for docker-compose) +## REDIS_PORT - Redis port (Default: 6379) +## REDIS_PASSWORD - Redis password (Default: "") +## WIPE_REDIS_ON_START - Wipes data / index on start (Default: True) +# REDIS_HOST=localhost +# REDIS_PORT=6379 +# REDIS_PASSWORD= +# WIPE_REDIS_ON_START=True ### WEAVIATE -# MEMORY_BACKEND - Use 'weaviate' to use Weaviate vector storage -# WEAVIATE_HOST - Weaviate host IP -# WEAVIATE_PORT - Weaviate host port -# WEAVIATE_PROTOCOL - Weaviate host protocol (e.g. 'http') -# USE_WEAVIATE_EMBEDDED - Whether to use Embedded Weaviate -# WEAVIATE_EMBEDDED_PATH - File system path were to persist data when running Embedded Weaviate -# WEAVIATE_USERNAME - Weaviate username -# WEAVIATE_PASSWORD - Weaviate password -# WEAVIATE_API_KEY - Weaviate API key if using API-key-based authentication -# MEMORY_INDEX - Name of index to create in Weaviate -WEAVIATE_HOST="127.0.0.1" -WEAVIATE_PORT=8080 -WEAVIATE_PROTOCOL="http" -USE_WEAVIATE_EMBEDDED=False -WEAVIATE_EMBEDDED_PATH="/home/me/.local/share/weaviate" -WEAVIATE_USERNAME= -WEAVIATE_PASSWORD= -WEAVIATE_API_KEY= -MEMORY_INDEX=AutoGpt +## MEMORY_BACKEND - Use 'weaviate' to use Weaviate vector storage +## WEAVIATE_HOST - Weaviate host IP +## WEAVIATE_PORT - Weaviate host port +## WEAVIATE_PROTOCOL - Weaviate host protocol (e.g. 'http') +## USE_WEAVIATE_EMBEDDED - Whether to use Embedded Weaviate +## WEAVIATE_EMBEDDED_PATH - File system path were to persist data when running Embedded Weaviate +## WEAVIATE_USERNAME - Weaviate username +## WEAVIATE_PASSWORD - Weaviate password +## WEAVIATE_API_KEY - Weaviate API key if using API-key-based authentication +# WEAVIATE_HOST="127.0.0.1" +# WEAVIATE_PORT=8080 +# WEAVIATE_PROTOCOL="http" +# USE_WEAVIATE_EMBEDDED=False +# WEAVIATE_EMBEDDED_PATH="/home/me/.local/share/weaviate" +# WEAVIATE_USERNAME= +# WEAVIATE_PASSWORD= +# WEAVIATE_API_KEY= ### MILVUS -# MILVUS_ADDR - Milvus remote address (e.g. localhost:19530) -# MILVUS_COLLECTION - Milvus collection, -# change it if you want to start a new memory and retain the old memory. -MILVUS_ADDR=your-milvus-cluster-host-port -MILVUS_COLLECTION=autogpt +## MILVUS_ADDR - Milvus remote address (e.g. localhost:19530) +## MILVUS_COLLECTION - Milvus collection, +## change it if you want to start a new memory and retain the old memory. +# MILVUS_ADDR=your-milvus-cluster-host-port +# MILVUS_COLLECTION=autogpt ################################################################################ ### IMAGE GENERATION PROVIDER ################################################################################ ### OPEN AI -# IMAGE_PROVIDER - Image provider (Example: dalle) -IMAGE_PROVIDER=dalle -# IMAGE_SIZE - Image size (Example: 256) -# DALLE: 256, 512, 1024 -IMAGE_SIZE=256 +## IMAGE_PROVIDER - Image provider (Example: dalle) +## IMAGE_SIZE - Image size (Example: 256) +## DALLE: 256, 512, 1024 +# IMAGE_PROVIDER=dalle +# IMAGE_SIZE=256 ### HUGGINGFACE -# HUGGINGFACE_IMAGE_MODEL - Text-to-image model from Huggingface (Default: CompVis/stable-diffusion-v1-4) -HUGGINGFACE_IMAGE_MODEL=CompVis/stable-diffusion-v1-4 -# HUGGINGFACE_API_TOKEN - HuggingFace API token (Example: my-huggingface-api-token) -HUGGINGFACE_API_TOKEN=your-huggingface-api-token +## HUGGINGFACE_IMAGE_MODEL - Text-to-image model from Huggingface (Default: CompVis/stable-diffusion-v1-4) +## HUGGINGFACE_API_TOKEN - HuggingFace API token (Example: my-huggingface-api-token) +# HUGGINGFACE_IMAGE_MODEL=CompVis/stable-diffusion-v1-4 +# HUGGINGFACE_API_TOKEN=your-huggingface-api-token ### STABLE DIFFUSION WEBUI -# SD_WEBUI_URL - Stable diffusion webui API URL (Example: http://127.0.0.1:7860) -SD_WEBUI_URL=http://127.0.0.1:7860 -# SD_WEBUI_AUTH - Stable diffusion webui username:password pair (Example: username:password) -SD_WEBUI_AUTH= +## SD_WEBUI_AUTH - Stable diffusion webui username:password pair (Example: username:password) +## SD_WEBUI_URL - Stable diffusion webui API URL (Example: http://127.0.0.1:7860) +# SD_WEBUI_AUTH= +# SD_WEBUI_URL=http://127.0.0.1:7860 ################################################################################ ### AUDIO TO TEXT PROVIDER ################################################################################ ### HUGGINGFACE -HUGGINGFACE_AUDIO_TO_TEXT_MODEL=facebook/wav2vec2-base-960h +# HUGGINGFACE_AUDIO_TO_TEXT_MODEL=facebook/wav2vec2-base-960h ################################################################################ ### GIT Provider for repository actions ################################################################################ ### GITHUB -# GITHUB_API_KEY - Github API key / PAT (Example: github_pat_123) -# GITHUB_USERNAME - Github username -GITHUB_API_KEY=github_pat_123 -GITHUB_USERNAME=your-github-username +## GITHUB_API_KEY - Github API key / PAT (Example: github_pat_123) +## GITHUB_USERNAME - Github username +# GITHUB_API_KEY=github_pat_123 +# GITHUB_USERNAME=your-github-username ################################################################################ ### WEB BROWSING ################################################################################ ### BROWSER -# USE_WEB_BROWSER - Sets the web-browser drivers to use with selenium (defaults to chrome). -# HEADLESS_BROWSER - Whether to run the browser in headless mode (defaults to True) -# Note: set this to either 'chrome', 'firefox', or 'safari' depending on your current browser -# USE_WEB_BROWSER=chrome +## HEADLESS_BROWSER - Whether to run the browser in headless mode (default: True) +## USE_WEB_BROWSER - Sets the web-browser driver to use with selenium (default: chrome). +## Note: set this to either 'chrome', 'firefox', or 'safari' depending on your current browser # HEADLESS_BROWSER=True +# USE_WEB_BROWSER=chrome +## BROWSE_CHUNK_MAX_LENGTH - When browsing website, define the length of chunks to summarize (in number of tokens, excluding the response. 75 % of FAST_TOKEN_LIMIT is usually wise ) +# BROWSE_CHUNK_MAX_LENGTH=3000 +## BROWSE_SPACY_LANGUAGE_MODEL is used to split sentences. Install additional languages via pip, and set the model name here. Example Chinese: python -m spacy download zh_core_web_sm +# BROWSE_SPACY_LANGUAGE_MODEL=en_core_web_sm ### GOOGLE -# GOOGLE_API_KEY - Google API key (Example: my-google-api-key) -# CUSTOM_SEARCH_ENGINE_ID - Custom search engine ID (Example: my-custom-search-engine-id) -GOOGLE_API_KEY=your-google-api-key -CUSTOM_SEARCH_ENGINE_ID=your-custom-search-engine-id +## GOOGLE_API_KEY - Google API key (Example: my-google-api-key) +## CUSTOM_SEARCH_ENGINE_ID - Custom search engine ID (Example: my-custom-search-engine-id) +# GOOGLE_API_KEY=your-google-api-key +# CUSTOM_SEARCH_ENGINE_ID=your-custom-search-engine-id ################################################################################ ### TTS PROVIDER ################################################################################ ### MAC OS -# USE_MAC_OS_TTS - Use Mac OS TTS or not (Default: False) -USE_MAC_OS_TTS=False +## USE_MAC_OS_TTS - Use Mac OS TTS or not (Default: False) +# USE_MAC_OS_TTS=False ### STREAMELEMENTS -# USE_BRIAN_TTS - Use Brian TTS or not (Default: False) -USE_BRIAN_TTS=False +## USE_BRIAN_TTS - Use Brian TTS or not (Default: False) +# USE_BRIAN_TTS=False ### ELEVENLABS -# ELEVENLABS_API_KEY - Eleven Labs API key (Example: my-elevenlabs-api-key) -# ELEVENLABS_VOICE_1_ID - Eleven Labs voice 1 ID (Example: my-voice-id-1) -# ELEVENLABS_VOICE_2_ID - Eleven Labs voice 2 ID (Example: my-voice-id-2) -ELEVENLABS_API_KEY=your-elevenlabs-api-key -ELEVENLABS_VOICE_1_ID=your-voice-id-1 -ELEVENLABS_VOICE_2_ID=your-voice-id-2 +## ELEVENLABS_API_KEY - Eleven Labs API key (Example: my-elevenlabs-api-key) +## ELEVENLABS_VOICE_1_ID - Eleven Labs voice 1 ID (Example: my-voice-id-1) +## ELEVENLABS_VOICE_2_ID - Eleven Labs voice 2 ID (Example: my-voice-id-2) +# ELEVENLABS_API_KEY=your-elevenlabs-api-key +# ELEVENLABS_VOICE_1_ID=your-voice-id-1 +# ELEVENLABS_VOICE_2_ID=your-voice-id-2 ################################################################################ -### TWITTER API +### TWITTER API ################################################################################ -TW_CONSUMER_KEY= -TW_CONSUMER_SECRET= -TW_ACCESS_TOKEN= -TW_ACCESS_TOKEN_SECRET= +# TW_CONSUMER_KEY= +# TW_CONSUMER_SECRET= +# TW_ACCESS_TOKEN= +# TW_ACCESS_TOKEN_SECRET= diff --git a/README.md b/README.md index 4969e5ed..6d636c56 100644 --- a/README.md +++ b/README.md @@ -32,13 +32,13 @@ Development of this free, open-source project is made possible by all the

-InfluxData    Roost.AI    NucleiAI    AlgohashFe    

+InfluxData    Roost.AI    NucleiAI    Algohash    TypingMind    

robinicus  prompthero  crizzler  tob-le-rone  FSTatSBS  toverly1  ddtarazona  Nalhos  Kazamario  pingbotan  indoor47  AuroraHolding  kreativai  hunteraraujo  Explorergt92  judegomila   thepok   SpacingLily  merwanehamadi  m  zkonduit  maxxflyer  tekelsey  digisomni  nocodeclarity  tjarmain -Josecodesalot  saten-private  kenndanielso  johnculkin  Daniel1357  0xmatchmaker  belharethsami  nicoguyon  josephcmiller2  KiaArmani  Mobivs  rocks6  Odin519Tomas  ChrisDMT  thisisjeffchen  RealChrisSean  AIdevelopersAI  scryptedinc  jun784  goldenrecursion  allenstecat  LeeRobidas  cfarquhar  avy-ai  omphos  sunchongren  CrazySwami  fruition  Web3Capital  jazgarewal  rejunity  dexterityx  hostdp6  shawnharmsen  tommygeee  abhinav-pandey29  ColinConwell  kMag410  lucas-chu  Heitechsoft  bentoml  MediConCenHK  nnkostov  founderblocks-sils  CarmenCocoa  angiaou  fabrietech  Partender  RThaweewat  GalaxyVideoAgency  Brodie0  sultanmeghji  CatsMeow492  caitlynmeeks  garythebat  concreit  Pythagora-io  ASmithOWL  Cameron-Fulton  joaomdmoura  Dradstone  st617  wenfengwang  morcos  CrypteorCapital  jd3655  mathewhawkins  ZERO-A-ONE  MayurVirkar  SwftCoins  marv-technology  cxs  iddelacruz  AryaXAI  lmaugustin  Mr-Bishop42  vixul-accelerator  TheStoneMX  ciscodebs  ntwrite  DataMetis  ikarosai  refinery1  MetaPath01  ternary5  arjunb023  yx3110  vkozacek  eelbaz  rapidstartup  txtr99  tob-le-rone  neverinstall  projectonegames  DailyBotHQ  comet-ml  rickscode  webbcolton  MBassi91  

+Josecodesalot  saten-private  kenndanielso  johnculkin  Daniel1357  0xmatchmaker  belharethsami  nicoguyon  josephcmiller2  KiaArmani  Mobivs  rocks6  Odin519Tomas  ChrisDMT  thisisjeffchen  RealChrisSean  AIdevelopersAI  scryptedinc  jun784  goldenrecursion  allenstecat  LeeRobidas  cfarquhar  avy-ai  omphos  sunchongren  CrazySwami  fruition  Web3Capital  jazgarewal  rejunity  dexterityx  shawnharmsen  tommygeee  abhinav-pandey29  ColinConwell  kMag410  lucas-chu  Heitechsoft  bentoml  MediConCenHK  nnkostov  founderblocks-sils  CarmenCocoa  angiaou  fabrietech  Partender  RThaweewat  GalaxyVideoAgency  Brodie0  sultanmeghji  CatsMeow492  caitlynmeeks  garythebat  concreit  Pythagora-io  ASmithOWL  Cameron-Fulton  joaomdmoura  Dradstone  st617  wenfengwang  morcos  CrypteorCapital  jd3655  mathewhawkins  ZERO-A-ONE  MayurVirkar  SwftCoins  marv-technology  cxs  iddelacruz  AryaXAI  lmaugustin  Mr-Bishop42  vixul-accelerator  TheStoneMX  ciscodebs  ntwrite  DataMetis  ikarosai  refinery1  MetaPath01  ternary5  arjunb023  yx3110  vkozacek  eelbaz  rapidstartup  txtr99  tob-le-rone  neverinstall  projectonegames  DailyBotHQ  comet-ml  rickscode  webbcolton  MBassi91  

## 🚀 Features @@ -135,12 +135,12 @@ _To execute the following commands, open a CMD, Bash, or Powershell window by na 1. Run `autogpt` Python module in your terminal. On linux or mac: ```bash - # On Linux of Mac: + # On Linux or Mac: ./run.sh start # On Windows: - ./run.bat start + .\run.bat ``` - Running with `--help` after `start` lists all the possible command line arguments you can pass. + Running with `--help` after `.\run.bat` lists all the possible command line arguments you can pass. 2. After each action, choose from options to authorize command(s), exit the program, or provide feedback to the AI. diff --git a/autogpt/cli.py b/autogpt/cli.py index a69a53ac..6fe9ecbb 100644 --- a/autogpt/cli.py +++ b/autogpt/cli.py @@ -70,6 +70,7 @@ def main( """ # Put imports inside function to avoid importing everything when starting the CLI import logging + import sys from colorama import Fore @@ -79,7 +80,7 @@ def main( from autogpt.logs import logger from autogpt.memory import get_memory from autogpt.prompt import construct_prompt - from autogpt.utils import get_latest_bulletin + from autogpt.utils import get_current_git_branch, get_latest_bulletin if ctx.invoked_subcommand is None: cfg = Config() @@ -105,6 +106,23 @@ def main( motd = get_latest_bulletin() if motd: logger.typewriter_log("NEWS: ", Fore.GREEN, motd) + git_branch = get_current_git_branch() + if git_branch and git_branch != "stable": + logger.typewriter_log( + "WARNING: ", + Fore.RED, + f"You are running on `{git_branch}` branch " + "- this is not a supported branch.", + ) + if sys.version_info < (3, 10): + logger.typewriter_log( + "WARNING: ", + Fore.RED, + "You are running on an older version of Python. " + "Some people have observed problems with certain " + "parts of Auto-GPT with this version. " + "Please consider upgrading to Python 3.10 or higher.", + ) system_prompt = construct_prompt() # print(prompt) # Initialize variables diff --git a/autogpt/config/config.py b/autogpt/config/config.py index 4b53df10..c284a4ac 100644 --- a/autogpt/config/config.py +++ b/autogpt/config/config.py @@ -31,10 +31,13 @@ class Config(metaclass=Singleton): self.smart_llm_model = os.getenv("SMART_LLM_MODEL", "gpt-4") self.fast_token_limit = int(os.getenv("FAST_TOKEN_LIMIT", 4000)) self.smart_token_limit = int(os.getenv("SMART_TOKEN_LIMIT", 8000)) - self.browse_chunk_max_length = int(os.getenv("BROWSE_CHUNK_MAX_LENGTH", 8192)) + self.browse_chunk_max_length = int(os.getenv("BROWSE_CHUNK_MAX_LENGTH", 3000)) + self.browse_spacy_language_model = os.getenv( + "BROWSE_SPACY_LANGUAGE_MODEL", "en_core_web_sm" + ) self.openai_api_key = os.getenv("OPENAI_API_KEY") - self.temperature = float(os.getenv("TEMPERATURE", "1")) + self.temperature = float(os.getenv("TEMPERATURE", "0")) self.use_azure = os.getenv("USE_AZURE") == "True" self.execute_local_commands = ( os.getenv("EXECUTE_LOCAL_COMMANDS", "False") == "True" @@ -145,7 +148,7 @@ class Config(metaclass=Singleton): else: return "" - AZURE_CONFIG_FILE = os.path.join(os.path.dirname(__file__), "..", "azure.yaml") + AZURE_CONFIG_FILE = os.path.join(os.path.dirname(__file__), "../..", "azure.yaml") def load_azure_config(self, config_file: str = AZURE_CONFIG_FILE) -> None: """ @@ -168,7 +171,7 @@ class Config(metaclass=Singleton): self.openai_api_version = ( config_params.get("azure_api_version") or "2023-03-15-preview" ) - self.azure_model_to_deployment_id_map = config_params.get("azure_model_map", []) + self.azure_model_to_deployment_id_map = config_params.get("azure_model_map", {}) def set_continuous_mode(self, value: bool) -> None: """Set the continuous mode value.""" diff --git a/autogpt/processing/text.py b/autogpt/processing/text.py index 52add814..2122f0f0 100644 --- a/autogpt/processing/text.py +++ b/autogpt/processing/text.py @@ -1,8 +1,10 @@ """Text processing functions""" from typing import Dict, Generator, Optional +import spacy from selenium.webdriver.remote.webdriver import WebDriver +from autogpt import token_counter from autogpt.config import Config from autogpt.llm_utils import create_chat_completion from autogpt.memory import get_memory @@ -11,7 +13,12 @@ CFG = Config() MEMORY = get_memory(CFG) -def split_text(text: str, max_length: int = 8192) -> Generator[str, None, None]: +def split_text( + text: str, + max_length: int = CFG.browse_chunk_max_length, + model: str = CFG.fast_llm_model, + question: str = "", +) -> Generator[str, None, None]: """Split text into chunks of a maximum length Args: @@ -24,21 +31,46 @@ def split_text(text: str, max_length: int = 8192) -> Generator[str, None, None]: Raises: ValueError: If the text is longer than the maximum length """ - paragraphs = text.split("\n") - current_length = 0 + flatened_paragraphs = " ".join(text.split("\n")) + nlp = spacy.load(CFG.browse_spacy_language_model) + nlp.add_pipe("sentencizer") + doc = nlp(flatened_paragraphs) + sentences = [sent.text.strip() for sent in doc.sents] + current_chunk = [] - for paragraph in paragraphs: - if current_length + len(paragraph) + 1 <= max_length: - current_chunk.append(paragraph) - current_length += len(paragraph) + 1 + for sentence in sentences: + message_with_additional_sentence = [ + create_message(" ".join(current_chunk) + " " + sentence, question) + ] + + expected_token_usage = ( + token_usage_of_chunk(messages=message_with_additional_sentence, model=model) + + 1 + ) + if expected_token_usage <= max_length: + current_chunk.append(sentence) else: - yield "\n".join(current_chunk) - current_chunk = [paragraph] - current_length = len(paragraph) + 1 + yield " ".join(current_chunk) + current_chunk = [sentence] + message_this_sentence_only = [ + create_message(" ".join(current_chunk), question) + ] + expected_token_usage = ( + token_usage_of_chunk(messages=message_this_sentence_only, model=model) + + 1 + ) + if expected_token_usage > max_length: + raise ValueError( + f"Sentence is too long in webpage: {expected_token_usage} tokens." + ) if current_chunk: - yield "\n".join(current_chunk) + yield " ".join(current_chunk) + + +def token_usage_of_chunk(messages, model): + return token_counter.count_message_tokens(messages, model) def summarize_text( @@ -58,11 +90,16 @@ def summarize_text( if not text: return "Error: No text to summarize" + model = CFG.fast_llm_model text_length = len(text) print(f"Text length: {text_length} characters") summaries = [] - chunks = list(split_text(text)) + chunks = list( + split_text( + text, max_length=CFG.browse_chunk_max_length, model=model, question=question + ), + ) scroll_ratio = 1 / len(chunks) for i, chunk in enumerate(chunks): @@ -74,15 +111,20 @@ def summarize_text( MEMORY.add(memory_to_add) - print(f"Summarizing chunk {i + 1} / {len(chunks)}") messages = [create_message(chunk, question)] + tokens_for_chunk = token_counter.count_message_tokens(messages, model) + print( + f"Summarizing chunk {i + 1} / {len(chunks)} of length {len(chunk)} characters, or {tokens_for_chunk} tokens" + ) summary = create_chat_completion( - model=CFG.fast_llm_model, + model=model, messages=messages, ) summaries.append(summary) - print(f"Added chunk {i + 1} summary to memory") + print( + f"Added chunk {i + 1} summary to memory, of length {len(summary)} characters" + ) memory_to_add = f"Source: {url}\n" f"Content summary part#{i + 1}: {summary}" @@ -94,7 +136,7 @@ def summarize_text( messages = [create_message(combined_summary, question)] return create_chat_completion( - model=CFG.fast_llm_model, + model=model, messages=messages, ) diff --git a/autogpt/prompt.py b/autogpt/prompt.py index 03c132ac..08754605 100644 --- a/autogpt/prompt.py +++ b/autogpt/prompt.py @@ -85,7 +85,6 @@ def get_prompt() -> str: {"code": "", "focus": ""}, ), ("Execute Python File", "execute_python_file", {"file": ""}), - ("Task Complete (Shutdown)", "task_complete", {"reason": ""}), ("Generate Image", "generate_image", {"prompt": ""}), ("Send Tweet", "send_tweet", {"text": ""}), ] diff --git a/autogpt/utils.py b/autogpt/utils.py index 0f52c060..e93d5ac7 100644 --- a/autogpt/utils.py +++ b/autogpt/utils.py @@ -3,6 +3,7 @@ import os import requests import yaml from colorama import Fore +from git import Repo def clean_input(prompt: str = ""): @@ -53,6 +54,15 @@ def get_bulletin_from_web() -> str: return "" +def get_current_git_branch() -> str: + try: + repo = Repo(search_parent_directories=True) + branch = repo.active_branch + return branch.name + except: + return "" + + def get_latest_bulletin() -> str: exists = os.path.exists("CURRENT_BULLETIN.md") current_bulletin = "" diff --git a/autogpt/workspace.py b/autogpt/workspace.py index 6fb0e311..724f8443 100644 --- a/autogpt/workspace.py +++ b/autogpt/workspace.py @@ -37,6 +37,7 @@ def safe_path_join(base: Path, *paths: str | Path) -> Path: Returns: Path: The joined path """ + base = base.resolve() joined_path = base.joinpath(*paths).resolve() if CFG.restrict_to_workspace and not joined_path.is_relative_to(base): diff --git a/azure.yaml.template b/azure.yaml.template index 74ca797b..ab6e9fb6 100644 --- a/azure.yaml.template +++ b/azure.yaml.template @@ -1,4 +1,4 @@ -azure_api_type: azure_ad +azure_api_type: azure azure_api_base: your-base-url-for-azure azure_api_version: api-version-for-azure azure_model_map: diff --git a/requirements.txt b/requirements.txt index b4245323..66c90c79 100644 --- a/requirements.txt +++ b/requirements.txt @@ -20,6 +20,8 @@ webdriver-manager jsonschema tweepy click +spacy>=3.0.0,<4.0.0 +en_core_web_sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.4.0/en_core_web_sm-3.4.0-py3-none-any.whl ##Dev coverage @@ -27,7 +29,6 @@ flake8 numpy pre-commit black -sourcery isort gitpython==3.1.31