mirror of
https://github.com/aljazceru/Auto-GPT.git
synced 2026-01-07 00:04:26 +01:00
Merge pull request #2599 from Significant-Gravitas/master
Master -> Test 0.2.2
This commit is contained in:
219
.env.template
219
.env.template
@@ -1,187 +1,190 @@
|
||||
################################################################################
|
||||
### AUTO-GPT - GENERAL SETTINGS
|
||||
################################################################################
|
||||
# EXECUTE_LOCAL_COMMANDS - Allow local command execution (Example: False)
|
||||
EXECUTE_LOCAL_COMMANDS=False
|
||||
# RESTRICT_TO_WORKSPACE - Restrict file operations to workspace ./auto_gpt_workspace (Default: True)
|
||||
RESTRICT_TO_WORKSPACE=True
|
||||
# BROWSE_CHUNK_MAX_LENGTH - When browsing website, define the length of chunk stored in memory
|
||||
BROWSE_CHUNK_MAX_LENGTH=8192
|
||||
# USER_AGENT - Define the user-agent used by the requests library to browse website (string)
|
||||
|
||||
## EXECUTE_LOCAL_COMMANDS - Allow local command execution (Default: False)
|
||||
## RESTRICT_TO_WORKSPACE - Restrict file operations to workspace ./auto_gpt_workspace (Default: True)
|
||||
# EXECUTE_LOCAL_COMMANDS=False
|
||||
# RESTRICT_TO_WORKSPACE=True
|
||||
|
||||
## USER_AGENT - Define the user-agent used by the requests library to browse website (string)
|
||||
# USER_AGENT="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36"
|
||||
# AI_SETTINGS_FILE - Specifies which AI Settings file to use (defaults to ai_settings.yaml)
|
||||
AI_SETTINGS_FILE=ai_settings.yaml
|
||||
|
||||
## AI_SETTINGS_FILE - Specifies which AI Settings file to use (defaults to ai_settings.yaml)
|
||||
# AI_SETTINGS_FILE=ai_settings.yaml
|
||||
|
||||
################################################################################
|
||||
### LLM PROVIDER
|
||||
################################################################################
|
||||
|
||||
### OPENAI
|
||||
# OPENAI_API_KEY - OpenAI API Key (Example: my-openai-api-key)
|
||||
# TEMPERATURE - Sets temperature in OpenAI (Default: 0)
|
||||
# USE_AZURE - Use Azure OpenAI or not (Default: False)
|
||||
## OPENAI_API_KEY - OpenAI API Key (Example: my-openai-api-key)
|
||||
## TEMPERATURE - Sets temperature in OpenAI (Default: 0)
|
||||
## USE_AZURE - Use Azure OpenAI or not (Default: False)
|
||||
OPENAI_API_KEY=your-openai-api-key
|
||||
TEMPERATURE=0
|
||||
USE_AZURE=False
|
||||
# TEMPERATURE=0
|
||||
# USE_AZURE=False
|
||||
|
||||
### AZURE
|
||||
# cleanup azure env as already moved to `azure.yaml.template`
|
||||
# moved to `azure.yaml.template`
|
||||
|
||||
################################################################################
|
||||
### LLM MODELS
|
||||
################################################################################
|
||||
|
||||
# SMART_LLM_MODEL - Smart language model (Default: gpt-4)
|
||||
# FAST_LLM_MODEL - Fast language model (Default: gpt-3.5-turbo)
|
||||
SMART_LLM_MODEL=gpt-4
|
||||
FAST_LLM_MODEL=gpt-3.5-turbo
|
||||
## SMART_LLM_MODEL - Smart language model (Default: gpt-4)
|
||||
## FAST_LLM_MODEL - Fast language model (Default: gpt-3.5-turbo)
|
||||
# SMART_LLM_MODEL=gpt-4
|
||||
# FAST_LLM_MODEL=gpt-3.5-turbo
|
||||
|
||||
### LLM MODEL SETTINGS
|
||||
# FAST_TOKEN_LIMIT - Fast token limit for OpenAI (Default: 4000)
|
||||
# SMART_TOKEN_LIMIT - Smart token limit for OpenAI (Default: 8000)
|
||||
# When using --gpt3only this needs to be set to 4000.
|
||||
FAST_TOKEN_LIMIT=4000
|
||||
SMART_TOKEN_LIMIT=8000
|
||||
## FAST_TOKEN_LIMIT - Fast token limit for OpenAI (Default: 4000)
|
||||
## SMART_TOKEN_LIMIT - Smart token limit for OpenAI (Default: 8000)
|
||||
## When using --gpt3only this needs to be set to 4000.
|
||||
# FAST_TOKEN_LIMIT=4000
|
||||
# SMART_TOKEN_LIMIT=8000
|
||||
|
||||
################################################################################
|
||||
### MEMORY
|
||||
################################################################################
|
||||
|
||||
### MEMORY_BACKEND - Memory backend type
|
||||
# local - Default
|
||||
# pinecone - Pinecone (if configured)
|
||||
# redis - Redis (if configured)
|
||||
# milvus - Milvus (if configured)
|
||||
MEMORY_BACKEND=local
|
||||
## local - Default
|
||||
## pinecone - Pinecone (if configured)
|
||||
## redis - Redis (if configured)
|
||||
## milvus - Milvus (if configured)
|
||||
## MEMORY_INDEX - Name of index created in Memory backend (Default: auto-gpt)
|
||||
# MEMORY_BACKEND=local
|
||||
# MEMORY_INDEX=auto-gpt
|
||||
|
||||
### PINECONE
|
||||
# PINECONE_API_KEY - Pinecone API Key (Example: my-pinecone-api-key)
|
||||
# PINECONE_ENV - Pinecone environment (region) (Example: us-west-2)
|
||||
PINECONE_API_KEY=your-pinecone-api-key
|
||||
PINECONE_ENV=your-pinecone-region
|
||||
## PINECONE_API_KEY - Pinecone API Key (Example: my-pinecone-api-key)
|
||||
## PINECONE_ENV - Pinecone environment (region) (Example: us-west-2)
|
||||
# PINECONE_API_KEY=your-pinecone-api-key
|
||||
# PINECONE_ENV=your-pinecone-region
|
||||
|
||||
### REDIS
|
||||
# REDIS_HOST - Redis host (Default: localhost, use "redis" for docker-compose)
|
||||
# REDIS_PORT - Redis port (Default: 6379)
|
||||
# REDIS_PASSWORD - Redis password (Default: "")
|
||||
# WIPE_REDIS_ON_START - Wipes data / index on start (Default: False)
|
||||
# MEMORY_INDEX - Name of index created in Redis database (Default: auto-gpt)
|
||||
REDIS_HOST=localhost
|
||||
REDIS_PORT=6379
|
||||
REDIS_PASSWORD=
|
||||
WIPE_REDIS_ON_START=False
|
||||
MEMORY_INDEX=auto-gpt
|
||||
## REDIS_HOST - Redis host (Default: localhost, use "redis" for docker-compose)
|
||||
## REDIS_PORT - Redis port (Default: 6379)
|
||||
## REDIS_PASSWORD - Redis password (Default: "")
|
||||
## WIPE_REDIS_ON_START - Wipes data / index on start (Default: True)
|
||||
# REDIS_HOST=localhost
|
||||
# REDIS_PORT=6379
|
||||
# REDIS_PASSWORD=
|
||||
# WIPE_REDIS_ON_START=True
|
||||
|
||||
### WEAVIATE
|
||||
# MEMORY_BACKEND - Use 'weaviate' to use Weaviate vector storage
|
||||
# WEAVIATE_HOST - Weaviate host IP
|
||||
# WEAVIATE_PORT - Weaviate host port
|
||||
# WEAVIATE_PROTOCOL - Weaviate host protocol (e.g. 'http')
|
||||
# USE_WEAVIATE_EMBEDDED - Whether to use Embedded Weaviate
|
||||
# WEAVIATE_EMBEDDED_PATH - File system path were to persist data when running Embedded Weaviate
|
||||
# WEAVIATE_USERNAME - Weaviate username
|
||||
# WEAVIATE_PASSWORD - Weaviate password
|
||||
# WEAVIATE_API_KEY - Weaviate API key if using API-key-based authentication
|
||||
# MEMORY_INDEX - Name of index to create in Weaviate
|
||||
WEAVIATE_HOST="127.0.0.1"
|
||||
WEAVIATE_PORT=8080
|
||||
WEAVIATE_PROTOCOL="http"
|
||||
USE_WEAVIATE_EMBEDDED=False
|
||||
WEAVIATE_EMBEDDED_PATH="/home/me/.local/share/weaviate"
|
||||
WEAVIATE_USERNAME=
|
||||
WEAVIATE_PASSWORD=
|
||||
WEAVIATE_API_KEY=
|
||||
MEMORY_INDEX=AutoGpt
|
||||
## MEMORY_BACKEND - Use 'weaviate' to use Weaviate vector storage
|
||||
## WEAVIATE_HOST - Weaviate host IP
|
||||
## WEAVIATE_PORT - Weaviate host port
|
||||
## WEAVIATE_PROTOCOL - Weaviate host protocol (e.g. 'http')
|
||||
## USE_WEAVIATE_EMBEDDED - Whether to use Embedded Weaviate
|
||||
## WEAVIATE_EMBEDDED_PATH - File system path were to persist data when running Embedded Weaviate
|
||||
## WEAVIATE_USERNAME - Weaviate username
|
||||
## WEAVIATE_PASSWORD - Weaviate password
|
||||
## WEAVIATE_API_KEY - Weaviate API key if using API-key-based authentication
|
||||
# WEAVIATE_HOST="127.0.0.1"
|
||||
# WEAVIATE_PORT=8080
|
||||
# WEAVIATE_PROTOCOL="http"
|
||||
# USE_WEAVIATE_EMBEDDED=False
|
||||
# WEAVIATE_EMBEDDED_PATH="/home/me/.local/share/weaviate"
|
||||
# WEAVIATE_USERNAME=
|
||||
# WEAVIATE_PASSWORD=
|
||||
# WEAVIATE_API_KEY=
|
||||
|
||||
### MILVUS
|
||||
# MILVUS_ADDR - Milvus remote address (e.g. localhost:19530)
|
||||
# MILVUS_COLLECTION - Milvus collection,
|
||||
# change it if you want to start a new memory and retain the old memory.
|
||||
MILVUS_ADDR=your-milvus-cluster-host-port
|
||||
MILVUS_COLLECTION=autogpt
|
||||
## MILVUS_ADDR - Milvus remote address (e.g. localhost:19530)
|
||||
## MILVUS_COLLECTION - Milvus collection,
|
||||
## change it if you want to start a new memory and retain the old memory.
|
||||
# MILVUS_ADDR=your-milvus-cluster-host-port
|
||||
# MILVUS_COLLECTION=autogpt
|
||||
|
||||
################################################################################
|
||||
### IMAGE GENERATION PROVIDER
|
||||
################################################################################
|
||||
|
||||
### OPEN AI
|
||||
# IMAGE_PROVIDER - Image provider (Example: dalle)
|
||||
IMAGE_PROVIDER=dalle
|
||||
# IMAGE_SIZE - Image size (Example: 256)
|
||||
# DALLE: 256, 512, 1024
|
||||
IMAGE_SIZE=256
|
||||
## IMAGE_PROVIDER - Image provider (Example: dalle)
|
||||
## IMAGE_SIZE - Image size (Example: 256)
|
||||
## DALLE: 256, 512, 1024
|
||||
# IMAGE_PROVIDER=dalle
|
||||
# IMAGE_SIZE=256
|
||||
|
||||
### HUGGINGFACE
|
||||
# HUGGINGFACE_IMAGE_MODEL - Text-to-image model from Huggingface (Default: CompVis/stable-diffusion-v1-4)
|
||||
HUGGINGFACE_IMAGE_MODEL=CompVis/stable-diffusion-v1-4
|
||||
# HUGGINGFACE_API_TOKEN - HuggingFace API token (Example: my-huggingface-api-token)
|
||||
HUGGINGFACE_API_TOKEN=your-huggingface-api-token
|
||||
## HUGGINGFACE_IMAGE_MODEL - Text-to-image model from Huggingface (Default: CompVis/stable-diffusion-v1-4)
|
||||
## HUGGINGFACE_API_TOKEN - HuggingFace API token (Example: my-huggingface-api-token)
|
||||
# HUGGINGFACE_IMAGE_MODEL=CompVis/stable-diffusion-v1-4
|
||||
# HUGGINGFACE_API_TOKEN=your-huggingface-api-token
|
||||
|
||||
### STABLE DIFFUSION WEBUI
|
||||
# SD_WEBUI_URL - Stable diffusion webui API URL (Example: http://127.0.0.1:7860)
|
||||
SD_WEBUI_URL=http://127.0.0.1:7860
|
||||
# SD_WEBUI_AUTH - Stable diffusion webui username:password pair (Example: username:password)
|
||||
SD_WEBUI_AUTH=
|
||||
## SD_WEBUI_AUTH - Stable diffusion webui username:password pair (Example: username:password)
|
||||
## SD_WEBUI_URL - Stable diffusion webui API URL (Example: http://127.0.0.1:7860)
|
||||
# SD_WEBUI_AUTH=
|
||||
# SD_WEBUI_URL=http://127.0.0.1:7860
|
||||
|
||||
################################################################################
|
||||
### AUDIO TO TEXT PROVIDER
|
||||
################################################################################
|
||||
|
||||
### HUGGINGFACE
|
||||
HUGGINGFACE_AUDIO_TO_TEXT_MODEL=facebook/wav2vec2-base-960h
|
||||
# HUGGINGFACE_AUDIO_TO_TEXT_MODEL=facebook/wav2vec2-base-960h
|
||||
|
||||
################################################################################
|
||||
### GIT Provider for repository actions
|
||||
################################################################################
|
||||
|
||||
### GITHUB
|
||||
# GITHUB_API_KEY - Github API key / PAT (Example: github_pat_123)
|
||||
# GITHUB_USERNAME - Github username
|
||||
GITHUB_API_KEY=github_pat_123
|
||||
GITHUB_USERNAME=your-github-username
|
||||
## GITHUB_API_KEY - Github API key / PAT (Example: github_pat_123)
|
||||
## GITHUB_USERNAME - Github username
|
||||
# GITHUB_API_KEY=github_pat_123
|
||||
# GITHUB_USERNAME=your-github-username
|
||||
|
||||
################################################################################
|
||||
### WEB BROWSING
|
||||
################################################################################
|
||||
|
||||
### BROWSER
|
||||
# USE_WEB_BROWSER - Sets the web-browser drivers to use with selenium (defaults to chrome).
|
||||
# HEADLESS_BROWSER - Whether to run the browser in headless mode (defaults to True)
|
||||
# Note: set this to either 'chrome', 'firefox', or 'safari' depending on your current browser
|
||||
# USE_WEB_BROWSER=chrome
|
||||
## HEADLESS_BROWSER - Whether to run the browser in headless mode (default: True)
|
||||
## USE_WEB_BROWSER - Sets the web-browser driver to use with selenium (default: chrome).
|
||||
## Note: set this to either 'chrome', 'firefox', or 'safari' depending on your current browser
|
||||
# HEADLESS_BROWSER=True
|
||||
# USE_WEB_BROWSER=chrome
|
||||
## BROWSE_CHUNK_MAX_LENGTH - When browsing website, define the length of chunks to summarize (in number of tokens, excluding the response. 75 % of FAST_TOKEN_LIMIT is usually wise )
|
||||
# BROWSE_CHUNK_MAX_LENGTH=3000
|
||||
## BROWSE_SPACY_LANGUAGE_MODEL is used to split sentences. Install additional languages via pip, and set the model name here. Example Chinese: python -m spacy download zh_core_web_sm
|
||||
# BROWSE_SPACY_LANGUAGE_MODEL=en_core_web_sm
|
||||
|
||||
### GOOGLE
|
||||
# GOOGLE_API_KEY - Google API key (Example: my-google-api-key)
|
||||
# CUSTOM_SEARCH_ENGINE_ID - Custom search engine ID (Example: my-custom-search-engine-id)
|
||||
GOOGLE_API_KEY=your-google-api-key
|
||||
CUSTOM_SEARCH_ENGINE_ID=your-custom-search-engine-id
|
||||
## GOOGLE_API_KEY - Google API key (Example: my-google-api-key)
|
||||
## CUSTOM_SEARCH_ENGINE_ID - Custom search engine ID (Example: my-custom-search-engine-id)
|
||||
# GOOGLE_API_KEY=your-google-api-key
|
||||
# CUSTOM_SEARCH_ENGINE_ID=your-custom-search-engine-id
|
||||
|
||||
################################################################################
|
||||
### TTS PROVIDER
|
||||
################################################################################
|
||||
|
||||
### MAC OS
|
||||
# USE_MAC_OS_TTS - Use Mac OS TTS or not (Default: False)
|
||||
USE_MAC_OS_TTS=False
|
||||
## USE_MAC_OS_TTS - Use Mac OS TTS or not (Default: False)
|
||||
# USE_MAC_OS_TTS=False
|
||||
|
||||
### STREAMELEMENTS
|
||||
# USE_BRIAN_TTS - Use Brian TTS or not (Default: False)
|
||||
USE_BRIAN_TTS=False
|
||||
## USE_BRIAN_TTS - Use Brian TTS or not (Default: False)
|
||||
# USE_BRIAN_TTS=False
|
||||
|
||||
### ELEVENLABS
|
||||
# ELEVENLABS_API_KEY - Eleven Labs API key (Example: my-elevenlabs-api-key)
|
||||
# ELEVENLABS_VOICE_1_ID - Eleven Labs voice 1 ID (Example: my-voice-id-1)
|
||||
# ELEVENLABS_VOICE_2_ID - Eleven Labs voice 2 ID (Example: my-voice-id-2)
|
||||
ELEVENLABS_API_KEY=your-elevenlabs-api-key
|
||||
ELEVENLABS_VOICE_1_ID=your-voice-id-1
|
||||
ELEVENLABS_VOICE_2_ID=your-voice-id-2
|
||||
## ELEVENLABS_API_KEY - Eleven Labs API key (Example: my-elevenlabs-api-key)
|
||||
## ELEVENLABS_VOICE_1_ID - Eleven Labs voice 1 ID (Example: my-voice-id-1)
|
||||
## ELEVENLABS_VOICE_2_ID - Eleven Labs voice 2 ID (Example: my-voice-id-2)
|
||||
# ELEVENLABS_API_KEY=your-elevenlabs-api-key
|
||||
# ELEVENLABS_VOICE_1_ID=your-voice-id-1
|
||||
# ELEVENLABS_VOICE_2_ID=your-voice-id-2
|
||||
|
||||
################################################################################
|
||||
### TWITTER API
|
||||
### TWITTER API
|
||||
################################################################################
|
||||
|
||||
TW_CONSUMER_KEY=
|
||||
TW_CONSUMER_SECRET=
|
||||
TW_ACCESS_TOKEN=
|
||||
TW_ACCESS_TOKEN_SECRET=
|
||||
# TW_CONSUMER_KEY=
|
||||
# TW_CONSUMER_SECRET=
|
||||
# TW_ACCESS_TOKEN=
|
||||
# TW_ACCESS_TOKEN_SECRET=
|
||||
|
||||
@@ -70,6 +70,7 @@ def main(
|
||||
"""
|
||||
# Put imports inside function to avoid importing everything when starting the CLI
|
||||
import logging
|
||||
import sys
|
||||
|
||||
from colorama import Fore
|
||||
|
||||
@@ -79,7 +80,7 @@ def main(
|
||||
from autogpt.logs import logger
|
||||
from autogpt.memory import get_memory
|
||||
from autogpt.prompt import construct_prompt
|
||||
from autogpt.utils import get_latest_bulletin
|
||||
from autogpt.utils import get_current_git_branch, get_latest_bulletin
|
||||
|
||||
if ctx.invoked_subcommand is None:
|
||||
cfg = Config()
|
||||
@@ -105,6 +106,23 @@ def main(
|
||||
motd = get_latest_bulletin()
|
||||
if motd:
|
||||
logger.typewriter_log("NEWS: ", Fore.GREEN, motd)
|
||||
git_branch = get_current_git_branch()
|
||||
if git_branch and git_branch != "stable":
|
||||
logger.typewriter_log(
|
||||
"WARNING: ",
|
||||
Fore.RED,
|
||||
f"You are running on `{git_branch}` branch "
|
||||
"- this is not a supported branch.",
|
||||
)
|
||||
if sys.version_info < (3, 10):
|
||||
logger.typewriter_log(
|
||||
"WARNING: ",
|
||||
Fore.RED,
|
||||
"You are running on an older version of Python. "
|
||||
"Some people have observed problems with certain "
|
||||
"parts of Auto-GPT with this version. "
|
||||
"Please consider upgrading to Python 3.10 or higher.",
|
||||
)
|
||||
system_prompt = construct_prompt()
|
||||
# print(prompt)
|
||||
# Initialize variables
|
||||
|
||||
@@ -31,10 +31,13 @@ class Config(metaclass=Singleton):
|
||||
self.smart_llm_model = os.getenv("SMART_LLM_MODEL", "gpt-4")
|
||||
self.fast_token_limit = int(os.getenv("FAST_TOKEN_LIMIT", 4000))
|
||||
self.smart_token_limit = int(os.getenv("SMART_TOKEN_LIMIT", 8000))
|
||||
self.browse_chunk_max_length = int(os.getenv("BROWSE_CHUNK_MAX_LENGTH", 8192))
|
||||
self.browse_chunk_max_length = int(os.getenv("BROWSE_CHUNK_MAX_LENGTH", 3000))
|
||||
self.browse_spacy_language_model = os.getenv(
|
||||
"BROWSE_SPACY_LANGUAGE_MODEL", "en_core_web_sm"
|
||||
)
|
||||
|
||||
self.openai_api_key = os.getenv("OPENAI_API_KEY")
|
||||
self.temperature = float(os.getenv("TEMPERATURE", "1"))
|
||||
self.temperature = float(os.getenv("TEMPERATURE", "0"))
|
||||
self.use_azure = os.getenv("USE_AZURE") == "True"
|
||||
self.execute_local_commands = (
|
||||
os.getenv("EXECUTE_LOCAL_COMMANDS", "False") == "True"
|
||||
@@ -145,7 +148,7 @@ class Config(metaclass=Singleton):
|
||||
else:
|
||||
return ""
|
||||
|
||||
AZURE_CONFIG_FILE = os.path.join(os.path.dirname(__file__), "..", "azure.yaml")
|
||||
AZURE_CONFIG_FILE = os.path.join(os.path.dirname(__file__), "../..", "azure.yaml")
|
||||
|
||||
def load_azure_config(self, config_file: str = AZURE_CONFIG_FILE) -> None:
|
||||
"""
|
||||
@@ -168,7 +171,7 @@ class Config(metaclass=Singleton):
|
||||
self.openai_api_version = (
|
||||
config_params.get("azure_api_version") or "2023-03-15-preview"
|
||||
)
|
||||
self.azure_model_to_deployment_id_map = config_params.get("azure_model_map", [])
|
||||
self.azure_model_to_deployment_id_map = config_params.get("azure_model_map", {})
|
||||
|
||||
def set_continuous_mode(self, value: bool) -> None:
|
||||
"""Set the continuous mode value."""
|
||||
|
||||
@@ -1,8 +1,10 @@
|
||||
"""Text processing functions"""
|
||||
from typing import Dict, Generator, Optional
|
||||
|
||||
import spacy
|
||||
from selenium.webdriver.remote.webdriver import WebDriver
|
||||
|
||||
from autogpt import token_counter
|
||||
from autogpt.config import Config
|
||||
from autogpt.llm_utils import create_chat_completion
|
||||
from autogpt.memory import get_memory
|
||||
@@ -11,7 +13,12 @@ CFG = Config()
|
||||
MEMORY = get_memory(CFG)
|
||||
|
||||
|
||||
def split_text(text: str, max_length: int = 8192) -> Generator[str, None, None]:
|
||||
def split_text(
|
||||
text: str,
|
||||
max_length: int = CFG.browse_chunk_max_length,
|
||||
model: str = CFG.fast_llm_model,
|
||||
question: str = "",
|
||||
) -> Generator[str, None, None]:
|
||||
"""Split text into chunks of a maximum length
|
||||
|
||||
Args:
|
||||
@@ -24,21 +31,46 @@ def split_text(text: str, max_length: int = 8192) -> Generator[str, None, None]:
|
||||
Raises:
|
||||
ValueError: If the text is longer than the maximum length
|
||||
"""
|
||||
paragraphs = text.split("\n")
|
||||
current_length = 0
|
||||
flatened_paragraphs = " ".join(text.split("\n"))
|
||||
nlp = spacy.load(CFG.browse_spacy_language_model)
|
||||
nlp.add_pipe("sentencizer")
|
||||
doc = nlp(flatened_paragraphs)
|
||||
sentences = [sent.text.strip() for sent in doc.sents]
|
||||
|
||||
current_chunk = []
|
||||
|
||||
for paragraph in paragraphs:
|
||||
if current_length + len(paragraph) + 1 <= max_length:
|
||||
current_chunk.append(paragraph)
|
||||
current_length += len(paragraph) + 1
|
||||
for sentence in sentences:
|
||||
message_with_additional_sentence = [
|
||||
create_message(" ".join(current_chunk) + " " + sentence, question)
|
||||
]
|
||||
|
||||
expected_token_usage = (
|
||||
token_usage_of_chunk(messages=message_with_additional_sentence, model=model)
|
||||
+ 1
|
||||
)
|
||||
if expected_token_usage <= max_length:
|
||||
current_chunk.append(sentence)
|
||||
else:
|
||||
yield "\n".join(current_chunk)
|
||||
current_chunk = [paragraph]
|
||||
current_length = len(paragraph) + 1
|
||||
yield " ".join(current_chunk)
|
||||
current_chunk = [sentence]
|
||||
message_this_sentence_only = [
|
||||
create_message(" ".join(current_chunk), question)
|
||||
]
|
||||
expected_token_usage = (
|
||||
token_usage_of_chunk(messages=message_this_sentence_only, model=model)
|
||||
+ 1
|
||||
)
|
||||
if expected_token_usage > max_length:
|
||||
raise ValueError(
|
||||
f"Sentence is too long in webpage: {expected_token_usage} tokens."
|
||||
)
|
||||
|
||||
if current_chunk:
|
||||
yield "\n".join(current_chunk)
|
||||
yield " ".join(current_chunk)
|
||||
|
||||
|
||||
def token_usage_of_chunk(messages, model):
|
||||
return token_counter.count_message_tokens(messages, model)
|
||||
|
||||
|
||||
def summarize_text(
|
||||
@@ -58,11 +90,16 @@ def summarize_text(
|
||||
if not text:
|
||||
return "Error: No text to summarize"
|
||||
|
||||
model = CFG.fast_llm_model
|
||||
text_length = len(text)
|
||||
print(f"Text length: {text_length} characters")
|
||||
|
||||
summaries = []
|
||||
chunks = list(split_text(text))
|
||||
chunks = list(
|
||||
split_text(
|
||||
text, max_length=CFG.browse_chunk_max_length, model=model, question=question
|
||||
),
|
||||
)
|
||||
scroll_ratio = 1 / len(chunks)
|
||||
|
||||
for i, chunk in enumerate(chunks):
|
||||
@@ -74,15 +111,20 @@ def summarize_text(
|
||||
|
||||
MEMORY.add(memory_to_add)
|
||||
|
||||
print(f"Summarizing chunk {i + 1} / {len(chunks)}")
|
||||
messages = [create_message(chunk, question)]
|
||||
tokens_for_chunk = token_counter.count_message_tokens(messages, model)
|
||||
print(
|
||||
f"Summarizing chunk {i + 1} / {len(chunks)} of length {len(chunk)} characters, or {tokens_for_chunk} tokens"
|
||||
)
|
||||
|
||||
summary = create_chat_completion(
|
||||
model=CFG.fast_llm_model,
|
||||
model=model,
|
||||
messages=messages,
|
||||
)
|
||||
summaries.append(summary)
|
||||
print(f"Added chunk {i + 1} summary to memory")
|
||||
print(
|
||||
f"Added chunk {i + 1} summary to memory, of length {len(summary)} characters"
|
||||
)
|
||||
|
||||
memory_to_add = f"Source: {url}\n" f"Content summary part#{i + 1}: {summary}"
|
||||
|
||||
@@ -94,7 +136,7 @@ def summarize_text(
|
||||
messages = [create_message(combined_summary, question)]
|
||||
|
||||
return create_chat_completion(
|
||||
model=CFG.fast_llm_model,
|
||||
model=model,
|
||||
messages=messages,
|
||||
)
|
||||
|
||||
|
||||
@@ -85,7 +85,6 @@ def get_prompt() -> str:
|
||||
{"code": "<full_code_string>", "focus": "<list_of_focus_areas>"},
|
||||
),
|
||||
("Execute Python File", "execute_python_file", {"file": "<file>"}),
|
||||
("Task Complete (Shutdown)", "task_complete", {"reason": "<reason>"}),
|
||||
("Generate Image", "generate_image", {"prompt": "<prompt>"}),
|
||||
("Send Tweet", "send_tweet", {"text": "<text>"}),
|
||||
]
|
||||
|
||||
@@ -3,6 +3,7 @@ import os
|
||||
import requests
|
||||
import yaml
|
||||
from colorama import Fore
|
||||
from git import Repo
|
||||
|
||||
|
||||
def clean_input(prompt: str = ""):
|
||||
@@ -53,6 +54,15 @@ def get_bulletin_from_web() -> str:
|
||||
return ""
|
||||
|
||||
|
||||
def get_current_git_branch() -> str:
|
||||
try:
|
||||
repo = Repo(search_parent_directories=True)
|
||||
branch = repo.active_branch
|
||||
return branch.name
|
||||
except:
|
||||
return ""
|
||||
|
||||
|
||||
def get_latest_bulletin() -> str:
|
||||
exists = os.path.exists("CURRENT_BULLETIN.md")
|
||||
current_bulletin = ""
|
||||
|
||||
@@ -37,6 +37,7 @@ def safe_path_join(base: Path, *paths: str | Path) -> Path:
|
||||
Returns:
|
||||
Path: The joined path
|
||||
"""
|
||||
base = base.resolve()
|
||||
joined_path = base.joinpath(*paths).resolve()
|
||||
|
||||
if CFG.restrict_to_workspace and not joined_path.is_relative_to(base):
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
azure_api_type: azure_ad
|
||||
azure_api_type: azure
|
||||
azure_api_base: your-base-url-for-azure
|
||||
azure_api_version: api-version-for-azure
|
||||
azure_model_map:
|
||||
|
||||
@@ -20,6 +20,8 @@ webdriver-manager
|
||||
jsonschema
|
||||
tweepy
|
||||
click
|
||||
spacy>=3.0.0,<4.0.0
|
||||
en_core_web_sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.4.0/en_core_web_sm-3.4.0-py3-none-any.whl
|
||||
|
||||
##Dev
|
||||
coverage
|
||||
@@ -27,7 +29,6 @@ flake8
|
||||
numpy
|
||||
pre-commit
|
||||
black
|
||||
sourcery
|
||||
isort
|
||||
gitpython==3.1.31
|
||||
|
||||
|
||||
Reference in New Issue
Block a user