From c785352ed2c6f1744bdeac5410ea93199bade937 Mon Sep 17 00:00:00 2001 From: Maiko Bossuyt Date: Wed, 12 Apr 2023 16:23:09 +0200 Subject: [PATCH 01/33] Update main.py clean trailing whitespace --- scripts/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/main.py b/scripts/main.py index 15af0c38..3dcedb5f 100644 --- a/scripts/main.py +++ b/scripts/main.py @@ -293,7 +293,7 @@ def parse_arguments(): if args.gpt3only: print_to_console("GPT3.5 Only Mode: ", Fore.GREEN, "ENABLED") cfg.set_smart_llm_model(cfg.fast_llm_model) - + if args.gpt4only: print_to_console("GPT4 Only Mode: ", Fore.GREEN, "ENABLED") cfg.set_fast_llm_model(cfg.smart_llm_model) From c986e8713512aad9f06c074b5e7fdfa31ade2df7 Mon Sep 17 00:00:00 2001 From: Maiko Bossuyt Date: Wed, 12 Apr 2023 16:36:27 +0200 Subject: [PATCH 02/33] Edit config Class to manage browse_website command chunk size and summary size I added two new config parameters: - browse_chunk_max_length: define the max_length of a chunk being sent to the memory and to FAST_LLM_MODEL for summarizing - browse_summary_max_token: define the max_token passed to the model use for summary creation. Changing this can help with complex subject, allowing the agent to be more verbose in its attemps to summarize the chunk and the chunks summary. I've also edited the way the user_agent is handle. --- .env.template | 2 ++ scripts/config.py | 14 +++++++++++++- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/.env.template b/.env.template index 6fbc8424..0953fac9 100644 --- a/.env.template +++ b/.env.template @@ -15,3 +15,5 @@ OPENAI_AZURE_DEPLOYMENT_ID=deployment-id-for-azure IMAGE_PROVIDER=dalle HUGGINGFACE_API_TOKEN= USE_MAC_OS_TTS=False +BROWSE_CHUNK_MAX_LENGTH=4000 +BROWSE_SUMMARY_MAX_TOKEN=300 \ No newline at end of file diff --git a/scripts/config.py b/scripts/config.py index a280e6cc..1eeeb72f 100644 --- a/scripts/config.py +++ b/scripts/config.py @@ -41,6 +41,8 @@ class Config(metaclass=Singleton): self.smart_llm_model = os.getenv("SMART_LLM_MODEL", "gpt-4") self.fast_token_limit = int(os.getenv("FAST_TOKEN_LIMIT", 4000)) self.smart_token_limit = int(os.getenv("SMART_TOKEN_LIMIT", 8000)) + self.browse_chunk_max_length = int(os.getenv("BROWSE_CHUNK_MAX_LENGTH", 8000)) + self.browse_summary_max_token = int(os.getenv("BROWSE_SUMMARY_MAX_TOKEN", 300)) self.openai_api_key = os.getenv("OPENAI_API_KEY") self.use_azure = False @@ -71,7 +73,8 @@ class Config(metaclass=Singleton): # User agent headers to use when browsing web # Some websites might just completely deny request with an error code if no user agent was found. - self.user_agent_header = {"User-Agent":"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36"} + self.user_agent = os.getenv("USER_AGENT", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36") + self.redis_host = os.getenv("REDIS_HOST", "localhost") self.redis_port = os.getenv("REDIS_PORT", "6379") self.redis_password = os.getenv("REDIS_PASSWORD", "") @@ -80,6 +83,7 @@ class Config(metaclass=Singleton): # Note that indexes must be created on db 0 in redis, this is not configurable. self.memory_backend = os.getenv("MEMORY_BACKEND", 'local') + # Initialize the OpenAI API client openai.api_key = self.openai_api_key @@ -107,6 +111,14 @@ class Config(metaclass=Singleton): """Set the smart token limit value.""" self.smart_token_limit = value + def set_browse_chunk_max_length(self, value: int): + """Set the browse_website command chunk max length value.""" + self.browse_chunk_max_length = value + + def set_browse_summary_max_token(self, value: int): + """Set the browse_website command summary max token value.""" + self.browse_summary_max_token = value + def set_openai_api_key(self, value: str): """Set the OpenAI API key value.""" self.openai_api_key = value From b20c0117c5732e73005ee9fc12380078d5ea442c Mon Sep 17 00:00:00 2001 From: Maiko Bossuyt Date: Wed, 12 Apr 2023 16:38:49 +0200 Subject: [PATCH 03/33] Add memory management to browse.py - Change the way User-Agent is handle when calling requests to browse website - Add chunk to memory before and after summary. We do not save the "summary of summaries" as this wasn't performing great and caused noise when the "question" couldn't be answered. - Use the newly added config parameters for max_length and max_token --- scripts/browse.py | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/scripts/browse.py b/scripts/browse.py index b0c745ef..5f4aafe6 100644 --- a/scripts/browse.py +++ b/scripts/browse.py @@ -1,9 +1,14 @@ import requests from bs4 import BeautifulSoup +from memory import get_memory from config import Config from llm_utils import create_chat_completion cfg = Config() +memory = get_memory(cfg) + +session = requests.Session() +session.headers.update({'User-Agent': cfg.user_agent}) # Define and check for local file address prefixes def check_local_file_access(url): @@ -21,7 +26,7 @@ def scrape_text(url): return "Error: Access to local files is restricted" try: - response = requests.get(url, headers=cfg.user_agent_header) + response = session.get(url) except requests.exceptions.RequestException as e: return "Error: " + str(e) @@ -60,7 +65,7 @@ def format_hyperlinks(hyperlinks): def scrape_links(url): """Scrape links from a webpage""" - response = requests.get(url, headers=cfg.user_agent_header) + response = session.get(url) # Check if the response contains an HTTP error if response.status_code >= 400: @@ -76,7 +81,7 @@ def scrape_links(url): return format_hyperlinks(hyperlinks) -def split_text(text, max_length=8192): +def split_text(text, max_length=cfg.browse_chunk_max_length): """Split text into chunks of a maximum length""" paragraphs = text.split("\n") current_length = 0 @@ -102,7 +107,7 @@ def create_message(chunk, question): "content": f"\"\"\"{chunk}\"\"\" Using the above text, please answer the following question: \"{question}\" -- if the question cannot be answered using the text, please summarize the text." } -def summarize_text(text, question): +def summarize_text(url, text, question): """Summarize text using the LLM model""" if not text: return "Error: No text to summarize" @@ -114,15 +119,28 @@ def summarize_text(text, question): chunks = list(split_text(text)) for i, chunk in enumerate(chunks): + print(f"Adding chunk {i + 1} / {len(chunks)} to memory") + + memory_to_add = f"Source: {url}\n" \ + f"Raw content part#{i + 1}: {chunk}" + + memory.add(memory_to_add) + print(f"Summarizing chunk {i + 1} / {len(chunks)}") messages = [create_message(chunk, question)] summary = create_chat_completion( model=cfg.fast_llm_model, messages=messages, - max_tokens=300, + max_tokens=cfg.browse_summary_max_token, ) summaries.append(summary) + print(f"Added chunk {i + 1} summary to memory") + + memory_to_add = f"Source: {url}\n" \ + f"Content summary part#{i + 1}: {summary}" + + memory.add(memory_to_add) print(f"Summarized {len(chunks)} chunks.") @@ -132,7 +150,7 @@ def summarize_text(text, question): final_summary = create_chat_completion( model=cfg.fast_llm_model, messages=messages, - max_tokens=300, + max_tokens=cfg.browse_summary_max_token, ) return final_summary From 5bb551db95fe1eb6765c61fa28bf384d8252cdad Mon Sep 17 00:00:00 2001 From: Maiko Bossuyt Date: Wed, 12 Apr 2023 16:42:14 +0200 Subject: [PATCH 04/33] add the url variable in the get_text_summary function to pass it to the memory By sending the url along when calling browse.summarize_text, we can then add it along the chunk in memory. --- scripts/commands.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/commands.py b/scripts/commands.py index 92d46ae1..90d7a6f3 100644 --- a/scripts/commands.py +++ b/scripts/commands.py @@ -183,7 +183,7 @@ def browse_website(url, question): def get_text_summary(url, question): """Return the results of a google search""" text = browse.scrape_text(url) - summary = browse.summarize_text(text, question) + summary = browse.summarize_text(url, text, question) return """ "Result" : """ + summary From a615e570616146ba51336b4160c2eff225479769 Mon Sep 17 00:00:00 2001 From: Maiko Bossuyt Date: Wed, 12 Apr 2023 18:00:17 +0200 Subject: [PATCH 05/33] Revert "Update main.py" This reverts commit c785352ed2c6f1744bdeac5410ea93199bade937. --- scripts/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/main.py b/scripts/main.py index 3dcedb5f..15af0c38 100644 --- a/scripts/main.py +++ b/scripts/main.py @@ -293,7 +293,7 @@ def parse_arguments(): if args.gpt3only: print_to_console("GPT3.5 Only Mode: ", Fore.GREEN, "ENABLED") cfg.set_smart_llm_model(cfg.fast_llm_model) - + if args.gpt4only: print_to_console("GPT4 Only Mode: ", Fore.GREEN, "ENABLED") cfg.set_fast_llm_model(cfg.smart_llm_model) From 8baa0769b154f3742cdc75e07404952de02e0669 Mon Sep 17 00:00:00 2001 From: Maiko Bossuyt Date: Wed, 12 Apr 2023 18:03:59 +0200 Subject: [PATCH 06/33] Update config.py --- scripts/config.py | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/config.py b/scripts/config.py index 1eeeb72f..9c4e4572 100644 --- a/scripts/config.py +++ b/scripts/config.py @@ -83,7 +83,6 @@ class Config(metaclass=Singleton): # Note that indexes must be created on db 0 in redis, this is not configurable. self.memory_backend = os.getenv("MEMORY_BACKEND", 'local') - # Initialize the OpenAI API client openai.api_key = self.openai_api_key From 0dddc94bdac94707062b1863f3c5a72d113432ca Mon Sep 17 00:00:00 2001 From: Maiko Bossuyt Date: Wed, 12 Apr 2023 19:13:04 +0200 Subject: [PATCH 07/33] Add file ingestion methode in file_operations.py Add the following functions to ingest data into memory before Auto-GPT run. - split_file: given a content, split it in chunks of max_length with (or without) a specified overlap - ingest_file: read a file, use split_file to split it in chunks and load each chunk in memory - ingest_directory: ingest all files in a directory in memory --- scripts/file_operations.py | 75 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) diff --git a/scripts/file_operations.py b/scripts/file_operations.py index c6066ef9..3493c2bf 100644 --- a/scripts/file_operations.py +++ b/scripts/file_operations.py @@ -1,5 +1,10 @@ import os import os.path +from config import Config +from memory import get_memory + +cfg = Config() +memory = get_memory(cfg) # Set a dedicated folder for file I/O working_directory = "auto_gpt_workspace" @@ -20,6 +25,30 @@ def safe_join(base, *paths): return norm_new_path +def split_file(content, max_length=4000, overlap=0): + """ + Split text into chunks of a specified maximum length with a specified overlap + between chunks. + + :param text: The input text to be split into chunks + :param max_length: The maximum length of each chunk, default is 4000 (about 1k token) + :param overlap: The number of overlapping characters between chunks, default is no overlap + :return: A generator yielding chunks of text + """ + start = 0 + content_length = len(content) + + while start < content_length: + end = start + max_length + chunk = content[start:end] + yield chunk + start += max_length - overlap + if start + max_length - overlap >= content_length: + break + if end + overlap > content_length: + start = content_length - max_length + + def read_file(filename): """Read a file and return the contents""" try: @@ -31,6 +60,52 @@ def read_file(filename): return "Error: " + str(e) +def ingest_file(filename, memory, max_length=4000, overlap=200): + """ + Ingest a file by reading its content, splitting it into chunks with a specified + maximum length and overlap, and adding the chunks to the memory storage. + + :param filename: The name of the file to ingest + :param memory: An object with an add() method to store the chunks in memory + :param max_length: The maximum length of each chunk, default is 4000 + :param overlap: The number of overlapping characters between chunks, default is 200 + """ + try: + print(f"Working with file {filename}") + content = read_file(filename) + content_length = len(content) + print(f"File length: {content_length} characters") + + chunks = list(split_file(content, max_length=max_length, overlap=overlap)) + + num_chunks = len(chunks) + for i, chunk in enumerate(chunks): + print(f"Ingesting chunk {i + 1} / {num_chunks} into memory") + memory_to_add = f"Filename: {filename}\n" \ + f"Content part#{i + 1}/{num_chunks}: {chunk}" + + memory.add(memory_to_add) + + print(f"Done ingesting {num_chunks} chunks from {filename}.") + except Exception as e: + print(f"Error while ingesting file '{filename}': {str(e)}") + + +def ingest_directory(directory, memory): + """ + Ingest all files in a directory by calling the ingest_file function for each file. + + :param directory: The directory containing the files to ingest + :param memory: An object with an add() method to store the chunks in memory + """ + try: + files = search_files(directory) + for file in files: + ingest_file(file, memory) + except Exception as e: + print(f"Error while ingesting directory '{directory}': {str(e)}") + + def write_to_file(filename, text): """Write text to a file""" try: From 7975c184a55a477e884e1920ed87dc67ca4b4261 Mon Sep 17 00:00:00 2001 From: Maiko Bossuyt Date: Wed, 12 Apr 2023 19:46:39 +0200 Subject: [PATCH 08/33] Update .gitignore add new log file to gitignore --- .gitignore | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index aa0dceaa..fc496609 100644 --- a/.gitignore +++ b/.gitignore @@ -18,4 +18,5 @@ log.txt # Coverage reports .coverage coverage.xml -htmlcov/ \ No newline at end of file +htmlcov/ +log-ingestion.txt From c91117616f7b5e16743208b8649ce4335077915b Mon Sep 17 00:00:00 2001 From: Maiko Bossuyt Date: Wed, 12 Apr 2023 19:46:58 +0200 Subject: [PATCH 09/33] Update file_operations.py revert change in import as we don't need them --- scripts/file_operations.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/scripts/file_operations.py b/scripts/file_operations.py index 3493c2bf..8e807bba 100644 --- a/scripts/file_operations.py +++ b/scripts/file_operations.py @@ -1,10 +1,5 @@ import os import os.path -from config import Config -from memory import get_memory - -cfg = Config() -memory = get_memory(cfg) # Set a dedicated folder for file I/O working_directory = "auto_gpt_workspace" From 8faa6ef949bf7fbbb8bd875a66bfd4fd231ecebc Mon Sep 17 00:00:00 2001 From: Maiko Bossuyt Date: Wed, 12 Apr 2023 19:47:51 +0200 Subject: [PATCH 10/33] Create data_ingestion.py This script is use when we want to seed Auto-GPT memory with one or multiple documents. The document are read, split into chunks and store in the memory. --- scripts/data_ingestion.py | 52 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 scripts/data_ingestion.py diff --git a/scripts/data_ingestion.py b/scripts/data_ingestion.py new file mode 100644 index 00000000..3f6d1322 --- /dev/null +++ b/scripts/data_ingestion.py @@ -0,0 +1,52 @@ +import argparse +import logging +from config import Config +from memory import get_memory +from file_operations import ingest_file, ingest_directory + +cfg = Config() + +def configure_logging(): + logging.basicConfig(filename='log-ingestion.txt', + filemode='a', + format='%(asctime)s,%(msecs)d %(name)s %(levelname)s %(message)s', + datefmt='%H:%M:%S', + level=logging.DEBUG) + return logging.getLogger('AutoGPT-Ingestion') + + +def main(): + logger = configure_logging() + + parser = argparse.ArgumentParser(description="Ingest a file or a directory with multiple files into memory. Make sure to set your .env before running this script.") + group = parser.add_mutually_exclusive_group(required=True) + group.add_argument("--file", type=str, help="The file to ingest.") + group.add_argument("--dir", type=str, help="The directory containing the files to ingest.") + parser.add_argument("--init", action='store_true', help="Init the memory and wipe its content", default=False) + args = parser.parse_args() + + + # Initialize memory + memory = get_memory(cfg, init=args.init) + print('Using memory of type: ' + memory.__class__.__name__) + + if args.file: + try: + ingest_file(args.file, memory) + print(f"File '{args.file}' ingested successfully.") + except Exception as e: + logger.error(f"Error while ingesting file '{args.file}': {str(e)}") + print(f"Error while ingesting file '{args.file}': {str(e)}") + elif args.dir: + try: + ingest_directory(args.dir, memory) + print(f"Directory '{args.dir}' ingested successfully.") + except Exception as e: + logger.error(f"Error while ingesting directory '{args.dir}': {str(e)}") + print(f"Error while ingesting directory '{args.dir}': {str(e)}") + else: + print("Please provide either a file path (--file) or a directory name (--dir) inside the auto_gpt_workspace directory as input.") + + +if __name__ == "__main__": + main() From 4465486ea39b0bc65715e48a1c7861a565b5126f Mon Sep 17 00:00:00 2001 From: Maiko Bossuyt Date: Wed, 12 Apr 2023 20:18:59 +0200 Subject: [PATCH 11/33] Update file_operations.py move the search_file function inside the data_ingestion script --- scripts/file_operations.py | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/scripts/file_operations.py b/scripts/file_operations.py index 8e807bba..e664fcc9 100644 --- a/scripts/file_operations.py +++ b/scripts/file_operations.py @@ -86,21 +86,6 @@ def ingest_file(filename, memory, max_length=4000, overlap=200): print(f"Error while ingesting file '{filename}': {str(e)}") -def ingest_directory(directory, memory): - """ - Ingest all files in a directory by calling the ingest_file function for each file. - - :param directory: The directory containing the files to ingest - :param memory: An object with an add() method to store the chunks in memory - """ - try: - files = search_files(directory) - for file in files: - ingest_file(file, memory) - except Exception as e: - print(f"Error while ingesting directory '{directory}': {str(e)}") - - def write_to_file(filename, text): """Write text to a file""" try: From 280647ff387bc29127b8403c7dd46f2c94d6a0b8 Mon Sep 17 00:00:00 2001 From: Maiko Bossuyt Date: Wed, 12 Apr 2023 20:19:36 +0200 Subject: [PATCH 12/33] Update data_ingestion.py move the search_file function inside the data_ingestion script add memory initialisation argument add overlap argument add chunk max_length argument --- scripts/data_ingestion.py | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/scripts/data_ingestion.py b/scripts/data_ingestion.py index 3f6d1322..32811166 100644 --- a/scripts/data_ingestion.py +++ b/scripts/data_ingestion.py @@ -2,7 +2,7 @@ import argparse import logging from config import Config from memory import get_memory -from file_operations import ingest_file, ingest_directory +from file_operations import ingest_file, search_files cfg = Config() @@ -15,6 +15,21 @@ def configure_logging(): return logging.getLogger('AutoGPT-Ingestion') +def ingest_directory(directory, memory, args): + """ + Ingest all files in a directory by calling the ingest_file function for each file. + + :param directory: The directory containing the files to ingest + :param memory: An object with an add() method to store the chunks in memory + """ + try: + files = search_files(directory) + for file in files: + ingest_file(file, memory, args.max_length, args.overlap) + except Exception as e: + print(f"Error while ingesting directory '{directory}': {str(e)}") + + def main(): logger = configure_logging() @@ -22,7 +37,10 @@ def main(): group = parser.add_mutually_exclusive_group(required=True) group.add_argument("--file", type=str, help="The file to ingest.") group.add_argument("--dir", type=str, help="The directory containing the files to ingest.") - parser.add_argument("--init", action='store_true', help="Init the memory and wipe its content", default=False) + parser.add_argument("--init", action='store_true', help="Init the memory and wipe its content (default: False)", default=False) + parser.add_argument("--overlap", type=int, help="The overlap size between chunks when ingesting files (default: 200)", default=200) + parser.add_argument("--max_length", type=int, help="The max_length of each chunk when ingesting files (default: 4000)", default=4000) + args = parser.parse_args() @@ -32,14 +50,14 @@ def main(): if args.file: try: - ingest_file(args.file, memory) + ingest_file(args.file, memory, args.max_length, args.overlap) print(f"File '{args.file}' ingested successfully.") except Exception as e: logger.error(f"Error while ingesting file '{args.file}': {str(e)}") print(f"Error while ingesting file '{args.file}': {str(e)}") elif args.dir: try: - ingest_directory(args.dir, memory) + ingest_directory(args.dir, memory, args) print(f"Directory '{args.dir}' ingested successfully.") except Exception as e: logger.error(f"Error while ingesting directory '{args.dir}': {str(e)}") From 65cc4f833f56000ae3928cccc3c9821fece53958 Mon Sep 17 00:00:00 2001 From: Maiko Bossuyt Date: Wed, 12 Apr 2023 20:47:46 +0200 Subject: [PATCH 13/33] Add Memory Pre-Seeding information to readme.md Add the documentation for memory pre-seeding --- README.md | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/README.md b/README.md index 2900daa9..6262467d 100644 --- a/README.md +++ b/README.md @@ -41,6 +41,7 @@ Your support is greatly appreciated - [๐Ÿ—ฃ๏ธ Speech Mode](#๏ธ-speech-mode) - [๐Ÿ” Google API Keys Configuration](#-google-api-keys-configuration) - [Setting up environment variables](#setting-up-environment-variables) + - [๐Ÿง  Memory pre-seeding](#memory-pre-seeding) - [๐Ÿ’€ Continuous Mode โš ๏ธ](#-continuous-mode-๏ธ) - [GPT3.5 ONLY Mode](#gpt35-only-mode) - [๐Ÿ–ผ Image Generation](#image-generation) @@ -245,6 +246,52 @@ To switch to either, change the `MEMORY_BACKEND` env variable to the value that 1. View memory usage by using the `--debug` flag :) + +## ๐Ÿง  Memory pre-seeding + +``` +# python scripts/data_ingestion.py -h +usage: data_ingestion.py [-h] (--file FILE | --dir DIR) [--init] [--overlap OVERLAP] [--max_length MAX_LENGTH] + +Ingest a file or a directory with multiple files into memory. Make sure to set your .env before running this script. + +options: + -h, --help show this help message and exit + --file FILE The file to ingest. + --dir DIR The directory containing the files to ingest. + --init Init the memory and wipe its content (default: False) + --overlap OVERLAP The overlap size between chunks when ingesting files (default: 200) + --max_length MAX_LENGTH The max_length of each chunk when ingesting files (default: 4000 + +# python scripts/data_ingestion.py --dir seed_data --init --overlap 200 --max_length 1000 +``` + +This script located at scripts/data_ingestion.py, allows you to ingest files into memory and pre-seed it before running Auto-GPT. + +Memory pre-seeding is a technique that involves ingesting relevant documents or data into the AI's memory so that it can use this information to generate more informed and accurate responses. + +To pre-seed the memory, the content of each document is split into chunks of a specified maximum length with a specified overlap between chunks, and then each chunk is added to the memory backend set in the .env file. When the AI is prompted to recall information, it can then access those pre-seeded memories to generate more informed and accurate responses. + +This technique is particularly useful when working with large amounts of data or when there is specific information that the AI needs to be able to access quickly. +By pre-seeding the memory, the AI can retrieve and use this information more efficiently, saving time, API call and improving the accuracy of its responses. + +You could for example download the documentation of an API, a Github repository, etc. and ingest it into memory before running Auto-GPT. + +โš ๏ธ If you use Redis as your memory, make sure to run Auto-GPT with the WIPE_REDIS_ON_START set to False in your .env file. + +โš ๏ธFor other memory backend, we currently forcefully wipe the memory when starting Auto-GPT. To ingest data with those memory backend, you can call the data_ingestion.py script anytime during an Auto-GPT run. + +Memories will be available to the AI immediately as they are ingested, even if ingested while Auto-GPT is running. + +In the example above, the script initializes the memory, ingests all files within the seed_data directory into memory with an overlap between chunks of 200 and a maximum length of each chunk of 4000. +Note that you can also use the --file argument to ingest a single file into memory and that the script will only ingest files within the auto_gpt_workspace directory. + +You can adjust the max_length and overlap parameters to fine-tune the way the docuents are presented to the AI when it "recall" that memory: + +- Adjusting the overlap value allows the AI to access more contextual information from each chunk when recalling information, but will result in more chunks being created and therefore increase memory backend usage and OpenAI API requests. +- Reducing the max_length value will create more chunks, which can save prompt tokens by allowing for more message history in the context, but will also increase the number of chunks. +- Increasing the max_length value will provide the AI with more contextual information from each chunk, reducing the number of chunks created and saving on OpenAI API requests. However, this may also use more prompt tokens and decrease the overall context available to the AI. + ## ๐Ÿ’€ Continuous Mode โš ๏ธ Run the AI **without** user authorisation, 100% automated. From 2e0b44ae05fce7795f662a81c765eeeeae32a768 Mon Sep 17 00:00:00 2001 From: Maiko Bossuyt Date: Wed, 12 Apr 2023 22:46:49 +0200 Subject: [PATCH 14/33] fix chunk creation the last chunk wasn't correctly created, this commit fix that issue. --- scripts/file_operations.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/scripts/file_operations.py b/scripts/file_operations.py index e664fcc9..f2a2b072 100644 --- a/scripts/file_operations.py +++ b/scripts/file_operations.py @@ -38,11 +38,12 @@ def split_file(content, max_length=4000, overlap=0): chunk = content[start:end] yield chunk start += max_length - overlap - if start + max_length - overlap >= content_length: - break - if end + overlap > content_length: + if start + max_length > content_length: start = content_length - max_length - + end = content_length + chunk = content[start:end] + yield chunk + break def read_file(filename): """Read a file and return the contents""" From 4e914e5ec1a4f7d39cba04cc2ebc0ba7f0055423 Mon Sep 17 00:00:00 2001 From: Maiko Bossuyt Date: Wed, 12 Apr 2023 22:51:52 +0200 Subject: [PATCH 15/33] Revert "Update .gitignore" This reverts commit 7975c184a55a477e884e1920ed87dc67ca4b4261. --- .gitignore | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index fc496609..aa0dceaa 100644 --- a/.gitignore +++ b/.gitignore @@ -18,5 +18,4 @@ log.txt # Coverage reports .coverage coverage.xml -htmlcov/ -log-ingestion.txt +htmlcov/ \ No newline at end of file From 2f1181f9a12bbbbf55b8f2224ecc645d22c5d90d Mon Sep 17 00:00:00 2001 From: Maiko Bossuyt Date: Wed, 12 Apr 2023 22:52:37 +0200 Subject: [PATCH 16/33] Update .gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index cfa3b08b..403417eb 100644 --- a/.gitignore +++ b/.gitignore @@ -14,6 +14,7 @@ ai_settings.yaml .idea/* auto-gpt.json log.txt +log-ingestion.txt # Coverage reports .coverage From 36d455c20e52aa1e09766a01c880f7914c5c24b7 Mon Sep 17 00:00:00 2001 From: Maiko Bossuyt Date: Wed, 12 Apr 2023 23:31:26 +0200 Subject: [PATCH 17/33] split_file() rework rework the split_file function to make it simple and only have one yield while providing an overlap at the start and end of each chunk --- scripts/file_operations.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/scripts/file_operations.py b/scripts/file_operations.py index db4702c5..c12774b9 100644 --- a/scripts/file_operations.py +++ b/scripts/file_operations.py @@ -35,15 +35,12 @@ def split_file(content, max_length=4000, overlap=0): while start < content_length: end = start + max_length - chunk = content[start:end] + if end + overlap < content_length: + chunk = content[start:end+overlap] + else: + chunk = content[start:content_length] yield chunk start += max_length - overlap - if start + max_length > content_length: - start = content_length - max_length - end = content_length - chunk = content[start:end] - yield chunk - break def read_file(filename): """Read a file and return the contents""" From 1c64a9d24508333d92cfdb26d38a90c4bd543dc6 Mon Sep 17 00:00:00 2001 From: Maiko Bossuyt Date: Wed, 12 Apr 2023 23:33:14 +0200 Subject: [PATCH 18/33] Update .env.template --- .env.template | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.env.template b/.env.template index 0aec7b10..f1e66ea5 100644 --- a/.env.template +++ b/.env.template @@ -19,4 +19,4 @@ HUGGINGFACE_API_TOKEN= USE_MAC_OS_TTS=False BROWSE_CHUNK_MAX_LENGTH=4000 BROWSE_SUMMARY_MAX_TOKEN=300 -MEMORY_BACKEND=local \ No newline at end of file +MEMORY_BACKEND=local From 428caa9bef83e93a6f97a1341a03a0f41b71dec0 Mon Sep 17 00:00:00 2001 From: Eesa Hamza Date: Thu, 13 Apr 2023 12:57:57 +0300 Subject: [PATCH 19/33] Added flags, and implemented skip-reprompt --- scripts/config.py | 1 + scripts/main.py | 14 ++++++++++++-- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/scripts/config.py b/scripts/config.py index ebf1b08b..fd370a72 100644 --- a/scripts/config.py +++ b/scripts/config.py @@ -37,6 +37,7 @@ class Config(metaclass=Singleton): self.debug_mode = False self.continuous_mode = False self.speak_mode = False + self.skip_reprompt = False self.fast_llm_model = os.getenv("FAST_LLM_MODEL", "gpt-3.5-turbo") self.smart_llm_model = os.getenv("SMART_LLM_MODEL", "gpt-4") diff --git a/scripts/main.py b/scripts/main.py index 81f560b2..f81b09a7 100644 --- a/scripts/main.py +++ b/scripts/main.py @@ -183,7 +183,11 @@ def load_variables(config_file="config.yaml"): def construct_prompt(): """Construct the prompt for the AI to respond to""" config = AIConfig.load() - if config.ai_name: + if cfg.skip_reprompt and config.ai_name: + logger.typewriter_log("Name :", Fore.GREEN, config.ai_name) + logger.typewriter_log("Role :", Fore.GREEN, config.ai_role) + logger.typewriter_log("Goals:", Fore.GREEN, config.ai_goals) + elif config.ai_name: logger.typewriter_log( f"Welcome back! ", Fore.GREEN, @@ -270,12 +274,14 @@ def parse_arguments(): cfg.set_speak_mode(False) parser = argparse.ArgumentParser(description='Process arguments.') - parser.add_argument('--continuous', action='store_true', help='Enable Continuous Mode') + parser.add_argument('--continuous', '-c', action='store_true', help='Enable Continuous Mode') parser.add_argument('--speak', action='store_true', help='Enable Speak Mode') parser.add_argument('--debug', action='store_true', help='Enable Debug Mode') parser.add_argument('--gpt3only', action='store_true', help='Enable GPT3.5 Only Mode') parser.add_argument('--gpt4only', action='store_true', help='Enable GPT4 Only Mode') parser.add_argument('--use-memory', '-m', dest="memory_type", help='Defines which Memory backend to use') + parser.add_argument('--skip-reprompt', '-y', dest='skip_reprompt', action='store_true', help='Skips the re-prompting messages at the beginning of the script') + parser.add_argument('--ai-settings', '-C', dest='ai_settings_file', help="Specifies which ai_settings.yaml file to use, will also automatically skip the re-prompt.") args = parser.parse_args() if args.debug: @@ -315,6 +321,10 @@ def parse_arguments(): else: cfg.memory_backend = chosen + if args.skip_reprompt: + logger.typewriter_log("Skip Re-prompt: ", Fore.GREEN, "ENABLED") + cfg.skip_reprompt = True + # TODO: fill in llm values here check_openai_api_key() From 0f6fba7d65302591f2c77a41483953df43d12d2b Mon Sep 17 00:00:00 2001 From: Eesa Hamza Date: Thu, 13 Apr 2023 14:02:42 +0300 Subject: [PATCH 20/33] Implemented the '--ai-settings' flag --- scripts/config.py | 1 + scripts/main.py | 16 +++++++++++++++- scripts/utils.py | 14 ++++++++++++++ 3 files changed, 30 insertions(+), 1 deletion(-) diff --git a/scripts/config.py b/scripts/config.py index fd370a72..ad968fb2 100644 --- a/scripts/config.py +++ b/scripts/config.py @@ -39,6 +39,7 @@ class Config(metaclass=Singleton): self.speak_mode = False self.skip_reprompt = False + self.ai_settings_file = os.getenv("AI_SETTINGS_FILE", "ai_settings.yaml") self.fast_llm_model = os.getenv("FAST_LLM_MODEL", "gpt-3.5-turbo") self.smart_llm_model = os.getenv("SMART_LLM_MODEL", "gpt-4") self.fast_token_limit = int(os.getenv("FAST_TOKEN_LIMIT", 4000)) diff --git a/scripts/main.py b/scripts/main.py index f81b09a7..07d2bbd2 100644 --- a/scripts/main.py +++ b/scripts/main.py @@ -182,7 +182,7 @@ def load_variables(config_file="config.yaml"): def construct_prompt(): """Construct the prompt for the AI to respond to""" - config = AIConfig.load() + config = AIConfig.load(cfg.ai_settings_file) if cfg.skip_reprompt and config.ai_name: logger.typewriter_log("Name :", Fore.GREEN, config.ai_name) logger.typewriter_log("Role :", Fore.GREEN, config.ai_role) @@ -324,7 +324,21 @@ def parse_arguments(): if args.skip_reprompt: logger.typewriter_log("Skip Re-prompt: ", Fore.GREEN, "ENABLED") cfg.skip_reprompt = True + + if args.ai_settings_file: + file = args.ai_settings_file + # Validate file + (validated, message) = utils.validate_yaml_file(file) + if not validated: + logger.typewriter_log("FAILED FILE VALIDATION", Fore.RED, message) + exit(1) + + logger.typewriter_log("Using AI Settings File:", Fore.GREEN, file) + cfg.ai_settings_file = file + cfg.skip_reprompt = True + + # TODO: fill in llm values here check_openai_api_key() diff --git a/scripts/utils.py b/scripts/utils.py index 5039796f..bca8d4a8 100644 --- a/scripts/utils.py +++ b/scripts/utils.py @@ -1,3 +1,6 @@ +import yaml +from colorama import Fore + def clean_input(prompt: str=''): try: return input(prompt) @@ -6,3 +9,14 @@ def clean_input(prompt: str=''): print("Quitting...") exit(0) + +def validate_yaml_file(file: str): + try: + with open(file) as file: + yaml.load(file, Loader=yaml.FullLoader) + except FileNotFoundError: + return (False, f"The file {Fore.CYAN}`{file}`{Fore.RESET} wasn't found") + except yaml.YAMLError as e: + return (False, f"There was an issue while trying to read with your AI Settings file: {e}") + + return (True, f"Successfully validated {Fore.CYAN}`{file}`{Fore.RESET}!") \ No newline at end of file From a10ffc1dbed88ce74f7ebb1dae0c90fb18bae9f6 Mon Sep 17 00:00:00 2001 From: Eesa Hamza Date: Thu, 13 Apr 2023 14:26:16 +0300 Subject: [PATCH 21/33] Fixed error logging when choosing non-supported memory backend with '--use-memory' --- scripts/main.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/main.py b/scripts/main.py index 07d2bbd2..59cb565e 100644 --- a/scripts/main.py +++ b/scripts/main.py @@ -316,8 +316,8 @@ def parse_arguments(): supported_memory = get_supported_memory_backends() chosen = args.memory_type if not chosen in supported_memory: - print_to_console("ONLY THE FOLLOWING MEMORY BACKENDS ARE SUPPORTED: ", Fore.RED, f'{supported_memory}') - print_to_console(f"Defaulting to: ", Fore.YELLOW, cfg.memory_backend) + logger.typewriter_log("ONLY THE FOLLOWING MEMORY BACKENDS ARE SUPPORTED: ", Fore.RED, f'{supported_memory}') + logger.typewriter_log(f"Defaulting to: ", Fore.YELLOW, cfg.memory_backend) else: cfg.memory_backend = chosen From ff094c7ecc58fad572dccbc8a376a75045d91733 Mon Sep 17 00:00:00 2001 From: Eesa Hamza Date: Thu, 13 Apr 2023 15:09:24 +0300 Subject: [PATCH 22/33] Resolve Linter Issues --- scripts/main.py | 5 ++--- scripts/utils.py | 4 ++-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/scripts/main.py b/scripts/main.py index 59cb565e..0674db47 100644 --- a/scripts/main.py +++ b/scripts/main.py @@ -327,19 +327,18 @@ def parse_arguments(): if args.ai_settings_file: file = args.ai_settings_file - + # Validate file (validated, message) = utils.validate_yaml_file(file) if not validated: logger.typewriter_log("FAILED FILE VALIDATION", Fore.RED, message) exit(1) - + logger.typewriter_log("Using AI Settings File:", Fore.GREEN, file) cfg.ai_settings_file = file cfg.skip_reprompt = True - # TODO: fill in llm values here check_openai_api_key() parse_arguments() diff --git a/scripts/utils.py b/scripts/utils.py index bca8d4a8..2b51c1fc 100644 --- a/scripts/utils.py +++ b/scripts/utils.py @@ -18,5 +18,5 @@ def validate_yaml_file(file: str): return (False, f"The file {Fore.CYAN}`{file}`{Fore.RESET} wasn't found") except yaml.YAMLError as e: return (False, f"There was an issue while trying to read with your AI Settings file: {e}") - - return (True, f"Successfully validated {Fore.CYAN}`{file}`{Fore.RESET}!") \ No newline at end of file + + return (True, f"Successfully validated {Fore.CYAN}`{file}`{Fore.RESET}!") From 47b72df262b894752b7fd0324f42f71ffc70e38c Mon Sep 17 00:00:00 2001 From: Eesa Hamza Date: Fri, 14 Apr 2023 01:20:43 +0300 Subject: [PATCH 23/33] Added 'AI_SETTINGS_FILE' to .env --- .env.template | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.env.template b/.env.template index 474b2727..c5cb54fd 100644 --- a/.env.template +++ b/.env.template @@ -3,6 +3,8 @@ ################################################################################ # EXECUTE_LOCAL_COMMANDS - Allow local command execution (Example: False) EXECUTE_LOCAL_COMMANDS=False +# AI_SETTINGS_FILE - Specifies which AI Settings file to use (defaults to ai_settings.yaml) +AI_SETTINGS_FILE=ai_settings.yaml ################################################################################ ### LLM PROVIDER From 05f6e9673f285ac40cf982a544dfa14750cf6af1 Mon Sep 17 00:00:00 2001 From: Eesa Hamza Date: Fri, 14 Apr 2023 01:23:23 +0300 Subject: [PATCH 24/33] Resolve Linter Issues --- scripts/utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/utils.py b/scripts/utils.py index 2b51c1fc..7521df29 100644 --- a/scripts/utils.py +++ b/scripts/utils.py @@ -1,6 +1,7 @@ import yaml from colorama import Fore + def clean_input(prompt: str=''): try: return input(prompt) From 8472bbd4556999cdd62e4930ae3723f18b746ef4 Mon Sep 17 00:00:00 2001 From: Eesa Hamza Date: Fri, 14 Apr 2023 01:34:30 +0300 Subject: [PATCH 25/33] Added 'Command Line Arguments' section to README --- README.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/README.md b/README.md index d1121976..8d402e7d 100644 --- a/README.md +++ b/README.md @@ -134,6 +134,14 @@ To output debug logs: ``` python scripts/main.py --debug ``` +### Command Line Arguments +Here are some common arguments you can use when running Auto-GPT: +> Replace anything in angled brackets (<>) to a value you want to specify +* `python scripts/main.py --help` to see a list of all available command line arguments +* `python scripts/main.py --ai-settings ` to run Auto-GPT with a different AI Settings file. +* `python scripts/main.py --use-memory ` to specify one of 3 memory backends: `local`, `redis` or `pinecone` + +> **NOTE**: There are shorthands for some of these flags, for example `-m` for `--use-memory`. Use `python scripts/main.py --help` for more information ## ๐Ÿ—ฃ๏ธ Speech Mode From 25509f9d2541a6c92949984da5548b11903fc98b Mon Sep 17 00:00:00 2001 From: Maiko Bossuyt Date: Fri, 14 Apr 2023 00:48:07 +0200 Subject: [PATCH 26/33] Update config.py 8192 is the current default --- scripts/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/config.py b/scripts/config.py index 0150ca42..a856265a 100644 --- a/scripts/config.py +++ b/scripts/config.py @@ -43,7 +43,7 @@ class Config(metaclass=Singleton): self.smart_llm_model = os.getenv("SMART_LLM_MODEL", "gpt-4") self.fast_token_limit = int(os.getenv("FAST_TOKEN_LIMIT", 4000)) self.smart_token_limit = int(os.getenv("SMART_TOKEN_LIMIT", 8000)) - self.browse_chunk_max_length = int(os.getenv("BROWSE_CHUNK_MAX_LENGTH", 8000)) + self.browse_chunk_max_length = int(os.getenv("BROWSE_CHUNK_MAX_LENGTH", 8192)) self.browse_summary_max_token = int(os.getenv("BROWSE_SUMMARY_MAX_TOKEN", 300)) self.openai_api_key = os.getenv("OPENAI_API_KEY") From 6702a04f767702d1e57ddcec81f2481def19f8a7 Mon Sep 17 00:00:00 2001 From: Eesa Hamza Date: Fri, 14 Apr 2023 01:50:13 +0300 Subject: [PATCH 27/33] Add 'no_memory' support for memory flag --- scripts/memory/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/memory/__init__.py b/scripts/memory/__init__.py index a0afc874..9b53d8d2 100644 --- a/scripts/memory/__init__.py +++ b/scripts/memory/__init__.py @@ -3,7 +3,7 @@ from memory.no_memory import NoMemory # List of supported memory backends # Add a backend to this list if the import attempt is successful -supported_memory = ['local'] +supported_memory = ['local', 'no_memory'] try: from memory.redismem import RedisMemory From 4f923ece60baee2c086c29610a05c4f130e43aa9 Mon Sep 17 00:00:00 2001 From: Eesa Hamza Date: Fri, 14 Apr 2023 01:56:45 +0300 Subject: [PATCH 28/33] Added double_check logging to AI Settings validator, and updated README for 'no_memory' --- README.md | 4 ++-- scripts/main.py | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 8d402e7d..b55a80c5 100644 --- a/README.md +++ b/README.md @@ -137,9 +137,9 @@ python scripts/main.py --debug ### Command Line Arguments Here are some common arguments you can use when running Auto-GPT: > Replace anything in angled brackets (<>) to a value you want to specify -* `python scripts/main.py --help` to see a list of all available command line arguments +* `python scripts/main.py --help` to see a list of all available command line arguments. * `python scripts/main.py --ai-settings ` to run Auto-GPT with a different AI Settings file. -* `python scripts/main.py --use-memory ` to specify one of 3 memory backends: `local`, `redis` or `pinecone` +* `python scripts/main.py --use-memory ` to specify one of 3 memory backends: `local`, `redis`, `pinecone` or 'no_memory'. > **NOTE**: There are shorthands for some of these flags, for example `-m` for `--use-memory`. Use `python scripts/main.py --help` for more information diff --git a/scripts/main.py b/scripts/main.py index 400eb1f6..78ffe243 100644 --- a/scripts/main.py +++ b/scripts/main.py @@ -348,6 +348,7 @@ def parse_arguments(): (validated, message) = utils.validate_yaml_file(file) if not validated: logger.typewriter_log("FAILED FILE VALIDATION", Fore.RED, message) + logger.double_check() exit(1) logger.typewriter_log("Using AI Settings File:", Fore.GREEN, file) From e147788c72535779ed094a101c4739aa1e8bdb8c Mon Sep 17 00:00:00 2001 From: Maiko Bossuyt Date: Fri, 14 Apr 2023 10:33:34 +0200 Subject: [PATCH 29/33] Update .env.template BROWSE_CHUNK_MAX_LENGTH default value --- .env.template | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.env.template b/.env.template index 7ff03cab..733597d9 100644 --- a/.env.template +++ b/.env.template @@ -4,7 +4,7 @@ # EXECUTE_LOCAL_COMMANDS - Allow local command execution (Example: False) EXECUTE_LOCAL_COMMANDS=False # BROWSE_CHUNK_MAX_LENGTH - When browsing website, define the length of chunk stored in memory -BROWSE_CHUNK_MAX_LENGTH=4000 +BROWSE_CHUNK_MAX_LENGTH=8192 # BROWSE_SUMMARY_MAX_TOKEN - Define the maximum length of the summary generated by GPT agent when browsing website BROWSE_SUMMARY_MAX_TOKEN=300 # USER_AGENT - Define the user-agent used by the requests library to browse website (string) From 6403bf112790b34fa122bdd519703e4b110f6875 Mon Sep 17 00:00:00 2001 From: Maiko Bossuyt Date: Fri, 14 Apr 2023 10:35:30 +0200 Subject: [PATCH 30/33] Update data_ingestion.py fixed linting --- scripts/data_ingestion.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/data_ingestion.py b/scripts/data_ingestion.py index 32811166..9addc34b 100644 --- a/scripts/data_ingestion.py +++ b/scripts/data_ingestion.py @@ -6,6 +6,7 @@ from file_operations import ingest_file, search_files cfg = Config() + def configure_logging(): logging.basicConfig(filename='log-ingestion.txt', filemode='a', @@ -43,7 +44,6 @@ def main(): args = parser.parse_args() - # Initialize memory memory = get_memory(cfg, init=args.init) print('Using memory of type: ' + memory.__class__.__name__) From c0462dbe7768d41ac205644987ad0fa9f14a5afc Mon Sep 17 00:00:00 2001 From: Maiko Bossuyt Date: Fri, 14 Apr 2023 10:35:52 +0200 Subject: [PATCH 31/33] Update file_operations.py fixed linting --- scripts/file_operations.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/file_operations.py b/scripts/file_operations.py index ed5aa4ec..1a072561 100644 --- a/scripts/file_operations.py +++ b/scripts/file_operations.py @@ -42,6 +42,7 @@ def split_file(content, max_length=4000, overlap=0): yield chunk start += max_length - overlap + def read_file(filename): """Read a file and return the contents""" try: From a67818648ed722e4e50133e01c0c2f189dfec05c Mon Sep 17 00:00:00 2001 From: Maiko Bossuyt Date: Fri, 14 Apr 2023 18:10:42 +0200 Subject: [PATCH 32/33] Update browse.py linting --- scripts/browse.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/browse.py b/scripts/browse.py index a4a41744..ef22de03 100644 --- a/scripts/browse.py +++ b/scripts/browse.py @@ -137,6 +137,7 @@ def create_message(chunk, question): "content": f"\"\"\"{chunk}\"\"\" Using the above text, please answer the following question: \"{question}\" -- if the question cannot be answered using the text, please summarize the text." } + def summarize_text(url, text, question): """Summarize text using the LLM model""" if not text: From 2ba0cb24dc84fae271d6466f00cc082cc8c44a4e Mon Sep 17 00:00:00 2001 From: Mike Kelly Date: Fri, 14 Apr 2023 16:39:29 +0100 Subject: [PATCH 33/33] execute python via shell if already running in a container --- scripts/execute_code.py | 93 +++++++++++++++++++++++------------------ 1 file changed, 52 insertions(+), 41 deletions(-) diff --git a/scripts/execute_code.py b/scripts/execute_code.py index dbd62c22..45263d02 100644 --- a/scripts/execute_code.py +++ b/scripts/execute_code.py @@ -19,53 +19,60 @@ def execute_python_file(file): if not os.path.isfile(file_path): return f"Error: File '{file}' does not exist." - try: - client = docker.from_env() - - image_name = 'python:3.10' + if we_are_running_in_a_docker_container(): + result = subprocess.run(f'python {file_path}', capture_output=True, encoding="utf8", shell=True) + if result.returncode == 0: + return result.stdout + else: + return f"Error: {result.stderr}" + else: try: - client.images.get(image_name) - print(f"Image '{image_name}' found locally") - except docker.errors.ImageNotFound: - print(f"Image '{image_name}' not found locally, pulling from Docker Hub") - # Use the low-level API to stream the pull response - low_level_client = docker.APIClient() - for line in low_level_client.pull(image_name, stream=True, decode=True): - # Print the status and progress, if available - status = line.get('status') - progress = line.get('progress') - if status and progress: - print(f"{status}: {progress}") - elif status: - print(status) + client = docker.from_env() - # You can replace 'python:3.8' with the desired Python image/version - # You can find available Python images on Docker Hub: - # https://hub.docker.com/_/python - container = client.containers.run( - image_name, - f'python {file}', - volumes={ - os.path.abspath(WORKSPACE_FOLDER): { - 'bind': '/workspace', - 'mode': 'ro'}}, - working_dir='/workspace', - stderr=True, - stdout=True, - detach=True, - ) + image_name = 'python:3.10' + try: + client.images.get(image_name) + print(f"Image '{image_name}' found locally") + except docker.errors.ImageNotFound: + print(f"Image '{image_name}' not found locally, pulling from Docker Hub") + # Use the low-level API to stream the pull response + low_level_client = docker.APIClient() + for line in low_level_client.pull(image_name, stream=True, decode=True): + # Print the status and progress, if available + status = line.get('status') + progress = line.get('progress') + if status and progress: + print(f"{status}: {progress}") + elif status: + print(status) - output = container.wait() - logs = container.logs().decode('utf-8') - container.remove() + # You can replace 'python:3.8' with the desired Python image/version + # You can find available Python images on Docker Hub: + # https://hub.docker.com/_/python + container = client.containers.run( + image_name, + f'python {file}', + volumes={ + os.path.abspath(WORKSPACE_FOLDER): { + 'bind': '/workspace', + 'mode': 'ro'}}, + working_dir='/workspace', + stderr=True, + stdout=True, + detach=True, + ) - # print(f"Execution complete. Output: {output}") - # print(f"Logs: {logs}") + output = container.wait() + logs = container.logs().decode('utf-8') + container.remove() - return logs + # print(f"Execution complete. Output: {output}") + # print(f"Logs: {logs}") - except Exception as e: - return f"Error: {str(e)}" + return logs + + except Exception as e: + return f"Error: {str(e)}" def execute_shell(command_line): @@ -86,3 +93,7 @@ def execute_shell(command_line): os.chdir(current_dir) return output + + +def we_are_running_in_a_docker_container(): + os.path.exists('/.dockerenv')