mirror of
https://github.com/aljazceru/Auto-GPT.git
synced 2025-12-17 22:14:28 +01:00
Merge branch 'master' of https://github.com/BillSchumacher/Auto-GPT into plugin-support
This commit is contained in:
96
data_ingestion.py
Normal file
96
data_ingestion.py
Normal file
@@ -0,0 +1,96 @@
|
||||
import argparse
|
||||
import logging
|
||||
|
||||
from autogpt.commands.file_operations import ingest_file, search_files
|
||||
from autogpt.config import Config
|
||||
from autogpt.memory import get_memory
|
||||
|
||||
cfg = Config()
|
||||
|
||||
|
||||
def configure_logging():
|
||||
logging.basicConfig(
|
||||
filename="log-ingestion.txt",
|
||||
filemode="a",
|
||||
format="%(asctime)s,%(msecs)d %(name)s %(levelname)s %(message)s",
|
||||
datefmt="%H:%M:%S",
|
||||
level=logging.DEBUG,
|
||||
)
|
||||
return logging.getLogger("AutoGPT-Ingestion")
|
||||
|
||||
|
||||
def ingest_directory(directory, memory, args):
|
||||
"""
|
||||
Ingest all files in a directory by calling the ingest_file function for each file.
|
||||
|
||||
:param directory: The directory containing the files to ingest
|
||||
:param memory: An object with an add() method to store the chunks in memory
|
||||
"""
|
||||
try:
|
||||
files = search_files(directory)
|
||||
for file in files:
|
||||
ingest_file(file, memory, args.max_length, args.overlap)
|
||||
except Exception as e:
|
||||
print(f"Error while ingesting directory '{directory}': {str(e)}")
|
||||
|
||||
|
||||
def main() -> None:
|
||||
logger = configure_logging()
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Ingest a file or a directory with multiple files into memory. "
|
||||
"Make sure to set your .env before running this script."
|
||||
)
|
||||
group = parser.add_mutually_exclusive_group(required=True)
|
||||
group.add_argument("--file", type=str, help="The file to ingest.")
|
||||
group.add_argument(
|
||||
"--dir", type=str, help="The directory containing the files to ingest."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--init",
|
||||
action="store_true",
|
||||
help="Init the memory and wipe its content (default: False)",
|
||||
default=False,
|
||||
)
|
||||
parser.add_argument(
|
||||
"--overlap",
|
||||
type=int,
|
||||
help="The overlap size between chunks when ingesting files (default: 200)",
|
||||
default=200,
|
||||
)
|
||||
parser.add_argument(
|
||||
"--max_length",
|
||||
type=int,
|
||||
help="The max_length of each chunk when ingesting files (default: 4000)",
|
||||
default=4000,
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Initialize memory
|
||||
memory = get_memory(cfg, init=args.init)
|
||||
print("Using memory of type: " + memory.__class__.__name__)
|
||||
|
||||
if args.file:
|
||||
try:
|
||||
ingest_file(args.file, memory, args.max_length, args.overlap)
|
||||
print(f"File '{args.file}' ingested successfully.")
|
||||
except Exception as e:
|
||||
logger.error(f"Error while ingesting file '{args.file}': {str(e)}")
|
||||
print(f"Error while ingesting file '{args.file}': {str(e)}")
|
||||
elif args.dir:
|
||||
try:
|
||||
ingest_directory(args.dir, memory, args)
|
||||
print(f"Directory '{args.dir}' ingested successfully.")
|
||||
except Exception as e:
|
||||
logger.error(f"Error while ingesting directory '{args.dir}': {str(e)}")
|
||||
print(f"Error while ingesting directory '{args.dir}': {str(e)}")
|
||||
else:
|
||||
print(
|
||||
"Please provide either a file path (--file) or a directory name (--dir)"
|
||||
" inside the auto_gpt_workspace directory as input."
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user