mirror of
https://github.com/aljazceru/Auto-GPT.git
synced 2025-12-20 15:34:23 +01:00
Text file loaders (#3031)
* adding requiered packages for loading pdf, docx, md, tex files (preferably pure python packages) * adding text file utils providing function to load file based on extension && adding read_text_file command * adding test cases for text file loading (pdf file creation is hardcoded due to external package requierment for creation (a sample file can be added)) * formatting * changing command name from 'read_text_file' to 'parse_text_document' * fallback to txtParser if file extension is not known to read script and code files * adding extension respective parsers * adding binary file check function * adding file existance check && raising valueError for unsupported binary file formats * adding check file type (binary) in test_parsers for specific extensions && fixing mock pdf generation to include null bytes * adding .yml extension parser * removal of .doc parser * updating file loading commands names * updating test (removing .doc mock function) * fix: import sort * new cassette for mem A * feat: update Cassettes * feat: consolidate commands * feat: linting * feat: updates to cassettes --------- Co-authored-by: Reinier van der Leer <github@pwuts.nl> Co-authored-by: Nicholas Tindle <nick@ntindle.com> Co-authored-by: k-boikov <64261260+k-boikov@users.noreply.github.com>
This commit is contained in:
@@ -12,6 +12,7 @@ from colorama import Back, Fore
|
||||
from requests.adapters import HTTPAdapter, Retry
|
||||
|
||||
from autogpt.commands.command import command
|
||||
from autogpt.commands.file_operations_utils import read_textual_file
|
||||
from autogpt.config import Config
|
||||
from autogpt.logs import logger
|
||||
from autogpt.spinner import Spinner
|
||||
@@ -143,7 +144,7 @@ def split_file(
|
||||
start += max_length - overlap
|
||||
|
||||
|
||||
@command("read_file", "Read file", '"filename": "<filename>"')
|
||||
@command("read_file", "Read a file", '"filename": "<filename>"')
|
||||
def read_file(filename: str) -> str:
|
||||
"""Read a file and return the contents
|
||||
|
||||
@@ -154,12 +155,10 @@ def read_file(filename: str) -> str:
|
||||
str: The contents of the file
|
||||
"""
|
||||
try:
|
||||
charset_match = charset_normalizer.from_path(filename).best()
|
||||
encoding = charset_match.encoding
|
||||
logger.debug(f"Read file '{filename}' with encoding '{encoding}'")
|
||||
return str(charset_match)
|
||||
except Exception as err:
|
||||
return f"Error: {err}"
|
||||
content = read_textual_file(filename, logger)
|
||||
return content
|
||||
except Exception as e:
|
||||
return f"Error: {str(e)}"
|
||||
|
||||
|
||||
def ingest_file(
|
||||
|
||||
Reference in New Issue
Block a user