mirror of
https://github.com/aljazceru/Auto-GPT.git
synced 2026-02-19 13:14:26 +01:00
fix(agent/file_operations): Fix read_file command in GCS and S3 workspaces
- Update the `read_file` function in `file_operations.py` to pass the file's extension to the `decode_textual_file` function. - Modify the `decode_textual_file` function in `file_operations_utils.py` to accept the file extension as an argument. - Update the `content` property in the `FileContextItem` class in `context_item.py` to pass the file's extension to the `decode_textual_file` function. - Update the `test_parsers` function in `test_text_file_parsers.py` to pass the file extension to the `decode_textual_file` function.
This commit is contained in:
@@ -150,7 +150,7 @@ def read_file(filename: str | Path, agent: Agent) -> str:
|
||||
str: The contents of the file
|
||||
"""
|
||||
file = agent.workspace.open_file(filename, binary=True)
|
||||
content = decode_textual_file(file, logger)
|
||||
content = decode_textual_file(file, os.path.splitext(filename)[1], logger)
|
||||
|
||||
# # TODO: invalidate/update memory when file is edited
|
||||
# file_memory = MemoryItem.from_text_file(content, str(filename), agent.config)
|
||||
|
||||
@@ -24,7 +24,10 @@ class ParserStrategy(ABC):
|
||||
class TXTParser(ParserStrategy):
|
||||
def read(self, file: BinaryIO) -> str:
|
||||
charset_match = charset_normalizer.from_bytes(file.read()).best()
|
||||
logger.debug(f"Reading '{file.name}' with encoding '{charset_match.encoding}'")
|
||||
logger.debug(
|
||||
f"Reading {getattr(file, 'name', 'file')} "
|
||||
f"with encoding '{charset_match.encoding}'"
|
||||
)
|
||||
return str(charset_match)
|
||||
|
||||
|
||||
@@ -95,7 +98,9 @@ class FileContext:
|
||||
self.parser = parser
|
||||
|
||||
def decode_file(self, file: BinaryIO) -> str:
|
||||
self.logger.debug(f"Reading file {file.name} with parser {self.parser}")
|
||||
self.logger.debug(
|
||||
f"Reading {getattr(file, 'name', 'file')} with parser {self.parser}"
|
||||
)
|
||||
return self.parser.read(file)
|
||||
|
||||
|
||||
@@ -133,15 +138,14 @@ def is_file_binary_fn(file: BinaryIO):
|
||||
return False
|
||||
|
||||
|
||||
def decode_textual_file(file: BinaryIO, logger: logging.Logger) -> str:
|
||||
def decode_textual_file(file: BinaryIO, ext: str, logger: logging.Logger) -> str:
|
||||
if not file.readable():
|
||||
raise ValueError(f"read_file failed: {file.name} is not a file")
|
||||
raise ValueError(f"{repr(file)} is not readable")
|
||||
|
||||
file_extension = os.path.splitext(file.name)[1].lower()
|
||||
parser = extension_to_parser.get(file_extension)
|
||||
parser = extension_to_parser.get(ext.lower())
|
||||
if not parser:
|
||||
if is_file_binary_fn(file):
|
||||
raise ValueError(f"Unsupported binary file format: {file_extension}")
|
||||
raise ValueError(f"Unsupported binary file format: {ext}")
|
||||
# fallback to txt file parser (to support script and code files loading)
|
||||
parser = TXTParser()
|
||||
file_context = FileContext(parser, logger)
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import logging
|
||||
import os.path
|
||||
from abc import ABC, abstractmethod
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
@@ -56,8 +57,9 @@ class FileContextItem(BaseModel, ContextItem):
|
||||
|
||||
@property
|
||||
def content(self) -> str:
|
||||
# TODO: use workspace.open_file()
|
||||
with open(self.file_path, "rb") as file:
|
||||
return decode_textual_file(file, logger)
|
||||
return decode_textual_file(file, os.path.splitext(file.name)[1], logger)
|
||||
|
||||
|
||||
class FolderContextItem(BaseModel, ContextItem):
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import json
|
||||
import logging
|
||||
import os.path
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
from xml.etree import ElementTree
|
||||
@@ -159,7 +160,7 @@ binary_files_extensions = [".pdf", ".docx"]
|
||||
def test_parsers(file_extension, c_file_creator):
|
||||
created_file_path = Path(c_file_creator())
|
||||
with open(created_file_path, "rb") as file:
|
||||
loaded_text = decode_textual_file(file, logger)
|
||||
loaded_text = decode_textual_file(file, os.path.splitext(file.name)[1], logger)
|
||||
|
||||
assert plain_text_str in loaded_text
|
||||
|
||||
|
||||
Reference in New Issue
Block a user