fix(agent/file_operations): Fix read_file command in GCS and S3 workspaces

- Update the `read_file` function in `file_operations.py` to pass the file's extension to the `decode_textual_file` function. - Modify the `decode_textual_file` function in `file_operations_utils.py` to accept the file extension as an argument. - Update the `content` property in the `FileContextItem` class in `context_item.py` to pass the file's extension to the `decode_textual_file` function. - Update the `test_parsers` function in `test_text_file_parsers.py` to pass the file extension to the `decode_textual_file` function.
2026-02-19 13:14:26 +01:00 · 2023-12-14 02:04:56 +01:00
parent 5978031f7a
commit e428130e4a
4 changed files with 17 additions and 10 deletions
--- a/autogpts/autogpt/autogpt/commands/file_operations.py
+++ b/autogpts/autogpt/autogpt/commands/file_operations.py
@@ -150,7 +150,7 @@ def read_file(filename: str | Path, agent: Agent) -> str:
        str: The contents of the file
    """
    file = agent.workspace.open_file(filename, binary=True)
-    content = decode_textual_file(file, logger)
+    content = decode_textual_file(file, os.path.splitext(filename)[1], logger)

    # # TODO: invalidate/update memory when file is edited
    # file_memory = MemoryItem.from_text_file(content, str(filename), agent.config)
--- a/autogpts/autogpt/autogpt/commands/file_operations_utils.py
+++ b/autogpts/autogpt/autogpt/commands/file_operations_utils.py
@@ -24,7 +24,10 @@ class ParserStrategy(ABC):
 class TXTParser(ParserStrategy):
    def read(self, file: BinaryIO) -> str:
        charset_match = charset_normalizer.from_bytes(file.read()).best()
-        logger.debug(f"Reading '{file.name}' with encoding '{charset_match.encoding}'")
+        logger.debug(
+            f"Reading {getattr(file, 'name', 'file')} "
+            f"with encoding '{charset_match.encoding}'"
+        )
        return str(charset_match)


@@ -95,7 +98,9 @@ class FileContext:
        self.parser = parser

    def decode_file(self, file: BinaryIO) -> str:
-        self.logger.debug(f"Reading file {file.name} with parser {self.parser}")
+        self.logger.debug(
+            f"Reading {getattr(file, 'name', 'file')} with parser {self.parser}"
+        )
        return self.parser.read(file)


@@ -133,15 +138,14 @@ def is_file_binary_fn(file: BinaryIO):
    return False


-def decode_textual_file(file: BinaryIO, logger: logging.Logger) -> str:
+def decode_textual_file(file: BinaryIO, ext: str, logger: logging.Logger) -> str:
    if not file.readable():
-        raise ValueError(f"read_file failed: {file.name} is not a file")
+        raise ValueError(f"{repr(file)} is not readable")

-    file_extension = os.path.splitext(file.name)[1].lower()
-    parser = extension_to_parser.get(file_extension)
+    parser = extension_to_parser.get(ext.lower())
    if not parser:
        if is_file_binary_fn(file):
-            raise ValueError(f"Unsupported binary file format: {file_extension}")
+            raise ValueError(f"Unsupported binary file format: {ext}")
        # fallback to txt file parser (to support script and code files loading)
        parser = TXTParser()
    file_context = FileContext(parser, logger)
--- a/autogpts/autogpt/autogpt/models/context_item.py
+++ b/autogpts/autogpt/autogpt/models/context_item.py
@@ -1,4 +1,5 @@
 import logging
+import os.path
 from abc import ABC, abstractmethod
 from pathlib import Path
 from typing import Optional
@@ -56,8 +57,9 @@ class FileContextItem(BaseModel, ContextItem):

    @property
    def content(self) -> str:
+        # TODO: use workspace.open_file()
        with open(self.file_path, "rb") as file:
-            return decode_textual_file(file, logger)
+            return decode_textual_file(file, os.path.splitext(file.name)[1], logger)


 class FolderContextItem(BaseModel, ContextItem):
--- a/autogpts/autogpt/tests/unit/test_text_file_parsers.py
+++ b/autogpts/autogpt/tests/unit/test_text_file_parsers.py
@@ -1,5 +1,6 @@
 import json
 import logging
+import os.path
 import tempfile
 from pathlib import Path
 from xml.etree import ElementTree
@@ -159,7 +160,7 @@ binary_files_extensions = [".pdf", ".docx"]
 def test_parsers(file_extension, c_file_creator):
    created_file_path = Path(c_file_creator())
    with open(created_file_path, "rb") as file:
-        loaded_text = decode_textual_file(file, logger)
+        loaded_text = decode_textual_file(file, os.path.splitext(file.name)[1], logger)

        assert plain_text_str in loaded_text