mirror of
https://github.com/aljazceru/Auto-GPT.git
synced 2025-12-17 14:04:27 +01:00
refactor(agent/file_operations): Refactor file opening/reading and parsing
- Update the signature of `FileWorkspace.open_file` and fix implementations in every workspace backend - Replace `open()` with `workspace.open_file` in the `read_file` command to use the workspace's file opening functionality - Fix the parametrization of the `test_text_file_parsers` test to correctly test text file parsers
This commit is contained in:
@@ -16,7 +16,7 @@ from autogpt.core.utils.json_schema import JSONSchema
|
|||||||
from autogpt.memory.vector import MemoryItemFactory, VectorMemory
|
from autogpt.memory.vector import MemoryItemFactory, VectorMemory
|
||||||
|
|
||||||
from .decorators import sanitize_path_arg
|
from .decorators import sanitize_path_arg
|
||||||
from .file_operations_utils import read_textual_file
|
from .file_operations_utils import decode_textual_file
|
||||||
|
|
||||||
COMMAND_CATEGORY = "file_operations"
|
COMMAND_CATEGORY = "file_operations"
|
||||||
COMMAND_CATEGORY_TITLE = "File Operations"
|
COMMAND_CATEGORY_TITLE = "File Operations"
|
||||||
@@ -140,8 +140,7 @@ def log_operation(
|
|||||||
)
|
)
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
@sanitize_path_arg("filename")
|
def read_file(filename: str | Path, agent: Agent) -> str:
|
||||||
def read_file(filename: Path, agent: Agent) -> str:
|
|
||||||
"""Read a file and return the contents
|
"""Read a file and return the contents
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
@@ -150,8 +149,8 @@ def read_file(filename: Path, agent: Agent) -> str:
|
|||||||
Returns:
|
Returns:
|
||||||
str: The contents of the file
|
str: The contents of the file
|
||||||
"""
|
"""
|
||||||
content = read_textual_file(filename, logger)
|
file = agent.workspace.open_file(filename, binary=True)
|
||||||
# TODO: content = agent.workspace.read_file(filename)
|
content = decode_textual_file(file, logger)
|
||||||
|
|
||||||
# # TODO: invalidate/update memory when file is edited
|
# # TODO: invalidate/update memory when file is edited
|
||||||
# file_memory = MemoryItem.from_text_file(content, str(filename), agent.config)
|
# file_memory = MemoryItem.from_text_file(content, str(filename), agent.config)
|
||||||
|
|||||||
@@ -1,11 +1,11 @@
|
|||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
from pathlib import Path
|
from abc import ABC, abstractmethod
|
||||||
|
from typing import BinaryIO
|
||||||
|
|
||||||
import charset_normalizer
|
import charset_normalizer
|
||||||
import docx
|
import docx
|
||||||
import markdown
|
|
||||||
import pypdf
|
import pypdf
|
||||||
import yaml
|
import yaml
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
@@ -14,23 +14,24 @@ from pylatexenc.latex2text import LatexNodes2Text
|
|||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class ParserStrategy:
|
class ParserStrategy(ABC):
|
||||||
def read(self, file_path: Path) -> str:
|
@abstractmethod
|
||||||
raise NotImplementedError
|
def read(self, file: BinaryIO) -> str:
|
||||||
|
...
|
||||||
|
|
||||||
|
|
||||||
# Basic text file reading
|
# Basic text file reading
|
||||||
class TXTParser(ParserStrategy):
|
class TXTParser(ParserStrategy):
|
||||||
def read(self, file_path: Path) -> str:
|
def read(self, file: BinaryIO) -> str:
|
||||||
charset_match = charset_normalizer.from_path(file_path).best()
|
charset_match = charset_normalizer.from_bytes(file.read()).best()
|
||||||
logger.debug(f"Reading '{file_path}' with encoding '{charset_match.encoding}'")
|
logger.debug(f"Reading '{file.name}' with encoding '{charset_match.encoding}'")
|
||||||
return str(charset_match)
|
return str(charset_match)
|
||||||
|
|
||||||
|
|
||||||
# Reading text from binary file using pdf parser
|
# Reading text from binary file using pdf parser
|
||||||
class PDFParser(ParserStrategy):
|
class PDFParser(ParserStrategy):
|
||||||
def read(self, file_path: Path) -> str:
|
def read(self, file: BinaryIO) -> str:
|
||||||
parser = pypdf.PdfReader(file_path)
|
parser = pypdf.PdfReader(file)
|
||||||
text = ""
|
text = ""
|
||||||
for page_idx in range(len(parser.pages)):
|
for page_idx in range(len(parser.pages)):
|
||||||
text += parser.pages[page_idx].extract_text()
|
text += parser.pages[page_idx].extract_text()
|
||||||
@@ -39,8 +40,8 @@ class PDFParser(ParserStrategy):
|
|||||||
|
|
||||||
# Reading text from binary file using docs parser
|
# Reading text from binary file using docs parser
|
||||||
class DOCXParser(ParserStrategy):
|
class DOCXParser(ParserStrategy):
|
||||||
def read(self, file_path: Path) -> str:
|
def read(self, file: BinaryIO) -> str:
|
||||||
doc_file = docx.Document(file_path)
|
doc_file = docx.Document(file)
|
||||||
text = ""
|
text = ""
|
||||||
for para in doc_file.paragraphs:
|
for para in doc_file.paragraphs:
|
||||||
text += para.text
|
text += para.text
|
||||||
@@ -49,50 +50,37 @@ class DOCXParser(ParserStrategy):
|
|||||||
|
|
||||||
# Reading as dictionary and returning string format
|
# Reading as dictionary and returning string format
|
||||||
class JSONParser(ParserStrategy):
|
class JSONParser(ParserStrategy):
|
||||||
def read(self, file_path: Path) -> str:
|
def read(self, file: BinaryIO) -> str:
|
||||||
with open(file_path, "r") as f:
|
data = json.load(file)
|
||||||
data = json.load(f)
|
text = str(data)
|
||||||
text = str(data)
|
|
||||||
return text
|
return text
|
||||||
|
|
||||||
|
|
||||||
class XMLParser(ParserStrategy):
|
class XMLParser(ParserStrategy):
|
||||||
def read(self, file_path: Path) -> str:
|
def read(self, file: BinaryIO) -> str:
|
||||||
with open(file_path, "r") as f:
|
soup = BeautifulSoup(file, "xml")
|
||||||
soup = BeautifulSoup(f, "xml")
|
text = soup.get_text()
|
||||||
text = soup.get_text()
|
|
||||||
return text
|
return text
|
||||||
|
|
||||||
|
|
||||||
# Reading as dictionary and returning string format
|
# Reading as dictionary and returning string format
|
||||||
class YAMLParser(ParserStrategy):
|
class YAMLParser(ParserStrategy):
|
||||||
def read(self, file_path: Path) -> str:
|
def read(self, file: BinaryIO) -> str:
|
||||||
with open(file_path, "r") as f:
|
data = yaml.load(file, Loader=yaml.FullLoader)
|
||||||
data = yaml.load(f, Loader=yaml.FullLoader)
|
text = str(data)
|
||||||
text = str(data)
|
|
||||||
return text
|
return text
|
||||||
|
|
||||||
|
|
||||||
class HTMLParser(ParserStrategy):
|
class HTMLParser(ParserStrategy):
|
||||||
def read(self, file_path: Path) -> str:
|
def read(self, file: BinaryIO) -> str:
|
||||||
with open(file_path, "r") as f:
|
soup = BeautifulSoup(file, "html.parser")
|
||||||
soup = BeautifulSoup(f, "html.parser")
|
text = soup.get_text()
|
||||||
text = soup.get_text()
|
|
||||||
return text
|
|
||||||
|
|
||||||
|
|
||||||
class MarkdownParser(ParserStrategy):
|
|
||||||
def read(self, file_path: Path) -> str:
|
|
||||||
with open(file_path, "r") as f:
|
|
||||||
html = markdown.markdown(f.read())
|
|
||||||
text = "".join(BeautifulSoup(html, "html.parser").findAll(string=True))
|
|
||||||
return text
|
return text
|
||||||
|
|
||||||
|
|
||||||
class LaTeXParser(ParserStrategy):
|
class LaTeXParser(ParserStrategy):
|
||||||
def read(self, file_path: Path) -> str:
|
def read(self, file: BinaryIO) -> str:
|
||||||
with open(file_path, "r") as f:
|
latex = file.read().decode()
|
||||||
latex = f.read()
|
|
||||||
text = LatexNodes2Text().latex_to_text(latex)
|
text = LatexNodes2Text().latex_to_text(latex)
|
||||||
return text
|
return text
|
||||||
|
|
||||||
@@ -106,13 +94,15 @@ class FileContext:
|
|||||||
self.logger.debug(f"Setting Context Parser to {parser}")
|
self.logger.debug(f"Setting Context Parser to {parser}")
|
||||||
self.parser = parser
|
self.parser = parser
|
||||||
|
|
||||||
def read_file(self, file_path) -> str:
|
def decode_file(self, file: BinaryIO) -> str:
|
||||||
self.logger.debug(f"Reading file {file_path} with parser {self.parser}")
|
self.logger.debug(f"Reading file {file.name} with parser {self.parser}")
|
||||||
return self.parser.read(file_path)
|
return self.parser.read(file)
|
||||||
|
|
||||||
|
|
||||||
extension_to_parser = {
|
extension_to_parser = {
|
||||||
".txt": TXTParser(),
|
".txt": TXTParser(),
|
||||||
|
".md": TXTParser(),
|
||||||
|
".markdown": TXTParser(),
|
||||||
".csv": TXTParser(),
|
".csv": TXTParser(),
|
||||||
".pdf": PDFParser(),
|
".pdf": PDFParser(),
|
||||||
".docx": DOCXParser(),
|
".docx": DOCXParser(),
|
||||||
@@ -123,47 +113,36 @@ extension_to_parser = {
|
|||||||
".html": HTMLParser(),
|
".html": HTMLParser(),
|
||||||
".htm": HTMLParser(),
|
".htm": HTMLParser(),
|
||||||
".xhtml": HTMLParser(),
|
".xhtml": HTMLParser(),
|
||||||
".md": MarkdownParser(),
|
|
||||||
".markdown": MarkdownParser(),
|
|
||||||
".tex": LaTeXParser(),
|
".tex": LaTeXParser(),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def is_file_binary_fn(file_path: Path):
|
def is_file_binary_fn(file: BinaryIO):
|
||||||
"""Given a file path load all its content and checks if the null bytes is present
|
"""Given a file path load all its content and checks if the null bytes is present
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
file_path (_type_): _description_
|
file (_type_): _description_
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
bool: is_binary
|
bool: is_binary
|
||||||
"""
|
"""
|
||||||
with open(file_path, "rb") as f:
|
file_data = file.read()
|
||||||
file_data = f.read()
|
file.seek(0)
|
||||||
if b"\x00" in file_data:
|
if b"\x00" in file_data:
|
||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
def read_textual_file(file_path: Path, logger: logging.Logger) -> str:
|
def decode_textual_file(file: BinaryIO, logger: logging.Logger) -> str:
|
||||||
if not file_path.is_absolute():
|
if not file.readable():
|
||||||
raise ValueError("File path must be absolute")
|
raise ValueError(f"read_file failed: {file.name} is not a file")
|
||||||
|
|
||||||
if not file_path.is_file():
|
file_extension = os.path.splitext(file.name)[1].lower()
|
||||||
if not file_path.exists():
|
|
||||||
raise FileNotFoundError(
|
|
||||||
f"read_file {file_path} failed: no such file or directory"
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
raise ValueError(f"read_file failed: {file_path} is not a file")
|
|
||||||
|
|
||||||
is_binary = is_file_binary_fn(file_path)
|
|
||||||
file_extension = os.path.splitext(file_path)[1].lower()
|
|
||||||
parser = extension_to_parser.get(file_extension)
|
parser = extension_to_parser.get(file_extension)
|
||||||
if not parser:
|
if not parser:
|
||||||
if is_binary:
|
if is_file_binary_fn(file):
|
||||||
raise ValueError(f"Unsupported binary file format: {file_extension}")
|
raise ValueError(f"Unsupported binary file format: {file_extension}")
|
||||||
# fallback to txt file parser (to support script and code files loading)
|
# fallback to txt file parser (to support script and code files loading)
|
||||||
parser = TXTParser()
|
parser = TXTParser()
|
||||||
file_context = FileContext(parser, logger)
|
file_context = FileContext(parser, logger)
|
||||||
return file_context.read_file(file_path)
|
return file_context.decode_file(file)
|
||||||
|
|||||||
@@ -5,8 +5,9 @@ from __future__ import annotations
|
|||||||
|
|
||||||
import logging
|
import logging
|
||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
|
from io import IOBase, TextIOBase
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, Callable, Literal, Optional, overload
|
from typing import IO, Any, BinaryIO, Callable, Literal, Optional, TextIO, overload
|
||||||
|
|
||||||
from autogpt.core.configuration.schema import SystemConfiguration
|
from autogpt.core.configuration.schema import SystemConfiguration
|
||||||
|
|
||||||
@@ -47,9 +48,23 @@ class FileWorkspace(ABC):
|
|||||||
doesn't exist yet. E.g. a folder on disk, or an S3 Bucket.
|
doesn't exist yet. E.g. a folder on disk, or an S3 Bucket.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
@overload
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def open_file(self, path: str | Path, mode: str = "r"):
|
def open_file(
|
||||||
"""Open a file in the workspace."""
|
self, path: str | Path, binary: Literal[False] = False
|
||||||
|
) -> TextIO | TextIOBase:
|
||||||
|
"""Returns a readable text file-like object representing the file."""
|
||||||
|
|
||||||
|
@overload
|
||||||
|
@abstractmethod
|
||||||
|
def open_file(
|
||||||
|
self, path: str | Path, binary: Literal[True] = True
|
||||||
|
) -> BinaryIO | IOBase:
|
||||||
|
"""Returns a readable binary file-like object representing the file."""
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def open_file(self, path: str | Path, binary: bool = False) -> IO | IOBase:
|
||||||
|
"""Returns a readable file-like object representing the file."""
|
||||||
|
|
||||||
@overload
|
@overload
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ from __future__ import annotations
|
|||||||
|
|
||||||
import inspect
|
import inspect
|
||||||
import logging
|
import logging
|
||||||
|
from io import IOBase
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from google.cloud import storage
|
from google.cloud import storage
|
||||||
@@ -40,7 +41,7 @@ class GCSFileWorkspace(FileWorkspace):
|
|||||||
return self._root
|
return self._root
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def restrict_to_root(self):
|
def restrict_to_root(self) -> bool:
|
||||||
"""Whether to restrict generated paths to the root."""
|
"""Whether to restrict generated paths to the root."""
|
||||||
return True
|
return True
|
||||||
|
|
||||||
@@ -50,26 +51,28 @@ class GCSFileWorkspace(FileWorkspace):
|
|||||||
def get_path(self, relative_path: str | Path) -> Path:
|
def get_path(self, relative_path: str | Path) -> Path:
|
||||||
return super().get_path(relative_path).relative_to("/")
|
return super().get_path(relative_path).relative_to("/")
|
||||||
|
|
||||||
def open_file(self, path: str | Path, mode: str = "r"):
|
def _get_blob(self, path: str | Path) -> storage.Blob:
|
||||||
"""Open a file in the workspace."""
|
|
||||||
path = self.get_path(path)
|
path = self.get_path(path)
|
||||||
blob = self._bucket.blob(str(path))
|
return self._bucket.blob(str(path))
|
||||||
return blob
|
|
||||||
|
def open_file(self, path: str | Path, binary: bool = False) -> IOBase:
|
||||||
|
"""Open a file in the workspace."""
|
||||||
|
blob = self._get_blob(path)
|
||||||
|
blob.reload() # pin revision number to prevent version mixing while reading
|
||||||
|
return blob.open("rb" if binary else "r")
|
||||||
|
|
||||||
def read_file(self, path: str | Path, binary: bool = False) -> str | bytes:
|
def read_file(self, path: str | Path, binary: bool = False) -> str | bytes:
|
||||||
"""Read a file in the workspace."""
|
"""Read a file in the workspace."""
|
||||||
blob = self.open_file(path, "r")
|
return self.open_file(path, binary).read()
|
||||||
file_content = (
|
|
||||||
blob.download_as_text() if not binary else blob.download_as_bytes()
|
|
||||||
)
|
|
||||||
return file_content
|
|
||||||
|
|
||||||
async def write_file(self, path: str | Path, content: str | bytes):
|
async def write_file(self, path: str | Path, content: str | bytes) -> None:
|
||||||
"""Write to a file in the workspace."""
|
"""Write to a file in the workspace."""
|
||||||
blob = self.open_file(path, "w")
|
blob = self._get_blob(path)
|
||||||
blob.upload_from_string(content) if isinstance(
|
|
||||||
content, str
|
if isinstance(content, str):
|
||||||
) else blob.upload_from_file(content)
|
blob.upload_from_string(content)
|
||||||
|
else:
|
||||||
|
blob.upload_from_file(content)
|
||||||
|
|
||||||
if self.on_write_file:
|
if self.on_write_file:
|
||||||
path = Path(path)
|
path = Path(path)
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ from __future__ import annotations
|
|||||||
import inspect
|
import inspect
|
||||||
import logging
|
import logging
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from typing import IO
|
||||||
|
|
||||||
from .base import FileWorkspace, FileWorkspaceConfiguration
|
from .base import FileWorkspace, FileWorkspaceConfiguration
|
||||||
|
|
||||||
@@ -26,26 +27,29 @@ class LocalFileWorkspace(FileWorkspace):
|
|||||||
return self._root
|
return self._root
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def restrict_to_root(self):
|
def restrict_to_root(self) -> bool:
|
||||||
"""Whether to restrict generated paths to the root."""
|
"""Whether to restrict generated paths to the root."""
|
||||||
return self._restrict_to_root
|
return self._restrict_to_root
|
||||||
|
|
||||||
def initialize(self) -> None:
|
def initialize(self) -> None:
|
||||||
self.root.mkdir(exist_ok=True, parents=True)
|
self.root.mkdir(exist_ok=True, parents=True)
|
||||||
|
|
||||||
def open_file(self, path: str | Path, mode: str = "r"):
|
def open_file(self, path: str | Path, binary: bool = False) -> IO:
|
||||||
"""Open a file in the workspace."""
|
"""Open a file in the workspace."""
|
||||||
full_path = self.get_path(path)
|
return self._open_file(path, "rb" if binary else "r")
|
||||||
return open(full_path, mode)
|
|
||||||
|
|
||||||
def read_file(self, path: str | Path, binary: bool = False):
|
def _open_file(self, path: str | Path, mode: str = "r") -> IO:
|
||||||
|
full_path = self.get_path(path)
|
||||||
|
return open(full_path, mode) # type: ignore
|
||||||
|
|
||||||
|
def read_file(self, path: str | Path, binary: bool = False) -> str | bytes:
|
||||||
"""Read a file in the workspace."""
|
"""Read a file in the workspace."""
|
||||||
with self.open_file(path, "rb" if binary else "r") as file:
|
with self._open_file(path, "rb" if binary else "r") as file:
|
||||||
return file.read()
|
return file.read()
|
||||||
|
|
||||||
async def write_file(self, path: str | Path, content: str | bytes):
|
async def write_file(self, path: str | Path, content: str | bytes) -> None:
|
||||||
"""Write to a file in the workspace."""
|
"""Write to a file in the workspace."""
|
||||||
with self.open_file(path, "wb" if type(content) is bytes else "w") as file:
|
with self._open_file(path, "wb" if type(content) is bytes else "w") as file:
|
||||||
file.write(content)
|
file.write(content)
|
||||||
|
|
||||||
if self.on_write_file:
|
if self.on_write_file:
|
||||||
@@ -61,7 +65,7 @@ class LocalFileWorkspace(FileWorkspace):
|
|||||||
path = self.get_path(path)
|
path = self.get_path(path)
|
||||||
return [file.relative_to(path) for file in path.rglob("*") if file.is_file()]
|
return [file.relative_to(path) for file in path.rglob("*") if file.is_file()]
|
||||||
|
|
||||||
def delete_file(self, path: str | Path):
|
def delete_file(self, path: str | Path) -> None:
|
||||||
"""Delete a file in the workspace."""
|
"""Delete a file in the workspace."""
|
||||||
full_path = self.get_path(path)
|
full_path = self.get_path(path)
|
||||||
full_path.unlink()
|
full_path.unlink()
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ import contextlib
|
|||||||
import inspect
|
import inspect
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
|
from io import IOBase, TextIOWrapper
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import TYPE_CHECKING, Optional
|
from typing import TYPE_CHECKING, Optional
|
||||||
|
|
||||||
@@ -74,22 +75,27 @@ class S3FileWorkspace(FileWorkspace):
|
|||||||
def get_path(self, relative_path: str | Path) -> Path:
|
def get_path(self, relative_path: str | Path) -> Path:
|
||||||
return super().get_path(relative_path).relative_to("/")
|
return super().get_path(relative_path).relative_to("/")
|
||||||
|
|
||||||
def open_file(self, path: str | Path, mode: str = "r"):
|
def _get_obj(self, path: str | Path) -> mypy_boto3_s3.service_resource.Object:
|
||||||
"""Open a file in the workspace."""
|
"""Get an S3 object."""
|
||||||
path = self.get_path(path)
|
path = self.get_path(path)
|
||||||
obj = self._bucket.Object(str(path))
|
obj = self._bucket.Object(str(path))
|
||||||
with contextlib.suppress(botocore.exceptions.ClientError):
|
with contextlib.suppress(botocore.exceptions.ClientError):
|
||||||
obj.load()
|
obj.load()
|
||||||
return obj
|
return obj
|
||||||
|
|
||||||
|
def open_file(self, path: str | Path, binary: bool = False) -> IOBase:
|
||||||
|
"""Open a file in the workspace."""
|
||||||
|
obj = self._get_obj(path)
|
||||||
|
return obj.get()["Body"] if not binary else TextIOWrapper(obj.get()["Body"])
|
||||||
|
|
||||||
def read_file(self, path: str | Path, binary: bool = False) -> str | bytes:
|
def read_file(self, path: str | Path, binary: bool = False) -> str | bytes:
|
||||||
"""Read a file in the workspace."""
|
"""Read a file in the workspace."""
|
||||||
file_content = self.open_file(path, "r").get()["Body"].read()
|
file_content = self.open_file(path, binary).read()
|
||||||
return file_content if binary else file_content.decode()
|
return file_content if binary else file_content.decode()
|
||||||
|
|
||||||
async def write_file(self, path: str | Path, content: str | bytes):
|
async def write_file(self, path: str | Path, content: str | bytes) -> None:
|
||||||
"""Write to a file in the workspace."""
|
"""Write to a file in the workspace."""
|
||||||
obj = self.open_file(path, "w")
|
obj = self._get_obj(path)
|
||||||
obj.put(Body=content)
|
obj.put(Body=content)
|
||||||
|
|
||||||
if self.on_write_file:
|
if self.on_write_file:
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ from typing import Optional
|
|||||||
|
|
||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
from autogpt.commands.file_operations_utils import read_textual_file
|
from autogpt.commands.file_operations_utils import decode_textual_file
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@@ -56,7 +56,8 @@ class FileContextItem(BaseModel, ContextItem):
|
|||||||
|
|
||||||
@property
|
@property
|
||||||
def content(self) -> str:
|
def content(self) -> str:
|
||||||
return read_textual_file(self.file_path, logger)
|
with open(self.file_path, "rb") as file:
|
||||||
|
return decode_textual_file(file, logger)
|
||||||
|
|
||||||
|
|
||||||
class FolderContextItem(BaseModel, ContextItem):
|
class FolderContextItem(BaseModel, ContextItem):
|
||||||
|
|||||||
@@ -5,10 +5,14 @@ from pathlib import Path
|
|||||||
from xml.etree import ElementTree
|
from xml.etree import ElementTree
|
||||||
|
|
||||||
import docx
|
import docx
|
||||||
|
import pytest
|
||||||
import yaml
|
import yaml
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
from autogpt.commands.file_operations_utils import is_file_binary_fn, read_textual_file
|
from autogpt.commands.file_operations_utils import (
|
||||||
|
decode_textual_file,
|
||||||
|
is_file_binary_fn,
|
||||||
|
)
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@@ -148,15 +152,18 @@ respective_file_creation_functions = {
|
|||||||
binary_files_extensions = [".pdf", ".docx"]
|
binary_files_extensions = [".pdf", ".docx"]
|
||||||
|
|
||||||
|
|
||||||
def test_parsers():
|
@pytest.mark.parametrize(
|
||||||
for (
|
"file_extension, c_file_creator",
|
||||||
file_extension,
|
respective_file_creation_functions.items(),
|
||||||
c_file_creator,
|
)
|
||||||
) in respective_file_creation_functions.items():
|
def test_parsers(file_extension, c_file_creator):
|
||||||
created_file_path = Path(c_file_creator())
|
created_file_path = Path(c_file_creator())
|
||||||
loaded_text = read_textual_file(created_file_path, logger)
|
with open(created_file_path, "rb") as file:
|
||||||
|
loaded_text = decode_textual_file(file, logger)
|
||||||
|
|
||||||
assert plain_text_str in loaded_text
|
assert plain_text_str in loaded_text
|
||||||
|
|
||||||
should_be_binary = file_extension in binary_files_extensions
|
should_be_binary = file_extension in binary_files_extensions
|
||||||
assert should_be_binary == is_file_binary_fn(created_file_path)
|
assert should_be_binary == is_file_binary_fn(file)
|
||||||
|
|
||||||
|
created_file_path.unlink() # cleanup
|
||||||
|
|||||||
Reference in New Issue
Block a user