mirror of
https://github.com/aljazceru/Auto-GPT.git
synced 2025-12-23 08:54:24 +01:00
Vector memory revamp (part 1: refactoring) (#4208)
Additional changes: * Improve typing * Modularize message history memory & fix/refactor lots of things * Fix summarization * Move memory relevance calculation to MemoryItem & improve test * Fix import warnings in web_selenium.py * Remove `memory_add` ghost command * Implement overlap in `split_text` * Move memory tests into subdirectory * Remove deprecated `get_ada_embedding()` and helpers * Fix used token calculation in `chat_with_ai` * Replace Message TypedDict by dataclass * Fix AgentManager singleton issues in tests --------- Co-authored-by: Auto-GPT-Bot <github-bot@agpt.co>
This commit is contained in:
committed by
GitHub
parent
10489e0df2
commit
bfbe613960
@@ -4,7 +4,7 @@ from __future__ import annotations
|
||||
import hashlib
|
||||
import os
|
||||
import os.path
|
||||
from typing import Dict, Generator, Literal, Tuple
|
||||
from typing import Generator, Literal
|
||||
|
||||
import charset_normalizer
|
||||
import requests
|
||||
@@ -15,6 +15,7 @@ from autogpt.commands.command import command
|
||||
from autogpt.commands.file_operations_utils import read_textual_file
|
||||
from autogpt.config import Config
|
||||
from autogpt.logs import logger
|
||||
from autogpt.memory.vector import MemoryItem, VectorMemory
|
||||
from autogpt.spinner import Spinner
|
||||
from autogpt.utils import readable_file_size
|
||||
|
||||
@@ -28,7 +29,9 @@ def text_checksum(text: str) -> str:
|
||||
return hashlib.md5(text.encode("utf-8")).hexdigest()
|
||||
|
||||
|
||||
def operations_from_log(log_path: str) -> Generator[Tuple[Operation, str, str | None]]:
|
||||
def operations_from_log(
|
||||
log_path: str,
|
||||
) -> Generator[tuple[Operation, str, str | None], None, None]:
|
||||
"""Parse the file operations log and return a tuple containing the log entries"""
|
||||
try:
|
||||
log = open(log_path, "r", encoding="utf-8")
|
||||
@@ -45,6 +48,7 @@ def operations_from_log(log_path: str) -> Generator[Tuple[Operation, str, str |
|
||||
try:
|
||||
path, checksum = (x.strip() for x in tail.rsplit(" #", maxsplit=1))
|
||||
except ValueError:
|
||||
logger.warn(f"File log entry lacks checksum: '{line}'")
|
||||
path, checksum = tail.strip(), None
|
||||
yield (operation, path, checksum)
|
||||
elif operation == "delete":
|
||||
@@ -53,7 +57,7 @@ def operations_from_log(log_path: str) -> Generator[Tuple[Operation, str, str |
|
||||
log.close()
|
||||
|
||||
|
||||
def file_operations_state(log_path: str) -> Dict:
|
||||
def file_operations_state(log_path: str) -> dict[str, str]:
|
||||
"""Iterates over the operations log and returns the expected state.
|
||||
|
||||
Parses a log file at CFG.file_logger_path to construct a dictionary that maps
|
||||
@@ -156,43 +160,41 @@ def read_file(filename: str) -> str:
|
||||
"""
|
||||
try:
|
||||
content = read_textual_file(filename, logger)
|
||||
|
||||
# TODO: invalidate/update memory when file is edited
|
||||
file_memory = MemoryItem.from_text_file(content, filename)
|
||||
if len(file_memory.chunks) > 1:
|
||||
return file_memory.summary
|
||||
|
||||
return content
|
||||
except Exception as e:
|
||||
return f"Error: {str(e)}"
|
||||
|
||||
|
||||
def ingest_file(
|
||||
filename: str, memory, max_length: int = 4000, overlap: int = 200
|
||||
filename: str,
|
||||
memory: VectorMemory,
|
||||
) -> None:
|
||||
"""
|
||||
Ingest a file by reading its content, splitting it into chunks with a specified
|
||||
maximum length and overlap, and adding the chunks to the memory storage.
|
||||
|
||||
:param filename: The name of the file to ingest
|
||||
:param memory: An object with an add() method to store the chunks in memory
|
||||
:param max_length: The maximum length of each chunk, default is 4000
|
||||
:param overlap: The number of overlapping characters between chunks, default is 200
|
||||
Args:
|
||||
filename: The name of the file to ingest
|
||||
memory: An object with an add() method to store the chunks in memory
|
||||
"""
|
||||
try:
|
||||
logger.info(f"Working with file {filename}")
|
||||
logger.info(f"Ingesting file {filename}")
|
||||
content = read_file(filename)
|
||||
content_length = len(content)
|
||||
logger.info(f"File length: {content_length} characters")
|
||||
|
||||
chunks = list(split_file(content, max_length=max_length, overlap=overlap))
|
||||
# TODO: differentiate between different types of files
|
||||
file_memory = MemoryItem.from_text_file(content, filename)
|
||||
logger.debug(f"Created memory: {file_memory.dump()}")
|
||||
memory.add(file_memory)
|
||||
|
||||
num_chunks = len(chunks)
|
||||
for i, chunk in enumerate(chunks):
|
||||
logger.info(f"Ingesting chunk {i + 1} / {num_chunks} into memory")
|
||||
memory_to_add = (
|
||||
f"Filename: {filename}\n" f"Content part#{i + 1}/{num_chunks}: {chunk}"
|
||||
)
|
||||
|
||||
memory.add(memory_to_add)
|
||||
|
||||
logger.info(f"Done ingesting {num_chunks} chunks from {filename}.")
|
||||
logger.info(f"Ingested {len(file_memory.e_chunks)} chunks from {filename}")
|
||||
except Exception as err:
|
||||
logger.info(f"Error while ingesting file '{filename}': {err}")
|
||||
logger.warn(f"Error while ingesting file '{filename}': {err}")
|
||||
|
||||
|
||||
@command("write_to_file", "Write to file", '"filename": "<filename>", "text": "<text>"')
|
||||
|
||||
Reference in New Issue
Block a user