diff --git a/autogpt/commands/file_operations.py b/autogpt/commands/file_operations.py index a0a61539..2ff14844 100644 --- a/autogpt/commands/file_operations.py +++ b/autogpt/commands/file_operations.py @@ -160,7 +160,7 @@ def ingest_file( # TODO: differentiate between different types of files file_memory = MemoryItem.from_text_file(content, filename) - logger.debug(f"Created memory: {file_memory.dump()}") + logger.debug(f"Created memory: {file_memory.dump(True)}") memory.add(file_memory) logger.info(f"Ingested {len(file_memory.e_chunks)} chunks from {filename}") diff --git a/autogpt/main.py b/autogpt/main.py index ab0a1533..ce6a983d 100644 --- a/autogpt/main.py +++ b/autogpt/main.py @@ -175,7 +175,8 @@ def run_auto_gpt( # Initialize memory and make sure it is empty. # this is particularly important for indexing and referencing pinecone memory - memory = get_memory(cfg, init=True) + memory = get_memory(cfg) + memory.clear() logger.typewriter_log( "Using memory of type:", Fore.GREEN, f"{memory.__class__.__name__}" ) diff --git a/autogpt/memory/vector/__init__.py b/autogpt/memory/vector/__init__.py index aaaf83fb..11c9d697 100644 --- a/autogpt/memory/vector/__init__.py +++ b/autogpt/memory/vector/__init__.py @@ -39,7 +39,7 @@ supported_memory = ["json_file", "no_memory"] # MilvusMemory = None -def get_memory(cfg: Config, init=False) -> VectorMemory: +def get_memory(cfg: Config) -> VectorMemory: memory = None match cfg.memory_backend: @@ -60,7 +60,7 @@ def get_memory(cfg: Config, init=False) -> VectorMemory: # ) # else: # memory = PineconeMemory(cfg) - # if init: + # if clear: # memory.clear() case "redis": diff --git a/autogpt/memory/vector/memory_item.py b/autogpt/memory/vector/memory_item.py index c57b87aa..539ccb61 100644 --- a/autogpt/memory/vector/memory_item.py +++ b/autogpt/memory/vector/memory_item.py @@ -109,21 +109,21 @@ class MemoryItem: # The result_message contains either user feedback # or the result of the command specified in ai_message - if ai_message["role"] != "assistant": - raise ValueError(f"Invalid role on 'ai_message': {ai_message['role']}") + if ai_message.role != "assistant": + raise ValueError(f"Invalid role on 'ai_message': {ai_message.role}") result = ( - result_message["content"] - if result_message["content"].startswith("Command") + result_message.content + if result_message.content.startswith("Command") else "None" ) user_input = ( - result_message["content"] - if result_message["content"].startswith("Human feedback") + result_message.content + if result_message.content.startswith("Human feedback") else "None" ) memory_content = ( - f"Assistant Reply: {ai_message['content']}" + f"Assistant Reply: {ai_message.content}" "\n\n" f"Result: {result}" "\n\n" @@ -145,11 +145,14 @@ class MemoryItem: question_for_summary=question, ) - def dump(self) -> str: - token_length = count_string_tokens(self.raw_content, Config().embedding_model) + def dump(self, calculate_length=False) -> str: + if calculate_length: + token_length = count_string_tokens( + self.raw_content, Config().embedding_model + ) return f""" =============== MemoryItem =============== -Length: {token_length} tokens in {len(self.e_chunks)} chunks +Size: {f'{token_length} tokens in ' if calculate_length else ''}{len(self.e_chunks)} chunks Metadata: {json.dumps(self.metadata, indent=2)} ---------------- SUMMARY ----------------- {self.summary} @@ -158,6 +161,31 @@ Metadata: {json.dumps(self.metadata, indent=2)} ========================================== """ + def __eq__(self, other: MemoryItem): + return ( + self.raw_content == other.raw_content + and self.chunks == other.chunks + and self.chunk_summaries == other.chunk_summaries + # Embeddings can either be list[float] or np.ndarray[float32], + # and for comparison they must be of the same type + and np.array_equal( + self.e_summary + if isinstance(self.e_summary, np.ndarray) + else np.array(self.e_summary, dtype=np.float32), + other.e_summary + if isinstance(other.e_summary, np.ndarray) + else np.array(other.e_summary, dtype=np.float32), + ) + and np.array_equal( + self.e_chunks + if isinstance(self.e_chunks[0], np.ndarray) + else [np.array(c, dtype=np.float32) for c in self.e_chunks], + other.e_chunks + if isinstance(other.e_chunks[0], np.ndarray) + else [np.array(c, dtype=np.float32) for c in other.e_chunks], + ) + ) + @dataclasses.dataclass class MemoryItemRelevance: diff --git a/autogpt/memory/vector/providers/json_file.py b/autogpt/memory/vector/providers/json_file.py index 46446a9c..3ae7cd86 100644 --- a/autogpt/memory/vector/providers/json_file.py +++ b/autogpt/memory/vector/providers/json_file.py @@ -32,10 +32,17 @@ class JSONFileMemory(VectorMemoryProvider): workspace_path = Path(cfg.workspace_path) self.file_path = workspace_path / f"{cfg.memory_index}.json" self.file_path.touch() - logger.debug(f"Initialized {__name__} with index path {self.file_path}") + logger.debug( + f"Initialized {__class__.__name__} with index path {self.file_path}" + ) self.memories = [] - self.save_index() + try: + self.load_index() + logger.debug(f"Loaded {len(self.memories)} MemoryItems from file") + except Exception as e: + logger.warn(f"Could not load MemoryItems from file: {e}") + self.save_index() def __iter__(self) -> Iterator[MemoryItem]: return iter(self.memories) @@ -48,6 +55,7 @@ class JSONFileMemory(VectorMemoryProvider): def add(self, item: MemoryItem): self.memories.append(item) + logger.debug(f"Adding item to memory: {item.dump()}") self.save_index() return len(self.memories) @@ -62,6 +70,17 @@ class JSONFileMemory(VectorMemoryProvider): self.memories.clear() self.save_index() + def load_index(self): + """Loads all memories from the index file""" + if not self.file_path.is_file(): + logger.debug(f"Index file '{self.file_path}' does not exist") + return + with self.file_path.open("r") as f: + logger.debug(f"Loading memories from index file '{self.file_path}'") + json_index = orjson.loads(f.read()) + for memory_item_dict in json_index: + self.memories.append(MemoryItem(**memory_item_dict)) + def save_index(self): logger.debug(f"Saving memory index to file {self.file_path}") with self.file_path.open("wb") as f: diff --git a/data_ingestion.py b/data_ingestion.py index e2d98d1c..09d5328c 100644 --- a/data_ingestion.py +++ b/data_ingestion.py @@ -70,7 +70,9 @@ def main() -> None: args = parser.parse_args() # Initialize memory - memory = get_memory(cfg, init=args.init) + memory = get_memory(cfg) + if args.init: + memory.clear() logger.debug("Using memory of type: " + memory.__class__.__name__) if args.file: diff --git a/tests/conftest.py b/tests/conftest.py index 2342a3b0..671096fd 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -94,7 +94,8 @@ def agent(config: Config, workspace: Workspace) -> Agent: ai_config.command_registry = command_registry config.set_memory_backend("json_file") - memory_json_file = get_memory(config, init=True) + memory_json_file = get_memory(config) + memory_json_file.clear() system_prompt = ai_config.construct_full_prompt() diff --git a/tests/integration/agent_factory.py b/tests/integration/agent_factory.py index fff3867e..c9b99f50 100644 --- a/tests/integration/agent_factory.py +++ b/tests/integration/agent_factory.py @@ -28,7 +28,9 @@ def memory_json_file(agent_test_config: Config): was_memory_backend = agent_test_config.memory_backend agent_test_config.set_memory_backend("json_file") - yield get_memory(agent_test_config, init=True) + memory = get_memory(agent_test_config) + memory.clear() + yield memory agent_test_config.set_memory_backend(was_memory_backend) diff --git a/tests/integration/memory/test_json_file_memory.py b/tests/integration/memory/test_json_file_memory.py index 9134a069..41a3e174 100644 --- a/tests/integration/memory/test_json_file_memory.py +++ b/tests/integration/memory/test_json_file_memory.py @@ -34,7 +34,9 @@ def test_json_memory_init_with_backing_empty_file(config: Config, workspace: Wor assert index_file.read_text() == "[]" -def test_json_memory_init_with_backing_file(config: Config, workspace: Workspace): +def test_json_memory_init_with_backing_invalid_file( + config: Config, workspace: Workspace +): index_file = workspace.root / f"{config.memory_index}.json" index_file.touch() @@ -78,6 +80,24 @@ def test_json_memory_get(config: Config, memory_item: MemoryItem, mock_get_embed assert retrieved.memory_item == memory_item +def test_json_memory_load_index(config: Config, memory_item: MemoryItem): + index = JSONFileMemory(config) + index.add(memory_item) + + try: + assert index.file_path.exists(), "index was not saved to file" + assert len(index) == 1, f"index constains {len(index)} items instead of 1" + assert index.memories[0] == memory_item, "item in index != added mock item" + except AssertionError as e: + raise ValueError(f"Setting up for load_index test failed: {e}") + + index.memories = [] + index.load_index() + + assert len(index) == 1 + assert index.memories[0] == memory_item + + @pytest.mark.vcr @requires_api_key("OPENAI_API_KEY") def test_json_memory_get_relevant(config: Config, patched_api_requestor: None) -> None: