Feature/challenge memory management (#3425)

Co-authored-by: JS <38794445+jonathansheets517@users.noreply.github.com> Co-authored-by: Richard Beales <rich@richbeales.net>
2025-12-18 14:34:23 +01:00 · 2023-04-29 13:09:58 -07:00
parent 4f72ee7815
commit cdd91f7ea3
9 changed files with 1649 additions and 1 deletions
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -127,3 +127,22 @@ When you run Pytest locally:
            - Or: The test might be poorly written. In that case, you can make suggestions to change the test.
 In our CI pipeline, Pytest will use the cassettes and not call paid API providers, so we need your help to record the replays that you break.
 ### Community Challenges
 Challenges are goals we need Auto-GPT to achieve. 
 To pick the challenge you like, go to the tests/integration/challenges folder and select the areas you would like to work on. 
 - a challenge is new if level_currently_beaten is None
 - a challenge is in progress if level_currently_beaten is greater or equal to 1
 - a challenge is beaten if level_currently_beaten = max_level
 Here is an example of how to run the memory challenge A and attempt to beat level 3.
 pytest -s tests/integration/challenges/memory/test_memory_challenge_a.py --level=3
 To beat a challenge, you're not allowed to change anything in the tests folder, you have to add code in the autogpt folder
 Challenges use cassettes. Cassettes allow us to replay your runs in our CI pipeline.
 Don't hesitate to delete the cassettes associated to the challenge you're working on if you need to. Otherwise it will keep replaying the last run.
 Once you've beaten a new level of a challenge, please create a pull request and we will analyze how you changed Auto-GPT to beat the challenge.
--- a/tests/integration/agent_factory.py
+++ b/tests/integration/agent_factory.py
@@ -3,7 +3,7 @@ import pytest
 from autogpt.agent import Agent
 from autogpt.commands.command import CommandRegistry
 from autogpt.config import AIConfig, Config
-from autogpt.memory import NoMemory, get_memory
+from autogpt.memory import LocalCache, NoMemory, get_memory
 from autogpt.prompts.prompt import DEFAULT_TRIGGERING_PROMPT
 from autogpt.workspace import Workspace
@@ -19,6 +19,16 @@ def agent_test_config(config: Config):
    config.set_temperature(was_temperature)
@pytest.fixture
 def memory_local_cache(agent_test_config: Config):
    was_memory_backend = agent_test_config.memory_backend
    agent_test_config.set_memory_backend("local_cache")
    yield get_memory(agent_test_config, init=True)
    agent_test_config.set_memory_backend(was_memory_backend)
@pytest.fixture
 def memory_none(agent_test_config: Config):
    was_memory_backend = agent_test_config.memory_backend
@@ -101,3 +111,38 @@ def writer_agent(agent_test_config, memory_none: NoMemory, workspace: Workspace)
    )
    return agent
@pytest.fixture
 def memory_management_agent(
    agent_test_config, memory_local_cache, workspace: Workspace
 ):
    command_registry = CommandRegistry()
    command_registry.import_commands("autogpt.commands.file_operations")
    command_registry.import_commands("autogpt.app")
    ai_config = AIConfig(
        ai_name="Follow-Instructions-GPT",
        ai_role="an AI designed to read the instructions_1.txt file using the read_file method and follow the instructions in the file.",
        ai_goals=[
            "Use the command read_file to read the instructions_1.txt file",
            "Follow the instructions in the instructions_1.txt file",
        ],
    )
    ai_config.command_registry = command_registry
    system_prompt = ai_config.construct_full_prompt()
    agent = Agent(
        ai_name="",
        memory=memory_local_cache,
        full_message_history=[],
        command_registry=command_registry,
        config=ai_config,
        next_action_count=0,
        system_prompt=system_prompt,
        triggering_prompt=DEFAULT_TRIGGERING_PROMPT,
        workspace_directory=workspace.root,
    )
    return agent
--- a/tests/integration/challenges/init.py
+++ b/tests/integration/challenges/init.py
--- a/tests/integration/challenges/conftest.py
+++ b/tests/integration/challenges/conftest.py
@@ -0,0 +1,17 @@
 import pytest
 def pytest_addoption(parser):
    parser.addoption(
        "--level", action="store", default=None, type=int, help="Specify test level"
    )
 def pytest_configure(config):
    config.option.level = config.getoption("--level")
@pytest.fixture
 def user_selected_level(request) -> int:
    ## used for challenges in the goal oriented tests
    return request.config.option.level
--- a/tests/integration/challenges/memory/init.py
+++ b/tests/integration/challenges/memory/init.py
--- a/tests/integration/challenges/memory/cassettes/test_memory_challenge_a/test_memory_challenge_a.yaml
+++ b/tests/integration/challenges/memory/cassettes/test_memory_challenge_a/test_memory_challenge_a.yaml
--- a/tests/integration/challenges/memory/test_memory_challenge_a.py
+++ b/tests/integration/challenges/memory/test_memory_challenge_a.py
@@ -0,0 +1,79 @@
 import pytest
 from autogpt.agent import Agent
 from autogpt.commands.file_operations import read_file, write_to_file
 from tests.integration.agent_utils import run_interaction_loop
 from tests.integration.challenges.utils import get_level_to_run
 from tests.utils import requires_api_key
@pytest.mark.vcr
@requires_api_key("OPENAI_API_KEY")
 def test_memory_challenge_a(
    memory_management_agent: Agent, user_selected_level: int
 ) -> None:
    """
    The agent reads a file containing a task_id. Then, it reads a series of other files.
    After reading 'n' files, the agent must write the task_id into a new file.
    Args:
        memory_management_agent (Agent)
        user_selected_level (int)
    """
    level_currently_beaten = 3
    max_level = 3
    num_files = get_level_to_run(user_selected_level, level_currently_beaten, max_level)
    task_id = "2314"
    create_instructions_files(memory_management_agent, num_files, task_id)
    try:
        run_interaction_loop(memory_management_agent, 40)
    # catch system exit exceptions
    except SystemExit:
        file_path = str(memory_management_agent.workspace.get_path("output.txt"))
        content = read_file(file_path)
        assert task_id in content, f"Expected the file to contain {task_id}"
 def create_instructions_files(
    memory_management_agent: Agent,
    num_files: int,
    task_id: str,
    base_filename: str = "instructions_",
 ) -> None:
    """
    Creates a series of instructions files for the memory challenge.
    Args:
        memory_management_agent (Agent)
        num_files (int)
        task_id (str)
        base_filename (str, optional)
    """
    for i in range(1, num_files + 1):
        content = generate_content(i, task_id, base_filename, num_files)
        file_name = f"{base_filename}{i}.txt"
        file_path = str(memory_management_agent.workspace.get_path(file_name))
        write_to_file(file_path, content)
 def generate_content(
    index: int, task_id: str, base_filename: str, num_files: int
 ) -> str:
    """
    Args:
        index: int
        task_id: str
        base_filename: str
        num_files: int
    Returns: str
    """
    if index == 1:
        return (
            f"This task_id is {task_id}\nRead the file {base_filename}{index + 1}.txt"
        )
    if index != num_files:
        return f"Read the file {base_filename}{index + 1}.txt"
    return "Write the task_id into the file output.txt\nShutdown"
--- a/tests/integration/challenges/utils.py
+++ b/tests/integration/challenges/utils.py
@@ -0,0 +1,34 @@
 from typing import Optional
 import pytest
 def get_level_to_run(
    user_selected_level: Optional[int],
    level_currently_beaten: Optional[int],
    max_level: int,
 ) -> int:
    """
    Determines the appropriate level to run for a challenge, based on user-selected level, level currently beaten, and maximum level.
    Args:
        user_selected_level (int | None): The level selected by the user. If not provided, the level currently beaten is used.
        level_currently_beaten (int | None): The highest level beaten so far. If not provided, the test will be skipped.
        max_level (int): The maximum level allowed for the challenge.
    Returns:
        int: The level to run for the challenge.
    Raises:
        ValueError: If the user-selected level is greater than the maximum level allowed.
    """
    if user_selected_level is None:
        if level_currently_beaten is None:
            pytest.skip(
                "No one has beaten any levels so we cannot run the test in our pipeline"
            )
        # by default we run the level currently beaten.
        return level_currently_beaten
    if user_selected_level > max_level:
        raise ValueError(f"This challenge was not designed to go beyond {max_level}")
    return user_selected_level
--- a/tests/integration/goal_oriented/init.py
+++ b/tests/integration/goal_oriented/init.py