From 87776b28865b8dd292dc7a293c79437a232c10e6 Mon Sep 17 00:00:00 2001
From: merwanehamadi <merwanehamadi@gmail.com>
Date: Tue, 30 May 2023 15:56:58 -0700
Subject: [PATCH] Make the information retrieval challenge a harder while still
 passing (#4468)

---
 autogpt/main.py                               | 39 ++++++------
 .../information_retrieval/challenge_a.md      |  9 ++-
 tests/integration/agent_factory.py            | 63 +++++++++++--------
 .../challenge_decorator.py                    |  9 ++-
 .../integration/challenges/current_score.json |  2 +-
 .../test_information_retrieval_challenge_a.py | 14 +++--
 6 files changed, 82 insertions(+), 54 deletions(-)

diff --git a/autogpt/main.py b/autogpt/main.py
index 39bbf8b5..efc70aae 100644
--- a/autogpt/main.py
+++ b/autogpt/main.py
@@ -22,6 +22,21 @@ from autogpt.utils import (
 from autogpt.workspace import Workspace
 from scripts.install_plugin_deps import install_plugin_dependencies
 
+COMMAND_CATEGORIES = [
+    "autogpt.commands.analyze_code",
+    "autogpt.commands.audio_text",
+    "autogpt.commands.execute_code",
+    "autogpt.commands.file_operations",
+    "autogpt.commands.git_operations",
+    "autogpt.commands.google_search",
+    "autogpt.commands.image_gen",
+    "autogpt.commands.improve_code",
+    "autogpt.commands.web_selenium",
+    "autogpt.commands.write_tests",
+    "autogpt.app",
+    "autogpt.commands.task_statuses",
+]
+
 
 def run_auto_gpt(
     continuous: bool,
@@ -128,30 +143,18 @@ def run_auto_gpt(
     # Create a CommandRegistry instance and scan default folder
     command_registry = CommandRegistry()
 
-    command_categories = [
-        "autogpt.commands.analyze_code",
-        "autogpt.commands.audio_text",
-        "autogpt.commands.execute_code",
-        "autogpt.commands.file_operations",
-        "autogpt.commands.git_operations",
-        "autogpt.commands.google_search",
-        "autogpt.commands.image_gen",
-        "autogpt.commands.improve_code",
-        "autogpt.commands.web_selenium",
-        "autogpt.commands.write_tests",
-        "autogpt.app",
-        "autogpt.commands.task_statuses",
-    ]
     logger.debug(
         f"The following command categories are disabled: {cfg.disabled_command_categories}"
     )
-    command_categories = [
-        x for x in command_categories if x not in cfg.disabled_command_categories
+    enabled_command_categories = [
+        x for x in COMMAND_CATEGORIES if x not in cfg.disabled_command_categories
     ]
 
-    logger.debug(f"The following command categories are enabled: {command_categories}")
+    logger.debug(
+        f"The following command categories are enabled: {enabled_command_categories}"
+    )
 
-    for command_category in command_categories:
+    for command_category in enabled_command_categories:
         command_registry.import_commands(command_category)
 
     ai_name = ""
diff --git a/docs/challenges/information_retrieval/challenge_a.md b/docs/challenges/information_retrieval/challenge_a.md
index 51762fc4..de21066e 100644
--- a/docs/challenges/information_retrieval/challenge_a.md
+++ b/docs/challenges/information_retrieval/challenge_a.md
@@ -1,16 +1,19 @@
 # Information Retrieval Challenge A
 
-**Status**: Current level to beat: level 1
+**Status**: Current level to beat: level 2
 
 **Command to try**:
 
 ```
-pytest -s tests/integration/challenges/information_retrieval/test_information_retrieval_challenge_a.py
+pytest -s tests/integration/challenges/information_retrieval/test_information_retrieval_challenge_a.py --level=2
 ```
 
 ## Description
 
-The agent's goal is to find the revenue of Tesla in 2022.
+The agent's goal is to find the revenue of Tesla:
+- level 1 asks the revenue of Tesla in 2022 and explicitly asks to search for 'tesla revenue 2022'
+- level 2 is identical but doesn't ask to search for 'tesla revenue 2022'
+- level 3 asks for tesla's revenue by year since its creation.
 
 It should write the result in a file called output.txt.
 
diff --git a/tests/integration/agent_factory.py b/tests/integration/agent_factory.py
index 15044e9f..716545c2 100644
--- a/tests/integration/agent_factory.py
+++ b/tests/integration/agent_factory.py
@@ -3,6 +3,7 @@ import pytest
 from autogpt.agent import Agent
 from autogpt.commands.command import CommandRegistry
 from autogpt.config import AIConfig, Config
+from autogpt.main import COMMAND_CATEGORIES
 from autogpt.memory.vector import NoMemory, get_memory
 from autogpt.prompts.prompt import DEFAULT_TRIGGERING_PROMPT
 from autogpt.workspace import Workspace
@@ -140,36 +141,46 @@ def memory_management_agent(agent_test_config, memory_json_file, workspace: Work
 
 
 @pytest.fixture
-def get_company_revenue_agent(
+def information_retrieval_agents(
     agent_test_config, memory_json_file, workspace: Workspace
 ):
+    agents = []
     command_registry = CommandRegistry()
-    command_registry.import_commands("autogpt.commands.file_operations")
-    command_registry.import_commands("autogpt.commands.google_search")
-    command_registry.import_commands("autogpt.app")
-    command_registry.import_commands("autogpt.commands.task_statuses")
+    enabled_command_categories = [
+        x
+        for x in COMMAND_CATEGORIES
+        if x not in agent_test_config.disabled_command_categories
+    ]
 
-    ai_config = AIConfig(
-        ai_name="Information Retrieval Agent",
-        ai_role="an autonomous agent that specializes in retrieving information.",
-        ai_goals=[
-            "Search for 'tesla revenue 2022' and write the revenue of Tesla in 2022 to a file called output.txt. You should write the number without commas and you should not use signs like B for billion and M for million.",
-        ],
-    )
-    ai_config.command_registry = command_registry
-    system_prompt = ai_config.construct_full_prompt()
-    Config().set_continuous_mode(False)
-    agent = Agent(
-        ai_name="Get-CompanyRevenue",
-        memory=memory_json_file,
-        command_registry=command_registry,
-        config=ai_config,
-        next_action_count=0,
-        system_prompt=system_prompt,
-        triggering_prompt=DEFAULT_TRIGGERING_PROMPT,
-        workspace_directory=workspace.root,
-    )
-    return agent
+    for command_category in enabled_command_categories:
+        command_registry.import_commands(command_category)
+    ai_goals = [
+        "Write to a file called output.txt tesla's revenue in 2022 after searching for 'tesla revenue 2022'.",
+        "Write to a file called output.txt tesla's revenue in 2022.",
+        "Write to a file called output.txt tesla's revenue every year since its creation.",
+    ]
+    for ai_goal in ai_goals:
+        ai_config = AIConfig(
+            ai_name="Information Retrieval Agent",
+            ai_role="an autonomous agent that specializes in retrieving information.",
+            ai_goals=[ai_goal],
+        )
+        ai_config.command_registry = command_registry
+        system_prompt = ai_config.construct_full_prompt()
+        Config().set_continuous_mode(False)
+        agents.append(
+            Agent(
+                ai_name="Information Retrieval Agent",
+                memory=memory_json_file,
+                command_registry=command_registry,
+                config=ai_config,
+                next_action_count=0,
+                system_prompt=system_prompt,
+                triggering_prompt=DEFAULT_TRIGGERING_PROMPT,
+                workspace_directory=workspace.root,
+            )
+        )
+    return agents
 
 
 @pytest.fixture
diff --git a/tests/integration/challenges/challenge_decorator/challenge_decorator.py b/tests/integration/challenges/challenge_decorator/challenge_decorator.py
index 1ec43aec..fe12317e 100644
--- a/tests/integration/challenges/challenge_decorator/challenge_decorator.py
+++ b/tests/integration/challenges/challenge_decorator/challenge_decorator.py
@@ -1,4 +1,3 @@
-import contextlib
 import os
 from functools import wraps
 from typing import Any, Callable, Optional
@@ -23,6 +22,7 @@ def challenge(func: Callable[..., Any]) -> Callable[..., None]:
     @wraps(func)
     def wrapper(*args: Any, **kwargs: Any) -> None:
         run_remaining = MAX_LEVEL_TO_IMPROVE_ON if Challenge.BEAT_CHALLENGES else 1
+        original_error = None
 
         while run_remaining > 0:
             current_score, new_score, new_score_location = get_scores()
@@ -32,9 +32,12 @@ def challenge(func: Callable[..., Any]) -> Callable[..., None]:
             )
             if challenge.level_to_run is not None:
                 kwargs["level_to_run"] = challenge.level_to_run
-                with contextlib.suppress(AssertionError):
+                try:
                     func(*args, **kwargs)
                     challenge.succeeded = True
+                except AssertionError as err:
+                    original_error = err
+                    challenge.succeeded = False
             else:
                 challenge.skipped = True
             if os.environ.get("CI") == "true":
@@ -51,6 +54,8 @@ def challenge(func: Callable[..., Any]) -> Callable[..., None]:
                 if Challenge.BEAT_CHALLENGES or challenge.is_new_challenge:
                     # xfail
                     pytest.xfail("Challenge failed")
+                if original_error:
+                    raise original_error
                 raise AssertionError("Challenge failed")
             run_remaining -= 1
 
diff --git a/tests/integration/challenges/current_score.json b/tests/integration/challenges/current_score.json
index 822c23c2..d2b0b30a 100644
--- a/tests/integration/challenges/current_score.json
+++ b/tests/integration/challenges/current_score.json
@@ -11,7 +11,7 @@
     },
     "information_retrieval": {
         "information_retrieval_challenge_a": {
-            "max_level": 1,
+            "max_level": 3,
             "max_level_beaten": 1
         },
         "information_retrieval_challenge_b": {
diff --git a/tests/integration/challenges/information_retrieval/test_information_retrieval_challenge_a.py b/tests/integration/challenges/information_retrieval/test_information_retrieval_challenge_a.py
index 7a9de8ab..6b970e8b 100644
--- a/tests/integration/challenges/information_retrieval/test_information_retrieval_challenge_a.py
+++ b/tests/integration/challenges/information_retrieval/test_information_retrieval_challenge_a.py
@@ -9,6 +9,7 @@ from tests.integration.challenges.utils import run_interaction_loop
 from tests.utils import requires_api_key
 
 CYCLE_COUNT = 3
+EXPECTED_REVENUES = [["81"], ["81"], ["81", "53", "24", "21", "11", "7", "4", "3", "2"]]
 from autogpt.agent import Agent
 
 
@@ -16,7 +17,7 @@ from autogpt.agent import Agent
 @requires_api_key("OPENAI_API_KEY")
 @challenge
 def test_information_retrieval_challenge_a(
-    get_company_revenue_agent: Agent,
+    information_retrieval_agents: Agent,
     monkeypatch: pytest.MonkeyPatch,
     patched_api_requestor: None,
     config: Config,
@@ -28,8 +29,13 @@ def test_information_retrieval_challenge_a(
     :param get_company_revenue_agent: The agent to test.
     :param monkeypatch: pytest's monkeypatch utility for modifying builtins.
     """
-    run_interaction_loop(monkeypatch, get_company_revenue_agent, CYCLE_COUNT)
+    information_retrieval_agent = information_retrieval_agents[level_to_run - 1]
+    run_interaction_loop(monkeypatch, information_retrieval_agent, CYCLE_COUNT)
 
-    file_path = str(get_company_revenue_agent.workspace.get_path("output.txt"))
+    file_path = str(information_retrieval_agent.workspace.get_path("output.txt"))
     content = read_file(file_path, config)
-    assert "81" in content, "Expected the file to contain 81"
+    expected_revenues = EXPECTED_REVENUES[level_to_run - 1]
+    for revenue in expected_revenues:
+        assert (
+            f"{revenue}." in content or f"{revenue}," in content
+        ), f"Expected the file to contain {revenue}"