From 87776b28865b8dd292dc7a293c79437a232c10e6 Mon Sep 17 00:00:00 2001 From: merwanehamadi Date: Tue, 30 May 2023 15:56:58 -0700 Subject: [PATCH] Make the information retrieval challenge a harder while still passing (#4468) --- autogpt/main.py | 39 ++++++------ .../information_retrieval/challenge_a.md | 9 ++- tests/integration/agent_factory.py | 63 +++++++++++-------- .../challenge_decorator.py | 9 ++- .../integration/challenges/current_score.json | 2 +- .../test_information_retrieval_challenge_a.py | 14 +++-- 6 files changed, 82 insertions(+), 54 deletions(-) diff --git a/autogpt/main.py b/autogpt/main.py index 39bbf8b5..efc70aae 100644 --- a/autogpt/main.py +++ b/autogpt/main.py @@ -22,6 +22,21 @@ from autogpt.utils import ( from autogpt.workspace import Workspace from scripts.install_plugin_deps import install_plugin_dependencies +COMMAND_CATEGORIES = [ + "autogpt.commands.analyze_code", + "autogpt.commands.audio_text", + "autogpt.commands.execute_code", + "autogpt.commands.file_operations", + "autogpt.commands.git_operations", + "autogpt.commands.google_search", + "autogpt.commands.image_gen", + "autogpt.commands.improve_code", + "autogpt.commands.web_selenium", + "autogpt.commands.write_tests", + "autogpt.app", + "autogpt.commands.task_statuses", +] + def run_auto_gpt( continuous: bool, @@ -128,30 +143,18 @@ def run_auto_gpt( # Create a CommandRegistry instance and scan default folder command_registry = CommandRegistry() - command_categories = [ - "autogpt.commands.analyze_code", - "autogpt.commands.audio_text", - "autogpt.commands.execute_code", - "autogpt.commands.file_operations", - "autogpt.commands.git_operations", - "autogpt.commands.google_search", - "autogpt.commands.image_gen", - "autogpt.commands.improve_code", - "autogpt.commands.web_selenium", - "autogpt.commands.write_tests", - "autogpt.app", - "autogpt.commands.task_statuses", - ] logger.debug( f"The following command categories are disabled: {cfg.disabled_command_categories}" ) - command_categories = [ - x for x in command_categories if x not in cfg.disabled_command_categories + enabled_command_categories = [ + x for x in COMMAND_CATEGORIES if x not in cfg.disabled_command_categories ] - logger.debug(f"The following command categories are enabled: {command_categories}") + logger.debug( + f"The following command categories are enabled: {enabled_command_categories}" + ) - for command_category in command_categories: + for command_category in enabled_command_categories: command_registry.import_commands(command_category) ai_name = "" diff --git a/docs/challenges/information_retrieval/challenge_a.md b/docs/challenges/information_retrieval/challenge_a.md index 51762fc4..de21066e 100644 --- a/docs/challenges/information_retrieval/challenge_a.md +++ b/docs/challenges/information_retrieval/challenge_a.md @@ -1,16 +1,19 @@ # Information Retrieval Challenge A -**Status**: Current level to beat: level 1 +**Status**: Current level to beat: level 2 **Command to try**: ``` -pytest -s tests/integration/challenges/information_retrieval/test_information_retrieval_challenge_a.py +pytest -s tests/integration/challenges/information_retrieval/test_information_retrieval_challenge_a.py --level=2 ``` ## Description -The agent's goal is to find the revenue of Tesla in 2022. +The agent's goal is to find the revenue of Tesla: +- level 1 asks the revenue of Tesla in 2022 and explicitly asks to search for 'tesla revenue 2022' +- level 2 is identical but doesn't ask to search for 'tesla revenue 2022' +- level 3 asks for tesla's revenue by year since its creation. It should write the result in a file called output.txt. diff --git a/tests/integration/agent_factory.py b/tests/integration/agent_factory.py index 15044e9f..716545c2 100644 --- a/tests/integration/agent_factory.py +++ b/tests/integration/agent_factory.py @@ -3,6 +3,7 @@ import pytest from autogpt.agent import Agent from autogpt.commands.command import CommandRegistry from autogpt.config import AIConfig, Config +from autogpt.main import COMMAND_CATEGORIES from autogpt.memory.vector import NoMemory, get_memory from autogpt.prompts.prompt import DEFAULT_TRIGGERING_PROMPT from autogpt.workspace import Workspace @@ -140,36 +141,46 @@ def memory_management_agent(agent_test_config, memory_json_file, workspace: Work @pytest.fixture -def get_company_revenue_agent( +def information_retrieval_agents( agent_test_config, memory_json_file, workspace: Workspace ): + agents = [] command_registry = CommandRegistry() - command_registry.import_commands("autogpt.commands.file_operations") - command_registry.import_commands("autogpt.commands.google_search") - command_registry.import_commands("autogpt.app") - command_registry.import_commands("autogpt.commands.task_statuses") + enabled_command_categories = [ + x + for x in COMMAND_CATEGORIES + if x not in agent_test_config.disabled_command_categories + ] - ai_config = AIConfig( - ai_name="Information Retrieval Agent", - ai_role="an autonomous agent that specializes in retrieving information.", - ai_goals=[ - "Search for 'tesla revenue 2022' and write the revenue of Tesla in 2022 to a file called output.txt. You should write the number without commas and you should not use signs like B for billion and M for million.", - ], - ) - ai_config.command_registry = command_registry - system_prompt = ai_config.construct_full_prompt() - Config().set_continuous_mode(False) - agent = Agent( - ai_name="Get-CompanyRevenue", - memory=memory_json_file, - command_registry=command_registry, - config=ai_config, - next_action_count=0, - system_prompt=system_prompt, - triggering_prompt=DEFAULT_TRIGGERING_PROMPT, - workspace_directory=workspace.root, - ) - return agent + for command_category in enabled_command_categories: + command_registry.import_commands(command_category) + ai_goals = [ + "Write to a file called output.txt tesla's revenue in 2022 after searching for 'tesla revenue 2022'.", + "Write to a file called output.txt tesla's revenue in 2022.", + "Write to a file called output.txt tesla's revenue every year since its creation.", + ] + for ai_goal in ai_goals: + ai_config = AIConfig( + ai_name="Information Retrieval Agent", + ai_role="an autonomous agent that specializes in retrieving information.", + ai_goals=[ai_goal], + ) + ai_config.command_registry = command_registry + system_prompt = ai_config.construct_full_prompt() + Config().set_continuous_mode(False) + agents.append( + Agent( + ai_name="Information Retrieval Agent", + memory=memory_json_file, + command_registry=command_registry, + config=ai_config, + next_action_count=0, + system_prompt=system_prompt, + triggering_prompt=DEFAULT_TRIGGERING_PROMPT, + workspace_directory=workspace.root, + ) + ) + return agents @pytest.fixture diff --git a/tests/integration/challenges/challenge_decorator/challenge_decorator.py b/tests/integration/challenges/challenge_decorator/challenge_decorator.py index 1ec43aec..fe12317e 100644 --- a/tests/integration/challenges/challenge_decorator/challenge_decorator.py +++ b/tests/integration/challenges/challenge_decorator/challenge_decorator.py @@ -1,4 +1,3 @@ -import contextlib import os from functools import wraps from typing import Any, Callable, Optional @@ -23,6 +22,7 @@ def challenge(func: Callable[..., Any]) -> Callable[..., None]: @wraps(func) def wrapper(*args: Any, **kwargs: Any) -> None: run_remaining = MAX_LEVEL_TO_IMPROVE_ON if Challenge.BEAT_CHALLENGES else 1 + original_error = None while run_remaining > 0: current_score, new_score, new_score_location = get_scores() @@ -32,9 +32,12 @@ def challenge(func: Callable[..., Any]) -> Callable[..., None]: ) if challenge.level_to_run is not None: kwargs["level_to_run"] = challenge.level_to_run - with contextlib.suppress(AssertionError): + try: func(*args, **kwargs) challenge.succeeded = True + except AssertionError as err: + original_error = err + challenge.succeeded = False else: challenge.skipped = True if os.environ.get("CI") == "true": @@ -51,6 +54,8 @@ def challenge(func: Callable[..., Any]) -> Callable[..., None]: if Challenge.BEAT_CHALLENGES or challenge.is_new_challenge: # xfail pytest.xfail("Challenge failed") + if original_error: + raise original_error raise AssertionError("Challenge failed") run_remaining -= 1 diff --git a/tests/integration/challenges/current_score.json b/tests/integration/challenges/current_score.json index 822c23c2..d2b0b30a 100644 --- a/tests/integration/challenges/current_score.json +++ b/tests/integration/challenges/current_score.json @@ -11,7 +11,7 @@ }, "information_retrieval": { "information_retrieval_challenge_a": { - "max_level": 1, + "max_level": 3, "max_level_beaten": 1 }, "information_retrieval_challenge_b": { diff --git a/tests/integration/challenges/information_retrieval/test_information_retrieval_challenge_a.py b/tests/integration/challenges/information_retrieval/test_information_retrieval_challenge_a.py index 7a9de8ab..6b970e8b 100644 --- a/tests/integration/challenges/information_retrieval/test_information_retrieval_challenge_a.py +++ b/tests/integration/challenges/information_retrieval/test_information_retrieval_challenge_a.py @@ -9,6 +9,7 @@ from tests.integration.challenges.utils import run_interaction_loop from tests.utils import requires_api_key CYCLE_COUNT = 3 +EXPECTED_REVENUES = [["81"], ["81"], ["81", "53", "24", "21", "11", "7", "4", "3", "2"]] from autogpt.agent import Agent @@ -16,7 +17,7 @@ from autogpt.agent import Agent @requires_api_key("OPENAI_API_KEY") @challenge def test_information_retrieval_challenge_a( - get_company_revenue_agent: Agent, + information_retrieval_agents: Agent, monkeypatch: pytest.MonkeyPatch, patched_api_requestor: None, config: Config, @@ -28,8 +29,13 @@ def test_information_retrieval_challenge_a( :param get_company_revenue_agent: The agent to test. :param monkeypatch: pytest's monkeypatch utility for modifying builtins. """ - run_interaction_loop(monkeypatch, get_company_revenue_agent, CYCLE_COUNT) + information_retrieval_agent = information_retrieval_agents[level_to_run - 1] + run_interaction_loop(monkeypatch, information_retrieval_agent, CYCLE_COUNT) - file_path = str(get_company_revenue_agent.workspace.get_path("output.txt")) + file_path = str(information_retrieval_agent.workspace.get_path("output.txt")) content = read_file(file_path, config) - assert "81" in content, "Expected the file to contain 81" + expected_revenues = EXPECTED_REVENUES[level_to_run - 1] + for revenue in expected_revenues: + assert ( + f"{revenue}." in content or f"{revenue}," in content + ), f"Expected the file to contain {revenue}"