Make the information retrieval challenge a harder while still passing (#4468)

This commit is contained in:
merwanehamadi
2023-05-30 15:56:58 -07:00
committed by GitHub
parent 86b6231f70
commit 87776b2886
6 changed files with 82 additions and 54 deletions

View File

@@ -22,6 +22,21 @@ from autogpt.utils import (
from autogpt.workspace import Workspace
from scripts.install_plugin_deps import install_plugin_dependencies
COMMAND_CATEGORIES = [
"autogpt.commands.analyze_code",
"autogpt.commands.audio_text",
"autogpt.commands.execute_code",
"autogpt.commands.file_operations",
"autogpt.commands.git_operations",
"autogpt.commands.google_search",
"autogpt.commands.image_gen",
"autogpt.commands.improve_code",
"autogpt.commands.web_selenium",
"autogpt.commands.write_tests",
"autogpt.app",
"autogpt.commands.task_statuses",
]
def run_auto_gpt(
continuous: bool,
@@ -128,30 +143,18 @@ def run_auto_gpt(
# Create a CommandRegistry instance and scan default folder
command_registry = CommandRegistry()
command_categories = [
"autogpt.commands.analyze_code",
"autogpt.commands.audio_text",
"autogpt.commands.execute_code",
"autogpt.commands.file_operations",
"autogpt.commands.git_operations",
"autogpt.commands.google_search",
"autogpt.commands.image_gen",
"autogpt.commands.improve_code",
"autogpt.commands.web_selenium",
"autogpt.commands.write_tests",
"autogpt.app",
"autogpt.commands.task_statuses",
]
logger.debug(
f"The following command categories are disabled: {cfg.disabled_command_categories}"
)
command_categories = [
x for x in command_categories if x not in cfg.disabled_command_categories
enabled_command_categories = [
x for x in COMMAND_CATEGORIES if x not in cfg.disabled_command_categories
]
logger.debug(f"The following command categories are enabled: {command_categories}")
logger.debug(
f"The following command categories are enabled: {enabled_command_categories}"
)
for command_category in command_categories:
for command_category in enabled_command_categories:
command_registry.import_commands(command_category)
ai_name = ""

View File

@@ -1,16 +1,19 @@
# Information Retrieval Challenge A
**Status**: Current level to beat: level 1
**Status**: Current level to beat: level 2
**Command to try**:
```
pytest -s tests/integration/challenges/information_retrieval/test_information_retrieval_challenge_a.py
pytest -s tests/integration/challenges/information_retrieval/test_information_retrieval_challenge_a.py --level=2
```
## Description
The agent's goal is to find the revenue of Tesla in 2022.
The agent's goal is to find the revenue of Tesla:
- level 1 asks the revenue of Tesla in 2022 and explicitly asks to search for 'tesla revenue 2022'
- level 2 is identical but doesn't ask to search for 'tesla revenue 2022'
- level 3 asks for tesla's revenue by year since its creation.
It should write the result in a file called output.txt.

View File

@@ -3,6 +3,7 @@ import pytest
from autogpt.agent import Agent
from autogpt.commands.command import CommandRegistry
from autogpt.config import AIConfig, Config
from autogpt.main import COMMAND_CATEGORIES
from autogpt.memory.vector import NoMemory, get_memory
from autogpt.prompts.prompt import DEFAULT_TRIGGERING_PROMPT
from autogpt.workspace import Workspace
@@ -140,36 +141,46 @@ def memory_management_agent(agent_test_config, memory_json_file, workspace: Work
@pytest.fixture
def get_company_revenue_agent(
def information_retrieval_agents(
agent_test_config, memory_json_file, workspace: Workspace
):
agents = []
command_registry = CommandRegistry()
command_registry.import_commands("autogpt.commands.file_operations")
command_registry.import_commands("autogpt.commands.google_search")
command_registry.import_commands("autogpt.app")
command_registry.import_commands("autogpt.commands.task_statuses")
enabled_command_categories = [
x
for x in COMMAND_CATEGORIES
if x not in agent_test_config.disabled_command_categories
]
ai_config = AIConfig(
ai_name="Information Retrieval Agent",
ai_role="an autonomous agent that specializes in retrieving information.",
ai_goals=[
"Search for 'tesla revenue 2022' and write the revenue of Tesla in 2022 to a file called output.txt. You should write the number without commas and you should not use signs like B for billion and M for million.",
],
)
ai_config.command_registry = command_registry
system_prompt = ai_config.construct_full_prompt()
Config().set_continuous_mode(False)
agent = Agent(
ai_name="Get-CompanyRevenue",
memory=memory_json_file,
command_registry=command_registry,
config=ai_config,
next_action_count=0,
system_prompt=system_prompt,
triggering_prompt=DEFAULT_TRIGGERING_PROMPT,
workspace_directory=workspace.root,
)
return agent
for command_category in enabled_command_categories:
command_registry.import_commands(command_category)
ai_goals = [
"Write to a file called output.txt tesla's revenue in 2022 after searching for 'tesla revenue 2022'.",
"Write to a file called output.txt tesla's revenue in 2022.",
"Write to a file called output.txt tesla's revenue every year since its creation.",
]
for ai_goal in ai_goals:
ai_config = AIConfig(
ai_name="Information Retrieval Agent",
ai_role="an autonomous agent that specializes in retrieving information.",
ai_goals=[ai_goal],
)
ai_config.command_registry = command_registry
system_prompt = ai_config.construct_full_prompt()
Config().set_continuous_mode(False)
agents.append(
Agent(
ai_name="Information Retrieval Agent",
memory=memory_json_file,
command_registry=command_registry,
config=ai_config,
next_action_count=0,
system_prompt=system_prompt,
triggering_prompt=DEFAULT_TRIGGERING_PROMPT,
workspace_directory=workspace.root,
)
)
return agents
@pytest.fixture

View File

@@ -1,4 +1,3 @@
import contextlib
import os
from functools import wraps
from typing import Any, Callable, Optional
@@ -23,6 +22,7 @@ def challenge(func: Callable[..., Any]) -> Callable[..., None]:
@wraps(func)
def wrapper(*args: Any, **kwargs: Any) -> None:
run_remaining = MAX_LEVEL_TO_IMPROVE_ON if Challenge.BEAT_CHALLENGES else 1
original_error = None
while run_remaining > 0:
current_score, new_score, new_score_location = get_scores()
@@ -32,9 +32,12 @@ def challenge(func: Callable[..., Any]) -> Callable[..., None]:
)
if challenge.level_to_run is not None:
kwargs["level_to_run"] = challenge.level_to_run
with contextlib.suppress(AssertionError):
try:
func(*args, **kwargs)
challenge.succeeded = True
except AssertionError as err:
original_error = err
challenge.succeeded = False
else:
challenge.skipped = True
if os.environ.get("CI") == "true":
@@ -51,6 +54,8 @@ def challenge(func: Callable[..., Any]) -> Callable[..., None]:
if Challenge.BEAT_CHALLENGES or challenge.is_new_challenge:
# xfail
pytest.xfail("Challenge failed")
if original_error:
raise original_error
raise AssertionError("Challenge failed")
run_remaining -= 1

View File

@@ -11,7 +11,7 @@
},
"information_retrieval": {
"information_retrieval_challenge_a": {
"max_level": 1,
"max_level": 3,
"max_level_beaten": 1
},
"information_retrieval_challenge_b": {

View File

@@ -9,6 +9,7 @@ from tests.integration.challenges.utils import run_interaction_loop
from tests.utils import requires_api_key
CYCLE_COUNT = 3
EXPECTED_REVENUES = [["81"], ["81"], ["81", "53", "24", "21", "11", "7", "4", "3", "2"]]
from autogpt.agent import Agent
@@ -16,7 +17,7 @@ from autogpt.agent import Agent
@requires_api_key("OPENAI_API_KEY")
@challenge
def test_information_retrieval_challenge_a(
get_company_revenue_agent: Agent,
information_retrieval_agents: Agent,
monkeypatch: pytest.MonkeyPatch,
patched_api_requestor: None,
config: Config,
@@ -28,8 +29,13 @@ def test_information_retrieval_challenge_a(
:param get_company_revenue_agent: The agent to test.
:param monkeypatch: pytest's monkeypatch utility for modifying builtins.
"""
run_interaction_loop(monkeypatch, get_company_revenue_agent, CYCLE_COUNT)
information_retrieval_agent = information_retrieval_agents[level_to_run - 1]
run_interaction_loop(monkeypatch, information_retrieval_agent, CYCLE_COUNT)
file_path = str(get_company_revenue_agent.workspace.get_path("output.txt"))
file_path = str(information_retrieval_agent.workspace.get_path("output.txt"))
content = read_file(file_path, config)
assert "81" in content, "Expected the file to contain 81"
expected_revenues = EXPECTED_REVENUES[level_to_run - 1]
for revenue in expected_revenues:
assert (
f"{revenue}." in content or f"{revenue}," in content
), f"Expected the file to contain {revenue}"