mirror of
https://github.com/aljazceru/Auto-GPT.git
synced 2026-02-10 08:44:27 +01:00
Make the information retrieval challenge a harder while still passing (#4468)
This commit is contained in:
@@ -22,6 +22,21 @@ from autogpt.utils import (
|
||||
from autogpt.workspace import Workspace
|
||||
from scripts.install_plugin_deps import install_plugin_dependencies
|
||||
|
||||
COMMAND_CATEGORIES = [
|
||||
"autogpt.commands.analyze_code",
|
||||
"autogpt.commands.audio_text",
|
||||
"autogpt.commands.execute_code",
|
||||
"autogpt.commands.file_operations",
|
||||
"autogpt.commands.git_operations",
|
||||
"autogpt.commands.google_search",
|
||||
"autogpt.commands.image_gen",
|
||||
"autogpt.commands.improve_code",
|
||||
"autogpt.commands.web_selenium",
|
||||
"autogpt.commands.write_tests",
|
||||
"autogpt.app",
|
||||
"autogpt.commands.task_statuses",
|
||||
]
|
||||
|
||||
|
||||
def run_auto_gpt(
|
||||
continuous: bool,
|
||||
@@ -128,30 +143,18 @@ def run_auto_gpt(
|
||||
# Create a CommandRegistry instance and scan default folder
|
||||
command_registry = CommandRegistry()
|
||||
|
||||
command_categories = [
|
||||
"autogpt.commands.analyze_code",
|
||||
"autogpt.commands.audio_text",
|
||||
"autogpt.commands.execute_code",
|
||||
"autogpt.commands.file_operations",
|
||||
"autogpt.commands.git_operations",
|
||||
"autogpt.commands.google_search",
|
||||
"autogpt.commands.image_gen",
|
||||
"autogpt.commands.improve_code",
|
||||
"autogpt.commands.web_selenium",
|
||||
"autogpt.commands.write_tests",
|
||||
"autogpt.app",
|
||||
"autogpt.commands.task_statuses",
|
||||
]
|
||||
logger.debug(
|
||||
f"The following command categories are disabled: {cfg.disabled_command_categories}"
|
||||
)
|
||||
command_categories = [
|
||||
x for x in command_categories if x not in cfg.disabled_command_categories
|
||||
enabled_command_categories = [
|
||||
x for x in COMMAND_CATEGORIES if x not in cfg.disabled_command_categories
|
||||
]
|
||||
|
||||
logger.debug(f"The following command categories are enabled: {command_categories}")
|
||||
logger.debug(
|
||||
f"The following command categories are enabled: {enabled_command_categories}"
|
||||
)
|
||||
|
||||
for command_category in command_categories:
|
||||
for command_category in enabled_command_categories:
|
||||
command_registry.import_commands(command_category)
|
||||
|
||||
ai_name = ""
|
||||
|
||||
@@ -1,16 +1,19 @@
|
||||
# Information Retrieval Challenge A
|
||||
|
||||
**Status**: Current level to beat: level 1
|
||||
**Status**: Current level to beat: level 2
|
||||
|
||||
**Command to try**:
|
||||
|
||||
```
|
||||
pytest -s tests/integration/challenges/information_retrieval/test_information_retrieval_challenge_a.py
|
||||
pytest -s tests/integration/challenges/information_retrieval/test_information_retrieval_challenge_a.py --level=2
|
||||
```
|
||||
|
||||
## Description
|
||||
|
||||
The agent's goal is to find the revenue of Tesla in 2022.
|
||||
The agent's goal is to find the revenue of Tesla:
|
||||
- level 1 asks the revenue of Tesla in 2022 and explicitly asks to search for 'tesla revenue 2022'
|
||||
- level 2 is identical but doesn't ask to search for 'tesla revenue 2022'
|
||||
- level 3 asks for tesla's revenue by year since its creation.
|
||||
|
||||
It should write the result in a file called output.txt.
|
||||
|
||||
|
||||
@@ -3,6 +3,7 @@ import pytest
|
||||
from autogpt.agent import Agent
|
||||
from autogpt.commands.command import CommandRegistry
|
||||
from autogpt.config import AIConfig, Config
|
||||
from autogpt.main import COMMAND_CATEGORIES
|
||||
from autogpt.memory.vector import NoMemory, get_memory
|
||||
from autogpt.prompts.prompt import DEFAULT_TRIGGERING_PROMPT
|
||||
from autogpt.workspace import Workspace
|
||||
@@ -140,36 +141,46 @@ def memory_management_agent(agent_test_config, memory_json_file, workspace: Work
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def get_company_revenue_agent(
|
||||
def information_retrieval_agents(
|
||||
agent_test_config, memory_json_file, workspace: Workspace
|
||||
):
|
||||
agents = []
|
||||
command_registry = CommandRegistry()
|
||||
command_registry.import_commands("autogpt.commands.file_operations")
|
||||
command_registry.import_commands("autogpt.commands.google_search")
|
||||
command_registry.import_commands("autogpt.app")
|
||||
command_registry.import_commands("autogpt.commands.task_statuses")
|
||||
enabled_command_categories = [
|
||||
x
|
||||
for x in COMMAND_CATEGORIES
|
||||
if x not in agent_test_config.disabled_command_categories
|
||||
]
|
||||
|
||||
ai_config = AIConfig(
|
||||
ai_name="Information Retrieval Agent",
|
||||
ai_role="an autonomous agent that specializes in retrieving information.",
|
||||
ai_goals=[
|
||||
"Search for 'tesla revenue 2022' and write the revenue of Tesla in 2022 to a file called output.txt. You should write the number without commas and you should not use signs like B for billion and M for million.",
|
||||
],
|
||||
)
|
||||
ai_config.command_registry = command_registry
|
||||
system_prompt = ai_config.construct_full_prompt()
|
||||
Config().set_continuous_mode(False)
|
||||
agent = Agent(
|
||||
ai_name="Get-CompanyRevenue",
|
||||
memory=memory_json_file,
|
||||
command_registry=command_registry,
|
||||
config=ai_config,
|
||||
next_action_count=0,
|
||||
system_prompt=system_prompt,
|
||||
triggering_prompt=DEFAULT_TRIGGERING_PROMPT,
|
||||
workspace_directory=workspace.root,
|
||||
)
|
||||
return agent
|
||||
for command_category in enabled_command_categories:
|
||||
command_registry.import_commands(command_category)
|
||||
ai_goals = [
|
||||
"Write to a file called output.txt tesla's revenue in 2022 after searching for 'tesla revenue 2022'.",
|
||||
"Write to a file called output.txt tesla's revenue in 2022.",
|
||||
"Write to a file called output.txt tesla's revenue every year since its creation.",
|
||||
]
|
||||
for ai_goal in ai_goals:
|
||||
ai_config = AIConfig(
|
||||
ai_name="Information Retrieval Agent",
|
||||
ai_role="an autonomous agent that specializes in retrieving information.",
|
||||
ai_goals=[ai_goal],
|
||||
)
|
||||
ai_config.command_registry = command_registry
|
||||
system_prompt = ai_config.construct_full_prompt()
|
||||
Config().set_continuous_mode(False)
|
||||
agents.append(
|
||||
Agent(
|
||||
ai_name="Information Retrieval Agent",
|
||||
memory=memory_json_file,
|
||||
command_registry=command_registry,
|
||||
config=ai_config,
|
||||
next_action_count=0,
|
||||
system_prompt=system_prompt,
|
||||
triggering_prompt=DEFAULT_TRIGGERING_PROMPT,
|
||||
workspace_directory=workspace.root,
|
||||
)
|
||||
)
|
||||
return agents
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
import contextlib
|
||||
import os
|
||||
from functools import wraps
|
||||
from typing import Any, Callable, Optional
|
||||
@@ -23,6 +22,7 @@ def challenge(func: Callable[..., Any]) -> Callable[..., None]:
|
||||
@wraps(func)
|
||||
def wrapper(*args: Any, **kwargs: Any) -> None:
|
||||
run_remaining = MAX_LEVEL_TO_IMPROVE_ON if Challenge.BEAT_CHALLENGES else 1
|
||||
original_error = None
|
||||
|
||||
while run_remaining > 0:
|
||||
current_score, new_score, new_score_location = get_scores()
|
||||
@@ -32,9 +32,12 @@ def challenge(func: Callable[..., Any]) -> Callable[..., None]:
|
||||
)
|
||||
if challenge.level_to_run is not None:
|
||||
kwargs["level_to_run"] = challenge.level_to_run
|
||||
with contextlib.suppress(AssertionError):
|
||||
try:
|
||||
func(*args, **kwargs)
|
||||
challenge.succeeded = True
|
||||
except AssertionError as err:
|
||||
original_error = err
|
||||
challenge.succeeded = False
|
||||
else:
|
||||
challenge.skipped = True
|
||||
if os.environ.get("CI") == "true":
|
||||
@@ -51,6 +54,8 @@ def challenge(func: Callable[..., Any]) -> Callable[..., None]:
|
||||
if Challenge.BEAT_CHALLENGES or challenge.is_new_challenge:
|
||||
# xfail
|
||||
pytest.xfail("Challenge failed")
|
||||
if original_error:
|
||||
raise original_error
|
||||
raise AssertionError("Challenge failed")
|
||||
run_remaining -= 1
|
||||
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
},
|
||||
"information_retrieval": {
|
||||
"information_retrieval_challenge_a": {
|
||||
"max_level": 1,
|
||||
"max_level": 3,
|
||||
"max_level_beaten": 1
|
||||
},
|
||||
"information_retrieval_challenge_b": {
|
||||
|
||||
@@ -9,6 +9,7 @@ from tests.integration.challenges.utils import run_interaction_loop
|
||||
from tests.utils import requires_api_key
|
||||
|
||||
CYCLE_COUNT = 3
|
||||
EXPECTED_REVENUES = [["81"], ["81"], ["81", "53", "24", "21", "11", "7", "4", "3", "2"]]
|
||||
from autogpt.agent import Agent
|
||||
|
||||
|
||||
@@ -16,7 +17,7 @@ from autogpt.agent import Agent
|
||||
@requires_api_key("OPENAI_API_KEY")
|
||||
@challenge
|
||||
def test_information_retrieval_challenge_a(
|
||||
get_company_revenue_agent: Agent,
|
||||
information_retrieval_agents: Agent,
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
patched_api_requestor: None,
|
||||
config: Config,
|
||||
@@ -28,8 +29,13 @@ def test_information_retrieval_challenge_a(
|
||||
:param get_company_revenue_agent: The agent to test.
|
||||
:param monkeypatch: pytest's monkeypatch utility for modifying builtins.
|
||||
"""
|
||||
run_interaction_loop(monkeypatch, get_company_revenue_agent, CYCLE_COUNT)
|
||||
information_retrieval_agent = information_retrieval_agents[level_to_run - 1]
|
||||
run_interaction_loop(monkeypatch, information_retrieval_agent, CYCLE_COUNT)
|
||||
|
||||
file_path = str(get_company_revenue_agent.workspace.get_path("output.txt"))
|
||||
file_path = str(information_retrieval_agent.workspace.get_path("output.txt"))
|
||||
content = read_file(file_path, config)
|
||||
assert "81" in content, "Expected the file to contain 81"
|
||||
expected_revenues = EXPECTED_REVENUES[level_to_run - 1]
|
||||
for revenue in expected_revenues:
|
||||
assert (
|
||||
f"{revenue}." in content or f"{revenue}," in content
|
||||
), f"Expected the file to contain {revenue}"
|
||||
|
||||
Reference in New Issue
Block a user