mirror of
https://github.com/aljazceru/Auto-GPT.git
synced 2026-01-31 11:54:30 +01:00
Information retrieval challenge (#4456)
* test: add information retrieval challenge b * test: get information retrieval challenge be working. * chore: clean up comments and imports. * chore: fix incorrect import * chore: clean up imports. * fix: add web_selenium cmd. resolve missing loop cycle * chore: remove commented code and unused imports. * fix (4261): use 2 cycles instead of 3 * chore: fix mypy formatting * chore: try 2 for mypy formatting * chore: resolve flake8 issues * chore: add docs * chore: resolve linting flake8 * chore: correct formatting to black * Update challenge_b.md * refactored challenge --------- Co-authored-by: PortlandKyGuy <kyleaaron1@gmail.com>
This commit is contained in:
22
docs/challenges/information_retrieval/challenge_b.md
Normal file
22
docs/challenges/information_retrieval/challenge_b.md
Normal file
@@ -0,0 +1,22 @@
|
||||
# Information Retrieval Challenge B
|
||||
|
||||
**Status**: Beaten
|
||||
|
||||
**Command to try**:
|
||||
|
||||
```
|
||||
pytest -s tests/integration/challenges/information_retrieval/test_information_retrieval_challenge_b.py
|
||||
```
|
||||
|
||||
## Description
|
||||
|
||||
The agent's goal is to find the names, affiliated university, and discovery of the individuals who won the nobel prize for physics in 2010.
|
||||
|
||||
It should write the result in a file called 2010_nobel_prize_winners.txt.
|
||||
|
||||
The agent should be able to beat this test consistently (this is the hardest part).
|
||||
|
||||
## Objective
|
||||
|
||||
The objective of this challenge is to test the agent's ability to retrieve multiple pieces of related information in a consistent way.
|
||||
The agent should not use google to perform the task, because it should already know the answer. This why the task fails after 2 cycles (1 cycle to retrieve information, 1 cycle to write the file)
|
||||
@@ -28,6 +28,7 @@ nav:
|
||||
- Information retrieval:
|
||||
- Introduction: challenges/information_retrieval/introduction.md
|
||||
- Information Retrieval Challenge A: challenges/information_retrieval/challenge_a.md
|
||||
- Information Retrieval Challenge B: challenges/information_retrieval/challenge_b.md
|
||||
- Submit a Challenge: challenges/submit.md
|
||||
- Beat a Challenge: challenges/beat.md
|
||||
|
||||
|
||||
@@ -202,3 +202,36 @@ def kubernetes_agent(memory_json_file, workspace: Workspace):
|
||||
)
|
||||
|
||||
return agent
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def get_nobel_prize_agent(agent_test_config, memory_json_file, workspace: Workspace):
|
||||
command_registry = CommandRegistry()
|
||||
command_registry.import_commands("autogpt.commands.file_operations")
|
||||
command_registry.import_commands("autogpt.app")
|
||||
command_registry.import_commands("autogpt.commands.web_selenium")
|
||||
|
||||
ai_config = AIConfig(
|
||||
ai_name="Get-PhysicsNobelPrize",
|
||||
ai_role="An autonomous agent that specializes in physics history.",
|
||||
ai_goals=[
|
||||
"Write to file the winner's name(s), affiliated university, and discovery of the 2010 nobel prize in physics. Write your final answer to 2010_nobel_prize_winners.txt.",
|
||||
],
|
||||
)
|
||||
ai_config.command_registry = command_registry
|
||||
|
||||
system_prompt = ai_config.construct_full_prompt()
|
||||
Config().set_continuous_mode(False)
|
||||
|
||||
agent = Agent(
|
||||
ai_name="Get-PhysicsNobelPrize",
|
||||
memory=memory_json_file,
|
||||
command_registry=command_registry,
|
||||
config=ai_config,
|
||||
next_action_count=0,
|
||||
system_prompt=system_prompt,
|
||||
triggering_prompt=DEFAULT_TRIGGERING_PROMPT,
|
||||
workspace_directory=workspace.root,
|
||||
)
|
||||
|
||||
return agent
|
||||
|
||||
@@ -13,6 +13,10 @@
|
||||
"information_retrieval_challenge_a": {
|
||||
"max_level": 1,
|
||||
"max_level_beaten": 1
|
||||
},
|
||||
"information_retrieval_challenge_b": {
|
||||
"max_level": 1,
|
||||
"max_level_beaten": 1
|
||||
}
|
||||
},
|
||||
"kubernetes": {
|
||||
@@ -35,4 +39,4 @@
|
||||
"max_level_beaten": 1
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,51 @@
|
||||
import contextlib
|
||||
|
||||
import pytest
|
||||
|
||||
from autogpt.agent import Agent
|
||||
from autogpt.commands.file_operations import read_file
|
||||
from autogpt.config import Config
|
||||
from tests.integration.challenges.challenge_decorator.challenge_decorator import (
|
||||
challenge,
|
||||
)
|
||||
from tests.integration.challenges.utils import run_interaction_loop
|
||||
from tests.utils import requires_api_key
|
||||
|
||||
CYCLE_COUNT = 3
|
||||
|
||||
|
||||
@pytest.mark.vcr
|
||||
@requires_api_key("OPENAI_API_KEY")
|
||||
@challenge
|
||||
def test_information_retrieval_challenge_b(
|
||||
get_nobel_prize_agent: Agent,
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
patched_api_requestor: None,
|
||||
level_to_run: int,
|
||||
config: Config,
|
||||
) -> None:
|
||||
"""
|
||||
Test the challenge_b function in a given agent by mocking user inputs and checking the output file content.
|
||||
|
||||
:param get_nobel_prize_agent: The agent to test.
|
||||
:param monkeypatch: pytest's monkeypatch utility for modifying builtins.
|
||||
:param patched_api_requestor: APIRequestor Patch to override the openai.api_requestor module for testing.
|
||||
:param level_to_run: The level to run.
|
||||
:param config: The config object.
|
||||
"""
|
||||
|
||||
with contextlib.suppress(SystemExit):
|
||||
run_interaction_loop(monkeypatch, get_nobel_prize_agent, CYCLE_COUNT)
|
||||
|
||||
file_path = str(
|
||||
get_nobel_prize_agent.workspace.get_path("2010_nobel_prize_winners.txt")
|
||||
)
|
||||
content = read_file(file_path, config)
|
||||
assert "Andre Geim" in content, "Expected the file to contain Andre Geim"
|
||||
assert (
|
||||
"Konstantin Novoselov" in content
|
||||
), "Expected the file to contain Konstantin Novoselov"
|
||||
assert (
|
||||
"University of Manchester" in content
|
||||
), "Expected the file to contain University of Manchester"
|
||||
assert "graphene" in content, "Expected the file to contain graphene"
|
||||
Reference in New Issue
Block a user