mirror of
https://github.com/aljazceru/Auto-GPT.git
synced 2026-01-03 14:24:24 +01:00
Add information retrieval 3 (#54)
Co-authored-by: Silen Naihin <silen.naihin@gmail.com>
This commit is contained in:
21
agbenchmark/challenges/retrieval/r2/r2_data.json
Normal file
21
agbenchmark/challenges/retrieval/r2/r2_data.json
Normal file
@@ -0,0 +1,21 @@
|
||||
{
|
||||
"name": "basic_information_retrieval",
|
||||
"category": ["retrieval"],
|
||||
"task": "Write tesla's revenue in 2022 into a .txt file. Use the US notation, with a precision rounded to the nearest million dollars (for instance, $31,578 billion).",
|
||||
"dependencies": [],
|
||||
"ground": {
|
||||
"answer": "81,462",
|
||||
"should_contain": ["81,462"],
|
||||
"should_not_contain": [],
|
||||
"files": [".txt"]
|
||||
},
|
||||
"mock": {
|
||||
"mock_func": "basic_retrieval_2_mock",
|
||||
"mock_task": "Write tesla's revenue in 2022 into a .txt file. Use the US notation, with a precision rounded to the nearest million dollars (for instance, $31,578 billion)."
|
||||
},
|
||||
"info": {
|
||||
"difficulty": "basic",
|
||||
"description": "Tests ability to retrieve information.",
|
||||
"side_effects": ["tests if there is in fact an LLM attached"]
|
||||
}
|
||||
}
|
||||
29
agbenchmark/challenges/retrieval/r2/r2_test.py
Normal file
29
agbenchmark/challenges/retrieval/r2/r2_test.py
Normal file
@@ -0,0 +1,29 @@
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict
|
||||
|
||||
import pytest
|
||||
|
||||
from agbenchmark.challenges.retrieval.retrieval import RetrievalChallenge
|
||||
|
||||
|
||||
class TestRetrieval2(RetrievalChallenge):
|
||||
"""The first information-retrieval challenge"""
|
||||
|
||||
def get_file_path(self) -> str: # all tests must implement this method
|
||||
return os.path.join(os.path.dirname(__file__), "r2_data.json")
|
||||
|
||||
@pytest.mark.depends(on=["basic_write_file"])
|
||||
def test_method(self, config: Dict[str, Any]) -> None:
|
||||
self.setup_challenge(config)
|
||||
|
||||
workspace = Path(os.getcwd()) / config["workspace"]
|
||||
files_contents = self.open_files(workspace, self.data.ground.files)
|
||||
|
||||
scores = []
|
||||
for file_content in files_contents:
|
||||
score = self.scoring(file_content, self.data.ground)
|
||||
print("Your score is:", score)
|
||||
scores.append(score)
|
||||
|
||||
assert 1 in scores
|
||||
21
agbenchmark/challenges/retrieval/r3/r3_data.json
Normal file
21
agbenchmark/challenges/retrieval/r3/r3_data.json
Normal file
@@ -0,0 +1,21 @@
|
||||
{
|
||||
"name": "basic_information_retrieval",
|
||||
"category": ["retrieval"],
|
||||
"task": "Write tesla's revenue every year since its creation into a .txt file. Use the US notation, with a precision rounded to the nearest million dollars (for instance, $31,578 billion).",
|
||||
"dependencies": [],
|
||||
"ground": {
|
||||
"answer": "15 Millions\n112 Millions\n117 Millions\n204 Millions\n413 Millions\n2,014 Millions\n3,198 Millions\n4,046 Millions\n7,000 Millions\n11,759 Millions\n21,461 Millions\n24,578 Millions\n31,536 Millions\n53,823 Millions\n81,462 Millions",
|
||||
"should_contain": ["15", "112", "117", "204", "413", "2,014", "3,198", "4,046", "7,000", "11,759", "21,461", "24,578", "31,536", "53,823", "81,462"],
|
||||
"should_not_contain": [],
|
||||
"files": [".txt"]
|
||||
},
|
||||
"mock": {
|
||||
"mock_func": "basic_retrieval_3_mock",
|
||||
"mock_task": "Write tesla's revenue every year since its creation into a .txt file. Use the US notation, with a precision rounded to the nearest million dollars (for instance, $31,578 billion)."
|
||||
},
|
||||
"info": {
|
||||
"difficulty": "basic",
|
||||
"description": "Tests ability to retrieve information.",
|
||||
"side_effects": ["tests if there is in fact an LLM attached"]
|
||||
}
|
||||
}
|
||||
29
agbenchmark/challenges/retrieval/r3/r3_test.py
Normal file
29
agbenchmark/challenges/retrieval/r3/r3_test.py
Normal file
@@ -0,0 +1,29 @@
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict
|
||||
|
||||
import pytest
|
||||
|
||||
from agbenchmark.challenges.retrieval.retrieval import RetrievalChallenge
|
||||
|
||||
|
||||
class TestRetrieval3(RetrievalChallenge):
|
||||
"""The first information-retrieval challenge"""
|
||||
|
||||
def get_file_path(self) -> str: # all tests must implement this method
|
||||
return os.path.join(os.path.dirname(__file__), "r3_data.json")
|
||||
|
||||
@pytest.mark.depends(on=["basic_write_file"])
|
||||
def test_method(self, config: Dict[str, Any]) -> None:
|
||||
self.setup_challenge(config)
|
||||
|
||||
workspace = Path(os.getcwd()) / config["workspace"]
|
||||
files_contents = self.open_files(workspace, self.data.ground.files)
|
||||
|
||||
scores = []
|
||||
for file_content in files_contents:
|
||||
score = self.scoring(file_content, self.data.ground)
|
||||
print("Your score is:", score)
|
||||
scores.append(score)
|
||||
|
||||
assert 1 in scores
|
||||
@@ -33,3 +33,25 @@ def basic_retrieval_mock(task: str, workspace: str) -> None:
|
||||
"file_to_check.txt",
|
||||
"25.89",
|
||||
)
|
||||
|
||||
|
||||
def basic_retrieval_2_mock(task: str, workspace: str) -> None:
|
||||
"""
|
||||
This mock writes to a file (creates one if it doesn't exist)
|
||||
"""
|
||||
Challenge.write_to_file(
|
||||
workspace,
|
||||
"file_to_check.txt",
|
||||
"81,462",
|
||||
)
|
||||
|
||||
|
||||
def basic_retrieval_3_mock(task: str, workspace: str) -> None:
|
||||
"""
|
||||
This mock writes to a file (creates one if it doesn't exist)
|
||||
"""
|
||||
Challenge.write_to_file(
|
||||
workspace,
|
||||
"file_to_check.txt",
|
||||
"15 Millions\n112 Millions\n117 Millions\n204 Millions\n413 Millions\n2,014 Millions\n3,198 Millions\n4,046 Millions\n7,000 Millions\n11,759 Millions\n21,461 Millions\n24,578 Millions\n31,536 Millions\n53,823 Millions\n81,462 Millions",
|
||||
)
|
||||
|
||||
@@ -15,5 +15,15 @@
|
||||
"basic_write_file"
|
||||
],
|
||||
"test": "agbenchmark/tests/basic_abilities/read_file/read_file_test.py"
|
||||
},
|
||||
"TestRetrieval2": {
|
||||
"difficulty": "basic",
|
||||
"dependencies": [],
|
||||
"test": "agbenchmark/challenges/retrieval/r2/r2_test.py"
|
||||
},
|
||||
"TestRetrieval3": {
|
||||
"difficulty": "basic",
|
||||
"dependencies": [],
|
||||
"test": "agbenchmark/challenges/retrieval/r3/r3_test.py"
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user