basic challenges, more ChallengeData structure

This commit is contained in:
Silen Naihin
2023-06-24 09:42:36 -04:00
parent b6562f3420
commit a5073ab577
14 changed files with 163 additions and 38 deletions

View File

@@ -0,0 +1,15 @@
{
"category": ["basic"],
"task": "Write the string 'random string' before any existing text to the file called file_to_check.txt",
"ground": {
"answer": "random string: this is how we're doing",
"should_contain": ["random string: this is how we're doing"],
"files": ["file_to_check.txt"]
},
"mock_func": "basic_read_file_mock",
"info": {
"description": "This reads the file quickly",
"difficulty": "basic",
"side_effects": [""]
}
}

View File

@@ -0,0 +1,29 @@
import pytest
from agbenchmark.challenges.define_task_types import ChallengeData
from agbenchmark.Challenge import Challenge
import os
data = ChallengeData.deserialize(
os.path.join(os.path.dirname(__file__), "r_file_data.json")
)
class TestReadFile(Challenge):
"""Testing if LLM can read a file"""
@pytest.mark.parametrize(
"server_response",
[(data.task, data.mock_func)],
indirect=True,
)
@pytest.mark.basic
def test_retrieval(
self, workspace
): # create_file simply there for the function to depend on the fixture
file = self.open_file(workspace, data.ground.files[0])
score = self.scoring(file, data.ground)
print("You score is:", score)
assert score

View File

@@ -0,0 +1,16 @@
{
"category": ["basic"],
"task": "What is the capital of America?",
"ground": {
"answer": "Washington",
"should_contain": ["Washington"],
"should_not_contain": ["New York", "Los Angeles", "San Francisco"],
"files": ["file_to_check.txt"]
},
"mock_func": "basic_write_file_mock",
"info": {
"difficulty": "easy",
"description": "Tests the writing to file",
"side_effects": ["tests if there is in fact an LLM attached"]
}
}

View File

@@ -0,0 +1,27 @@
import pytest
from agbenchmark.challenges.define_task_types import ChallengeData
from agbenchmark.Challenge import Challenge
import os
data = ChallengeData.deserialize(
os.path.join(os.path.dirname(__file__), "w_file_data.json")
)
class TestWriteFile(Challenge):
"""Testing if LLM can write to a file"""
@pytest.mark.parametrize(
"server_response",
[(data.task, data.mock_func)],
indirect=True,
)
@pytest.mark.basic
def test_retrieval(self, workspace):
file = self.open_file(workspace, data.ground.files[0])
score = self.scoring(file, data.ground)
print("You score is:", score)
assert score