basic challenges, more ChallengeData structure

This commit is contained in:
Silen Naihin
2023-06-24 09:42:36 -04:00
parent b6562f3420
commit a5073ab577
14 changed files with 163 additions and 38 deletions

View File

@@ -1,5 +1,6 @@
import os
from typing import Optional
from agbenchmark.challenges.define_task_types import Ground
class Challenge:
@@ -30,3 +31,24 @@ class Challenge:
for filename in os.listdir(workspace)
if os.path.isfile(os.path.join(workspace, filename))
]
def scoring(self, content: str, ground: Ground):
if ground.should_contain:
for should_contain_word in ground.should_contain:
if should_contain_word not in content:
return 0.0
else:
print(
f"Word that should exist: {should_contain_word} exists in the content"
)
if ground.should_not_contain:
for should_not_contain_word in ground.should_not_contain:
if should_not_contain_word in content:
return 0.0
else:
print(
f"Word that should not exist: {should_not_contain_word} does not exist in the content"
)
return 1.0

View File

@@ -4,6 +4,12 @@ import json
import os
class Info(BaseModel):
difficulty: str
description: str
side_effects: List[str]
class Ground(BaseModel):
answer: str
should_contain: Optional[List[str]]
@@ -11,20 +17,20 @@ class Ground(BaseModel):
files: List[str]
class Challenge(BaseModel):
category: str
class ChallengeData(BaseModel):
category: List[str]
task: str
ground: Ground
difficulty: str
mock_func: Optional[str] = None
info: Info
def serialize(self, path: str) -> None:
with open(path, "w") as file:
file.write(self.json())
@staticmethod
def deserialize(path: str) -> "Challenge":
def deserialize(path: str) -> "ChallengeData":
print("Deserializing", path)
with open(path, "r") as file:
data = json.load(file)
return Challenge(**data)
return ChallengeData(**data)

View File

@@ -1,27 +1,7 @@
from agbenchmark.Challenge import Challenge
from agbenchmark.challenges.define_task_types import Ground
class RetrievalChallenge(Challenge):
"""Challenge for information-retrieval"""
def scoring(self, content: str, ground: Ground):
if ground.should_contain:
for should_contain_word in ground.should_contain:
if should_contain_word not in content:
return 0.0
else:
print(
f"Word that should exist: {should_contain_word} exists in the content"
)
if ground.should_not_contain:
for should_not_contain_word in ground.should_not_contain:
if should_not_contain_word in content:
return 0.0
else:
print(
f"Word that should not exist: {should_not_contain_word} does not exist in the content"
)
return 1.0
pass

View File

@@ -1,5 +1,5 @@
{
"category": "retrieval",
"category": ["basic"],
"task": "What is the capital of America?",
"ground": {
"answer": "Washington",
@@ -7,6 +7,10 @@
"should_not_contain": ["New York", "Los Angeles", "San Francisco"],
"files": ["file_to_check.txt"]
},
"difficulty": "easy",
"mock_func": "retrieval_1_mock"
"mock_func": "write_file_mock",
"info": {
"difficulty": "easy",
"description": "Tests the writing to file",
"side_effects": ["tests if there is in fact an LLM attached"]
}
}

View File

@@ -1,9 +1,11 @@
import pytest
from agbenchmark.challenges.retrieval.Retrieval import RetrievalChallenge
from agbenchmark.challenges.define_task_types import Challenge, Ground
from agbenchmark.challenges.define_task_types import ChallengeData, Ground
import os
data = Challenge.deserialize(os.path.join(os.path.dirname(__file__), "r1_data.json"))
data = ChallengeData.deserialize(
os.path.join(os.path.dirname(__file__), "r1_data.json")
)
class TestRetrieval1(RetrievalChallenge):

View File

@@ -0,0 +1,28 @@
from agbenchmark.Challenge import Challenge
from ..basic_gpt_agent import basic_gpt_agent
def basic_read_file_mock(task: str, workspace: str):
"""
This mock reads a file and returns its content.
"""
Challenge.write_to_file(workspace, "file_to_check.txt", "this is how we're doing")
file_contents = Challenge.open_file(workspace, "file_to_check.txt")
Challenge.write_to_file(
workspace, "file_to_check.txt", f"random string: {file_contents}"
)
def basic_write_file_mock(task: str, workspace: str):
"""
This mock writes to a file (creates one if it doesn't exist)
"""
# Call the basic_gpt_agent to get a response.
response = basic_gpt_agent(task)
# Open the file in write mode.
Challenge.write_to_file(workspace, "file_to_check.txt", response)

View File

@@ -1,4 +1,3 @@
from ..basic_gpt_agent import basic_gpt_agent
from agbenchmark.Challenge import Challenge
@@ -6,8 +5,4 @@ from agbenchmark.Challenge import Challenge
# Prerequisites here would be writing to a file (basic_abilities test).
# Should also check if prerequisites exists in regression file
def retrieval_1_mock(task: str, workspace: str):
# Call the basic_gpt_agent to get a response.
response = basic_gpt_agent(task)
# Open the file in write mode.
Challenge.write_to_file(workspace, "file_to_check.txt", response)
pass

View File

@@ -0,0 +1,15 @@
{
"category": ["basic"],
"task": "Write the string 'random string' before any existing text to the file called file_to_check.txt",
"ground": {
"answer": "random string: this is how we're doing",
"should_contain": ["random string: this is how we're doing"],
"files": ["file_to_check.txt"]
},
"mock_func": "basic_read_file_mock",
"info": {
"description": "This reads the file quickly",
"difficulty": "basic",
"side_effects": [""]
}
}

View File

@@ -0,0 +1,29 @@
import pytest
from agbenchmark.challenges.define_task_types import ChallengeData
from agbenchmark.Challenge import Challenge
import os
data = ChallengeData.deserialize(
os.path.join(os.path.dirname(__file__), "r_file_data.json")
)
class TestReadFile(Challenge):
"""Testing if LLM can read a file"""
@pytest.mark.parametrize(
"server_response",
[(data.task, data.mock_func)],
indirect=True,
)
@pytest.mark.basic
def test_retrieval(
self, workspace
): # create_file simply there for the function to depend on the fixture
file = self.open_file(workspace, data.ground.files[0])
score = self.scoring(file, data.ground)
print("You score is:", score)
assert score

View File

@@ -0,0 +1,16 @@
{
"category": ["basic"],
"task": "What is the capital of America?",
"ground": {
"answer": "Washington",
"should_contain": ["Washington"],
"should_not_contain": ["New York", "Los Angeles", "San Francisco"],
"files": ["file_to_check.txt"]
},
"mock_func": "basic_write_file_mock",
"info": {
"difficulty": "easy",
"description": "Tests the writing to file",
"side_effects": ["tests if there is in fact an LLM attached"]
}
}

View File

@@ -0,0 +1,27 @@
import pytest
from agbenchmark.challenges.define_task_types import ChallengeData
from agbenchmark.Challenge import Challenge
import os
data = ChallengeData.deserialize(
os.path.join(os.path.dirname(__file__), "w_file_data.json")
)
class TestWriteFile(Challenge):
"""Testing if LLM can write to a file"""
@pytest.mark.parametrize(
"server_response",
[(data.task, data.mock_func)],
indirect=True,
)
@pytest.mark.basic
def test_retrieval(self, workspace):
file = self.open_file(workspace, data.ground.files[0])
score = self.scoring(file, data.ground)
print("You score is:", score)
assert score

View File

@@ -28,7 +28,8 @@ testpaths = [
]
markers = [
"retrieval",
"regression"
"regression",
"basic"
]
[tool.poetry.scripts]