Merge pull request #34 from Significant-Gravitas/dsl

This commit is contained in:
Silen Naihin
2023-06-20 18:32:58 -04:00
committed by GitHub
6 changed files with 123 additions and 0 deletions

View File

@@ -0,0 +1,11 @@
import json
class Challenge(object):
def __init__(self, json_data):
self.json_data = json_data
@classmethod
def from_json_file(cls, json_file):
with open(json_file) as f:
json_data = json.load(f)
return cls(json_data)

View File

@@ -0,0 +1,29 @@
from ..Challenge import Challenge
class RetrievelChallenge(Challenge):
""" Chanllenge for information-retrieval """
def __init__(self, json_data):
self.json_data = json_data
assert self.json_data["category"] == "information-retrieval"
@property
def agent_input(self):
return self.json_data["query"]
def scoring(self, content):
for should_contain_word in self.json_data["ground"]["should_contain"]:
if should_contain_word not in content:
return 0.
for should_not_contain_word in self.json_data["ground"]["should_not_contain"]:
if should_not_contain_word in content:
return 0.
return 1.
def run(self, output_file):
output = open(output_file).read().strip()
score = self.scoring(output)
return score

37
data/README.md Normal file
View File

@@ -0,0 +1,37 @@
# Challenges Data Schema of Benchmark
## General challenges
Input:
- **category** (str): information-retrieval
- **difficulty_level**(str): the difficulty of this query. choices from ["easy", "medium", "hard"]
## Information-retrieval challenges
Input:
- **category** (str): information-retrieval
- **query** (str): the question need to be solve.
- **ground** (dict): The ground truth.
- **answer** (str): The raw text of ground truth answer
- **should_contain** (list): the exact strings that is required in the final answer
- **should_not_contain** (list): the exact strings that should not be in the final answer
- **difficulty_level**(str): the difficulty of this query. choices from ["easy", "medium", "hard"]
Example:
```python
{
"category": "information-retrieval",
"query": "what is the capital of America",
"ground": {
"answer": "Washington",
"should_contain": ["Washington"],
"should_not_contain": ["New York", "Los Angeles", "San Francisco"]
},
"difficulty_level": "easy"
}
```
Output:
- **score** (float): scores range from [0, 1]

View File

@@ -0,0 +1,10 @@
{
"category": "information-retrieval",
"query": "what is the capital of America",
"ground": {
"answer": "Washington",
"should_contain": ["Washington"],
"should_not_contain": ["New York", "Los Angeles", "San Francisco"]
},
"difficulty_level": "easy"
}

View File

@@ -0,0 +1,10 @@
{
"category": "information-retrieval",
"query": "The Nobel Prize in Literature 2012",
"ground": {
"answer": "Mo Yan",
"should_contain": ["Mo Yan"],
"should_not_contain": ["Murakami Haruki"]
},
"difficulty_level": "easy"
}

View File

@@ -0,0 +1,26 @@
import json
import openai
from agbenchmark.benchmark.challenges.retrieval.r1_test import RetrievelChallenge
def basic_gpt_agent(challenge_file):
challenge = RetrievelChallenge.from_json_file(challenge_file)
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo-0613",
messages=[{"role": "user", "content": challenge.agent_input}])
answer = response["choices"][0]["message"]["content"]
output_file = "./basic_gpt_agent_retrieval_results.txt"
with open(output_file, "w") as f:
f.write(answer)
print("QUERY : ", challenge.agent_input)
print("AGENT ANSWER: ", answer)
score = challenge.run(output_file)
print("AGENT SCORE : ", score)
if __name__ == "__main__":
basic_gpt_agent("./data/retrieval/r1_test_data_1.json")