mirror of
https://github.com/aljazceru/Auto-GPT.git
synced 2026-01-03 14:24:24 +01:00
Merge pull request #34 from Significant-Gravitas/dsl
This commit is contained in:
@@ -0,0 +1,11 @@
|
||||
import json
|
||||
|
||||
class Challenge(object):
|
||||
def __init__(self, json_data):
|
||||
self.json_data = json_data
|
||||
|
||||
@classmethod
|
||||
def from_json_file(cls, json_file):
|
||||
with open(json_file) as f:
|
||||
json_data = json.load(f)
|
||||
return cls(json_data)
|
||||
@@ -0,0 +1,29 @@
|
||||
from ..Challenge import Challenge
|
||||
|
||||
|
||||
class RetrievelChallenge(Challenge):
|
||||
""" Chanllenge for information-retrieval """
|
||||
def __init__(self, json_data):
|
||||
self.json_data = json_data
|
||||
assert self.json_data["category"] == "information-retrieval"
|
||||
|
||||
@property
|
||||
def agent_input(self):
|
||||
return self.json_data["query"]
|
||||
|
||||
def scoring(self, content):
|
||||
for should_contain_word in self.json_data["ground"]["should_contain"]:
|
||||
if should_contain_word not in content:
|
||||
return 0.
|
||||
|
||||
for should_not_contain_word in self.json_data["ground"]["should_not_contain"]:
|
||||
if should_not_contain_word in content:
|
||||
return 0.
|
||||
return 1.
|
||||
|
||||
def run(self, output_file):
|
||||
output = open(output_file).read().strip()
|
||||
|
||||
score = self.scoring(output)
|
||||
|
||||
return score
|
||||
37
data/README.md
Normal file
37
data/README.md
Normal file
@@ -0,0 +1,37 @@
|
||||
# Challenges Data Schema of Benchmark
|
||||
|
||||
## General challenges
|
||||
Input:
|
||||
- **category** (str): information-retrieval
|
||||
- **difficulty_level**(str): the difficulty of this query. choices from ["easy", "medium", "hard"]
|
||||
|
||||
|
||||
|
||||
## Information-retrieval challenges
|
||||
Input:
|
||||
- **category** (str): information-retrieval
|
||||
- **query** (str): the question need to be solve.
|
||||
- **ground** (dict): The ground truth.
|
||||
- **answer** (str): The raw text of ground truth answer
|
||||
- **should_contain** (list): the exact strings that is required in the final answer
|
||||
- **should_not_contain** (list): the exact strings that should not be in the final answer
|
||||
- **difficulty_level**(str): the difficulty of this query. choices from ["easy", "medium", "hard"]
|
||||
|
||||
Example:
|
||||
```python
|
||||
{
|
||||
"category": "information-retrieval",
|
||||
"query": "what is the capital of America",
|
||||
"ground": {
|
||||
"answer": "Washington",
|
||||
"should_contain": ["Washington"],
|
||||
"should_not_contain": ["New York", "Los Angeles", "San Francisco"]
|
||||
},
|
||||
"difficulty_level": "easy"
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
|
||||
Output:
|
||||
- **score** (float): scores range from [0, 1]
|
||||
10
data/retrieval/r1_test_data_0.json
Normal file
10
data/retrieval/r1_test_data_0.json
Normal file
@@ -0,0 +1,10 @@
|
||||
{
|
||||
"category": "information-retrieval",
|
||||
"query": "what is the capital of America",
|
||||
"ground": {
|
||||
"answer": "Washington",
|
||||
"should_contain": ["Washington"],
|
||||
"should_not_contain": ["New York", "Los Angeles", "San Francisco"]
|
||||
},
|
||||
"difficulty_level": "easy"
|
||||
}
|
||||
10
data/retrieval/r1_test_data_1.json
Normal file
10
data/retrieval/r1_test_data_1.json
Normal file
@@ -0,0 +1,10 @@
|
||||
{
|
||||
"category": "information-retrieval",
|
||||
"query": "The Nobel Prize in Literature 2012",
|
||||
"ground": {
|
||||
"answer": "Mo Yan",
|
||||
"should_contain": ["Mo Yan"],
|
||||
"should_not_contain": ["Murakami Haruki"]
|
||||
},
|
||||
"difficulty_level": "easy"
|
||||
}
|
||||
26
examples/basic_gpt_agent.py
Normal file
26
examples/basic_gpt_agent.py
Normal file
@@ -0,0 +1,26 @@
|
||||
import json
|
||||
import openai
|
||||
from agbenchmark.benchmark.challenges.retrieval.r1_test import RetrievelChallenge
|
||||
|
||||
|
||||
def basic_gpt_agent(challenge_file):
|
||||
challenge = RetrievelChallenge.from_json_file(challenge_file)
|
||||
|
||||
response = openai.ChatCompletion.create(
|
||||
model="gpt-3.5-turbo-0613",
|
||||
messages=[{"role": "user", "content": challenge.agent_input}])
|
||||
answer = response["choices"][0]["message"]["content"]
|
||||
|
||||
output_file = "./basic_gpt_agent_retrieval_results.txt"
|
||||
with open(output_file, "w") as f:
|
||||
f.write(answer)
|
||||
|
||||
print("QUERY : ", challenge.agent_input)
|
||||
print("AGENT ANSWER: ", answer)
|
||||
|
||||
score = challenge.run(output_file)
|
||||
|
||||
print("AGENT SCORE : ", score)
|
||||
|
||||
if __name__ == "__main__":
|
||||
basic_gpt_agent("./data/retrieval/r1_test_data_1.json")
|
||||
Reference in New Issue
Block a user