Merge pull request #34 from Significant-Gravitas/dsl

2026-01-09 17:24:24 +01:00 · 2023-06-20 18:32:58 -04:00
parent 1eb278f3cc f37981c388
commit 04536e92a5
6 changed files with 123 additions and 0 deletions
--- a/agbenchmark/benchmark/challenges/Challenge.py
+++ b/agbenchmark/benchmark/challenges/Challenge.py
@@ -0,0 +1,11 @@
+import json
+
+class Challenge(object):
+    def __init__(self, json_data):
+        self.json_data = json_data
+
+    @classmethod
+    def from_json_file(cls, json_file):
+        with open(json_file) as f:
+            json_data = json.load(f)
+        return cls(json_data)
--- a/agbenchmark/benchmark/challenges/retrieval/r1_test.py
+++ b/agbenchmark/benchmark/challenges/retrieval/r1_test.py
@@ -0,0 +1,29 @@
+from ..Challenge import Challenge
+
+
+class RetrievelChallenge(Challenge):
+    """ Chanllenge for information-retrieval """
+    def __init__(self, json_data):
+        self.json_data = json_data
+        assert self.json_data["category"] == "information-retrieval"
+
+    @property
+    def agent_input(self):
+        return self.json_data["query"]
+
+    def scoring(self, content):
+        for should_contain_word in self.json_data["ground"]["should_contain"]:
+            if should_contain_word not in content:
+                return 0.
+        
+        for should_not_contain_word in self.json_data["ground"]["should_not_contain"]:
+            if should_not_contain_word in content:
+                return 0.
+        return 1.
+
+    def run(self, output_file):
+        output = open(output_file).read().strip()
+
+        score = self.scoring(output)
+
+        return score
--- a/data/README.md
+++ b/data/README.md
@@ -0,0 +1,37 @@
+# Challenges Data Schema of Benchmark
+
+## General challenges
+Input:
+- **category** (str): information-retrieval
+- **difficulty_level**(str): the difficulty of this query. choices from ["easy", "medium", "hard"]
+
+
+
+## Information-retrieval challenges
+Input:
+- **category** (str): information-retrieval
+- **query** (str): the question need to be solve.
+- **ground** (dict): The ground truth.
+    - **answer** (str): The raw text of ground truth answer
+    - **should_contain** (list): the exact strings that is required in the final answer 
+    - **should_not_contain** (list): the exact strings that should not be in the final answer 
+- **difficulty_level**(str): the difficulty of this query. choices from ["easy", "medium", "hard"]
+
+Example:
+```python
+{
+    "category": "information-retrieval",
+    "query": "what is the capital of America",
+    "ground": {
+        "answer": "Washington",
+        "should_contain": ["Washington"],
+        "should_not_contain": ["New York", "Los Angeles", "San Francisco"]
+    },
+    "difficulty_level": "easy"
+}
+```
+
+
+
+Output:
+- **score** (float): scores range from [0, 1]
--- a/data/retrieval/r1_test_data_0.json
+++ b/data/retrieval/r1_test_data_0.json
@@ -0,0 +1,10 @@
+{
+    "category": "information-retrieval",
+    "query": "what is the capital of America",
+    "ground": {
+        "answer": "Washington",
+        "should_contain": ["Washington"],
+        "should_not_contain": ["New York", "Los Angeles", "San Francisco"]
+    },
+    "difficulty_level": "easy"
+}
--- a/data/retrieval/r1_test_data_1.json
+++ b/data/retrieval/r1_test_data_1.json
@@ -0,0 +1,10 @@
+{
+    "category": "information-retrieval",
+    "query": "The Nobel Prize in Literature 2012",
+    "ground": {
+        "answer": "Mo Yan",
+        "should_contain": ["Mo Yan"],
+        "should_not_contain": ["Murakami Haruki"]
+    },
+    "difficulty_level": "easy"
+}
--- a/examples/basic_gpt_agent.py
+++ b/examples/basic_gpt_agent.py
@@ -0,0 +1,26 @@
+import json
+import openai
+from agbenchmark.benchmark.challenges.retrieval.r1_test import RetrievelChallenge
+
+
+def basic_gpt_agent(challenge_file):
+    challenge = RetrievelChallenge.from_json_file(challenge_file)
+
+    response = openai.ChatCompletion.create(
+        model="gpt-3.5-turbo-0613",
+        messages=[{"role": "user", "content": challenge.agent_input}])
+    answer = response["choices"][0]["message"]["content"]
+
+    output_file = "./basic_gpt_agent_retrieval_results.txt"
+    with open(output_file, "w") as f:
+        f.write(answer)
+
+    print("QUERY       : ", challenge.agent_input)
+    print("AGENT ANSWER: ", answer)
+
+    score = challenge.run(output_file)
+
+    print("AGENT SCORE : ", score)
+
+if __name__ == "__main__":
+    basic_gpt_agent("./data/retrieval/r1_test_data_1.json")