From 8c44b9eddf7c566d5e39f7e11149772b96e23a5f Mon Sep 17 00:00:00 2001
From: Silen Naihin <silen.naihin@gmail.com>
Date: Sat, 24 Jun 2023 09:42:36 -0400
Subject: [PATCH] basic challenges, more ChallengeData structure

---
 agbenchmark/Challenge.py                      | 22 ++++++++++++++
 agbenchmark/challenges/define_task_types.py   | 16 ++++++----
 agbenchmark/challenges/retrieval/Retrieval.py | 22 +-------------
 .../challenges/retrieval/r1/r1_data.json      | 10 +++++--
 .../challenges/retrieval/r1/r1_test.py        |  6 ++--
 agbenchmark/mocks/tests/basic_mocks.py        | 28 ++++++++++++++++++
 agbenchmark/mocks/tests/retrieval_mocks.py    |  7 +----
 .../read_file/r_file_data.json                | 15 ++++++++++
 .../read_file/read_file_test.py               | 29 +++++++++++++++++++
 .../tests/basic_abilities/read_file_test.py   |  0
 .../write_file/w_file_data.json               | 16 ++++++++++
 .../write_file/write_file_test.py             | 27 +++++++++++++++++
 .../tests/basic_abilities/write_file_test.py  |  0
 pyproject.toml                                |  3 +-
 14 files changed, 163 insertions(+), 38 deletions(-)
 create mode 100644 agbenchmark/tests/basic_abilities/read_file/r_file_data.json
 create mode 100644 agbenchmark/tests/basic_abilities/read_file/read_file_test.py
 delete mode 100644 agbenchmark/tests/basic_abilities/read_file_test.py
 create mode 100644 agbenchmark/tests/basic_abilities/write_file/w_file_data.json
 create mode 100644 agbenchmark/tests/basic_abilities/write_file/write_file_test.py
 delete mode 100644 agbenchmark/tests/basic_abilities/write_file_test.py

diff --git a/agbenchmark/Challenge.py b/agbenchmark/Challenge.py
index 20bf5585..9828a0e9 100644
--- a/agbenchmark/Challenge.py
+++ b/agbenchmark/Challenge.py
@@ -1,5 +1,6 @@
 import os
 from typing import Optional
+from agbenchmark.challenges.define_task_types import Ground
 
 
 class Challenge:
@@ -30,3 +31,24 @@ class Challenge:
             for filename in os.listdir(workspace)
             if os.path.isfile(os.path.join(workspace, filename))
         ]
+
+    def scoring(self, content: str, ground: Ground):
+        if ground.should_contain:
+            for should_contain_word in ground.should_contain:
+                if should_contain_word not in content:
+                    return 0.0
+                else:
+                    print(
+                        f"Word that should exist: {should_contain_word} exists in the content"
+                    )
+
+        if ground.should_not_contain:
+            for should_not_contain_word in ground.should_not_contain:
+                if should_not_contain_word in content:
+                    return 0.0
+                else:
+                    print(
+                        f"Word that should not exist: {should_not_contain_word} does not exist in the content"
+                    )
+
+        return 1.0
diff --git a/agbenchmark/challenges/define_task_types.py b/agbenchmark/challenges/define_task_types.py
index f1a841b5..879a46af 100644
--- a/agbenchmark/challenges/define_task_types.py
+++ b/agbenchmark/challenges/define_task_types.py
@@ -4,6 +4,12 @@ import json
 import os
 
 
+class Info(BaseModel):
+    difficulty: str
+    description: str
+    side_effects: List[str]
+
+
 class Ground(BaseModel):
     answer: str
     should_contain: Optional[List[str]]
@@ -11,20 +17,20 @@ class Ground(BaseModel):
     files: List[str]
 
 
-class Challenge(BaseModel):
-    category: str
+class ChallengeData(BaseModel):
+    category: List[str]
     task: str
     ground: Ground
-    difficulty: str
     mock_func: Optional[str] = None
+    info: Info
 
     def serialize(self, path: str) -> None:
         with open(path, "w") as file:
             file.write(self.json())
 
     @staticmethod
-    def deserialize(path: str) -> "Challenge":
+    def deserialize(path: str) -> "ChallengeData":
         print("Deserializing", path)
         with open(path, "r") as file:
             data = json.load(file)
-        return Challenge(**data)
+        return ChallengeData(**data)
diff --git a/agbenchmark/challenges/retrieval/Retrieval.py b/agbenchmark/challenges/retrieval/Retrieval.py
index 2db22ae4..9434d69c 100644
--- a/agbenchmark/challenges/retrieval/Retrieval.py
+++ b/agbenchmark/challenges/retrieval/Retrieval.py
@@ -1,27 +1,7 @@
 from agbenchmark.Challenge import Challenge
-from agbenchmark.challenges.define_task_types import Ground
 
 
 class RetrievalChallenge(Challenge):
     """Challenge for information-retrieval"""
 
-    def scoring(self, content: str, ground: Ground):
-        if ground.should_contain:
-            for should_contain_word in ground.should_contain:
-                if should_contain_word not in content:
-                    return 0.0
-                else:
-                    print(
-                        f"Word that should exist: {should_contain_word} exists in the content"
-                    )
-
-        if ground.should_not_contain:
-            for should_not_contain_word in ground.should_not_contain:
-                if should_not_contain_word in content:
-                    return 0.0
-                else:
-                    print(
-                        f"Word that should not exist: {should_not_contain_word} does not exist in the content"
-                    )
-
-        return 1.0
+    pass
diff --git a/agbenchmark/challenges/retrieval/r1/r1_data.json b/agbenchmark/challenges/retrieval/r1/r1_data.json
index c7cc3100..08b74d1b 100644
--- a/agbenchmark/challenges/retrieval/r1/r1_data.json
+++ b/agbenchmark/challenges/retrieval/r1/r1_data.json
@@ -1,5 +1,5 @@
 {
-  "category": "retrieval",
+  "category": ["basic"],
   "task": "What is the capital of America?",
   "ground": {
     "answer": "Washington",
@@ -7,6 +7,10 @@
     "should_not_contain": ["New York", "Los Angeles", "San Francisco"],
     "files": ["file_to_check.txt"]
   },
-  "difficulty": "easy",
-  "mock_func": "retrieval_1_mock"
+  "mock_func": "write_file_mock",
+  "info": {
+    "difficulty": "easy",
+    "description": "Tests the writing to file",
+    "side_effects": ["tests if there is in fact an LLM attached"]
+  }
 }
diff --git a/agbenchmark/challenges/retrieval/r1/r1_test.py b/agbenchmark/challenges/retrieval/r1/r1_test.py
index e20c9f7b..d37c5e79 100644
--- a/agbenchmark/challenges/retrieval/r1/r1_test.py
+++ b/agbenchmark/challenges/retrieval/r1/r1_test.py
@@ -1,9 +1,11 @@
 import pytest
 from agbenchmark.challenges.retrieval.Retrieval import RetrievalChallenge
-from agbenchmark.challenges.define_task_types import Challenge, Ground
+from agbenchmark.challenges.define_task_types import ChallengeData, Ground
 import os
 
-data = Challenge.deserialize(os.path.join(os.path.dirname(__file__), "r1_data.json"))
+data = ChallengeData.deserialize(
+    os.path.join(os.path.dirname(__file__), "r1_data.json")
+)
 
 
 class TestRetrieval1(RetrievalChallenge):
diff --git a/agbenchmark/mocks/tests/basic_mocks.py b/agbenchmark/mocks/tests/basic_mocks.py
index e69de29b..eb7b9654 100644
--- a/agbenchmark/mocks/tests/basic_mocks.py
+++ b/agbenchmark/mocks/tests/basic_mocks.py
@@ -0,0 +1,28 @@
+from agbenchmark.Challenge import Challenge
+from ..basic_gpt_agent import basic_gpt_agent
+
+
+def basic_read_file_mock(task: str, workspace: str):
+    """
+    This mock reads a file and returns its content.
+    """
+
+    Challenge.write_to_file(workspace, "file_to_check.txt", "this is how we're doing")
+
+    file_contents = Challenge.open_file(workspace, "file_to_check.txt")
+
+    Challenge.write_to_file(
+        workspace, "file_to_check.txt", f"random string: {file_contents}"
+    )
+
+
+def basic_write_file_mock(task: str, workspace: str):
+    """
+    This mock writes to a file (creates one if it doesn't exist)
+    """
+
+    # Call the basic_gpt_agent to get a response.
+    response = basic_gpt_agent(task)
+
+    # Open the file in write mode.
+    Challenge.write_to_file(workspace, "file_to_check.txt", response)
diff --git a/agbenchmark/mocks/tests/retrieval_mocks.py b/agbenchmark/mocks/tests/retrieval_mocks.py
index 23f4bde1..2481de06 100644
--- a/agbenchmark/mocks/tests/retrieval_mocks.py
+++ b/agbenchmark/mocks/tests/retrieval_mocks.py
@@ -1,4 +1,3 @@
-from ..basic_gpt_agent import basic_gpt_agent
 from agbenchmark.Challenge import Challenge
 
 
@@ -6,8 +5,4 @@ from agbenchmark.Challenge import Challenge
 # Prerequisites here would be writing to a file (basic_abilities test).
 # Should also check if prerequisites exists in regression file
 def retrieval_1_mock(task: str, workspace: str):
-    # Call the basic_gpt_agent to get a response.
-    response = basic_gpt_agent(task)
-
-    # Open the file in write mode.
-    Challenge.write_to_file(workspace, "file_to_check.txt", response)
+    pass
diff --git a/agbenchmark/tests/basic_abilities/read_file/r_file_data.json b/agbenchmark/tests/basic_abilities/read_file/r_file_data.json
new file mode 100644
index 00000000..55319ddf
--- /dev/null
+++ b/agbenchmark/tests/basic_abilities/read_file/r_file_data.json
@@ -0,0 +1,15 @@
+{
+  "category": ["basic"],
+  "task": "Write the string 'random string' before any existing text to the file called file_to_check.txt",
+  "ground": {
+    "answer": "random string: this is how we're doing",
+    "should_contain": ["random string: this is how we're doing"],
+    "files": ["file_to_check.txt"]
+  },
+  "mock_func": "basic_read_file_mock",
+  "info": {
+    "description": "This reads the file quickly",
+    "difficulty": "basic",
+    "side_effects": [""]
+  }
+}
diff --git a/agbenchmark/tests/basic_abilities/read_file/read_file_test.py b/agbenchmark/tests/basic_abilities/read_file/read_file_test.py
new file mode 100644
index 00000000..610ccdab
--- /dev/null
+++ b/agbenchmark/tests/basic_abilities/read_file/read_file_test.py
@@ -0,0 +1,29 @@
+import pytest
+from agbenchmark.challenges.define_task_types import ChallengeData
+from agbenchmark.Challenge import Challenge
+import os
+
+data = ChallengeData.deserialize(
+    os.path.join(os.path.dirname(__file__), "r_file_data.json")
+)
+
+
+class TestReadFile(Challenge):
+    """Testing if LLM can read a file"""
+
+    @pytest.mark.parametrize(
+        "server_response",
+        [(data.task, data.mock_func)],
+        indirect=True,
+    )
+    @pytest.mark.basic
+    def test_retrieval(
+        self, workspace
+    ):  # create_file simply there for the function to depend on the fixture
+        file = self.open_file(workspace, data.ground.files[0])
+
+        score = self.scoring(file, data.ground)
+
+        print("You score is:", score)
+
+        assert score
diff --git a/agbenchmark/tests/basic_abilities/read_file_test.py b/agbenchmark/tests/basic_abilities/read_file_test.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/agbenchmark/tests/basic_abilities/write_file/w_file_data.json b/agbenchmark/tests/basic_abilities/write_file/w_file_data.json
new file mode 100644
index 00000000..4aaa1347
--- /dev/null
+++ b/agbenchmark/tests/basic_abilities/write_file/w_file_data.json
@@ -0,0 +1,16 @@
+{
+  "category": ["basic"],
+  "task": "What is the capital of America?",
+  "ground": {
+    "answer": "Washington",
+    "should_contain": ["Washington"],
+    "should_not_contain": ["New York", "Los Angeles", "San Francisco"],
+    "files": ["file_to_check.txt"]
+  },
+  "mock_func": "basic_write_file_mock",
+  "info": {
+    "difficulty": "easy",
+    "description": "Tests the writing to file",
+    "side_effects": ["tests if there is in fact an LLM attached"]
+  }
+}
diff --git a/agbenchmark/tests/basic_abilities/write_file/write_file_test.py b/agbenchmark/tests/basic_abilities/write_file/write_file_test.py
new file mode 100644
index 00000000..ccb10fe7
--- /dev/null
+++ b/agbenchmark/tests/basic_abilities/write_file/write_file_test.py
@@ -0,0 +1,27 @@
+import pytest
+from agbenchmark.challenges.define_task_types import ChallengeData
+from agbenchmark.Challenge import Challenge
+import os
+
+data = ChallengeData.deserialize(
+    os.path.join(os.path.dirname(__file__), "w_file_data.json")
+)
+
+
+class TestWriteFile(Challenge):
+    """Testing if LLM can write to a file"""
+
+    @pytest.mark.parametrize(
+        "server_response",
+        [(data.task, data.mock_func)],
+        indirect=True,
+    )
+    @pytest.mark.basic
+    def test_retrieval(self, workspace):
+        file = self.open_file(workspace, data.ground.files[0])
+
+        score = self.scoring(file, data.ground)
+
+        print("You score is:", score)
+
+        assert score
diff --git a/agbenchmark/tests/basic_abilities/write_file_test.py b/agbenchmark/tests/basic_abilities/write_file_test.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/pyproject.toml b/pyproject.toml
index 5498381a..6f79e75c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -28,7 +28,8 @@ testpaths = [
 ]
 markers = [
     "retrieval",
-    "regression"
+    "regression",
+    "basic"
 ]
 
 [tool.poetry.scripts]