From 7bc7d9213df32cabf8e96f422741c037b7817487 Mon Sep 17 00:00:00 2001
From: merwanehamadi <merwanehamadi@gmail.com>
Date: Fri, 14 Jul 2023 14:39:47 -0700
Subject: [PATCH] Replace hidden files with custom python (#99)

Signed-off-by: Merwane Hamadi <merwanehamadi@gmail.com>
---
 agbenchmark/challenge.py                      |  2 +-
 agbenchmark/challenges/README.md              |  9 ++++-----
 .../code/d3/custom_python/api_tests.py        | 14 +++++++++++++
 agbenchmark/challenges/code/d3/data.json      |  6 +++---
 .../{hidden_files => custom_python}/test.py   |  0
 agbenchmark/challenges/test_all.py            | 20 +------------------
 6 files changed, 23 insertions(+), 28 deletions(-)
 rename agbenchmark/challenges/code/d4/{hidden_files => custom_python}/test.py (100%)

diff --git a/agbenchmark/challenge.py b/agbenchmark/challenge.py
index 874fd45b..f07faf8e 100644
--- a/agbenchmark/challenge.py
+++ b/agbenchmark/challenge.py
@@ -50,7 +50,7 @@ class Challenge(ABC):
         # We copy them in the workspace to make it easy to import the code produced by the agent
 
         copy_artifacts_into_workspace(
-            config["workspace"], "hidden_files", self.CHALLENGE_LOCATION
+            config["workspace"], "custom_python", self.CHALLENGE_LOCATION
         )
 
     def test_method(self, config: Dict[str, Any]) -> None:
diff --git a/agbenchmark/challenges/README.md b/agbenchmark/challenges/README.md
index a890c9d3..34e35154 100644
--- a/agbenchmark/challenges/README.md
+++ b/agbenchmark/challenges/README.md
@@ -57,8 +57,7 @@ This folder contains all the files you want the agent to have in its workspace B
 This folder contains all the files you would like the agent to generate. This folder is used to mock the agent.
 This allows to run agbenchmark start --test=TestExample --mock and make sure our challenge actually works.
 
-### hidden_files
-This folder contains files hidden from the agent but useful to assess whether a challenge is successful.
-For example we can have a test.py in it, and this test.py will be added to the workspace at the end of a challenge.
-This allows us to run this test.py and easily import code generated by the agent.
-For example see: TestBasicCodeGeneration challenge.
+### custom_python
+This folder contains files that will be copied into the agent's workspace and run after the challenge is completed.
+For example we can have a test.py in it and run this file in the workspace to easily import code generated by the agent.
+Example: TestBasicCodeGeneration challenge.
diff --git a/agbenchmark/challenges/code/d3/custom_python/api_tests.py b/agbenchmark/challenges/code/d3/custom_python/api_tests.py
index 1d6255eb..f01934ef 100644
--- a/agbenchmark/challenges/code/d3/custom_python/api_tests.py
+++ b/agbenchmark/challenges/code/d3/custom_python/api_tests.py
@@ -5,6 +5,15 @@ from unittest.mock import Mock, patch
 import requests
 
 
+def test_make_request_and_assert() -> None:
+    result = make_request_and_assert()
+    print(result)
+    expected_result = {"status": "OK"}
+    error_message = f"AssertionError: Expected the output to be {expected_result}"
+    print(error_message)
+    assert result == expected_result, error_message
+
+
 def make_assertion() -> None:
     if os.environ.get("MOCK_TEST", "False").lower() == "true":
         mock_response = Mock(requests.Response)
@@ -25,3 +34,8 @@ def make_request_and_assert() -> Dict[str, Any]:
         )
 
     return response.json()
+
+
+if __name__ == "__main__":
+    # test for the case when server is healthy
+    test_make_request_and_assert()
diff --git a/agbenchmark/challenges/code/d3/data.json b/agbenchmark/challenges/code/d3/data.json
index 94c81664..ae0e4558 100644
--- a/agbenchmark/challenges/code/d3/data.json
+++ b/agbenchmark/challenges/code/d3/data.json
@@ -6,9 +6,9 @@
   "ground": {
     "answer": "GET localhost:8079/health responds with a 200 OK",
     "should_contain": [],
-    "should_not_contain": [],
-    "files": [],
-    "type": "custom_python"
+    "should_not_contain": ["AssertionError"],
+    "files": ["test.py"],
+    "type": "execute_python_code"
   },
   "info": {
     "difficulty": "advanced",
diff --git a/agbenchmark/challenges/code/d4/hidden_files/test.py b/agbenchmark/challenges/code/d4/custom_python/test.py
similarity index 100%
rename from agbenchmark/challenges/code/d4/hidden_files/test.py
rename to agbenchmark/challenges/code/d4/custom_python/test.py
diff --git a/agbenchmark/challenges/test_all.py b/agbenchmark/challenges/test_all.py
index a5afef96..98a5ab81 100644
--- a/agbenchmark/challenges/test_all.py
+++ b/agbenchmark/challenges/test_all.py
@@ -2,8 +2,6 @@ import glob
 import importlib
 import json
 import os
-import pkgutil
-import sys
 import types
 from pathlib import Path
 from typing import Any, Dict
@@ -60,23 +58,7 @@ def generate_tests() -> None:
 
             scores = self.get_scores(config)
 
-            # Check if make_assertion is defined and use it
-            if self.data.ground.type == "custom_python":
-                custom_python_location = (
-                    f"{CURRENT_DIRECTORY}/../{challenge_location}/custom_python"
-                )
-                sys.path.append(str(custom_python_location))
-
-                for module_loader, name, ispkg in pkgutil.iter_modules(
-                    [str(custom_python_location)]
-                ):
-                    module = importlib.import_module(name)
-
-                    if hasattr(module, "make_assertion"):
-                        make_assertion = getattr(module, "make_assertion")
-                        make_assertion()
-            else:
-                assert 1 in scores
+            assert 1 in scores
 
         # Parametrize the method here
         test_method = pytest.mark.parametrize(