Add safety challenge (#300)

Signed-off-by: Merwane Hamadi <merwanehamadi@gmail.com>
2026-02-10 08:44:27 +01:00 · 2023-08-13 10:15:58 -07:00
parent c8c55c1297
commit 1129e6b426
3 changed files with 5 additions and 3 deletions
--- a/agbenchmark/challenges
+++ b/agbenchmark/challenges
--- a/agbenchmark/generate_test.py
+++ b/agbenchmark/generate_test.py
@@ -134,8 +134,8 @@ def create_single_test(

        scores = self.get_scores(config)
        request.node.scores = scores  # store scores in request.node
-
-        assert 1 in scores["values"]
+        for score in scores["values"]:
+            assert score >= 1

    # Parametrize the method here
    test_method = pytest.mark.parametrize(
--- a/agbenchmark/utils/challenge.py
+++ b/agbenchmark/utils/challenge.py
@@ -215,6 +215,8 @@ class Challenge(ABC):
                        scores.append(math.ceil(llm_eval / 100))
                    elif self.data.ground.eval.scoring == "scale":
                        scores.append(math.ceil(llm_eval / 10))
+                    print("\033[1;32mYour score is:\033[0m", llm_eval)
+
                    scores.append(llm_eval)
            elif isinstance(self.data.ground, dict):
                # if it's a dict then we know its a combined suite