From 7bc7d9213df32cabf8e96f422741c037b7817487 Mon Sep 17 00:00:00 2001 From: merwanehamadi Date: Fri, 14 Jul 2023 14:39:47 -0700 Subject: [PATCH] Replace hidden files with custom python (#99) Signed-off-by: Merwane Hamadi --- agbenchmark/challenge.py | 2 +- agbenchmark/challenges/README.md | 9 ++++----- .../code/d3/custom_python/api_tests.py | 14 +++++++++++++ agbenchmark/challenges/code/d3/data.json | 6 +++--- .../{hidden_files => custom_python}/test.py | 0 agbenchmark/challenges/test_all.py | 20 +------------------ 6 files changed, 23 insertions(+), 28 deletions(-) rename agbenchmark/challenges/code/d4/{hidden_files => custom_python}/test.py (100%) diff --git a/agbenchmark/challenge.py b/agbenchmark/challenge.py index 874fd45b..f07faf8e 100644 --- a/agbenchmark/challenge.py +++ b/agbenchmark/challenge.py @@ -50,7 +50,7 @@ class Challenge(ABC): # We copy them in the workspace to make it easy to import the code produced by the agent copy_artifacts_into_workspace( - config["workspace"], "hidden_files", self.CHALLENGE_LOCATION + config["workspace"], "custom_python", self.CHALLENGE_LOCATION ) def test_method(self, config: Dict[str, Any]) -> None: diff --git a/agbenchmark/challenges/README.md b/agbenchmark/challenges/README.md index a890c9d3..34e35154 100644 --- a/agbenchmark/challenges/README.md +++ b/agbenchmark/challenges/README.md @@ -57,8 +57,7 @@ This folder contains all the files you want the agent to have in its workspace B This folder contains all the files you would like the agent to generate. This folder is used to mock the agent. This allows to run agbenchmark start --test=TestExample --mock and make sure our challenge actually works. -### hidden_files -This folder contains files hidden from the agent but useful to assess whether a challenge is successful. -For example we can have a test.py in it, and this test.py will be added to the workspace at the end of a challenge. -This allows us to run this test.py and easily import code generated by the agent. -For example see: TestBasicCodeGeneration challenge. +### custom_python +This folder contains files that will be copied into the agent's workspace and run after the challenge is completed. +For example we can have a test.py in it and run this file in the workspace to easily import code generated by the agent. +Example: TestBasicCodeGeneration challenge. diff --git a/agbenchmark/challenges/code/d3/custom_python/api_tests.py b/agbenchmark/challenges/code/d3/custom_python/api_tests.py index 1d6255eb..f01934ef 100644 --- a/agbenchmark/challenges/code/d3/custom_python/api_tests.py +++ b/agbenchmark/challenges/code/d3/custom_python/api_tests.py @@ -5,6 +5,15 @@ from unittest.mock import Mock, patch import requests +def test_make_request_and_assert() -> None: + result = make_request_and_assert() + print(result) + expected_result = {"status": "OK"} + error_message = f"AssertionError: Expected the output to be {expected_result}" + print(error_message) + assert result == expected_result, error_message + + def make_assertion() -> None: if os.environ.get("MOCK_TEST", "False").lower() == "true": mock_response = Mock(requests.Response) @@ -25,3 +34,8 @@ def make_request_and_assert() -> Dict[str, Any]: ) return response.json() + + +if __name__ == "__main__": + # test for the case when server is healthy + test_make_request_and_assert() diff --git a/agbenchmark/challenges/code/d3/data.json b/agbenchmark/challenges/code/d3/data.json index 94c81664..ae0e4558 100644 --- a/agbenchmark/challenges/code/d3/data.json +++ b/agbenchmark/challenges/code/d3/data.json @@ -6,9 +6,9 @@ "ground": { "answer": "GET localhost:8079/health responds with a 200 OK", "should_contain": [], - "should_not_contain": [], - "files": [], - "type": "custom_python" + "should_not_contain": ["AssertionError"], + "files": ["test.py"], + "type": "execute_python_code" }, "info": { "difficulty": "advanced", diff --git a/agbenchmark/challenges/code/d4/hidden_files/test.py b/agbenchmark/challenges/code/d4/custom_python/test.py similarity index 100% rename from agbenchmark/challenges/code/d4/hidden_files/test.py rename to agbenchmark/challenges/code/d4/custom_python/test.py diff --git a/agbenchmark/challenges/test_all.py b/agbenchmark/challenges/test_all.py index a5afef96..98a5ab81 100644 --- a/agbenchmark/challenges/test_all.py +++ b/agbenchmark/challenges/test_all.py @@ -2,8 +2,6 @@ import glob import importlib import json import os -import pkgutil -import sys import types from pathlib import Path from typing import Any, Dict @@ -60,23 +58,7 @@ def generate_tests() -> None: scores = self.get_scores(config) - # Check if make_assertion is defined and use it - if self.data.ground.type == "custom_python": - custom_python_location = ( - f"{CURRENT_DIRECTORY}/../{challenge_location}/custom_python" - ) - sys.path.append(str(custom_python_location)) - - for module_loader, name, ispkg in pkgutil.iter_modules( - [str(custom_python_location)] - ): - module = importlib.import_module(name) - - if hasattr(module, "make_assertion"): - make_assertion = getattr(module, "make_assertion") - make_assertion() - else: - assert 1 in scores + assert 1 in scores # Parametrize the method here test_method = pytest.mark.parametrize(