Replace hidden files with custom python (#99)

Signed-off-by: Merwane Hamadi <merwanehamadi@gmail.com>
This commit is contained in:
merwanehamadi
2023-07-14 14:39:47 -07:00
committed by GitHub
parent a9702e4629
commit 7bc7d9213d
6 changed files with 23 additions and 28 deletions

View File

@@ -50,7 +50,7 @@ class Challenge(ABC):
# We copy them in the workspace to make it easy to import the code produced by the agent
copy_artifacts_into_workspace(
config["workspace"], "hidden_files", self.CHALLENGE_LOCATION
config["workspace"], "custom_python", self.CHALLENGE_LOCATION
)
def test_method(self, config: Dict[str, Any]) -> None:

View File

@@ -57,8 +57,7 @@ This folder contains all the files you want the agent to have in its workspace B
This folder contains all the files you would like the agent to generate. This folder is used to mock the agent.
This allows to run agbenchmark start --test=TestExample --mock and make sure our challenge actually works.
### hidden_files
This folder contains files hidden from the agent but useful to assess whether a challenge is successful.
For example we can have a test.py in it, and this test.py will be added to the workspace at the end of a challenge.
This allows us to run this test.py and easily import code generated by the agent.
For example see: TestBasicCodeGeneration challenge.
### custom_python
This folder contains files that will be copied into the agent's workspace and run after the challenge is completed.
For example we can have a test.py in it and run this file in the workspace to easily import code generated by the agent.
Example: TestBasicCodeGeneration challenge.

View File

@@ -5,6 +5,15 @@ from unittest.mock import Mock, patch
import requests
def test_make_request_and_assert() -> None:
result = make_request_and_assert()
print(result)
expected_result = {"status": "OK"}
error_message = f"AssertionError: Expected the output to be {expected_result}"
print(error_message)
assert result == expected_result, error_message
def make_assertion() -> None:
if os.environ.get("MOCK_TEST", "False").lower() == "true":
mock_response = Mock(requests.Response)
@@ -25,3 +34,8 @@ def make_request_and_assert() -> Dict[str, Any]:
)
return response.json()
if __name__ == "__main__":
# test for the case when server is healthy
test_make_request_and_assert()

View File

@@ -6,9 +6,9 @@
"ground": {
"answer": "GET localhost:8079/health responds with a 200 OK",
"should_contain": [],
"should_not_contain": [],
"files": [],
"type": "custom_python"
"should_not_contain": ["AssertionError"],
"files": ["test.py"],
"type": "execute_python_code"
},
"info": {
"difficulty": "advanced",

View File

@@ -2,8 +2,6 @@ import glob
import importlib
import json
import os
import pkgutil
import sys
import types
from pathlib import Path
from typing import Any, Dict
@@ -60,23 +58,7 @@ def generate_tests() -> None:
scores = self.get_scores(config)
# Check if make_assertion is defined and use it
if self.data.ground.type == "custom_python":
custom_python_location = (
f"{CURRENT_DIRECTORY}/../{challenge_location}/custom_python"
)
sys.path.append(str(custom_python_location))
for module_loader, name, ispkg in pkgutil.iter_modules(
[str(custom_python_location)]
):
module = importlib.import_module(name)
if hasattr(module, "make_assertion"):
make_assertion = getattr(module, "make_assertion")
make_assertion()
else:
assert 1 in scores
assert 1 in scores
# Parametrize the method here
test_method = pytest.mark.parametrize(