diff --git a/agbenchmark/challenges/retrieval/Retrieval.py b/agbenchmark/challenges/retrieval/Retrieval.py index 9434d69c..b8aa81ce 100644 --- a/agbenchmark/challenges/retrieval/Retrieval.py +++ b/agbenchmark/challenges/retrieval/Retrieval.py @@ -1,6 +1,8 @@ from agbenchmark.Challenge import Challenge +import pytest +@pytest.mark.retrieval class RetrievalChallenge(Challenge): """Challenge for information-retrieval""" diff --git a/agbenchmark/challenges/retrieval/r1/r1_data.json b/agbenchmark/challenges/retrieval/r1/r1_data.json index fe05b6d5..562d1c36 100644 --- a/agbenchmark/challenges/retrieval/r1/r1_data.json +++ b/agbenchmark/challenges/retrieval/r1/r1_data.json @@ -1,14 +1,14 @@ { "category": ["basic"], - "dependencies": ["test_write_file"], "task": "What is the capital of America?", + "dependencies": [], "ground": { "answer": "Washington", "should_contain": ["Washington"], "should_not_contain": ["New York", "Los Angeles", "San Francisco"], "files": ["file_to_check.txt"] }, - "mock_func": "write_file_mock", + "mock_func": "basic_write_file_mock", "info": { "difficulty": "easy", "description": "Tests the writing to file", diff --git a/agbenchmark/challenges/retrieval/r1/r1_test.py b/agbenchmark/challenges/retrieval/r1/r1_test.py index d37c5e79..5e6d6abf 100644 --- a/agbenchmark/challenges/retrieval/r1/r1_test.py +++ b/agbenchmark/challenges/retrieval/r1/r1_test.py @@ -2,6 +2,8 @@ import pytest from agbenchmark.challenges.retrieval.Retrieval import RetrievalChallenge from agbenchmark.challenges.define_task_types import ChallengeData, Ground import os +from pytest_dependency import depends + data = ChallengeData.deserialize( os.path.join(os.path.dirname(__file__), "r1_data.json") @@ -16,8 +18,8 @@ class TestRetrieval1(RetrievalChallenge): [(data.task, data.mock_func)], indirect=True, ) - @pytest.mark.retrieval - def test_retrieval(self, workspace): + def test_retrieval(self, request, workspace): + depends(request, data.dependencies) file = self.open_file(workspace, data.ground.files[0]) score = self.scoring(file, data.ground) diff --git a/agbenchmark/tests/basic_abilities/BasicChallenge.py b/agbenchmark/tests/basic_abilities/BasicChallenge.py index 56320740..0cada86c 100644 --- a/agbenchmark/tests/basic_abilities/BasicChallenge.py +++ b/agbenchmark/tests/basic_abilities/BasicChallenge.py @@ -2,6 +2,7 @@ import pytest from agbenchmark.Challenge import Challenge +@pytest.mark.run(order=1) @pytest.mark.basic class BasicChallenge(Challenge): pass diff --git a/agbenchmark/tests/basic_abilities/read_file/r_file_data.json b/agbenchmark/tests/basic_abilities/read_file/r_file_data.json index 8c5ef62d..4d04f33e 100644 --- a/agbenchmark/tests/basic_abilities/read_file/r_file_data.json +++ b/agbenchmark/tests/basic_abilities/read_file/r_file_data.json @@ -1,7 +1,9 @@ { "category": ["basic"], "task": "Write the string 'random string' before any existing text to the file called file_to_check.txt", - "dependencies": ["test_write_file"], + "dependencies": [ + "agbenchmark/tests/basic_abilities/write_file/write_file_test.py::TestWriteFile::test_write_file" + ], "ground": { "answer": "random string: this is how we're doing", "should_contain": ["random string: this is how we're doing"], diff --git a/agbenchmark/tests/basic_abilities/read_file/read_file_test.py b/agbenchmark/tests/basic_abilities/read_file/read_file_test.py index 03b2d6ca..ad08da4e 100644 --- a/agbenchmark/tests/basic_abilities/read_file/read_file_test.py +++ b/agbenchmark/tests/basic_abilities/read_file/read_file_test.py @@ -3,7 +3,6 @@ from agbenchmark.challenges.define_task_types import ChallengeData from agbenchmark.Challenge import Challenge from agbenchmark.tests.basic_abilities.BasicChallenge import BasicChallenge import os -from pytest_dependency import depends data = ChallengeData.deserialize( os.path.join(os.path.dirname(__file__), "r_file_data.json") @@ -26,9 +25,8 @@ class TestReadFile(BasicChallenge): [(data.task, data.mock_func)], indirect=True, ) - def test_read_file(self, request, workspace): - depends(request, data.dependencies) - + @pytest.mark.order(after=data.dependencies) + def test_read_file(self, workspace): file = self.open_file(workspace, data.ground.files[0]) score = self.scoring(file, data.ground) diff --git a/agbenchmark/tests/basic_abilities/write_file/write_file_test.py b/agbenchmark/tests/basic_abilities/write_file/write_file_test.py index b09162e3..4c94320e 100644 --- a/agbenchmark/tests/basic_abilities/write_file/write_file_test.py +++ b/agbenchmark/tests/basic_abilities/write_file/write_file_test.py @@ -16,7 +16,6 @@ class TestWriteFile(BasicChallenge): [(data.task, data.mock_func)], indirect=True, ) - @pytest.mark.dependency() def test_write_file(self, workspace): file = self.open_file(workspace, data.ground.files[0]) diff --git a/agbenchmark/tests/regression/regression_tests.txt b/agbenchmark/tests/regression/regression_tests.txt index b831003f..df27f312 100644 --- a/agbenchmark/tests/regression/regression_tests.txt +++ b/agbenchmark/tests/regression/regression_tests.txt @@ -1,3 +1,3 @@ -agbenchmark/tests/basic_abilities/read_file/read_file_test.py::TestReadFile::test_retrieval[server_response0] -agbenchmark/tests/basic_abilities/write_file/write_file_test.py::TestWriteFile::test_retrieval[server_response0] agbenchmark/tests/basic_abilities/write_file/write_file_test.py::TestWriteFile::test_write_file[server_response0] +agbenchmark/challenges/retrieval/r1/r1_test.py::TestRetrieval1::test_retrieval[server_response0] +agbenchmark/tests/basic_abilities/read_file/read_file_test.py::TestReadFile::test_read_file[server_response0] diff --git a/poetry.lock b/poetry.lock index f6f24c5f..4764bf49 100644 --- a/poetry.lock +++ b/poetry.lock @@ -608,6 +608,23 @@ files = [ [package.dependencies] pytest = ">=3.6.0" +[[package]] +name = "pytest-order" +version = "1.1.0" +description = "pytest plugin to run your tests in a specific order" +optional = false +python-versions = ">=3.6" +files = [ + {file = "pytest-order-1.1.0.tar.gz", hash = "sha256:139d25b30826b78eebb42722f747eab14c44b88059d7a71d4f79d14a057269a5"}, + {file = "pytest_order-1.1.0-py3-none-any.whl", hash = "sha256:3b3730969c97900fa5cd31ecff80847680ed56b2490954565c14949ba60d9371"}, +] + +[package.dependencies] +pytest = [ + {version = ">=5.0", markers = "python_version < \"3.10\""}, + {version = ">=6.2.4", markers = "python_version >= \"3.10\""}, +] + [[package]] name = "pytest-ordering" version = "0.6" @@ -793,4 +810,4 @@ multidict = ">=4.0" [metadata] lock-version = "2.0" python-versions = "^3.9" -content-hash = "65b68e43440faafbd2883edd6b10bc177ab334380e908c27c9f511703065f8e7" +content-hash = "64d22c864fe244497b7ebc81ead1be0b0570b14ee1ced323813d427672e17ff3" diff --git a/pyproject.toml b/pyproject.toml index faee61c2..fd2c5204 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,6 +16,7 @@ openai = "^0.27.8" pydantic = "^1.10.9" pytest-dependency = "^0.5.1" pytest-ordering = "^0.6" +pytest-order = "^1.1.0" [build-system] @@ -24,7 +25,7 @@ build-backend = "poetry.core.masonry.api" [tool.pytest.ini_options] minversion = "6.0" -addopts = "-ra -q" +addopts = "--order-dependencies" # -ra -q testpaths = [ "tests", "agbenchmark", ]