mirror of
https://github.com/aljazceru/Auto-GPT.git
synced 2025-12-18 14:34:23 +01:00
other was non solution, solution is pytest-depends
This commit is contained in:
@@ -6,7 +6,7 @@ Input:
|
|||||||
|
|
||||||
- **category** (str[]): Category of the challenge such as 'retrieval', 'comprehension', etc. _this is not currently used. for the future it may be needed_
|
- **category** (str[]): Category of the challenge such as 'retrieval', 'comprehension', etc. _this is not currently used. for the future it may be needed_
|
||||||
- **task** (str): The task that the agent needs to solve.
|
- **task** (str): The task that the agent needs to solve.
|
||||||
- **dependencies** (str[]): The dependencies that the challenge needs to run.
|
- **dependencies** (str[]): The dependencies that the challenge needs to run. Needs to be the full node to the test function.
|
||||||
- **ground** (dict): The ground truth.
|
- **ground** (dict): The ground truth.
|
||||||
- **answer** (str): The raw text of the ground truth answer.
|
- **answer** (str): The raw text of the ground truth answer.
|
||||||
- **should_contain** (list): The exact strings that are required in the final answer.
|
- **should_contain** (list): The exact strings that are required in the final answer.
|
||||||
@@ -23,18 +23,20 @@ Example:
|
|||||||
```python
|
```python
|
||||||
{
|
{
|
||||||
"category": ["basic"],
|
"category": ["basic"],
|
||||||
"task": "What is the capital of America?",
|
"task": "Write the string 'random string' before any existing text to the file called file_to_check.txt",
|
||||||
|
"dependencies": [
|
||||||
|
"agbenchmark/tests/basic_abilities/write_file/write_file_test.py::TestWriteFile::test_write_file"
|
||||||
|
],
|
||||||
"ground": {
|
"ground": {
|
||||||
"answer": "Washington",
|
"answer": "random string: this is how we're doing",
|
||||||
"should_contain": ["Washington"],
|
"should_contain": ["random string: this is how we're doing"],
|
||||||
"should_not_contain": ["New York", "Los Angeles", "San Francisco"],
|
|
||||||
"files": ["file_to_check.txt"]
|
"files": ["file_to_check.txt"]
|
||||||
},
|
},
|
||||||
"mock_func": "write_file_mock",
|
"mock_func": "basic_read_file_mock",
|
||||||
"info": {
|
"info": {
|
||||||
"difficulty": "easy",
|
"description": "This reads the file quickly",
|
||||||
"description": "Tests the writing to file",
|
"difficulty": "basic",
|
||||||
"side_effects": ["tests if there is in fact an LLM attached"]
|
"side_effects": [""]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -2,7 +2,6 @@ import pytest
|
|||||||
from agbenchmark.challenges.retrieval.Retrieval import RetrievalChallenge
|
from agbenchmark.challenges.retrieval.Retrieval import RetrievalChallenge
|
||||||
from agbenchmark.challenges.define_task_types import ChallengeData, Ground
|
from agbenchmark.challenges.define_task_types import ChallengeData, Ground
|
||||||
import os
|
import os
|
||||||
from pytest_dependency import depends
|
|
||||||
|
|
||||||
|
|
||||||
data = ChallengeData.deserialize(
|
data = ChallengeData.deserialize(
|
||||||
@@ -19,7 +18,6 @@ class TestRetrieval1(RetrievalChallenge):
|
|||||||
indirect=True,
|
indirect=True,
|
||||||
)
|
)
|
||||||
def test_retrieval(self, request, workspace):
|
def test_retrieval(self, request, workspace):
|
||||||
depends(request, data.dependencies)
|
|
||||||
file = self.open_file(workspace, data.ground.files[0])
|
file = self.open_file(workspace, data.ground.files[0])
|
||||||
|
|
||||||
score = self.scoring(file, data.ground)
|
score = self.scoring(file, data.ground)
|
||||||
|
|||||||
@@ -2,7 +2,6 @@ import pytest
|
|||||||
from agbenchmark.Challenge import Challenge
|
from agbenchmark.Challenge import Challenge
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.run(order=1)
|
|
||||||
@pytest.mark.basic
|
@pytest.mark.basic
|
||||||
class BasicChallenge(Challenge):
|
class BasicChallenge(Challenge):
|
||||||
pass
|
pass
|
||||||
|
|||||||
@@ -1,9 +1,7 @@
|
|||||||
{
|
{
|
||||||
"category": ["basic"],
|
"category": ["basic"],
|
||||||
"task": "Write the string 'random string' before any existing text to the file called file_to_check.txt",
|
"task": "Write the string 'random string' before any existing text to the file called file_to_check.txt",
|
||||||
"dependencies": [
|
"dependencies": ["test_write_file"],
|
||||||
"agbenchmark/tests/basic_abilities/write_file/write_file_test.py::TestWriteFile::test_write_file"
|
|
||||||
],
|
|
||||||
"ground": {
|
"ground": {
|
||||||
"answer": "random string: this is how we're doing",
|
"answer": "random string: this is how we're doing",
|
||||||
"should_contain": ["random string: this is how we're doing"],
|
"should_contain": ["random string: this is how we're doing"],
|
||||||
|
|||||||
@@ -25,7 +25,7 @@ class TestReadFile(BasicChallenge):
|
|||||||
[(data.task, data.mock_func)],
|
[(data.task, data.mock_func)],
|
||||||
indirect=True,
|
indirect=True,
|
||||||
)
|
)
|
||||||
@pytest.mark.order(after=data.dependencies)
|
@pytest.mark.depends(on=data.dependencies)
|
||||||
def test_read_file(self, workspace):
|
def test_read_file(self, workspace):
|
||||||
file = self.open_file(workspace, data.ground.files[0])
|
file = self.open_file(workspace, data.ground.files[0])
|
||||||
|
|
||||||
|
|||||||
@@ -16,6 +16,7 @@ class TestWriteFile(BasicChallenge):
|
|||||||
[(data.task, data.mock_func)],
|
[(data.task, data.mock_func)],
|
||||||
indirect=True,
|
indirect=True,
|
||||||
)
|
)
|
||||||
|
@pytest.mark.depends(name="test_write_file")
|
||||||
def test_write_file(self, workspace):
|
def test_write_file(self, workspace):
|
||||||
file = self.open_file(workspace, data.ground.files[0])
|
file = self.open_file(workspace, data.ground.files[0])
|
||||||
|
|
||||||
|
|||||||
@@ -1,3 +1,3 @@
|
|||||||
agbenchmark/tests/basic_abilities/write_file/write_file_test.py::TestWriteFile::test_write_file[server_response0]
|
|
||||||
agbenchmark/challenges/retrieval/r1/r1_test.py::TestRetrieval1::test_retrieval[server_response0]
|
agbenchmark/challenges/retrieval/r1/r1_test.py::TestRetrieval1::test_retrieval[server_response0]
|
||||||
|
agbenchmark/tests/basic_abilities/write_file/write_file_test.py::TestWriteFile::test_write_file[server_response0]
|
||||||
agbenchmark/tests/basic_abilities/read_file/read_file_test.py::TestReadFile::test_read_file[server_response0]
|
agbenchmark/tests/basic_abilities/read_file/read_file_test.py::TestReadFile::test_read_file[server_response0]
|
||||||
|
|||||||
80
poetry.lock
generated
80
poetry.lock
generated
@@ -368,6 +368,20 @@ files = [
|
|||||||
{file = "frozenlist-1.3.3.tar.gz", hash = "sha256:58bcc55721e8a90b88332d6cd441261ebb22342e238296bb330968952fbb3a6a"},
|
{file = "frozenlist-1.3.3.tar.gz", hash = "sha256:58bcc55721e8a90b88332d6cd441261ebb22342e238296bb330968952fbb3a6a"},
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "future-fstrings"
|
||||||
|
version = "1.2.0"
|
||||||
|
description = "A backport of fstrings to python<3.6"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
|
||||||
|
files = [
|
||||||
|
{file = "future_fstrings-1.2.0-py2.py3-none-any.whl", hash = "sha256:90e49598b553d8746c4dc7d9442e0359d038c3039d802c91c0a55505da318c63"},
|
||||||
|
{file = "future_fstrings-1.2.0.tar.gz", hash = "sha256:6cf41cbe97c398ab5a81168ce0dbb8ad95862d3caf23c21e4430627b90844089"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[package.extras]
|
||||||
|
rewrite = ["tokenize-rt (>=3)"]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "idna"
|
name = "idna"
|
||||||
version = "3.4"
|
version = "3.4"
|
||||||
@@ -473,6 +487,24 @@ files = [
|
|||||||
{file = "multidict-6.0.4.tar.gz", hash = "sha256:3666906492efb76453c0e7b97f2cf459b0682e7402c0489a95484965dbc1da49"},
|
{file = "multidict-6.0.4.tar.gz", hash = "sha256:3666906492efb76453c0e7b97f2cf459b0682e7402c0489a95484965dbc1da49"},
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "networkx"
|
||||||
|
version = "3.1"
|
||||||
|
description = "Python package for creating and manipulating graphs and networks"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.8"
|
||||||
|
files = [
|
||||||
|
{file = "networkx-3.1-py3-none-any.whl", hash = "sha256:4f33f68cb2afcf86f28a45f43efc27a9386b535d567d2127f8f61d51dec58d36"},
|
||||||
|
{file = "networkx-3.1.tar.gz", hash = "sha256:de346335408f84de0eada6ff9fafafff9bcda11f0a0dfaa931133debb146ab61"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[package.extras]
|
||||||
|
default = ["matplotlib (>=3.4)", "numpy (>=1.20)", "pandas (>=1.3)", "scipy (>=1.8)"]
|
||||||
|
developer = ["mypy (>=1.1)", "pre-commit (>=3.2)"]
|
||||||
|
doc = ["nb2plots (>=0.6)", "numpydoc (>=1.5)", "pillow (>=9.4)", "pydata-sphinx-theme (>=0.13)", "sphinx (>=6.1)", "sphinx-gallery (>=0.12)", "texext (>=0.6.7)"]
|
||||||
|
extra = ["lxml (>=4.6)", "pydot (>=1.4.2)", "pygraphviz (>=1.10)", "sympy (>=1.10)"]
|
||||||
|
test = ["codecov (>=2.1)", "pytest (>=7.2)", "pytest-cov (>=4.0)"]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "openai"
|
name = "openai"
|
||||||
version = "0.27.8"
|
version = "0.27.8"
|
||||||
@@ -596,49 +628,21 @@ tomli = {version = ">=1.0.0", markers = "python_version < \"3.11\""}
|
|||||||
testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"]
|
testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "pytest-dependency"
|
name = "pytest-depends"
|
||||||
version = "0.5.1"
|
version = "1.0.1"
|
||||||
description = "Manage dependencies of tests"
|
description = "Tests that depend on other tests"
|
||||||
optional = false
|
optional = false
|
||||||
python-versions = "*"
|
python-versions = "*"
|
||||||
files = [
|
files = [
|
||||||
{file = "pytest-dependency-0.5.1.tar.gz", hash = "sha256:c2a892906192663f85030a6ab91304e508e546cddfe557d692d61ec57a1d946b"},
|
{file = "pytest-depends-1.0.1.tar.gz", hash = "sha256:90a28e2b87b75b18abd128c94015248544acac20e4392e9921e5a86f93319dfe"},
|
||||||
|
{file = "pytest_depends-1.0.1-py3-none-any.whl", hash = "sha256:a1df072bcc93d77aca3f0946903f5fed8af2d9b0056db1dfc9ed5ac164ab0642"},
|
||||||
]
|
]
|
||||||
|
|
||||||
[package.dependencies]
|
[package.dependencies]
|
||||||
pytest = ">=3.6.0"
|
colorama = "*"
|
||||||
|
future-fstrings = "*"
|
||||||
[[package]]
|
networkx = "*"
|
||||||
name = "pytest-order"
|
pytest = ">=3"
|
||||||
version = "1.1.0"
|
|
||||||
description = "pytest plugin to run your tests in a specific order"
|
|
||||||
optional = false
|
|
||||||
python-versions = ">=3.6"
|
|
||||||
files = [
|
|
||||||
{file = "pytest-order-1.1.0.tar.gz", hash = "sha256:139d25b30826b78eebb42722f747eab14c44b88059d7a71d4f79d14a057269a5"},
|
|
||||||
{file = "pytest_order-1.1.0-py3-none-any.whl", hash = "sha256:3b3730969c97900fa5cd31ecff80847680ed56b2490954565c14949ba60d9371"},
|
|
||||||
]
|
|
||||||
|
|
||||||
[package.dependencies]
|
|
||||||
pytest = [
|
|
||||||
{version = ">=5.0", markers = "python_version < \"3.10\""},
|
|
||||||
{version = ">=6.2.4", markers = "python_version >= \"3.10\""},
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "pytest-ordering"
|
|
||||||
version = "0.6"
|
|
||||||
description = "pytest plugin to run your tests in a specific order"
|
|
||||||
optional = false
|
|
||||||
python-versions = "*"
|
|
||||||
files = [
|
|
||||||
{file = "pytest-ordering-0.6.tar.gz", hash = "sha256:561ad653626bb171da78e682f6d39ac33bb13b3e272d406cd555adb6b006bda6"},
|
|
||||||
{file = "pytest_ordering-0.6-py2-none-any.whl", hash = "sha256:27fba3fc265f5d0f8597e7557885662c1bdc1969497cd58aff6ed21c3b617de2"},
|
|
||||||
{file = "pytest_ordering-0.6-py3-none-any.whl", hash = "sha256:3f314a178dbeb6777509548727dc69edf22d6d9a2867bf2d310ab85c403380b6"},
|
|
||||||
]
|
|
||||||
|
|
||||||
[package.dependencies]
|
|
||||||
pytest = "*"
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "requests"
|
name = "requests"
|
||||||
@@ -810,4 +814,4 @@ multidict = ">=4.0"
|
|||||||
[metadata]
|
[metadata]
|
||||||
lock-version = "2.0"
|
lock-version = "2.0"
|
||||||
python-versions = "^3.9"
|
python-versions = "^3.9"
|
||||||
content-hash = "64d22c864fe244497b7ebc81ead1be0b0570b14ee1ced323813d427672e17ff3"
|
content-hash = "a03dfa9938e062bdf564b7678df9dc9277c7c8e504f14f98084c5a2d497a8f7c"
|
||||||
|
|||||||
@@ -14,9 +14,7 @@ click = "^8.1.3"
|
|||||||
requests = "^2.31.0"
|
requests = "^2.31.0"
|
||||||
openai = "^0.27.8"
|
openai = "^0.27.8"
|
||||||
pydantic = "^1.10.9"
|
pydantic = "^1.10.9"
|
||||||
pytest-dependency = "^0.5.1"
|
pytest-depends = "^1.0.1"
|
||||||
pytest-ordering = "^0.6"
|
|
||||||
pytest-order = "^1.1.0"
|
|
||||||
|
|
||||||
|
|
||||||
[build-system]
|
[build-system]
|
||||||
@@ -25,7 +23,7 @@ build-backend = "poetry.core.masonry.api"
|
|||||||
|
|
||||||
[tool.pytest.ini_options]
|
[tool.pytest.ini_options]
|
||||||
minversion = "6.0"
|
minversion = "6.0"
|
||||||
addopts = "--order-dependencies" # -ra -q
|
addopts = "-ra -q"
|
||||||
testpaths = [
|
testpaths = [
|
||||||
"tests", "agbenchmark",
|
"tests", "agbenchmark",
|
||||||
]
|
]
|
||||||
|
|||||||
Reference in New Issue
Block a user