diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2d25e4ff..6fb348e9 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -66,3 +66,43 @@ jobs: cmd="poetry run autoflake --remove-all-unused-imports --recursive --ignore-init-module-imports --ignore-pass-after-docstring agbenchmark" $cmd --check || (echo "You have unused imports or pass statements, please run '${cmd} --in-place'" && exit 1) if: success() || failure() + + tests: + + runs-on: ubuntu-latest + env: + min-python-version: "3.10" + + steps: + - name: Checkout repository + uses: actions/checkout@v3 + with: + fetch-depth: 0 + ref: ${{ github.event.pull_request.head.ref }} + repository: ${{ github.event.pull_request.head.repo.full_name }} + + - name: Set up Python ${{ env.min-python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ env.min-python-version }} + + - id: get_date + name: Get date + run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_OUTPUT + + - name: Install Poetry + run: | + curl -sSL https://install.python-poetry.org | python - + + - name: Set up Poetry cache + uses: actions/cache@v2 + with: + path: | + ~/.cache/pypoetry + .venv + key: ${{ runner.os }}-poetry-${{ hashFiles('**/pyproject.toml') }}-${{ hashFiles('**/poetry.lock') }}-${{ steps.get_date.outputs.date }} + + - name: Install dependencies + run: | + poetry install + poetry run agbenchmark start --mock diff --git a/agbenchmark/challenges/retrieval/r1/r1_data.json b/agbenchmark/challenges/retrieval/r1/r1_data.json index 80c5e51e..44fce24e 100644 --- a/agbenchmark/challenges/retrieval/r1/r1_data.json +++ b/agbenchmark/challenges/retrieval/r1/r1_data.json @@ -1,21 +1,21 @@ { - "name": "retrieval1", - "category": ["basic"], - "task": "Print the the capital of America to a .txt file", + "name": "basic_information_retrieval", + "category": ["retrieval"], + "task": "Write the price of the book in this url 'http://books.toscrape.com/catalogue/meditations_33/index.html' to a .txt file.", "dependencies": [], "ground": { - "answer": "Washington", - "should_contain": ["Washington"], - "should_not_contain": ["New York", "Los Angeles", "San Francisco"], + "answer": "£25.89", + "should_contain": ["25.89"], + "should_not_contain": [], "files": [".txt"] }, "mock": { - "mock_func": "basic_write_file_mock", - "mock_task": "What is the capital of America?" + "mock_func": "basic_retrieval_mock", + "mock_task": "Write the price of the book in this url 'http://books.toscrape.com/catalogue/meditations_33/index.html' to a .txt file." }, "info": { "difficulty": "basic", - "description": "Tests the writing to file", + "description": "Tests ability to retrieve information from a website.", "side_effects": ["tests if there is in fact an LLM attached"] } } diff --git a/agbenchmark/challenges/retrieval/r1/r1_test.py b/agbenchmark/challenges/retrieval/r1/r1_test.py index 675ac8bd..91478496 100644 --- a/agbenchmark/challenges/retrieval/r1/r1_test.py +++ b/agbenchmark/challenges/retrieval/r1/r1_test.py @@ -1,18 +1,24 @@ import os +from pathlib import Path from typing import Any, Dict +import pytest + from agbenchmark.challenges.retrieval.retrieval import RetrievalChallenge -class TestRetrieval1(RetrievalChallenge): +class TestRetrieval(RetrievalChallenge): """The first information-retrieval challenge""" def get_file_path(self) -> str: # all tests must implement this method return os.path.join(os.path.dirname(__file__), "r1_data.json") + @pytest.mark.depends(on=["basic_write_file"]) def test_method(self, config: Dict[str, Any]) -> None: self.setup_challenge(config) - files_contents = self.open_files(config["workspace"], self.data.ground.files) + + workspace = Path(os.getcwd()) / config["workspace"] + files_contents = self.open_files(workspace, self.data.ground.files) scores = [] for file_content in files_contents: diff --git a/agbenchmark/mocks/tests/basic_mocks.py b/agbenchmark/mocks/tests/basic_mocks.py index c79a8e2d..07d8a6de 100644 --- a/agbenchmark/mocks/tests/basic_mocks.py +++ b/agbenchmark/mocks/tests/basic_mocks.py @@ -22,3 +22,14 @@ def basic_write_file_mock(task: str, workspace: str) -> None: "file_to_check.txt", "Washington DC is the capital of the United States of America", ) + + +def basic_retrieval_mock(task: str, workspace: str) -> None: + """ + This mock writes to a file (creates one if it doesn't exist) + """ + Challenge.write_to_file( + workspace, + "file_to_check.txt", + "25.89", + ) diff --git a/agbenchmark/mocks/workspace/file_to_check.txt b/agbenchmark/mocks/workspace/file_to_check.txt deleted file mode 100644 index 48dc8cff..00000000 --- a/agbenchmark/mocks/workspace/file_to_check.txt +++ /dev/null @@ -1 +0,0 @@ -Washington DC is the capital of the United States of America \ No newline at end of file diff --git a/agbenchmark/tests/basic_abilities/write_file/write_file_test.py b/agbenchmark/tests/basic_abilities/write_file/write_file_test.py index c59e03cc..966df7f2 100644 --- a/agbenchmark/tests/basic_abilities/write_file/write_file_test.py +++ b/agbenchmark/tests/basic_abilities/write_file/write_file_test.py @@ -1,6 +1,7 @@ import os from pathlib import Path from typing import Any, Dict + import pytest from agbenchmark.tests.basic_abilities.basic_challenge import BasicChallenge diff --git a/mypy.ini b/mypy.ini index 315ecae5..ceb13fcd 100644 --- a/mypy.ini +++ b/mypy.ini @@ -3,3 +3,16 @@ follow_imports = skip check_untyped_defs = True disallow_untyped_defs = True exclude = ^(agent/.*\.py)$ +ignore_missing_imports = True + +[mypy-agbenchmark.mocks.mock_manager.*] +ignore_errors = True + +[mypy-agbenchmark.tests.basic_abilities.basic_challenge.*] +ignore_errors = True + +[mypy-agbenchmark.mocks.tests.basic_mocks.*] +ignore_errors = True + +[mypy-agbenchmark.tests.regression.RegressionManager.*] +ignore_errors = True diff --git a/regression_tests.json b/regression_tests.json index e3633a2a..9b998d11 100644 --- a/regression_tests.json +++ b/regression_tests.json @@ -1,7 +1,19 @@ { + "TestRetrieval": { + "difficulty": "basic", + "dependencies": [], + "test": "agbenchmark/challenges/retrieval/r1/r1_test.py" + }, "TestWriteFile": { "difficulty": "basic", "dependencies": [], "test": "agbenchmark/tests/basic_abilities/write_file/write_file_test.py" + }, + "TestReadFile": { + "difficulty": "basic", + "dependencies": [ + "basic_write_file" + ], + "test": "agbenchmark/tests/basic_abilities/read_file/read_file_test.py" } } \ No newline at end of file