Add retrieval challenge test + run tests on CI pipeline (#51)

This commit is contained in:
merwanehamadi
2023-07-04 15:28:00 -07:00
committed by GitHub
parent e6e92e9952
commit f8e550773f
8 changed files with 94 additions and 12 deletions

View File

@@ -66,3 +66,43 @@ jobs:
cmd="poetry run autoflake --remove-all-unused-imports --recursive --ignore-init-module-imports --ignore-pass-after-docstring agbenchmark"
$cmd --check || (echo "You have unused imports or pass statements, please run '${cmd} --in-place'" && exit 1)
if: success() || failure()
tests:
runs-on: ubuntu-latest
env:
min-python-version: "3.10"
steps:
- name: Checkout repository
uses: actions/checkout@v3
with:
fetch-depth: 0
ref: ${{ github.event.pull_request.head.ref }}
repository: ${{ github.event.pull_request.head.repo.full_name }}
- name: Set up Python ${{ env.min-python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ env.min-python-version }}
- id: get_date
name: Get date
run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_OUTPUT
- name: Install Poetry
run: |
curl -sSL https://install.python-poetry.org | python -
- name: Set up Poetry cache
uses: actions/cache@v2
with:
path: |
~/.cache/pypoetry
.venv
key: ${{ runner.os }}-poetry-${{ hashFiles('**/pyproject.toml') }}-${{ hashFiles('**/poetry.lock') }}-${{ steps.get_date.outputs.date }}
- name: Install dependencies
run: |
poetry install
poetry run agbenchmark start --mock

View File

@@ -1,21 +1,21 @@
{
"name": "retrieval1",
"category": ["basic"],
"task": "Print the the capital of America to a .txt file",
"name": "basic_information_retrieval",
"category": ["retrieval"],
"task": "Write the price of the book in this url 'http://books.toscrape.com/catalogue/meditations_33/index.html' to a .txt file.",
"dependencies": [],
"ground": {
"answer": "Washington",
"should_contain": ["Washington"],
"should_not_contain": ["New York", "Los Angeles", "San Francisco"],
"answer": "£25.89",
"should_contain": ["25.89"],
"should_not_contain": [],
"files": [".txt"]
},
"mock": {
"mock_func": "basic_write_file_mock",
"mock_task": "What is the capital of America?"
"mock_func": "basic_retrieval_mock",
"mock_task": "Write the price of the book in this url 'http://books.toscrape.com/catalogue/meditations_33/index.html' to a .txt file."
},
"info": {
"difficulty": "basic",
"description": "Tests the writing to file",
"description": "Tests ability to retrieve information from a website.",
"side_effects": ["tests if there is in fact an LLM attached"]
}
}

View File

@@ -1,18 +1,24 @@
import os
from pathlib import Path
from typing import Any, Dict
import pytest
from agbenchmark.challenges.retrieval.retrieval import RetrievalChallenge
class TestRetrieval1(RetrievalChallenge):
class TestRetrieval(RetrievalChallenge):
"""The first information-retrieval challenge"""
def get_file_path(self) -> str: # all tests must implement this method
return os.path.join(os.path.dirname(__file__), "r1_data.json")
@pytest.mark.depends(on=["basic_write_file"])
def test_method(self, config: Dict[str, Any]) -> None:
self.setup_challenge(config)
files_contents = self.open_files(config["workspace"], self.data.ground.files)
workspace = Path(os.getcwd()) / config["workspace"]
files_contents = self.open_files(workspace, self.data.ground.files)
scores = []
for file_content in files_contents:

View File

@@ -22,3 +22,14 @@ def basic_write_file_mock(task: str, workspace: str) -> None:
"file_to_check.txt",
"Washington DC is the capital of the United States of America",
)
def basic_retrieval_mock(task: str, workspace: str) -> None:
"""
This mock writes to a file (creates one if it doesn't exist)
"""
Challenge.write_to_file(
workspace,
"file_to_check.txt",
"25.89",
)

View File

@@ -1 +0,0 @@
Washington DC is the capital of the United States of America

View File

@@ -1,6 +1,7 @@
import os
from pathlib import Path
from typing import Any, Dict
import pytest
from agbenchmark.tests.basic_abilities.basic_challenge import BasicChallenge

View File

@@ -3,3 +3,16 @@ follow_imports = skip
check_untyped_defs = True
disallow_untyped_defs = True
exclude = ^(agent/.*\.py)$
ignore_missing_imports = True
[mypy-agbenchmark.mocks.mock_manager.*]
ignore_errors = True
[mypy-agbenchmark.tests.basic_abilities.basic_challenge.*]
ignore_errors = True
[mypy-agbenchmark.mocks.tests.basic_mocks.*]
ignore_errors = True
[mypy-agbenchmark.tests.regression.RegressionManager.*]
ignore_errors = True

View File

@@ -1,7 +1,19 @@
{
"TestRetrieval": {
"difficulty": "basic",
"dependencies": [],
"test": "agbenchmark/challenges/retrieval/r1/r1_test.py"
},
"TestWriteFile": {
"difficulty": "basic",
"dependencies": [],
"test": "agbenchmark/tests/basic_abilities/write_file/write_file_test.py"
},
"TestReadFile": {
"difficulty": "basic",
"dependencies": [
"basic_write_file"
],
"test": "agbenchmark/tests/basic_abilities/read_file/read_file_test.py"
}
}