mirror of
https://github.com/aljazceru/Auto-GPT.git
synced 2026-01-28 10:24:26 +01:00
Add retrieval challenge test + run tests on CI pipeline (#51)
This commit is contained in:
40
.github/workflows/ci.yml
vendored
40
.github/workflows/ci.yml
vendored
@@ -66,3 +66,43 @@ jobs:
|
||||
cmd="poetry run autoflake --remove-all-unused-imports --recursive --ignore-init-module-imports --ignore-pass-after-docstring agbenchmark"
|
||||
$cmd --check || (echo "You have unused imports or pass statements, please run '${cmd} --in-place'" && exit 1)
|
||||
if: success() || failure()
|
||||
|
||||
tests:
|
||||
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
min-python-version: "3.10"
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
fetch-depth: 0
|
||||
ref: ${{ github.event.pull_request.head.ref }}
|
||||
repository: ${{ github.event.pull_request.head.repo.full_name }}
|
||||
|
||||
- name: Set up Python ${{ env.min-python-version }}
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: ${{ env.min-python-version }}
|
||||
|
||||
- id: get_date
|
||||
name: Get date
|
||||
run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Install Poetry
|
||||
run: |
|
||||
curl -sSL https://install.python-poetry.org | python -
|
||||
|
||||
- name: Set up Poetry cache
|
||||
uses: actions/cache@v2
|
||||
with:
|
||||
path: |
|
||||
~/.cache/pypoetry
|
||||
.venv
|
||||
key: ${{ runner.os }}-poetry-${{ hashFiles('**/pyproject.toml') }}-${{ hashFiles('**/poetry.lock') }}-${{ steps.get_date.outputs.date }}
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
poetry install
|
||||
poetry run agbenchmark start --mock
|
||||
|
||||
@@ -1,21 +1,21 @@
|
||||
{
|
||||
"name": "retrieval1",
|
||||
"category": ["basic"],
|
||||
"task": "Print the the capital of America to a .txt file",
|
||||
"name": "basic_information_retrieval",
|
||||
"category": ["retrieval"],
|
||||
"task": "Write the price of the book in this url 'http://books.toscrape.com/catalogue/meditations_33/index.html' to a .txt file.",
|
||||
"dependencies": [],
|
||||
"ground": {
|
||||
"answer": "Washington",
|
||||
"should_contain": ["Washington"],
|
||||
"should_not_contain": ["New York", "Los Angeles", "San Francisco"],
|
||||
"answer": "£25.89",
|
||||
"should_contain": ["25.89"],
|
||||
"should_not_contain": [],
|
||||
"files": [".txt"]
|
||||
},
|
||||
"mock": {
|
||||
"mock_func": "basic_write_file_mock",
|
||||
"mock_task": "What is the capital of America?"
|
||||
"mock_func": "basic_retrieval_mock",
|
||||
"mock_task": "Write the price of the book in this url 'http://books.toscrape.com/catalogue/meditations_33/index.html' to a .txt file."
|
||||
},
|
||||
"info": {
|
||||
"difficulty": "basic",
|
||||
"description": "Tests the writing to file",
|
||||
"description": "Tests ability to retrieve information from a website.",
|
||||
"side_effects": ["tests if there is in fact an LLM attached"]
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,18 +1,24 @@
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict
|
||||
|
||||
import pytest
|
||||
|
||||
from agbenchmark.challenges.retrieval.retrieval import RetrievalChallenge
|
||||
|
||||
|
||||
class TestRetrieval1(RetrievalChallenge):
|
||||
class TestRetrieval(RetrievalChallenge):
|
||||
"""The first information-retrieval challenge"""
|
||||
|
||||
def get_file_path(self) -> str: # all tests must implement this method
|
||||
return os.path.join(os.path.dirname(__file__), "r1_data.json")
|
||||
|
||||
@pytest.mark.depends(on=["basic_write_file"])
|
||||
def test_method(self, config: Dict[str, Any]) -> None:
|
||||
self.setup_challenge(config)
|
||||
files_contents = self.open_files(config["workspace"], self.data.ground.files)
|
||||
|
||||
workspace = Path(os.getcwd()) / config["workspace"]
|
||||
files_contents = self.open_files(workspace, self.data.ground.files)
|
||||
|
||||
scores = []
|
||||
for file_content in files_contents:
|
||||
|
||||
@@ -22,3 +22,14 @@ def basic_write_file_mock(task: str, workspace: str) -> None:
|
||||
"file_to_check.txt",
|
||||
"Washington DC is the capital of the United States of America",
|
||||
)
|
||||
|
||||
|
||||
def basic_retrieval_mock(task: str, workspace: str) -> None:
|
||||
"""
|
||||
This mock writes to a file (creates one if it doesn't exist)
|
||||
"""
|
||||
Challenge.write_to_file(
|
||||
workspace,
|
||||
"file_to_check.txt",
|
||||
"25.89",
|
||||
)
|
||||
|
||||
@@ -1 +0,0 @@
|
||||
Washington DC is the capital of the United States of America
|
||||
@@ -1,6 +1,7 @@
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict
|
||||
|
||||
import pytest
|
||||
|
||||
from agbenchmark.tests.basic_abilities.basic_challenge import BasicChallenge
|
||||
|
||||
13
mypy.ini
13
mypy.ini
@@ -3,3 +3,16 @@ follow_imports = skip
|
||||
check_untyped_defs = True
|
||||
disallow_untyped_defs = True
|
||||
exclude = ^(agent/.*\.py)$
|
||||
ignore_missing_imports = True
|
||||
|
||||
[mypy-agbenchmark.mocks.mock_manager.*]
|
||||
ignore_errors = True
|
||||
|
||||
[mypy-agbenchmark.tests.basic_abilities.basic_challenge.*]
|
||||
ignore_errors = True
|
||||
|
||||
[mypy-agbenchmark.mocks.tests.basic_mocks.*]
|
||||
ignore_errors = True
|
||||
|
||||
[mypy-agbenchmark.tests.regression.RegressionManager.*]
|
||||
ignore_errors = True
|
||||
|
||||
@@ -1,7 +1,19 @@
|
||||
{
|
||||
"TestRetrieval": {
|
||||
"difficulty": "basic",
|
||||
"dependencies": [],
|
||||
"test": "agbenchmark/challenges/retrieval/r1/r1_test.py"
|
||||
},
|
||||
"TestWriteFile": {
|
||||
"difficulty": "basic",
|
||||
"dependencies": [],
|
||||
"test": "agbenchmark/tests/basic_abilities/write_file/write_file_test.py"
|
||||
},
|
||||
"TestReadFile": {
|
||||
"difficulty": "basic",
|
||||
"dependencies": [
|
||||
"basic_write_file"
|
||||
],
|
||||
"test": "agbenchmark/tests/basic_abilities/read_file/read_file_test.py"
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user