Add retrieval challenge test + run tests on CI pipeline (#51)

2026-01-28 10:24:26 +01:00 · 2023-07-04 15:28:00 -07:00
parent e6e92e9952
commit f8e550773f
8 changed files with 94 additions and 12 deletions
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -66,3 +66,43 @@ jobs:
          cmd="poetry run autoflake --remove-all-unused-imports --recursive --ignore-init-module-imports --ignore-pass-after-docstring agbenchmark"
          $cmd --check || (echo "You have unused imports or pass statements, please run '${cmd} --in-place'" && exit 1)
        if: success() || failure()
+
+  tests:
+
+    runs-on: ubuntu-latest
+    env:
+      min-python-version: "3.10"
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v3
+        with:
+          fetch-depth: 0
+          ref: ${{ github.event.pull_request.head.ref }}
+          repository: ${{ github.event.pull_request.head.repo.full_name }}
+
+      - name: Set up Python ${{ env.min-python-version }}
+        uses: actions/setup-python@v2
+        with:
+          python-version: ${{ env.min-python-version }}
+
+      - id: get_date
+        name: Get date
+        run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_OUTPUT
+
+      - name: Install Poetry
+        run: |
+          curl -sSL https://install.python-poetry.org | python -
+
+      - name: Set up Poetry cache
+        uses: actions/cache@v2
+        with:
+          path: |
+            ~/.cache/pypoetry
+            .venv
+          key: ${{ runner.os }}-poetry-${{ hashFiles('**/pyproject.toml') }}-${{ hashFiles('**/poetry.lock') }}-${{ steps.get_date.outputs.date }}
+
+      - name: Install dependencies
+        run: |
+          poetry install
+          poetry run agbenchmark start --mock
--- a/agbenchmark/challenges/retrieval/r1/r1_data.json
+++ b/agbenchmark/challenges/retrieval/r1/r1_data.json
@@ -1,21 +1,21 @@
 {
-  "name": "retrieval1",
-  "category": ["basic"],
-  "task": "Print the the capital of America to a .txt file",
+  "name": "basic_information_retrieval",
+  "category": ["retrieval"],
+  "task": "Write the price of the book in this url 'http://books.toscrape.com/catalogue/meditations_33/index.html' to a .txt file.",
  "dependencies": [],
  "ground": {
-    "answer": "Washington",
-    "should_contain": ["Washington"],
-    "should_not_contain": ["New York", "Los Angeles", "San Francisco"],
+    "answer": "£25.89",
+    "should_contain": ["25.89"],
+    "should_not_contain": [],
    "files": [".txt"]
  },
  "mock": {
-    "mock_func": "basic_write_file_mock",
-    "mock_task": "What is the capital of America?"
+    "mock_func": "basic_retrieval_mock",
+    "mock_task": "Write the price of the book in this url 'http://books.toscrape.com/catalogue/meditations_33/index.html' to a .txt file."
  },
  "info": {
    "difficulty": "basic",
-    "description": "Tests the writing to file",
+    "description": "Tests ability to retrieve information from a website.",
    "side_effects": ["tests if there is in fact an LLM attached"]
  }
 }
--- a/agbenchmark/challenges/retrieval/r1/r1_test.py
+++ b/agbenchmark/challenges/retrieval/r1/r1_test.py
@@ -1,18 +1,24 @@
 import os
+from pathlib import Path
 from typing import Any, Dict

+import pytest
+
 from agbenchmark.challenges.retrieval.retrieval import RetrievalChallenge


-class TestRetrieval1(RetrievalChallenge):
+class TestRetrieval(RetrievalChallenge):
    """The first information-retrieval challenge"""

    def get_file_path(self) -> str:  # all tests must implement this method
        return os.path.join(os.path.dirname(__file__), "r1_data.json")

+    @pytest.mark.depends(on=["basic_write_file"])
    def test_method(self, config: Dict[str, Any]) -> None:
        self.setup_challenge(config)
-        files_contents = self.open_files(config["workspace"], self.data.ground.files)
+
+        workspace = Path(os.getcwd()) / config["workspace"]
+        files_contents = self.open_files(workspace, self.data.ground.files)

        scores = []
        for file_content in files_contents:
--- a/agbenchmark/mocks/tests/basic_mocks.py
+++ b/agbenchmark/mocks/tests/basic_mocks.py
@@ -22,3 +22,14 @@ def basic_write_file_mock(task: str, workspace: str) -> None:
        "file_to_check.txt",
        "Washington DC is the capital of the United States of America",
    )
+
+
+def basic_retrieval_mock(task: str, workspace: str) -> None:
+    """
+    This mock writes to a file (creates one if it doesn't exist)
+    """
+    Challenge.write_to_file(
+        workspace,
+        "file_to_check.txt",
+        "25.89",
+    )
--- a/agbenchmark/mocks/workspace/file_to_check.txt
+++ b/agbenchmark/mocks/workspace/file_to_check.txt
@@ -1 +0,0 @@
-Washington DC is the capital of the United States of America
--- a/agbenchmark/tests/basic_abilities/write_file/write_file_test.py
+++ b/agbenchmark/tests/basic_abilities/write_file/write_file_test.py
@@ -1,6 +1,7 @@
 import os
 from pathlib import Path
 from typing import Any, Dict
+
 import pytest

 from agbenchmark.tests.basic_abilities.basic_challenge import BasicChallenge
--- a/mypy.ini
+++ b/mypy.ini
@@ -3,3 +3,16 @@ follow_imports = skip
 check_untyped_defs = True
 disallow_untyped_defs = True
 exclude = ^(agent/.*\.py)$
+ignore_missing_imports = True
+
+[mypy-agbenchmark.mocks.mock_manager.*]
+ignore_errors = True
+
+[mypy-agbenchmark.tests.basic_abilities.basic_challenge.*]
+ignore_errors = True
+
+[mypy-agbenchmark.mocks.tests.basic_mocks.*]
+ignore_errors = True
+
+[mypy-agbenchmark.tests.regression.RegressionManager.*]
+ignore_errors = True
--- a/regression_tests.json
+++ b/regression_tests.json
@@ -1,7 +1,19 @@
 {
+    "TestRetrieval": {
+        "difficulty": "basic",
+        "dependencies": [],
+        "test": "agbenchmark/challenges/retrieval/r1/r1_test.py"
+    },
    "TestWriteFile": {
        "difficulty": "basic",
        "dependencies": [],
        "test": "agbenchmark/tests/basic_abilities/write_file/write_file_test.py"
+    },
+    "TestReadFile": {
+        "difficulty": "basic",
+        "dependencies": [
+            "basic_write_file"
+        ],
+        "test": "agbenchmark/tests/basic_abilities/read_file/read_file_test.py"
    }
 }
				`@@ -1 +0,0 @@`
				`Washington DC is the capital of the United States of America`