Create benchmarks.yml (#4647)

2026-02-07 07:14:33 +01:00 · 2023-06-10 15:11:24 -07:00
parent c1ee8cb62e
commit 097ce08908
4 changed files with 80 additions and 4 deletions
--- a/.github/workflows/benchmarks.yml
+++ b/.github/workflows/benchmarks.yml
@@ -0,0 +1,73 @@
+name: Benchmarks
+
+on:
+  schedule:
+    - cron: '0 8 * * *'
+  workflow_dispatch:
+
+jobs:
+  Benchmark:
+    name: Benchmark - ${{ matrix.config.task-name }}
+    runs-on: ubuntu-latest
+    timeout-minutes: 30
+    strategy:
+      fail-fast: false
+      matrix:
+        config:
+          - python-version: ["3.10"]
+            task: "tests/challenges"
+            task-name: "Mandatory Tasks"
+          - python-version: ["3.10"]
+            task: "--beat-challenges -ra tests/challenges"
+            task-name: "Challenging Tasks"
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v3
+        with:
+          ref: master
+
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v2
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - id: get_date
+        name: Get date
+        run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_OUTPUT
+
+      - name: Set up Python dependency cache
+        uses: actions/cache@v3
+        with:
+          path: ~/.cache/pip
+          key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}-${{ steps.get_date.outputs.date }}
+
+      - name: Install Python dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -r requirements.txt
+
+      - name: Run pytest with coverage
+        run: |
+          rm -rf tests/Auto-GPT-test-cassettes
+          pytest -n auto ${{ matrix.task }}
+        env:
+          CI: true
+          PROXY: ${{ secrets.PROXY }}
+          AGENT_MODE: ${{ secrets.AGENT_MODE }}
+          AGENT_TYPE: ${{ secrets.AGENT_TYPE }}
+          PLAIN_OUTPUT: True
+
+      - name: Upload logs as artifact
+        if: always()
+        uses: actions/upload-artifact@v3
+        with:
+          name: test-logs-${{ matrix.config.task-name }}
+          path: logs/
+
+      - name: Upload cassettes as artifact
+        if: always()
+        uses: actions/upload-artifact@v3
+        with:
+          name: cassettes-${{ matrix.config.task-name }}
+          path: tests/Auto-GPT-test-cassettes/
--- a/tests/Auto-GPT-test-cassettes
+++ b/tests/Auto-GPT-test-cassettes
--- a/tests/challenges/challenge_decorator/challenge_decorator.py
+++ b/tests/challenges/challenge_decorator/challenge_decorator.py
@@ -22,7 +22,7 @@ def challenge(func: Callable[..., Any]) -> Callable[..., None]:
    @wraps(func)
    def wrapper(*args: Any, **kwargs: Any) -> None:
        run_remaining = MAX_LEVEL_TO_IMPROVE_ON if Challenge.BEAT_CHALLENGES else 1
-        original_error = None
+        original_error: Optional[Exception] = None

        while run_remaining > 0:
            current_score, new_score, new_score_location = get_scores()
@@ -40,6 +40,9 @@ def challenge(func: Callable[..., Any]) -> Callable[..., None]:
                        f"{CHALLENGE_FAILED_MESSAGE}\n{err}"
                    )
                    challenge.succeeded = False
+                except Exception as err:
+                    original_error = err
+                    challenge.succeeded = False
            else:
                challenge.skipped = True
            if os.environ.get("CI") == "true":
@@ -55,7 +58,7 @@ def challenge(func: Callable[..., Any]) -> Callable[..., None]:
            if not challenge.succeeded:
                if Challenge.BEAT_CHALLENGES or challenge.is_new_challenge:
                    # xfail
-                    pytest.xfail("Challenge failed")
+                    pytest.xfail(str(original_error))
                if original_error:
                    raise original_error
            run_remaining -= 1
--- a/tests/challenges/memory/test_memory_challenge_d.py
+++ b/tests/challenges/memory/test_memory_challenge_d.py
@@ -16,7 +16,7 @@ MAX_LEVEL = 5
 OUTPUT_LOCATION = "output.txt"


-# @pytest.mark.vcr
+@pytest.mark.vcr
@requires_api_key("OPENAI_API_KEY")
@challenge
 def test_memory_challenge_d(