From 097ce08908fa911bc0e282af54d605611c5092ae Mon Sep 17 00:00:00 2001
From: merwanehamadi <merwanehamadi@gmail.com>
Date: Sat, 10 Jun 2023 15:11:24 -0700
Subject: [PATCH] Create benchmarks.yml (#4647)

---
 .github/workflows/benchmarks.yml              | 73 +++++++++++++++++++
 tests/Auto-GPT-test-cassettes                 |  2 +-
 .../challenge_decorator.py                    |  7 +-
 .../memory/test_memory_challenge_d.py         |  2 +-
 4 files changed, 80 insertions(+), 4 deletions(-)
 create mode 100644 .github/workflows/benchmarks.yml

diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml
new file mode 100644
index 00000000..e9f46d04
--- /dev/null
+++ b/.github/workflows/benchmarks.yml
@@ -0,0 +1,73 @@
+name: Benchmarks
+
+on:
+  schedule:
+    - cron: '0 8 * * *'
+  workflow_dispatch:
+
+jobs:
+  Benchmark:
+    name: Benchmark - ${{ matrix.config.task-name }}
+    runs-on: ubuntu-latest
+    timeout-minutes: 30
+    strategy:
+      fail-fast: false
+      matrix:
+        config:
+          - python-version: ["3.10"]
+            task: "tests/challenges"
+            task-name: "Mandatory Tasks"
+          - python-version: ["3.10"]
+            task: "--beat-challenges -ra tests/challenges"
+            task-name: "Challenging Tasks"
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v3
+        with:
+          ref: master
+
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v2
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - id: get_date
+        name: Get date
+        run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_OUTPUT
+
+      - name: Set up Python dependency cache
+        uses: actions/cache@v3
+        with:
+          path: ~/.cache/pip
+          key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}-${{ steps.get_date.outputs.date }}
+
+      - name: Install Python dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -r requirements.txt
+
+      - name: Run pytest with coverage
+        run: |
+          rm -rf tests/Auto-GPT-test-cassettes
+          pytest -n auto ${{ matrix.task }}
+        env:
+          CI: true
+          PROXY: ${{ secrets.PROXY }}
+          AGENT_MODE: ${{ secrets.AGENT_MODE }}
+          AGENT_TYPE: ${{ secrets.AGENT_TYPE }}
+          PLAIN_OUTPUT: True
+
+      - name: Upload logs as artifact
+        if: always()
+        uses: actions/upload-artifact@v3
+        with:
+          name: test-logs-${{ matrix.config.task-name }}
+          path: logs/
+
+      - name: Upload cassettes as artifact
+        if: always()
+        uses: actions/upload-artifact@v3
+        with:
+          name: cassettes-${{ matrix.config.task-name }}
+          path: tests/Auto-GPT-test-cassettes/
diff --git a/tests/Auto-GPT-test-cassettes b/tests/Auto-GPT-test-cassettes
index 5cf78f4a..c154c67a 160000
--- a/tests/Auto-GPT-test-cassettes
+++ b/tests/Auto-GPT-test-cassettes
@@ -1 +1 @@
-Subproject commit 5cf78f4af1158c636a0ca997365a7ad6d8343e26
+Subproject commit c154c67a58fceb534b6969bfbf92928c4ae54ea3
diff --git a/tests/challenges/challenge_decorator/challenge_decorator.py b/tests/challenges/challenge_decorator/challenge_decorator.py
index 5ef7f19e..d887e21d 100644
--- a/tests/challenges/challenge_decorator/challenge_decorator.py
+++ b/tests/challenges/challenge_decorator/challenge_decorator.py
@@ -22,7 +22,7 @@ def challenge(func: Callable[..., Any]) -> Callable[..., None]:
     @wraps(func)
     def wrapper(*args: Any, **kwargs: Any) -> None:
         run_remaining = MAX_LEVEL_TO_IMPROVE_ON if Challenge.BEAT_CHALLENGES else 1
-        original_error = None
+        original_error: Optional[Exception] = None
 
         while run_remaining > 0:
             current_score, new_score, new_score_location = get_scores()
@@ -40,6 +40,9 @@ def challenge(func: Callable[..., Any]) -> Callable[..., None]:
                         f"{CHALLENGE_FAILED_MESSAGE}\n{err}"
                     )
                     challenge.succeeded = False
+                except Exception as err:
+                    original_error = err
+                    challenge.succeeded = False
             else:
                 challenge.skipped = True
             if os.environ.get("CI") == "true":
@@ -55,7 +58,7 @@ def challenge(func: Callable[..., Any]) -> Callable[..., None]:
             if not challenge.succeeded:
                 if Challenge.BEAT_CHALLENGES or challenge.is_new_challenge:
                     # xfail
-                    pytest.xfail("Challenge failed")
+                    pytest.xfail(str(original_error))
                 if original_error:
                     raise original_error
             run_remaining -= 1
diff --git a/tests/challenges/memory/test_memory_challenge_d.py b/tests/challenges/memory/test_memory_challenge_d.py
index 9e662e08..e788f65d 100644
--- a/tests/challenges/memory/test_memory_challenge_d.py
+++ b/tests/challenges/memory/test_memory_challenge_d.py
@@ -16,7 +16,7 @@ MAX_LEVEL = 5
 OUTPUT_LOCATION = "output.txt"
 
 
-# @pytest.mark.vcr
+@pytest.mark.vcr
 @requires_api_key("OPENAI_API_KEY")
 @challenge
 def test_memory_challenge_d(