From 097ce08908fa911bc0e282af54d605611c5092ae Mon Sep 17 00:00:00 2001 From: merwanehamadi Date: Sat, 10 Jun 2023 15:11:24 -0700 Subject: [PATCH] Create benchmarks.yml (#4647) --- .github/workflows/benchmarks.yml | 73 +++++++++++++++++++ tests/Auto-GPT-test-cassettes | 2 +- .../challenge_decorator.py | 7 +- .../memory/test_memory_challenge_d.py | 2 +- 4 files changed, 80 insertions(+), 4 deletions(-) create mode 100644 .github/workflows/benchmarks.yml diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml new file mode 100644 index 00000000..e9f46d04 --- /dev/null +++ b/.github/workflows/benchmarks.yml @@ -0,0 +1,73 @@ +name: Benchmarks + +on: + schedule: + - cron: '0 8 * * *' + workflow_dispatch: + +jobs: + Benchmark: + name: Benchmark - ${{ matrix.config.task-name }} + runs-on: ubuntu-latest + timeout-minutes: 30 + strategy: + fail-fast: false + matrix: + config: + - python-version: ["3.10"] + task: "tests/challenges" + task-name: "Mandatory Tasks" + - python-version: ["3.10"] + task: "--beat-challenges -ra tests/challenges" + task-name: "Challenging Tasks" + + steps: + - name: Checkout repository + uses: actions/checkout@v3 + with: + ref: master + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + + - id: get_date + name: Get date + run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_OUTPUT + + - name: Set up Python dependency cache + uses: actions/cache@v3 + with: + path: ~/.cache/pip + key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}-${{ steps.get_date.outputs.date }} + + - name: Install Python dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + + - name: Run pytest with coverage + run: | + rm -rf tests/Auto-GPT-test-cassettes + pytest -n auto ${{ matrix.task }} + env: + CI: true + PROXY: ${{ secrets.PROXY }} + AGENT_MODE: ${{ secrets.AGENT_MODE }} + AGENT_TYPE: ${{ secrets.AGENT_TYPE }} + PLAIN_OUTPUT: True + + - name: Upload logs as artifact + if: always() + uses: actions/upload-artifact@v3 + with: + name: test-logs-${{ matrix.config.task-name }} + path: logs/ + + - name: Upload cassettes as artifact + if: always() + uses: actions/upload-artifact@v3 + with: + name: cassettes-${{ matrix.config.task-name }} + path: tests/Auto-GPT-test-cassettes/ diff --git a/tests/Auto-GPT-test-cassettes b/tests/Auto-GPT-test-cassettes index 5cf78f4a..c154c67a 160000 --- a/tests/Auto-GPT-test-cassettes +++ b/tests/Auto-GPT-test-cassettes @@ -1 +1 @@ -Subproject commit 5cf78f4af1158c636a0ca997365a7ad6d8343e26 +Subproject commit c154c67a58fceb534b6969bfbf92928c4ae54ea3 diff --git a/tests/challenges/challenge_decorator/challenge_decorator.py b/tests/challenges/challenge_decorator/challenge_decorator.py index 5ef7f19e..d887e21d 100644 --- a/tests/challenges/challenge_decorator/challenge_decorator.py +++ b/tests/challenges/challenge_decorator/challenge_decorator.py @@ -22,7 +22,7 @@ def challenge(func: Callable[..., Any]) -> Callable[..., None]: @wraps(func) def wrapper(*args: Any, **kwargs: Any) -> None: run_remaining = MAX_LEVEL_TO_IMPROVE_ON if Challenge.BEAT_CHALLENGES else 1 - original_error = None + original_error: Optional[Exception] = None while run_remaining > 0: current_score, new_score, new_score_location = get_scores() @@ -40,6 +40,9 @@ def challenge(func: Callable[..., Any]) -> Callable[..., None]: f"{CHALLENGE_FAILED_MESSAGE}\n{err}" ) challenge.succeeded = False + except Exception as err: + original_error = err + challenge.succeeded = False else: challenge.skipped = True if os.environ.get("CI") == "true": @@ -55,7 +58,7 @@ def challenge(func: Callable[..., Any]) -> Callable[..., None]: if not challenge.succeeded: if Challenge.BEAT_CHALLENGES or challenge.is_new_challenge: # xfail - pytest.xfail("Challenge failed") + pytest.xfail(str(original_error)) if original_error: raise original_error run_remaining -= 1 diff --git a/tests/challenges/memory/test_memory_challenge_d.py b/tests/challenges/memory/test_memory_challenge_d.py index 9e662e08..e788f65d 100644 --- a/tests/challenges/memory/test_memory_challenge_d.py +++ b/tests/challenges/memory/test_memory_challenge_d.py @@ -16,7 +16,7 @@ MAX_LEVEL = 5 OUTPUT_LOCATION = "output.txt" -# @pytest.mark.vcr +@pytest.mark.vcr @requires_api_key("OPENAI_API_KEY") @challenge def test_memory_challenge_d(