Create benchmarks.yml (#4647)

This commit is contained in:
merwanehamadi
2023-06-10 15:11:24 -07:00
committed by GitHub
parent c1ee8cb62e
commit 097ce08908
4 changed files with 80 additions and 4 deletions

73
.github/workflows/benchmarks.yml vendored Normal file
View File

@@ -0,0 +1,73 @@
name: Benchmarks
on:
schedule:
- cron: '0 8 * * *'
workflow_dispatch:
jobs:
Benchmark:
name: Benchmark - ${{ matrix.config.task-name }}
runs-on: ubuntu-latest
timeout-minutes: 30
strategy:
fail-fast: false
matrix:
config:
- python-version: ["3.10"]
task: "tests/challenges"
task-name: "Mandatory Tasks"
- python-version: ["3.10"]
task: "--beat-challenges -ra tests/challenges"
task-name: "Challenging Tasks"
steps:
- name: Checkout repository
uses: actions/checkout@v3
with:
ref: master
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- id: get_date
name: Get date
run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_OUTPUT
- name: Set up Python dependency cache
uses: actions/cache@v3
with:
path: ~/.cache/pip
key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}-${{ steps.get_date.outputs.date }}
- name: Install Python dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
- name: Run pytest with coverage
run: |
rm -rf tests/Auto-GPT-test-cassettes
pytest -n auto ${{ matrix.task }}
env:
CI: true
PROXY: ${{ secrets.PROXY }}
AGENT_MODE: ${{ secrets.AGENT_MODE }}
AGENT_TYPE: ${{ secrets.AGENT_TYPE }}
PLAIN_OUTPUT: True
- name: Upload logs as artifact
if: always()
uses: actions/upload-artifact@v3
with:
name: test-logs-${{ matrix.config.task-name }}
path: logs/
- name: Upload cassettes as artifact
if: always()
uses: actions/upload-artifact@v3
with:
name: cassettes-${{ matrix.config.task-name }}
path: tests/Auto-GPT-test-cassettes/

View File

@@ -22,7 +22,7 @@ def challenge(func: Callable[..., Any]) -> Callable[..., None]:
@wraps(func)
def wrapper(*args: Any, **kwargs: Any) -> None:
run_remaining = MAX_LEVEL_TO_IMPROVE_ON if Challenge.BEAT_CHALLENGES else 1
original_error = None
original_error: Optional[Exception] = None
while run_remaining > 0:
current_score, new_score, new_score_location = get_scores()
@@ -40,6 +40,9 @@ def challenge(func: Callable[..., Any]) -> Callable[..., None]:
f"{CHALLENGE_FAILED_MESSAGE}\n{err}"
)
challenge.succeeded = False
except Exception as err:
original_error = err
challenge.succeeded = False
else:
challenge.skipped = True
if os.environ.get("CI") == "true":
@@ -55,7 +58,7 @@ def challenge(func: Callable[..., Any]) -> Callable[..., None]:
if not challenge.succeeded:
if Challenge.BEAT_CHALLENGES or challenge.is_new_challenge:
# xfail
pytest.xfail("Challenge failed")
pytest.xfail(str(original_error))
if original_error:
raise original_error
run_remaining -= 1

View File

@@ -16,7 +16,7 @@ MAX_LEVEL = 5
OUTPUT_LOCATION = "output.txt"
# @pytest.mark.vcr
@pytest.mark.vcr
@requires_api_key("OPENAI_API_KEY")
@challenge
def test_memory_challenge_d(