mirror of
https://github.com/aljazceru/Auto-GPT.git
synced 2026-02-07 07:14:33 +01:00
Create benchmarks.yml (#4647)
This commit is contained in:
73
.github/workflows/benchmarks.yml
vendored
Normal file
73
.github/workflows/benchmarks.yml
vendored
Normal file
@@ -0,0 +1,73 @@
|
||||
name: Benchmarks
|
||||
|
||||
on:
|
||||
schedule:
|
||||
- cron: '0 8 * * *'
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
Benchmark:
|
||||
name: Benchmark - ${{ matrix.config.task-name }}
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 30
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
config:
|
||||
- python-version: ["3.10"]
|
||||
task: "tests/challenges"
|
||||
task-name: "Mandatory Tasks"
|
||||
- python-version: ["3.10"]
|
||||
task: "--beat-challenges -ra tests/challenges"
|
||||
task-name: "Challenging Tasks"
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
ref: master
|
||||
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
|
||||
- id: get_date
|
||||
name: Get date
|
||||
run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Set up Python dependency cache
|
||||
uses: actions/cache@v3
|
||||
with:
|
||||
path: ~/.cache/pip
|
||||
key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}-${{ steps.get_date.outputs.date }}
|
||||
|
||||
- name: Install Python dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install -r requirements.txt
|
||||
|
||||
- name: Run pytest with coverage
|
||||
run: |
|
||||
rm -rf tests/Auto-GPT-test-cassettes
|
||||
pytest -n auto ${{ matrix.task }}
|
||||
env:
|
||||
CI: true
|
||||
PROXY: ${{ secrets.PROXY }}
|
||||
AGENT_MODE: ${{ secrets.AGENT_MODE }}
|
||||
AGENT_TYPE: ${{ secrets.AGENT_TYPE }}
|
||||
PLAIN_OUTPUT: True
|
||||
|
||||
- name: Upload logs as artifact
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: test-logs-${{ matrix.config.task-name }}
|
||||
path: logs/
|
||||
|
||||
- name: Upload cassettes as artifact
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: cassettes-${{ matrix.config.task-name }}
|
||||
path: tests/Auto-GPT-test-cassettes/
|
||||
Submodule tests/Auto-GPT-test-cassettes updated: 5cf78f4af1...c154c67a58
@@ -22,7 +22,7 @@ def challenge(func: Callable[..., Any]) -> Callable[..., None]:
|
||||
@wraps(func)
|
||||
def wrapper(*args: Any, **kwargs: Any) -> None:
|
||||
run_remaining = MAX_LEVEL_TO_IMPROVE_ON if Challenge.BEAT_CHALLENGES else 1
|
||||
original_error = None
|
||||
original_error: Optional[Exception] = None
|
||||
|
||||
while run_remaining > 0:
|
||||
current_score, new_score, new_score_location = get_scores()
|
||||
@@ -40,6 +40,9 @@ def challenge(func: Callable[..., Any]) -> Callable[..., None]:
|
||||
f"{CHALLENGE_FAILED_MESSAGE}\n{err}"
|
||||
)
|
||||
challenge.succeeded = False
|
||||
except Exception as err:
|
||||
original_error = err
|
||||
challenge.succeeded = False
|
||||
else:
|
||||
challenge.skipped = True
|
||||
if os.environ.get("CI") == "true":
|
||||
@@ -55,7 +58,7 @@ def challenge(func: Callable[..., Any]) -> Callable[..., None]:
|
||||
if not challenge.succeeded:
|
||||
if Challenge.BEAT_CHALLENGES or challenge.is_new_challenge:
|
||||
# xfail
|
||||
pytest.xfail("Challenge failed")
|
||||
pytest.xfail(str(original_error))
|
||||
if original_error:
|
||||
raise original_error
|
||||
run_remaining -= 1
|
||||
|
||||
@@ -16,7 +16,7 @@ MAX_LEVEL = 5
|
||||
OUTPUT_LOCATION = "output.txt"
|
||||
|
||||
|
||||
# @pytest.mark.vcr
|
||||
@pytest.mark.vcr
|
||||
@requires_api_key("OPENAI_API_KEY")
|
||||
@challenge
|
||||
def test_memory_challenge_d(
|
||||
|
||||
Reference in New Issue
Block a user