Auto-GPT/.github/workflows/benchmark-ci.yml

name: Benchmark CI

on:
  push:
    branches: [master, ci-test*]
    paths:
      - 'benchmark/**'
      - .github/workflows/benchmark-ci.yml
      - '!benchmark/reports/**'
  pull_request:
    branches: [stable, master, release-*]
    paths:
      - 'benchmark/**'
      - '!benchmark/reports/**'
      - .github/workflows/benchmark-ci.yml

jobs:
  lint:
    runs-on: ubuntu-latest
    env:
      min-python-version: '3.10'

    steps:
      - name: Checkout repository
        uses: actions/checkout@v3
        with:
          fetch-depth: 0
          ref: ${{ github.event.pull_request.head.ref }}
          repository: ${{ github.event.pull_request.head.repo.full_name }}

      - name: Set up Python ${{ env.min-python-version }}
        uses: actions/setup-python@v2
        with:
          python-version: ${{ env.min-python-version }}

      - id: get_date
        name: Get date
        working-directory: ./benchmark/
        run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_OUTPUT

      - name: Install Poetry
        working-directory: ./benchmark/
        run: |
          curl -sSL https://install.python-poetry.org | python -

      - name: Install dependencies
        working-directory: ./benchmark/
        run: |
          export POETRY_VIRTUALENVS_IN_PROJECT=true
          poetry install -vvv

      - name: Lint with flake8
        working-directory: ./benchmark/
        run: poetry run flake8

      - name: Check black formatting
        working-directory: ./benchmark/
        run: poetry run  black . --exclude test.py --check
        if: success() || failure()

      - name: Check isort formatting
        working-directory: ./benchmark/
        run: poetry run  isort . --check
        if: success() || failure()

      - name: Check for unused imports and pass statements
        working-directory: ./benchmark/
        run: |
          cmd="poetry run autoflake --remove-all-unused-imports --recursive --ignore-init-module-imports --ignore-pass-after-docstring agbenchmark"
          $cmd --check || (echo "You have unused imports or pass statements, please run '${cmd} --in-place'" && exit 1)
        if: success() || failure()

  tests-agbenchmark:
    runs-on: ubuntu-latest
    strategy:
      matrix:
        agent-name: [ forge ]
      fail-fast: false
    timeout-minutes: 20
    steps:
      - name: Checkout repository
        uses: actions/checkout@v3
        with:
          fetch-depth: 0
          ref: ${{ github.event.pull_request.head.ref }}
          repository: ${{ github.event.pull_request.head.repo.full_name }}
          submodules: true

      - name: Set up Python ${{ env.min-python-version }}
        uses: actions/setup-python@v2
        with:
          python-version: ${{ env.min-python-version }}

      - name: Install Poetry
        working-directory: ./autogpts/${{ matrix.agent-name }}/
        run: |
          curl -sSL https://install.python-poetry.org | python -

      - name: Run regression tests
        run: |
          ./run agent start ${{ matrix.agent-name }} --setup
          sleep 10
          cd autogpts/${{ matrix.agent-name }}
          set +e # Ignore non-zero exit codes and continue execution
          echo "Running the following command: poetry run agbenchmark --maintain --mock"

          poetry run agbenchmark --maintain --mock
          EXIT_CODE=$?
          set -e  # Stop ignoring non-zero exit codes
          # Check if the exit code was 5, and if so, exit with 0 instead
          if [ $EXIT_CODE -eq 5 ]; then
            echo "regression_tests.json is empty."
          fi

          echo "Running the following command: poetry run agbenchmark --mock"
          poetry run agbenchmark --mock

          echo "Running the following command: poetry run agbenchmark --mock --category=data"
          poetry run agbenchmark --mock --category=data

          echo "Running the following command: poetry run agbenchmark --mock --category=coding"
          poetry run agbenchmark --mock --category=coding

          echo "Running the following command: poetry run agbenchmark --test=WriteFile"
          poetry run agbenchmark --test=WriteFile
          cd ../../benchmark
          poetry install
          echo "Adding the BUILD_SKILL_TREE environment variable. This will attempt to add new elements in the skill tree. If new elements are added, the CI fails because they should have been pushed"
          export BUILD_SKILL_TREE=true

          poetry run agbenchmark --mock
          poetry run pytest -vv -s tests

          CHANGED=$(git diff --name-only | grep -E '(agbenchmark/challenges)|(../frontend/assets)') || echo "No diffs"
          if [ ! -z "$CHANGED" ]; then
            echo "There are unstaged changes please run agbenchmark and commit those changes since they are needed."
            echo "$CHANGED"
            exit 1
          else
            echo "No unstaged changes."
          fi
        env:
          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}