Clean up & fix GitHub workflows (#6313)

* ci: Mitigate security issues in autogpt-ci.yml - Remove unnecessary pull_request_target paths and related variables and config - Set permissions for contents to read only * ci: Simplify steps in autogpt-ci.yml workflow using GitHub CLI - Simplify step in 'autogpt-ci.yml' by using GitHub CLI instead of API for adding label and comment functionality - Replace curl command with 'gh issue edit' to add "behaviour change" label to the pull request - Replace gh api command with 'gh issue comment' to leave a comment about the changed behavior of AutoGPT in the pull request * ci: Fix issues in workflows - Move environment variable definition to top level in benchmark-ci.yml (because the other job also needs it) - Removed invalid 'branches: [hackathon]' restriction in hackathon.yml workflow - Removed redundant 'ref' and 'repository' fields in the 'checkout' step of both workflows. * ci: Delete legacy benchmarks.yml workflow * ci: Add triggers for CI workflows - Add triggers to run CI workflows when they are edited. - Update the paths for the CI workflows in the trigger configuration. * fix: Fix benchmark lint error - Removed unnecessary blank lines in report_types.py - Fixed string quotes in challenge.py to maintain consistency * fix: Update task description in password generator data.json - Update task description in `data.json` file for the password generator challenge to clarify the input requirements and error handling. - This change is made in an attempt to make the Benchmark CI pass. * fix: Fix PasswordGenerator challenge in CI - Fix the behavior of the reference password_generator.py to align with the task description - Use default password length 8 instead of a random length in the generate_password function - Retrieve the password length from the command line arguments if "--length" is provided, else set it to 8
2026-01-31 11:54:30 +01:00 · 2023-11-21 10:58:54 +01:00
parent 874000624d
commit b106a61352
11 changed files with 55 additions and 134 deletions
--- a/.github/workflows/autogpt-ci.yml
+++ b/.github/workflows/autogpt-ci.yml
@@ -4,16 +4,13 @@ on:
  push:
    branches: [ master, development, ci-test* ]
    paths:
+      - '.github/workflows/autogpt-ci.yml'
      - 'autogpts/autogpt/**'
      - '!autogpts/autogpt/tests/vcr_cassettes'
  pull_request:
    branches: [ master, development, release-* ]
    paths:
-      - 'autogpts/autogpt/**'
-      - '!autogpts/autogpt/tests/vcr_cassettes'
-  pull_request_target:
-    branches: [ master, development, release-*, ci-test* ]
-    paths:
+      - '.github/workflows/autogpt-ci.yml'
      - 'autogpts/autogpt/**'
      - '!autogpts/autogpt/tests/vcr_cassettes'

@@ -27,9 +24,6 @@ defaults:

 jobs:
  lint:
-    # eliminate duplicate runs
-    if: github.event_name == 'push' || (github.event.pull_request.head.repo.fork == (github.event_name == 'pull_request_target'))
-
    runs-on: ubuntu-latest
    env:
      min-python-version: "3.10"
@@ -39,8 +33,6 @@ jobs:
        uses: actions/checkout@v3
        with:
          fetch-depth: 0
-          ref: ${{ github.event.pull_request.head.ref }}
-          repository: ${{ github.event.pull_request.head.repo.full_name }}

      - name: Set up Python ${{ env.min-python-version }}
        uses: actions/setup-python@v4
@@ -83,17 +75,8 @@ jobs:
          poetry run $cmd --check || (echo "You have unused imports or pass statements, please run '${cmd} --in-place'" && exit 1)

  test:
-    # eliminate duplicate runs
-    if: github.event_name == 'push' || (github.event.pull_request.head.repo.fork == (github.event_name == 'pull_request_target'))
-
    permissions:
-      # Gives the action the necessary permissions for publishing new
-      # comments in pull requests.
-      pull-requests: write
-      # Gives the action the necessary permissions for pushing data to the
-      # python-coverage-comment-action branch, and for editing existing
-      # comments (to avoid publishing multiple comments in the same PR)
-      contents: write
+      contents: read
    runs-on: ubuntu-latest
    timeout-minutes: 30
    strategy:
@@ -105,8 +88,6 @@ jobs:
        uses: actions/checkout@v3
        with:
          fetch-depth: 0
-          ref: ${{ github.event.pull_request.head.ref }}
-          repository: ${{ github.event.pull_request.head.repo.full_name }}
          submodules: true

      - name: Configure git user Auto-GPT-Bot
@@ -116,9 +97,13 @@ jobs:

      - name: Checkout cassettes
        if: ${{ startsWith(github.event_name, 'pull_request') }}
+        env:
+          PR_BASE: ${{ github.event.pull_request.base.ref }}
+          PR_BRANCH: ${{ github.event.pull_request.head.ref }}
+          PR_AUTHOR: ${{ github.event.pull_request.user.login }}
        run: |
-          cassette_branch="${{ github.event.pull_request.user.login }}-${{ github.event.pull_request.head.ref }}"
-          cassette_base_branch="${{ github.event.pull_request.base.ref }}"
+          cassette_branch="${PR_AUTHOR}-${PR_BRANCH}"
+          cassette_base_branch="${PR_BASE}"
          cd tests/vcr_cassettes

          if ! git ls-remote --exit-code --heads origin $cassette_base_branch ; then
@@ -169,10 +154,7 @@ jobs:
            tests/unit tests/integration
        env:
          CI: true
-          PROXY: ${{ github.event_name == 'pull_request_target' && secrets.PROXY || '' }}
-          AGENT_MODE: ${{ github.event_name == 'pull_request_target' && secrets.AGENT_MODE || '' }}
-          AGENT_TYPE: ${{ github.event_name == 'pull_request_target' && secrets.AGENT_TYPE || '' }}
-          OPENAI_API_KEY: ${{ github.event_name != 'pull_request_target' && secrets.OPENAI_API_KEY || '' }}
+          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
          PLAIN_OUTPUT: True

      - name: Upload coverage reports to Codecov
@@ -198,11 +180,14 @@ jobs:
      - id: push_cassettes
        name: Push updated cassettes
        # For pull requests, push updated cassettes even when tests fail
-        if: github.event_name == 'push' || success() || failure()
+        if: github.event_name == 'push' || (! github.event.pull_request.head.repo.fork && (success() || failure()))
+        env:
+          PR_BRANCH: ${{ github.event.pull_request.head.ref }}
+          PR_AUTHOR: ${{ github.event.pull_request.user.login }}
        run: |
          if [ "${{ startsWith(github.event_name, 'pull_request') }}" = "true" ]; then
            is_pull_request=true
-            cassette_branch="${{ github.event.pull_request.user.login }}-${{ github.event.pull_request.head.ref }}"
+            cassette_branch="${PR_AUTHOR}-${PR_BRANCH}"
          else
            cassette_branch="${{ github.ref_name }}"
          fi
@@ -234,20 +219,15 @@ jobs:
      - name: Apply "behaviour change" label and comment on PR
        if: ${{ startsWith(github.event_name, 'pull_request') }}
        run: |
-          PR_NUMBER=${{ github.event.pull_request.number }}
-          TOKEN=${{ secrets.PAT_REVIEW }}
-          REPO=${{ github.repository }}
+          PR_NUMBER="${{ github.event.pull_request.number }}"
+          TOKEN="${{ secrets.PAT_REVIEW }}"
+          REPO="${{ github.repository }}"

          if [[ "${{ steps.push_cassettes.outputs.updated }}" == "true" ]]; then
            echo "Adding label and comment..."
-            curl -X POST \
-            -H "Authorization: Bearer $TOKEN" \
-            -H "Accept: application/vnd.github.v3+json" \
-            https://api.github.com/repos/$REPO/issues/$PR_NUMBER/labels \
-            -d '{"labels":["behaviour change"]}'
-
            echo $TOKEN | gh auth login --with-token
-            gh api repos/$REPO/issues/$PR_NUMBER/comments -X POST -F body="You changed AutoGPT's behaviour. The cassettes have been updated and will be merged to the submodule when this Pull Request gets merged."
+            gh issue edit $PR_NUMBER --add-label "behaviour change"
+            gh issue comment $PR_NUMBER --body "You changed AutoGPT's behaviour. The cassettes have been updated and will be merged to the submodule when this Pull Request gets merged."
          fi

      - name: Upload logs to artifact
--- a/.github/workflows/autogpt-docker-ci.yml
+++ b/.github/workflows/autogpt-docker-ci.yml
@@ -4,11 +4,13 @@ on:
  push:
    branches: [ master, development ]
    paths:
+      - '.github/workflows/autogpt-docker-ci.yml'
      - 'autogpts/autogpt/**'
      - '!autogpts/autogpt/tests/vcr_cassettes'
  pull_request:
    branches: [ master, development, release-* ]
    paths:
+      - '.github/workflows/autogpt-docker-ci.yml'
      - 'autogpts/autogpt/**'
      - '!autogpts/autogpt/tests/vcr_cassettes'

--- a/.github/workflows/autogpts-ci.yml
+++ b/.github/workflows/autogpts-ci.yml
@@ -1,4 +1,4 @@
-name: Valid AutoGPTs
+name: AutoGPTs smoke test CI

 on:
  workflow_dispatch:
@@ -6,8 +6,24 @@ on:
    - cron: '0 8 * * *'
  push:
    branches: [ master, development, ci-test* ]
+    paths:
+      - '.github/workflows/autogpts-ci.yml'
+      - 'autogpts/**'
+      - 'benchmark/**'
+      - 'run'
+      - 'cli.py'
+      - 'setup.py'
+      - '!**/*.md'
  pull_request:
    branches: [ master, development, release-* ]
+    paths:
+      - '.github/workflows/autogpts-ci.yml'
+      - 'autogpts/**'
+      - 'benchmark/**'
+      - 'run'
+      - 'cli.py'
+      - 'setup.py'
+      - '!**/*.md'

 jobs:
  run-tests:
@@ -17,13 +33,13 @@ jobs:
        agent-name: [ autogpt, forge ]
      fail-fast: false
    timeout-minutes: 20
+    env:
+      min-python-version: '3.10'
    steps:
      - name: Checkout repository
        uses: actions/checkout@v3
        with:
          fetch-depth: 0
-          ref: ${{ github.event.pull_request.head.ref }}
-          repository: ${{ github.event.pull_request.head.repo.full_name }}
          submodules: true

      - name: Set up Python ${{ env.min-python-version }}
--- a/.github/workflows/benchmark-ci.yml
+++ b/.github/workflows/benchmark-ci.yml
@@ -14,19 +14,18 @@ on:
      - '!benchmark/reports/**'
      - .github/workflows/benchmark-ci.yml

+env:
+  min-python-version: '3.10'
+
 jobs:
  lint:
    runs-on: ubuntu-latest
-    env:
-      min-python-version: '3.10'

    steps:
      - name: Checkout repository
        uses: actions/checkout@v3
        with:
          fetch-depth: 0
-          ref: ${{ github.event.pull_request.head.ref }}
-          repository: ${{ github.event.pull_request.head.repo.full_name }}

      - name: Set up Python ${{ env.min-python-version }}
        uses: actions/setup-python@v2
@@ -82,8 +81,6 @@ jobs:
        uses: actions/checkout@v3
        with:
          fetch-depth: 0
-          ref: ${{ github.event.pull_request.head.ref }}
-          repository: ${{ github.event.pull_request.head.repo.full_name }}
          submodules: true

      - name: Set up Python ${{ env.min-python-version }}
--- a/.github/workflows/benchmarks.yml
+++ b/.github/workflows/benchmarks.yml
@@ -1,73 +0,0 @@
-name: Benchmarks (legacy)
-
-on:
-  schedule:
-    - cron: '0 8 * * *'
-  workflow_dispatch:
-
-jobs:
-  Benchmark:
-    name: ${{ matrix.config.task-name }}
-    runs-on: ubuntu-latest
-    timeout-minutes: 30
-    strategy:
-      fail-fast: false
-      matrix:
-        config:
-          - python-version: "3.10"
-            task: "tests/challenges"
-            task-name: "Mandatory Tasks"
-          - python-version: "3.10"
-            task: "--beat-challenges -ra tests/challenges"
-            task-name: "Challenging Tasks"
-
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@v3
-        with:
-          ref: master
-
-      - name: Set up Python ${{ matrix.config.python-version }}
-        uses: actions/setup-python@v4
-        with:
-          python-version: ${{ matrix.config.python-version }}
-
-      - id: get_date
-        name: Get date
-        run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_OUTPUT
-
-      - name: Set up Python dependency cache
-        uses: actions/cache@v3
-        with:
-          path: ~/.cache/pypoetry
-          key: ${{ runner.os }}-poetry-${{ hashFiles('autogpts/autogpt/pyproject.toml') }}-${{ steps.get_date.outputs.date }}
-
-      - name: Install Python dependencies
-        run: |
-          curl -sSL https://install.python-poetry.org | python3 -
-          poetry install
-
-      - name: Run pytest with coverage
-        run: |
-          rm -rf tests/vcr_cassettes
-          pytest -n auto --record-mode=all ${{ matrix.config.task }}
-        env:
-          CI: true
-          PROXY: ${{ secrets.PROXY }}
-          AGENT_MODE: ${{ secrets.AGENT_MODE }}
-          AGENT_TYPE: ${{ secrets.AGENT_TYPE }}
-          PLAIN_OUTPUT: True
-
-      - name: Upload logs as artifact
-        if: always()
-        uses: actions/upload-artifact@v3
-        with:
-          name: test-logs-${{ matrix.config.task-name }}
-          path: logs/
-
-      - name: Upload cassettes as artifact
-        if: always()
-        uses: actions/upload-artifact@v3
-        with:
-          name: cassettes-${{ matrix.config.task-name }}
-          path: tests/vcr_cassettes/
--- a/.github/workflows/hackathon.yml
+++ b/.github/workflows/hackathon.yml
@@ -2,7 +2,6 @@ name: Hackathon

 on:
  workflow_dispatch:
-    branches: [hackathon]
    inputs:
      agents:
        description: "Agents to run (comma-separated)"
@@ -92,8 +91,6 @@ jobs:
        uses: actions/checkout@v3
        with:
          fetch-depth: 0
-          ref: ${{ github.event.pull_request.head.ref }}
-          repository: ${{ github.event.pull_request.head.repo.full_name }}
          submodules: true

      - name: Set up Python ${{ env.min-python-version }}
@@ -133,4 +130,4 @@ jobs:
          WEAVIATE_URL: ${{ secrets.WEAVIATE_URL }}
          GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
          GOOGLE_CUSTOM_SEARCH_ENGINE_ID: ${{ secrets.GOOGLE_CUSTOM_SEARCH_ENGINE_ID }}
-          AGENT_NAME: ${{ matrix.agent-name }}
+          AGENT_NAME: ${{ matrix.agent-name }}
--- a/benchmark/agbenchmark/challenges/deprecated/code/1_password_generator/data.json
+++ b/benchmark/agbenchmark/challenges/deprecated/code/1_password_generator/data.json
@@ -24,5 +24,5 @@
        "side_effects": []
    },
    "name": "PasswordGenerator",
-    "task": "Create a random password generator. The password should have between 8 and 16 characters and should contain letters, numbers and symbols. The password should be printed to the console. The entry point will be a python file that can be run this way: python password_generator.py [--len x] where x is the length of the password. If no length is specified, the password should be 8 characters long. The password_generator can also be imported as a module and called as password = password_generator.generate_password(len=x). Any invalid input should raise a ValueError."
+    "task": "Create a random password generator. The password should have between 8 and 16 characters and should contain letters, numbers and symbols. The password should be printed to the console. The entry point will be a python file that can be run this way: python password_generator.py [--len x] where x is the length of the password. The password_generator can also be imported as a module and called as password = password_generator.generate_password(len=x). If no length is specified, the password should be 8 characters long. Any invalid input should raise a ValueError."
 }
--- a/benchmark/agbenchmark/challenges/verticals/code/2_password_generator/artifacts_out/password_generator.py
+++ b/benchmark/agbenchmark/challenges/verticals/code/2_password_generator/artifacts_out/password_generator.py
@@ -1,8 +1,9 @@
 import random
 import string
+import sys


-def generate_password(length: int) -> str:
+def generate_password(length: int = 8) -> str:
    if length < 8 or length > 16:
        raise ValueError("Password length must be between 8 and 16 characters.")

@@ -19,5 +20,8 @@ def generate_password(length: int) -> str:


 if __name__ == "__main__":
-    password_length = random.randint(8, 16)
+    password_length = (
+        int(sys.argv[sys.argv.index("--length") + 1])
+        if "--length" in sys.argv else 8
+    )
    print(generate_password(password_length))
--- a/benchmark/agbenchmark/challenges/verticals/code/2_password_generator/data.json
+++ b/benchmark/agbenchmark/challenges/verticals/code/2_password_generator/data.json
@@ -24,5 +24,5 @@
        "side_effects": []
    },
    "name": "PasswordGenerator",
-    "task": "Create a random password generator. The password should have between 8 and 16 characters and should contain at least one letter, number and symbol. The password should be printed to the console. The entry point will be a python file that can be run this way: python password_generator.py [--len x] where x is the length of the password. If no length is specified, the password should be 8 characters long. The password_generator can also be imported as a module and called as password = password_generator.generate_password(len=x). Any invalid input should raise a ValueError."
+    "task": "Create a random password generator. The password should have between 8 and 16 characters and should contain at least one letter, number and symbol. The password should be printed to the console. The entry point will be a python file that can be run this way: python password_generator.py [--length x] where x is the length of the password. If no length is specified, the password should be 8 characters long. The password_generator can also be imported as a module and called as password = password_generator.generate_password(length=x). Any invalid input should raise a ValueError."
 }
--- a/benchmark/agbenchmark/reports/processing/report_types.py
+++ b/benchmark/agbenchmark/reports/processing/report_types.py
@@ -54,7 +54,6 @@ class Test(BaseModelBenchmark):
    metadata: Any


-
 class ReportBase(BaseModelBenchmark):
    command: str
    completion_time: str | None
@@ -70,7 +69,6 @@ class Report(ReportBase):
    tests: Dict[str, Test]


-
 class ReportV2(Test, ReportBase):
    test_name: str
    run_id: str | None
--- a/benchmark/agbenchmark/utils/challenge.py
+++ b/benchmark/agbenchmark/utils/challenge.py
@@ -123,7 +123,7 @@ class Challenge(ABC):
        print("\033[1;34mScoring content:\033[0m", content)
        if ground.should_contain:
            for should_contain_word in ground.should_contain:
-                if not getattr(ground, 'case_sensitive', True):
+                if not getattr(ground, "case_sensitive", True):
                    should_contain_word = should_contain_word.lower()
                    content = content.lower()
                print_content = (
@@ -137,7 +137,7 @@ class Challenge(ABC):

        if ground.should_not_contain:
            for should_not_contain_word in ground.should_not_contain:
-                if not getattr(ground, 'case_sensitive', True):
+                if not getattr(ground, "case_sensitive", True):
                    should_not_contain_word = should_not_contain_word.lower()
                    content = content.lower()
                print_content = f"\033[1;34mWord that should not exist\033[0m - {should_not_contain_word}:"