diff --git a/.github/workflows/autogpt-ci.yml b/.github/workflows/autogpt-ci.yml index 375a433a..1701325d 100644 --- a/.github/workflows/autogpt-ci.yml +++ b/.github/workflows/autogpt-ci.yml @@ -4,16 +4,13 @@ on: push: branches: [ master, development, ci-test* ] paths: + - '.github/workflows/autogpt-ci.yml' - 'autogpts/autogpt/**' - '!autogpts/autogpt/tests/vcr_cassettes' pull_request: branches: [ master, development, release-* ] paths: - - 'autogpts/autogpt/**' - - '!autogpts/autogpt/tests/vcr_cassettes' - pull_request_target: - branches: [ master, development, release-*, ci-test* ] - paths: + - '.github/workflows/autogpt-ci.yml' - 'autogpts/autogpt/**' - '!autogpts/autogpt/tests/vcr_cassettes' @@ -27,9 +24,6 @@ defaults: jobs: lint: - # eliminate duplicate runs - if: github.event_name == 'push' || (github.event.pull_request.head.repo.fork == (github.event_name == 'pull_request_target')) - runs-on: ubuntu-latest env: min-python-version: "3.10" @@ -39,8 +33,6 @@ jobs: uses: actions/checkout@v3 with: fetch-depth: 0 - ref: ${{ github.event.pull_request.head.ref }} - repository: ${{ github.event.pull_request.head.repo.full_name }} - name: Set up Python ${{ env.min-python-version }} uses: actions/setup-python@v4 @@ -83,17 +75,8 @@ jobs: poetry run $cmd --check || (echo "You have unused imports or pass statements, please run '${cmd} --in-place'" && exit 1) test: - # eliminate duplicate runs - if: github.event_name == 'push' || (github.event.pull_request.head.repo.fork == (github.event_name == 'pull_request_target')) - permissions: - # Gives the action the necessary permissions for publishing new - # comments in pull requests. - pull-requests: write - # Gives the action the necessary permissions for pushing data to the - # python-coverage-comment-action branch, and for editing existing - # comments (to avoid publishing multiple comments in the same PR) - contents: write + contents: read runs-on: ubuntu-latest timeout-minutes: 30 strategy: @@ -105,8 +88,6 @@ jobs: uses: actions/checkout@v3 with: fetch-depth: 0 - ref: ${{ github.event.pull_request.head.ref }} - repository: ${{ github.event.pull_request.head.repo.full_name }} submodules: true - name: Configure git user Auto-GPT-Bot @@ -116,9 +97,13 @@ jobs: - name: Checkout cassettes if: ${{ startsWith(github.event_name, 'pull_request') }} + env: + PR_BASE: ${{ github.event.pull_request.base.ref }} + PR_BRANCH: ${{ github.event.pull_request.head.ref }} + PR_AUTHOR: ${{ github.event.pull_request.user.login }} run: | - cassette_branch="${{ github.event.pull_request.user.login }}-${{ github.event.pull_request.head.ref }}" - cassette_base_branch="${{ github.event.pull_request.base.ref }}" + cassette_branch="${PR_AUTHOR}-${PR_BRANCH}" + cassette_base_branch="${PR_BASE}" cd tests/vcr_cassettes if ! git ls-remote --exit-code --heads origin $cassette_base_branch ; then @@ -169,10 +154,7 @@ jobs: tests/unit tests/integration env: CI: true - PROXY: ${{ github.event_name == 'pull_request_target' && secrets.PROXY || '' }} - AGENT_MODE: ${{ github.event_name == 'pull_request_target' && secrets.AGENT_MODE || '' }} - AGENT_TYPE: ${{ github.event_name == 'pull_request_target' && secrets.AGENT_TYPE || '' }} - OPENAI_API_KEY: ${{ github.event_name != 'pull_request_target' && secrets.OPENAI_API_KEY || '' }} + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} PLAIN_OUTPUT: True - name: Upload coverage reports to Codecov @@ -198,11 +180,14 @@ jobs: - id: push_cassettes name: Push updated cassettes # For pull requests, push updated cassettes even when tests fail - if: github.event_name == 'push' || success() || failure() + if: github.event_name == 'push' || (! github.event.pull_request.head.repo.fork && (success() || failure())) + env: + PR_BRANCH: ${{ github.event.pull_request.head.ref }} + PR_AUTHOR: ${{ github.event.pull_request.user.login }} run: | if [ "${{ startsWith(github.event_name, 'pull_request') }}" = "true" ]; then is_pull_request=true - cassette_branch="${{ github.event.pull_request.user.login }}-${{ github.event.pull_request.head.ref }}" + cassette_branch="${PR_AUTHOR}-${PR_BRANCH}" else cassette_branch="${{ github.ref_name }}" fi @@ -234,20 +219,15 @@ jobs: - name: Apply "behaviour change" label and comment on PR if: ${{ startsWith(github.event_name, 'pull_request') }} run: | - PR_NUMBER=${{ github.event.pull_request.number }} - TOKEN=${{ secrets.PAT_REVIEW }} - REPO=${{ github.repository }} + PR_NUMBER="${{ github.event.pull_request.number }}" + TOKEN="${{ secrets.PAT_REVIEW }}" + REPO="${{ github.repository }}" if [[ "${{ steps.push_cassettes.outputs.updated }}" == "true" ]]; then echo "Adding label and comment..." - curl -X POST \ - -H "Authorization: Bearer $TOKEN" \ - -H "Accept: application/vnd.github.v3+json" \ - https://api.github.com/repos/$REPO/issues/$PR_NUMBER/labels \ - -d '{"labels":["behaviour change"]}' - echo $TOKEN | gh auth login --with-token - gh api repos/$REPO/issues/$PR_NUMBER/comments -X POST -F body="You changed AutoGPT's behaviour. The cassettes have been updated and will be merged to the submodule when this Pull Request gets merged." + gh issue edit $PR_NUMBER --add-label "behaviour change" + gh issue comment $PR_NUMBER --body "You changed AutoGPT's behaviour. The cassettes have been updated and will be merged to the submodule when this Pull Request gets merged." fi - name: Upload logs to artifact diff --git a/.github/workflows/autogpt-docker-ci.yml b/.github/workflows/autogpt-docker-ci.yml index a6f336a7..f9b8143e 100644 --- a/.github/workflows/autogpt-docker-ci.yml +++ b/.github/workflows/autogpt-docker-ci.yml @@ -4,11 +4,13 @@ on: push: branches: [ master, development ] paths: + - '.github/workflows/autogpt-docker-ci.yml' - 'autogpts/autogpt/**' - '!autogpts/autogpt/tests/vcr_cassettes' pull_request: branches: [ master, development, release-* ] paths: + - '.github/workflows/autogpt-docker-ci.yml' - 'autogpts/autogpt/**' - '!autogpts/autogpt/tests/vcr_cassettes' diff --git a/.github/workflows/autogpts-ci.yml b/.github/workflows/autogpts-ci.yml index 4f89fb7c..0385470a 100644 --- a/.github/workflows/autogpts-ci.yml +++ b/.github/workflows/autogpts-ci.yml @@ -1,4 +1,4 @@ -name: Valid AutoGPTs +name: AutoGPTs smoke test CI on: workflow_dispatch: @@ -6,8 +6,24 @@ on: - cron: '0 8 * * *' push: branches: [ master, development, ci-test* ] + paths: + - '.github/workflows/autogpts-ci.yml' + - 'autogpts/**' + - 'benchmark/**' + - 'run' + - 'cli.py' + - 'setup.py' + - '!**/*.md' pull_request: branches: [ master, development, release-* ] + paths: + - '.github/workflows/autogpts-ci.yml' + - 'autogpts/**' + - 'benchmark/**' + - 'run' + - 'cli.py' + - 'setup.py' + - '!**/*.md' jobs: run-tests: @@ -17,13 +33,13 @@ jobs: agent-name: [ autogpt, forge ] fail-fast: false timeout-minutes: 20 + env: + min-python-version: '3.10' steps: - name: Checkout repository uses: actions/checkout@v3 with: fetch-depth: 0 - ref: ${{ github.event.pull_request.head.ref }} - repository: ${{ github.event.pull_request.head.repo.full_name }} submodules: true - name: Set up Python ${{ env.min-python-version }} diff --git a/.github/workflows/benchmark-ci.yml b/.github/workflows/benchmark-ci.yml index fd8020d2..5f95dd02 100644 --- a/.github/workflows/benchmark-ci.yml +++ b/.github/workflows/benchmark-ci.yml @@ -14,19 +14,18 @@ on: - '!benchmark/reports/**' - .github/workflows/benchmark-ci.yml +env: + min-python-version: '3.10' + jobs: lint: runs-on: ubuntu-latest - env: - min-python-version: '3.10' steps: - name: Checkout repository uses: actions/checkout@v3 with: fetch-depth: 0 - ref: ${{ github.event.pull_request.head.ref }} - repository: ${{ github.event.pull_request.head.repo.full_name }} - name: Set up Python ${{ env.min-python-version }} uses: actions/setup-python@v2 @@ -82,8 +81,6 @@ jobs: uses: actions/checkout@v3 with: fetch-depth: 0 - ref: ${{ github.event.pull_request.head.ref }} - repository: ${{ github.event.pull_request.head.repo.full_name }} submodules: true - name: Set up Python ${{ env.min-python-version }} diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml deleted file mode 100644 index af2383a6..00000000 --- a/.github/workflows/benchmarks.yml +++ /dev/null @@ -1,73 +0,0 @@ -name: Benchmarks (legacy) - -on: - schedule: - - cron: '0 8 * * *' - workflow_dispatch: - -jobs: - Benchmark: - name: ${{ matrix.config.task-name }} - runs-on: ubuntu-latest - timeout-minutes: 30 - strategy: - fail-fast: false - matrix: - config: - - python-version: "3.10" - task: "tests/challenges" - task-name: "Mandatory Tasks" - - python-version: "3.10" - task: "--beat-challenges -ra tests/challenges" - task-name: "Challenging Tasks" - - steps: - - name: Checkout repository - uses: actions/checkout@v3 - with: - ref: master - - - name: Set up Python ${{ matrix.config.python-version }} - uses: actions/setup-python@v4 - with: - python-version: ${{ matrix.config.python-version }} - - - id: get_date - name: Get date - run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_OUTPUT - - - name: Set up Python dependency cache - uses: actions/cache@v3 - with: - path: ~/.cache/pypoetry - key: ${{ runner.os }}-poetry-${{ hashFiles('autogpts/autogpt/pyproject.toml') }}-${{ steps.get_date.outputs.date }} - - - name: Install Python dependencies - run: | - curl -sSL https://install.python-poetry.org | python3 - - poetry install - - - name: Run pytest with coverage - run: | - rm -rf tests/vcr_cassettes - pytest -n auto --record-mode=all ${{ matrix.config.task }} - env: - CI: true - PROXY: ${{ secrets.PROXY }} - AGENT_MODE: ${{ secrets.AGENT_MODE }} - AGENT_TYPE: ${{ secrets.AGENT_TYPE }} - PLAIN_OUTPUT: True - - - name: Upload logs as artifact - if: always() - uses: actions/upload-artifact@v3 - with: - name: test-logs-${{ matrix.config.task-name }} - path: logs/ - - - name: Upload cassettes as artifact - if: always() - uses: actions/upload-artifact@v3 - with: - name: cassettes-${{ matrix.config.task-name }} - path: tests/vcr_cassettes/ diff --git a/.github/workflows/hackathon.yml b/.github/workflows/hackathon.yml index 9bd5d5f1..a8b592eb 100644 --- a/.github/workflows/hackathon.yml +++ b/.github/workflows/hackathon.yml @@ -2,7 +2,6 @@ name: Hackathon on: workflow_dispatch: - branches: [hackathon] inputs: agents: description: "Agents to run (comma-separated)" @@ -92,8 +91,6 @@ jobs: uses: actions/checkout@v3 with: fetch-depth: 0 - ref: ${{ github.event.pull_request.head.ref }} - repository: ${{ github.event.pull_request.head.repo.full_name }} submodules: true - name: Set up Python ${{ env.min-python-version }} @@ -133,4 +130,4 @@ jobs: WEAVIATE_URL: ${{ secrets.WEAVIATE_URL }} GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }} GOOGLE_CUSTOM_SEARCH_ENGINE_ID: ${{ secrets.GOOGLE_CUSTOM_SEARCH_ENGINE_ID }} - AGENT_NAME: ${{ matrix.agent-name }} \ No newline at end of file + AGENT_NAME: ${{ matrix.agent-name }} diff --git a/benchmark/agbenchmark/challenges/deprecated/code/1_password_generator/data.json b/benchmark/agbenchmark/challenges/deprecated/code/1_password_generator/data.json index 01dd0afc..0f30ea87 100644 --- a/benchmark/agbenchmark/challenges/deprecated/code/1_password_generator/data.json +++ b/benchmark/agbenchmark/challenges/deprecated/code/1_password_generator/data.json @@ -24,5 +24,5 @@ "side_effects": [] }, "name": "PasswordGenerator", - "task": "Create a random password generator. The password should have between 8 and 16 characters and should contain letters, numbers and symbols. The password should be printed to the console. The entry point will be a python file that can be run this way: python password_generator.py [--len x] where x is the length of the password. If no length is specified, the password should be 8 characters long. The password_generator can also be imported as a module and called as password = password_generator.generate_password(len=x). Any invalid input should raise a ValueError." + "task": "Create a random password generator. The password should have between 8 and 16 characters and should contain letters, numbers and symbols. The password should be printed to the console. The entry point will be a python file that can be run this way: python password_generator.py [--len x] where x is the length of the password. The password_generator can also be imported as a module and called as password = password_generator.generate_password(len=x). If no length is specified, the password should be 8 characters long. Any invalid input should raise a ValueError." } diff --git a/benchmark/agbenchmark/challenges/verticals/code/2_password_generator/artifacts_out/password_generator.py b/benchmark/agbenchmark/challenges/verticals/code/2_password_generator/artifacts_out/password_generator.py index 514ec43a..5797ebcb 100644 --- a/benchmark/agbenchmark/challenges/verticals/code/2_password_generator/artifacts_out/password_generator.py +++ b/benchmark/agbenchmark/challenges/verticals/code/2_password_generator/artifacts_out/password_generator.py @@ -1,8 +1,9 @@ import random import string +import sys -def generate_password(length: int) -> str: +def generate_password(length: int = 8) -> str: if length < 8 or length > 16: raise ValueError("Password length must be between 8 and 16 characters.") @@ -19,5 +20,8 @@ def generate_password(length: int) -> str: if __name__ == "__main__": - password_length = random.randint(8, 16) + password_length = ( + int(sys.argv[sys.argv.index("--length") + 1]) + if "--length" in sys.argv else 8 + ) print(generate_password(password_length)) diff --git a/benchmark/agbenchmark/challenges/verticals/code/2_password_generator/data.json b/benchmark/agbenchmark/challenges/verticals/code/2_password_generator/data.json index 8b2e1d91..14bbe453 100644 --- a/benchmark/agbenchmark/challenges/verticals/code/2_password_generator/data.json +++ b/benchmark/agbenchmark/challenges/verticals/code/2_password_generator/data.json @@ -24,5 +24,5 @@ "side_effects": [] }, "name": "PasswordGenerator", - "task": "Create a random password generator. The password should have between 8 and 16 characters and should contain at least one letter, number and symbol. The password should be printed to the console. The entry point will be a python file that can be run this way: python password_generator.py [--len x] where x is the length of the password. If no length is specified, the password should be 8 characters long. The password_generator can also be imported as a module and called as password = password_generator.generate_password(len=x). Any invalid input should raise a ValueError." + "task": "Create a random password generator. The password should have between 8 and 16 characters and should contain at least one letter, number and symbol. The password should be printed to the console. The entry point will be a python file that can be run this way: python password_generator.py [--length x] where x is the length of the password. If no length is specified, the password should be 8 characters long. The password_generator can also be imported as a module and called as password = password_generator.generate_password(length=x). Any invalid input should raise a ValueError." } diff --git a/benchmark/agbenchmark/reports/processing/report_types.py b/benchmark/agbenchmark/reports/processing/report_types.py index d2fc8dea..3ba9e6c6 100644 --- a/benchmark/agbenchmark/reports/processing/report_types.py +++ b/benchmark/agbenchmark/reports/processing/report_types.py @@ -54,7 +54,6 @@ class Test(BaseModelBenchmark): metadata: Any - class ReportBase(BaseModelBenchmark): command: str completion_time: str | None @@ -70,7 +69,6 @@ class Report(ReportBase): tests: Dict[str, Test] - class ReportV2(Test, ReportBase): test_name: str run_id: str | None diff --git a/benchmark/agbenchmark/utils/challenge.py b/benchmark/agbenchmark/utils/challenge.py index 20353f68..a32ab6cf 100644 --- a/benchmark/agbenchmark/utils/challenge.py +++ b/benchmark/agbenchmark/utils/challenge.py @@ -123,7 +123,7 @@ class Challenge(ABC): print("\033[1;34mScoring content:\033[0m", content) if ground.should_contain: for should_contain_word in ground.should_contain: - if not getattr(ground, 'case_sensitive', True): + if not getattr(ground, "case_sensitive", True): should_contain_word = should_contain_word.lower() content = content.lower() print_content = ( @@ -137,7 +137,7 @@ class Challenge(ABC): if ground.should_not_contain: for should_not_contain_word in ground.should_not_contain: - if not getattr(ground, 'case_sensitive', True): + if not getattr(ground, "case_sensitive", True): should_not_contain_word = should_not_contain_word.lower() content = content.lower() print_content = f"\033[1;34mWord that should not exist\033[0m - {should_not_contain_word}:"