diff --git a/.github/workflows/benchmark-ci.yml b/.github/workflows/benchmark-ci.yml index f7cdeac5..654b06b8 100644 --- a/.github/workflows/benchmark-ci.yml +++ b/.github/workflows/benchmark-ci.yml @@ -146,9 +146,12 @@ jobs: mkdir agent link=$(jq -r '.["'"$AGENT_NAME"'"].url' agents_to_benchmark.json) branch=$(jq -r '.["'"$AGENT_NAME"'"].branch' agents_to_benchmark.json) + commit=$(jq -r '.["'"$AGENT_NAME"'"].commit' agents_to_benchmark.json) cd agent git clone "$link" -b "$branch" cd $AGENT_NAME + git checkout "$commit" || echo "Commit not found, using latest commit on branch" + prefix="" if [ "$AGENT_NAME" == "gpt-engineer" ]; then make install @@ -259,7 +262,6 @@ jobs: REQUESTS_CA_BUNDLE: /etc/ssl/certs/ca-certificates.crt HELICONE_CACHE_ENABLED: false HELICONE_PROPERTY_AGENT: ${{ matrix.agent-name }} - REPORT_LOCATION: ${{ format('../../reports/{0}', matrix.agent-name) }} WOLFRAM_ALPHA_APPID: ${{ secrets.WOLFRAM_ALPHA_APPID }} SERPER_API_KEY: ${{ secrets.SERPER_API_KEY }} BING_SUBSCRIPTION_KEY: ${{ secrets.BING_SUBSCRIPTION_KEY }} @@ -273,11 +275,12 @@ jobs: - name: Authenticate and Push to Branch working-directory: ./benchmark/ - if: (success() || failure()) && (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') + if: (success() || failure()) && (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' || startsWith(github.ref_name, 'ci-test-')) run: | git config --global user.email "github-bot@agpt.co" git config --global user.name "Auto-GPT-Bot" - + cp -rn agent/$AGENT_NAME/agbenchmark/reports/* reports/$AGENT_NAME/ + rm -rf agent git add reports/* || echo "nothing to commit" commit_message="${{ matrix.agent-name }}-$(date +'%Y%m%d%H%M%S')" git commit -m "${commit_message}" @@ -303,3 +306,4 @@ jobs: env: GDRIVE_BASE64: ${{ secrets.GDRIVE_BASE64 }} GITHUB_REF_NAME: ${{ github.ref_name }} + AGENT_NAME: ${{ matrix.agent-name }} diff --git a/benchmark/agents_to_benchmark.json b/benchmark/agents_to_benchmark.json index d91f178f..b35188fa 100644 --- a/benchmark/agents_to_benchmark.json +++ b/benchmark/agents_to_benchmark.json @@ -1,38 +1,47 @@ { "Auto-GPT": { "url": "https://github.com/Significant-Gravitas/Auto-GPT", - "branch": "master" + "branch": "master", + "commit": "3a2d08fb415071cc94dd6fcee24cfbdd1fb487dd" }, "gpt-engineer": { "url": "https://github.com/merwanehamadi/gpt-engineer.git", - "branch": "benchmark-integration" + "branch": "benchmark-integration", + "commit": "9bb81041ace9f09e8ea0e34e29f2e46bb9d46a36" }, "mini-agi": { "url": "https://github.com/SilenNaihin/mini-agi.git", - "branch": "benchmark-integration" + "branch": "benchmark-integration", + "commit": "2fc70aa0032eec986dfb1020854a1b3b8aaf6780" }, "smol-developer": { "url": "https://github.com/e2b-dev/smol-developer.git", - "branch": "benchmarks" + "branch": "benchmarks", + "commit": "a23d01369cea976e80b7889fdbf1096619471301" }, "SuperAGI": { "url": "https://github.com/SilenNaihin/SuperAGI.git", - "branch": "benchmark-integration" + "branch": "benchmark-integration", + "commit": "48b2101374264b97dbdfc2c0bb0ae45e769e157d" }, "BabyAGI": { "url": "https://github.com/SilenNaihin/babyagi.git", - "branch": "benchmark-integration" + "branch": "benchmark-integration", + "commit": "16f1b9519fea5543695203be0262a1b41c77cbba" }, "beebot": { "url": "https://github.com/AutoPackAI/beebot.git", - "branch": "main" + "branch": "main", + "commit": "59d4e93c133612a0319d135bb0eb08bbcead9fa2" }, "PolyGPT": { "url": "https://github.com/polywrap/PolyGPT.git", - "branch": "nerfzael-use-local-wrap-library" + "branch": "nerfzael-use-local-wrap-library", + "commit": "d621adf5f54cc0f9a6d191139fb67ac3d1436d7b" }, "Turbo": { "url": "https://github.com/lc0rp/Auto-GPT-Turbo.git", - "branch": "main" + "branch": "main", + "commit": "8469e09ae204f2d5f41d489b217551544597ee14" } }