From a30cbcc2ce5071aa779a0d06b4d789c0167ff99f Mon Sep 17 00:00:00 2001 From: merwanehamadi Date: Mon, 2 Oct 2023 12:41:32 -0700 Subject: [PATCH] Fix benchmark ci (#5478) Fix benchmark CI Signed-off-by: Merwane Hamadi --- .github/workflows/benchmark-ci.yml | 8 +++----- benchmark/agbenchmark/__main__.py | 3 ++- benchmark/tests/test_benchmark_workflow.py | 4 ++-- 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/.github/workflows/benchmark-ci.yml b/.github/workflows/benchmark-ci.yml index b4d10e6f..14e8b8c1 100644 --- a/.github/workflows/benchmark-ci.yml +++ b/.github/workflows/benchmark-ci.yml @@ -97,10 +97,10 @@ jobs: curl -sSL https://install.python-poetry.org | python - - name: Run regression tests - working-directory: ./autogpts/${{ matrix.agent-name }}/ run: | - sh run & - sleep 20 + ./run agent start ${{ matrix.agent-name }} + sleep 10 + cd autogpts/${{ matrix.agent-name }} set +e # Ignore non-zero exit codes and continue execution echo "Running the following command: poetry run agbenchmark --maintain --mock" @@ -123,8 +123,6 @@ jobs: echo "Running the following command: poetry run agbenchmark --test=WriteFile" poetry run agbenchmark --test=WriteFile - sh run_benchmark serve & - sleep 10 cd ../../benchmark poetry install echo "Adding the BUILD_SKILL_TREE environment variable. This will attempt to add new elements in the skill tree. If new elements are added, the CI fails because they should have been pushed" diff --git a/benchmark/agbenchmark/__main__.py b/benchmark/agbenchmark/__main__.py index 8bf42f4f..76ca7529 100644 --- a/benchmark/agbenchmark/__main__.py +++ b/benchmark/agbenchmark/__main__.py @@ -9,12 +9,13 @@ from typing import Any, Optional import click import pytest import toml +from dotenv import load_dotenv from helicone.lock import HeliconeLockManager from agbenchmark.app import app from agbenchmark.reports.ReportManager import SingletonReportManager from agbenchmark.utils.data_types import AgentBenchmarkConfig -from dotenv import load_dotenv + load_dotenv() BENCHMARK_START_TIME_DT = datetime.now(timezone.utc) diff --git a/benchmark/tests/test_benchmark_workflow.py b/benchmark/tests/test_benchmark_workflow.py index 700d42a8..ca3eec88 100644 --- a/benchmark/tests/test_benchmark_workflow.py +++ b/benchmark/tests/test_benchmark_workflow.py @@ -12,14 +12,14 @@ import time "eval_id, input_text, expected_artifact_length, test_name, should_be_successful", [ ( - "f219f3d3-a41b-45a9-a3d0-389832086ee8", + "021c695a-6cc4-46c2-b93a-f3a9b0f4d123", "Write the word 'Washington' to a .txt file", 0, "WriteFile", True, ), ( - "021c695a-6cc4-46c2-b93a-f3a9b0f4d123", + "f219f3d3-a41b-45a9-a3d0-389832086ee8", "Read the file called file_to_read.txt and write its content to a file called output.txt", 1, "ReadFile",