Fix benchmark ci (#5478)

Fix benchmark CI

Signed-off-by: Merwane Hamadi <merwanehamadi@gmail.com>
This commit is contained in:
merwanehamadi
2023-10-02 12:41:32 -07:00
committed by GitHub
parent 163ab75379
commit a30cbcc2ce
3 changed files with 7 additions and 8 deletions

View File

@@ -97,10 +97,10 @@ jobs:
curl -sSL https://install.python-poetry.org | python - curl -sSL https://install.python-poetry.org | python -
- name: Run regression tests - name: Run regression tests
working-directory: ./autogpts/${{ matrix.agent-name }}/
run: | run: |
sh run & ./run agent start ${{ matrix.agent-name }}
sleep 20 sleep 10
cd autogpts/${{ matrix.agent-name }}
set +e # Ignore non-zero exit codes and continue execution set +e # Ignore non-zero exit codes and continue execution
echo "Running the following command: poetry run agbenchmark --maintain --mock" echo "Running the following command: poetry run agbenchmark --maintain --mock"
@@ -123,8 +123,6 @@ jobs:
echo "Running the following command: poetry run agbenchmark --test=WriteFile" echo "Running the following command: poetry run agbenchmark --test=WriteFile"
poetry run agbenchmark --test=WriteFile poetry run agbenchmark --test=WriteFile
sh run_benchmark serve &
sleep 10
cd ../../benchmark cd ../../benchmark
poetry install poetry install
echo "Adding the BUILD_SKILL_TREE environment variable. This will attempt to add new elements in the skill tree. If new elements are added, the CI fails because they should have been pushed" echo "Adding the BUILD_SKILL_TREE environment variable. This will attempt to add new elements in the skill tree. If new elements are added, the CI fails because they should have been pushed"

View File

@@ -9,12 +9,13 @@ from typing import Any, Optional
import click import click
import pytest import pytest
import toml import toml
from dotenv import load_dotenv
from helicone.lock import HeliconeLockManager from helicone.lock import HeliconeLockManager
from agbenchmark.app import app from agbenchmark.app import app
from agbenchmark.reports.ReportManager import SingletonReportManager from agbenchmark.reports.ReportManager import SingletonReportManager
from agbenchmark.utils.data_types import AgentBenchmarkConfig from agbenchmark.utils.data_types import AgentBenchmarkConfig
from dotenv import load_dotenv
load_dotenv() load_dotenv()
BENCHMARK_START_TIME_DT = datetime.now(timezone.utc) BENCHMARK_START_TIME_DT = datetime.now(timezone.utc)

View File

@@ -12,14 +12,14 @@ import time
"eval_id, input_text, expected_artifact_length, test_name, should_be_successful", "eval_id, input_text, expected_artifact_length, test_name, should_be_successful",
[ [
( (
"f219f3d3-a41b-45a9-a3d0-389832086ee8", "021c695a-6cc4-46c2-b93a-f3a9b0f4d123",
"Write the word 'Washington' to a .txt file", "Write the word 'Washington' to a .txt file",
0, 0,
"WriteFile", "WriteFile",
True, True,
), ),
( (
"021c695a-6cc4-46c2-b93a-f3a9b0f4d123", "f219f3d3-a41b-45a9-a3d0-389832086ee8",
"Read the file called file_to_read.txt and write its content to a file called output.txt", "Read the file called file_to_read.txt and write its content to a file called output.txt",
1, 1,
"ReadFile", "ReadFile",