From eeb68858d7472b462828e9ad7eb26453aa9f5c90 Mon Sep 17 00:00:00 2001 From: merwanehamadi Date: Tue, 1 Aug 2023 16:50:41 -0700 Subject: [PATCH] Only run mini-agi on tests (#232) --- .github/workflows/ci.yml | 99 +++++++++++---------- agbenchmark/conftest.py | 6 +- agbenchmark/generate_test.py | 1 + agbenchmark/reports/reports.py | 4 +- agbenchmark/utils/get_data_from_helicone.py | 25 +++--- agent/Auto-GPT | 2 +- agent/beebot | 2 +- 7 files changed, 74 insertions(+), 65 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f6c1cc00..816117f1 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -65,8 +65,31 @@ jobs: cmd="poetry run autoflake --remove-all-unused-imports --recursive --ignore-init-module-imports --ignore-pass-after-docstring agbenchmark" $cmd --check || (echo "You have unused imports or pass statements, please run '${cmd} --in-place'" && exit 1) if: success() || failure() + matrix-setup: + runs-on: ubuntu-latest + outputs: + matrix: ${{ steps.set-matrix.outputs.matrix }} + env-name: ${{ steps.set-env-name.outputs.env-name }} + steps: + - id: set-matrix + run: | + if [ "${{ github.event_name }}" == "schedule" ] || [ "${{ github.event_name }}" == "workflow_dispatch" ]; then + echo "::set-output name=matrix::[ 'gpt-engineer', 'smol-developer', 'Auto-GPT', 'mini-agi', 'beebot', 'BabyAGI' ]" + else + echo "::set-output name=matrix::[ 'mini-agi']" + fi + - id: set-env-name + run: | + if [[ "${{ github.event_name }}" == "push" ]] || [[ "${{ github.event_name }}" == "pull_request" ]]; then + echo "::set-output name=env-name::testing" + else + echo "::set-output name=env-name::production" + fi tests: + environment: + name: '${{ needs.matrix-setup.outputs.env-name }}' + needs: matrix-setup env: GH_TOKEN: ${{ github.event_name == 'pull_request' && github.token || secrets.PAT }} min-python-version: '3.10' @@ -76,14 +99,13 @@ jobs: strategy: fail-fast: false matrix: - agent-name: - - 'gpt-engineer' - - 'smol-developer' - - 'Auto-GPT' - - 'mini-agi' - - 'beebot' - - 'BabyAGI' + agent-name: ${{fromJson(needs.matrix-setup.outputs.matrix)}} steps: + - name: Print Environment Name + run: | + echo "Matrix Setup Environment Name: ${{ needs.matrix-setup.outputs.env-name }}" + + - name: Checkout repository uses: actions/checkout@v3 with: @@ -150,47 +172,30 @@ jobs: fi pip install ../../dist/*.whl + + bash -c "$(curl -fsSL https://raw.githubusercontent.com/merwanehamadi/helicone/b7ab4bc53e51d8ab29fff19ce5986ab7720970c6/mitmproxy.sh)" -s start + set +e # Ignore non-zero exit codes and continue execution + ${prefix}agbenchmark start --maintain --mock + EXIT_CODE=$? + set -e # Stop ignoring non-zero exit codes - if [ "${GITHUB_EVENT_NAME}" == "pull_request" ]; then + # Check if the exit code was 5, and if so, exit with 0 instead + if [ $EXIT_CODE -eq 5 ]; then + echo "regression_tests.json is empty." + exit 0 + fi - set +e # Ignore non-zero exit codes and continue execution - ${prefix}agbenchmark start --maintain --mock - EXIT_CODE=$? - set -e # Stop ignoring non-zero exit codes - - # Check if the exit code was 5, and if so, exit with 0 instead - if [ $EXIT_CODE -eq 5 ] - then - echo "regression_tests.json is empty." - exit 0 - else - exit $EXIT_CODE - fi - - set +e # Ignore non-zero exit codes and continue execution - improve_cmd = ${prefix}agbenchmark start --improve --mock - EXIT_CODE=$? - set -e # Stop ignoring non-zero exit codes - - # Check if the exit code was 5, and if so, exit with 0 instead - if [ $EXIT_CODE -eq 5 ] - then - echo "regression_tests.json is empty." - exit 0 - else - exit $EXIT_CODE - fi - - ${prefix}agbenchmark start --mock - ${prefix}agbenchmark start --mock --category=retrieval - ${prefix}agbenchmark start --mock --category=interface - ${prefix}agbenchmark start --mock --category=code - ${prefix}agbenchmark start --mock --category=memory - ${prefix}agbenchmark start --mock --category=iterate - ${prefix}agbenchmark start --mock --suite TestReturnCode - ${prefix}agbenchmark start --mock --suite TestRevenueRetrieval + ${prefix}agbenchmark start --mock + ${prefix}agbenchmark start --mock --category=retrieval + ${prefix}agbenchmark start --mock --category=interface + ${prefix}agbenchmark start --mock --category=code + ${prefix}agbenchmark start --mock --category=memory + ${prefix}agbenchmark start --mock --category=iterate + ${prefix}agbenchmark start --mock --suite TestReturnCode + ${prefix}agbenchmark start --mock --suite TestRevenueRetrieval + if [ "${GITHUB_EVENT_NAME}" == "pull_request" ] || [ "${{ github.event_name }}" == "push" ]; then + ${prefix}agbenchmark start --test=TestWriteFile else - bash -c "$(curl -fsSL https://raw.githubusercontent.com/merwanehamadi/helicone/b7ab4bc53e51d8ab29fff19ce5986ab7720970c6/mitmproxy.sh)" -s start ${prefix}agbenchmark start || echo "This command will always return a non zero exit code unless all the challenges are solved." fi @@ -200,7 +205,7 @@ jobs: GITHUB_EVENT_NAME: ${{ github.event_name }} OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} AGENT_NAME: ${{ matrix.agent-name }} - PROMPT_USER: false # For mini-agi. TODO: Remove this once mini-agi follows the standards. + PROMPT_USER: false # For mini-agi. TODO: Remove this and put it in benchmarks.py HELICONE_API_KEY: ${{ secrets.HELICONE_API_KEY }} REQUESTS_CA_BUNDLE: /etc/ssl/certs/ca-certificates.crt HELICONE_CACHE_ENABLED: false @@ -295,7 +300,7 @@ jobs: path: reports/combined_charts/* - name: Authenticate and Push to Branch - if: (success() || failure()) && (github.event_name != 'pull_request') + if: (success() || failure()) && (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') run: | git config --global user.email "github-bot@agpt.co" git config --global user.name "Auto-GPT-Bot" diff --git a/agbenchmark/conftest.py b/agbenchmark/conftest.py index a9ebba7e..07731e33 100644 --- a/agbenchmark/conftest.py +++ b/agbenchmark/conftest.py @@ -19,7 +19,9 @@ from agbenchmark.reports.reports import ( from agbenchmark.start_benchmark import CONFIG_PATH, get_regression_data from agbenchmark.utils.data_types import SuiteConfig -GLOBAL_TIMEOUT = 1500 # The tests will stop after 25 minutes so we can send the reports. +GLOBAL_TIMEOUT = ( + 1500 # The tests will stop after 25 minutes so we can send the reports. +) def resolve_workspace(workspace: str) -> str: @@ -182,12 +184,14 @@ def timeout_monitor(start_time: int) -> None: pytest.exit("Test suite exceeded the global timeout", returncode=1) + def pytest_sessionstart(session: Any) -> None: start_time = time.time() t = threading.Thread(target=timeout_monitor, args=(start_time,)) t.daemon = True # Daemon threads are abruptly stopped at shutdown t.start() + def pytest_sessionfinish(session: Any) -> None: """Called at the end of the session to save regression tests and info""" diff --git a/agbenchmark/generate_test.py b/agbenchmark/generate_test.py index 180893f7..7586070b 100644 --- a/agbenchmark/generate_test.py +++ b/agbenchmark/generate_test.py @@ -57,6 +57,7 @@ def create_single_test( # Define test method within the dynamically created class def test_method(self, config: Dict[str, Any], request) -> None: # type: ignore from helicone.lock import HeliconeLockManager + HeliconeLockManager.write_custom_property("challenge", self.data.name) cutoff = self.data.cutoff or 60 diff --git a/agbenchmark/reports/reports.py b/agbenchmark/reports/reports.py index 53af7de8..19845e19 100644 --- a/agbenchmark/reports/reports.py +++ b/agbenchmark/reports/reports.py @@ -234,8 +234,10 @@ def finalize_reports(item: Any, challenge_data: dict[str, Any]) -> None: if run_time: cost = None if not MOCK_FLAG and os.environ.get("HELICONE_API_KEY"): + print("Getting cost from Helicone") cost = get_data_from_helicone(test_name) - + else: + print("Helicone not setup or mock flag set, not getting cost") info_details["metrics"]["cost"] = cost info_details["metrics"]["run_time"] = f"{str(round(run_time, 3))} seconds" diff --git a/agbenchmark/utils/get_data_from_helicone.py b/agbenchmark/utils/get_data_from_helicone.py index 32a4817d..0de17cd3 100644 --- a/agbenchmark/utils/get_data_from_helicone.py +++ b/agbenchmark/utils/get_data_from_helicone.py @@ -25,27 +25,24 @@ query ExampleQuery($properties: [PropertyFilter!]){ print(query) variables = { - "filters": [ + "properties": [ { - "property": { - "value": {"equals": os.environ.get("AGENT_NAME")}, - "name": "agent", - } + "value": {"equals": os.environ.get("AGENT_NAME")}, + "name": "agent", }, { - "property": { - "value": {"equals": BENCHMARK_START_TIME}, - "name": "benchmark_start_time", - } + "value": {"equals": BENCHMARK_START_TIME}, + "name": "benchmark_start_time", }, - {"property": {"value": {"equals": challenge}, "name": "challenge"}}, + {"value": {"equals": challenge}, "name": "challenge"}, ] } + print(json.dumps(variables, indent=4)) operation_name = "ExampleQuery" - data = None + data = {} response = None try: @@ -70,10 +67,10 @@ query ExampleQuery($properties: [PropertyFilter!]){ print(f"Other error occurred: {err}") return None - if data is None or data.get("data") is None: - print("Invalid response received from server: no data") - return None try: + if data is None or data.get("data") is None: + print("Invalid response received from server: no data") + return None return ( data.get("data", {}) .get("aggregatedHeliconeRequest", {}) diff --git a/agent/Auto-GPT b/agent/Auto-GPT index 7cd407b7..3a2d08fb 160000 --- a/agent/Auto-GPT +++ b/agent/Auto-GPT @@ -1 +1 @@ -Subproject commit 7cd407b7b4a9f4395761e772335e859e40e8c3d3 +Subproject commit 3a2d08fb415071cc94dd6fcee24cfbdd1fb487dd diff --git a/agent/beebot b/agent/beebot index 7409c065..8e4cd92c 160000 --- a/agent/beebot +++ b/agent/beebot @@ -1 +1 @@ -Subproject commit 7409c0650194162b92367caf5f36724ed5b147b5 +Subproject commit 8e4cd92c1d0c135ff241f9906c05c3ff895ada30