From eeb68858d7472b462828e9ad7eb26453aa9f5c90 Mon Sep 17 00:00:00 2001
From: merwanehamadi <merwanehamadi@gmail.com>
Date: Tue, 1 Aug 2023 16:50:41 -0700
Subject: [PATCH] Only run mini-agi on tests (#232)

---
 .github/workflows/ci.yml                    | 99 +++++++++++----------
 agbenchmark/conftest.py                     |  6 +-
 agbenchmark/generate_test.py                |  1 +
 agbenchmark/reports/reports.py              |  4 +-
 agbenchmark/utils/get_data_from_helicone.py | 25 +++---
 agent/Auto-GPT                              |  2 +-
 agent/beebot                                |  2 +-
 7 files changed, 74 insertions(+), 65 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index f6c1cc00..816117f1 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -65,8 +65,31 @@ jobs:
           cmd="poetry run autoflake --remove-all-unused-imports --recursive --ignore-init-module-imports --ignore-pass-after-docstring agbenchmark"
           $cmd --check || (echo "You have unused imports or pass statements, please run '${cmd} --in-place'" && exit 1)
         if: success() || failure()
+  matrix-setup:
+    runs-on: ubuntu-latest
+    outputs:
+      matrix: ${{ steps.set-matrix.outputs.matrix }}
+      env-name: ${{ steps.set-env-name.outputs.env-name }}
+    steps:
+      - id: set-matrix
+        run: |
+          if [ "${{ github.event_name }}" == "schedule" ] || [ "${{ github.event_name }}" == "workflow_dispatch" ]; then
+            echo "::set-output name=matrix::[ 'gpt-engineer', 'smol-developer', 'Auto-GPT', 'mini-agi', 'beebot', 'BabyAGI' ]"
+          else
+            echo "::set-output name=matrix::[ 'mini-agi']"
+          fi
+      - id: set-env-name
+        run: |
+          if [[ "${{ github.event_name }}" == "push" ]] || [[ "${{ github.event_name }}" == "pull_request" ]]; then
+            echo "::set-output name=env-name::testing"
+          else
+            echo "::set-output name=env-name::production"
+          fi
 
   tests:
+    environment:
+      name: '${{ needs.matrix-setup.outputs.env-name }}'
+    needs: matrix-setup
     env:
       GH_TOKEN: ${{ github.event_name == 'pull_request' && github.token || secrets.PAT }}
       min-python-version: '3.10'
@@ -76,14 +99,13 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        agent-name:
-          - 'gpt-engineer'
-          - 'smol-developer'
-          - 'Auto-GPT'
-          - 'mini-agi'
-          - 'beebot'
-          - 'BabyAGI'
+        agent-name: ${{fromJson(needs.matrix-setup.outputs.matrix)}}
     steps:
+      - name: Print Environment Name
+        run: |
+          echo "Matrix Setup Environment Name: ${{ needs.matrix-setup.outputs.env-name }}"
+
+
       - name: Checkout repository
         uses: actions/checkout@v3
         with:
@@ -150,47 +172,30 @@ jobs:
           fi
 
           pip install ../../dist/*.whl
+          
+          bash -c "$(curl -fsSL https://raw.githubusercontent.com/merwanehamadi/helicone/b7ab4bc53e51d8ab29fff19ce5986ab7720970c6/mitmproxy.sh)" -s start
+          set +e # Ignore non-zero exit codes and continue execution
+          ${prefix}agbenchmark start --maintain --mock
+          EXIT_CODE=$?
+          set -e  # Stop ignoring non-zero exit codes
 
-          if [ "${GITHUB_EVENT_NAME}" == "pull_request" ]; then
+          # Check if the exit code was 5, and if so, exit with 0 instead
+          if [ $EXIT_CODE -eq 5 ]; then
+            echo "regression_tests.json is empty."
+            exit 0
+          fi
 
-            set +e # Ignore non-zero exit codes and continue execution
-            ${prefix}agbenchmark start --maintain --mock
-            EXIT_CODE=$?
-            set -e  # Stop ignoring non-zero exit codes
-
-            # Check if the exit code was 5, and if so, exit with 0 instead
-            if [ $EXIT_CODE -eq 5 ]
-            then
-              echo "regression_tests.json is empty."
-              exit 0
-            else
-              exit $EXIT_CODE
-            fi
-
-            set +e # Ignore non-zero exit codes and continue execution
-            improve_cmd = ${prefix}agbenchmark start --improve --mock
-            EXIT_CODE=$?
-            set -e  # Stop ignoring non-zero exit codes
-
-            # Check if the exit code was 5, and if so, exit with 0 instead
-            if [ $EXIT_CODE -eq 5 ]
-            then
-              echo "regression_tests.json is empty."
-              exit 0
-            else
-              exit $EXIT_CODE
-            fi
-
-            ${prefix}agbenchmark start --mock
-            ${prefix}agbenchmark start --mock --category=retrieval
-            ${prefix}agbenchmark start --mock --category=interface
-            ${prefix}agbenchmark start --mock --category=code
-            ${prefix}agbenchmark start --mock --category=memory
-            ${prefix}agbenchmark start --mock --category=iterate
-            ${prefix}agbenchmark start --mock --suite TestReturnCode 
-            ${prefix}agbenchmark start --mock --suite TestRevenueRetrieval
+          ${prefix}agbenchmark start --mock
+          ${prefix}agbenchmark start --mock --category=retrieval
+          ${prefix}agbenchmark start --mock --category=interface
+          ${prefix}agbenchmark start --mock --category=code
+          ${prefix}agbenchmark start --mock --category=memory
+          ${prefix}agbenchmark start --mock --category=iterate
+          ${prefix}agbenchmark start --mock --suite TestReturnCode 
+          ${prefix}agbenchmark start --mock --suite TestRevenueRetrieval
+          if [ "${GITHUB_EVENT_NAME}" == "pull_request" ] || [ "${{ github.event_name }}" == "push" ]; then
+            ${prefix}agbenchmark start --test=TestWriteFile
           else
-            bash -c "$(curl -fsSL https://raw.githubusercontent.com/merwanehamadi/helicone/b7ab4bc53e51d8ab29fff19ce5986ab7720970c6/mitmproxy.sh)" -s start
             ${prefix}agbenchmark start || echo "This command will always return a non zero exit code unless all the challenges are solved."
           fi
 
@@ -200,7 +205,7 @@ jobs:
           GITHUB_EVENT_NAME: ${{ github.event_name }}
           OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
           AGENT_NAME: ${{ matrix.agent-name }}
-          PROMPT_USER: false # For mini-agi. TODO: Remove this once mini-agi follows the standards.
+          PROMPT_USER: false # For mini-agi. TODO: Remove this and put it in benchmarks.py
           HELICONE_API_KEY: ${{ secrets.HELICONE_API_KEY }}
           REQUESTS_CA_BUNDLE: /etc/ssl/certs/ca-certificates.crt
           HELICONE_CACHE_ENABLED: false
@@ -295,7 +300,7 @@ jobs:
           path: reports/combined_charts/*
 
       - name: Authenticate and Push to Branch
-        if: (success() || failure()) && (github.event_name != 'pull_request')
+        if: (success() || failure()) && (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch')
         run: |
           git config --global user.email "github-bot@agpt.co"
           git config --global user.name "Auto-GPT-Bot"
diff --git a/agbenchmark/conftest.py b/agbenchmark/conftest.py
index a9ebba7e..07731e33 100644
--- a/agbenchmark/conftest.py
+++ b/agbenchmark/conftest.py
@@ -19,7 +19,9 @@ from agbenchmark.reports.reports import (
 from agbenchmark.start_benchmark import CONFIG_PATH, get_regression_data
 from agbenchmark.utils.data_types import SuiteConfig
 
-GLOBAL_TIMEOUT = 1500  # The tests will stop after 25 minutes so we can send the reports.
+GLOBAL_TIMEOUT = (
+    1500  # The tests will stop after 25 minutes so we can send the reports.
+)
 
 
 def resolve_workspace(workspace: str) -> str:
@@ -182,12 +184,14 @@ def timeout_monitor(start_time: int) -> None:
 
     pytest.exit("Test suite exceeded the global timeout", returncode=1)
 
+
 def pytest_sessionstart(session: Any) -> None:
     start_time = time.time()
     t = threading.Thread(target=timeout_monitor, args=(start_time,))
     t.daemon = True  # Daemon threads are abruptly stopped at shutdown
     t.start()
 
+
 def pytest_sessionfinish(session: Any) -> None:
     """Called at the end of the session to save regression tests and info"""
 
diff --git a/agbenchmark/generate_test.py b/agbenchmark/generate_test.py
index 180893f7..7586070b 100644
--- a/agbenchmark/generate_test.py
+++ b/agbenchmark/generate_test.py
@@ -57,6 +57,7 @@ def create_single_test(
     # Define test method within the dynamically created class
     def test_method(self, config: Dict[str, Any], request) -> None:  # type: ignore
         from helicone.lock import HeliconeLockManager
+
         HeliconeLockManager.write_custom_property("challenge", self.data.name)
 
         cutoff = self.data.cutoff or 60
diff --git a/agbenchmark/reports/reports.py b/agbenchmark/reports/reports.py
index 53af7de8..19845e19 100644
--- a/agbenchmark/reports/reports.py
+++ b/agbenchmark/reports/reports.py
@@ -234,8 +234,10 @@ def finalize_reports(item: Any, challenge_data: dict[str, Any]) -> None:
         if run_time:
             cost = None
             if not MOCK_FLAG and os.environ.get("HELICONE_API_KEY"):
+                print("Getting cost from Helicone")
                 cost = get_data_from_helicone(test_name)
-
+            else:
+                print("Helicone not setup or mock flag set, not getting cost")
             info_details["metrics"]["cost"] = cost
             info_details["metrics"]["run_time"] = f"{str(round(run_time, 3))} seconds"
 
diff --git a/agbenchmark/utils/get_data_from_helicone.py b/agbenchmark/utils/get_data_from_helicone.py
index 32a4817d..0de17cd3 100644
--- a/agbenchmark/utils/get_data_from_helicone.py
+++ b/agbenchmark/utils/get_data_from_helicone.py
@@ -25,27 +25,24 @@ query ExampleQuery($properties: [PropertyFilter!]){
     print(query)
 
     variables = {
-        "filters": [
+        "properties": [
             {
-                "property": {
-                    "value": {"equals": os.environ.get("AGENT_NAME")},
-                    "name": "agent",
-                }
+                "value": {"equals": os.environ.get("AGENT_NAME")},
+                "name": "agent",
             },
             {
-                "property": {
-                    "value": {"equals": BENCHMARK_START_TIME},
-                    "name": "benchmark_start_time",
-                }
+                "value": {"equals": BENCHMARK_START_TIME},
+                "name": "benchmark_start_time",
             },
-            {"property": {"value": {"equals": challenge}, "name": "challenge"}},
+            {"value": {"equals": challenge}, "name": "challenge"},
         ]
     }
+
     print(json.dumps(variables, indent=4))
 
     operation_name = "ExampleQuery"
 
-    data = None
+    data = {}
     response = None
 
     try:
@@ -70,10 +67,10 @@ query ExampleQuery($properties: [PropertyFilter!]){
         print(f"Other error occurred: {err}")
         return None
 
-    if data is None or data.get("data") is None:
-        print("Invalid response received from server: no data")
-        return None
     try:
+        if data is None or data.get("data") is None:
+            print("Invalid response received from server: no data")
+            return None
         return (
             data.get("data", {})
             .get("aggregatedHeliconeRequest", {})
diff --git a/agent/Auto-GPT b/agent/Auto-GPT
index 7cd407b7..3a2d08fb 160000
--- a/agent/Auto-GPT
+++ b/agent/Auto-GPT
@@ -1 +1 @@
-Subproject commit 7cd407b7b4a9f4395761e772335e859e40e8c3d3
+Subproject commit 3a2d08fb415071cc94dd6fcee24cfbdd1fb487dd
diff --git a/agent/beebot b/agent/beebot
index 7409c065..8e4cd92c 160000
--- a/agent/beebot
+++ b/agent/beebot
@@ -1 +1 @@
-Subproject commit 7409c0650194162b92367caf5f36724ed5b147b5
+Subproject commit 8e4cd92c1d0c135ff241f9906c05c3ff895ada30