Only run mini-agi on tests (#232)

This commit is contained in:
merwanehamadi
2023-08-01 16:50:41 -07:00
committed by GitHub
parent 828512f7ca
commit eeb68858d7
7 changed files with 74 additions and 65 deletions

View File

@@ -65,8 +65,31 @@ jobs:
cmd="poetry run autoflake --remove-all-unused-imports --recursive --ignore-init-module-imports --ignore-pass-after-docstring agbenchmark"
$cmd --check || (echo "You have unused imports or pass statements, please run '${cmd} --in-place'" && exit 1)
if: success() || failure()
matrix-setup:
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
env-name: ${{ steps.set-env-name.outputs.env-name }}
steps:
- id: set-matrix
run: |
if [ "${{ github.event_name }}" == "schedule" ] || [ "${{ github.event_name }}" == "workflow_dispatch" ]; then
echo "::set-output name=matrix::[ 'gpt-engineer', 'smol-developer', 'Auto-GPT', 'mini-agi', 'beebot', 'BabyAGI' ]"
else
echo "::set-output name=matrix::[ 'mini-agi']"
fi
- id: set-env-name
run: |
if [[ "${{ github.event_name }}" == "push" ]] || [[ "${{ github.event_name }}" == "pull_request" ]]; then
echo "::set-output name=env-name::testing"
else
echo "::set-output name=env-name::production"
fi
tests:
environment:
name: '${{ needs.matrix-setup.outputs.env-name }}'
needs: matrix-setup
env:
GH_TOKEN: ${{ github.event_name == 'pull_request' && github.token || secrets.PAT }}
min-python-version: '3.10'
@@ -76,14 +99,13 @@ jobs:
strategy:
fail-fast: false
matrix:
agent-name:
- 'gpt-engineer'
- 'smol-developer'
- 'Auto-GPT'
- 'mini-agi'
- 'beebot'
- 'BabyAGI'
agent-name: ${{fromJson(needs.matrix-setup.outputs.matrix)}}
steps:
- name: Print Environment Name
run: |
echo "Matrix Setup Environment Name: ${{ needs.matrix-setup.outputs.env-name }}"
- name: Checkout repository
uses: actions/checkout@v3
with:
@@ -150,47 +172,30 @@ jobs:
fi
pip install ../../dist/*.whl
bash -c "$(curl -fsSL https://raw.githubusercontent.com/merwanehamadi/helicone/b7ab4bc53e51d8ab29fff19ce5986ab7720970c6/mitmproxy.sh)" -s start
set +e # Ignore non-zero exit codes and continue execution
${prefix}agbenchmark start --maintain --mock
EXIT_CODE=$?
set -e # Stop ignoring non-zero exit codes
if [ "${GITHUB_EVENT_NAME}" == "pull_request" ]; then
# Check if the exit code was 5, and if so, exit with 0 instead
if [ $EXIT_CODE -eq 5 ]; then
echo "regression_tests.json is empty."
exit 0
fi
set +e # Ignore non-zero exit codes and continue execution
${prefix}agbenchmark start --maintain --mock
EXIT_CODE=$?
set -e # Stop ignoring non-zero exit codes
# Check if the exit code was 5, and if so, exit with 0 instead
if [ $EXIT_CODE -eq 5 ]
then
echo "regression_tests.json is empty."
exit 0
else
exit $EXIT_CODE
fi
set +e # Ignore non-zero exit codes and continue execution
improve_cmd = ${prefix}agbenchmark start --improve --mock
EXIT_CODE=$?
set -e # Stop ignoring non-zero exit codes
# Check if the exit code was 5, and if so, exit with 0 instead
if [ $EXIT_CODE -eq 5 ]
then
echo "regression_tests.json is empty."
exit 0
else
exit $EXIT_CODE
fi
${prefix}agbenchmark start --mock
${prefix}agbenchmark start --mock --category=retrieval
${prefix}agbenchmark start --mock --category=interface
${prefix}agbenchmark start --mock --category=code
${prefix}agbenchmark start --mock --category=memory
${prefix}agbenchmark start --mock --category=iterate
${prefix}agbenchmark start --mock --suite TestReturnCode
${prefix}agbenchmark start --mock --suite TestRevenueRetrieval
${prefix}agbenchmark start --mock
${prefix}agbenchmark start --mock --category=retrieval
${prefix}agbenchmark start --mock --category=interface
${prefix}agbenchmark start --mock --category=code
${prefix}agbenchmark start --mock --category=memory
${prefix}agbenchmark start --mock --category=iterate
${prefix}agbenchmark start --mock --suite TestReturnCode
${prefix}agbenchmark start --mock --suite TestRevenueRetrieval
if [ "${GITHUB_EVENT_NAME}" == "pull_request" ] || [ "${{ github.event_name }}" == "push" ]; then
${prefix}agbenchmark start --test=TestWriteFile
else
bash -c "$(curl -fsSL https://raw.githubusercontent.com/merwanehamadi/helicone/b7ab4bc53e51d8ab29fff19ce5986ab7720970c6/mitmproxy.sh)" -s start
${prefix}agbenchmark start || echo "This command will always return a non zero exit code unless all the challenges are solved."
fi
@@ -200,7 +205,7 @@ jobs:
GITHUB_EVENT_NAME: ${{ github.event_name }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
AGENT_NAME: ${{ matrix.agent-name }}
PROMPT_USER: false # For mini-agi. TODO: Remove this once mini-agi follows the standards.
PROMPT_USER: false # For mini-agi. TODO: Remove this and put it in benchmarks.py
HELICONE_API_KEY: ${{ secrets.HELICONE_API_KEY }}
REQUESTS_CA_BUNDLE: /etc/ssl/certs/ca-certificates.crt
HELICONE_CACHE_ENABLED: false
@@ -295,7 +300,7 @@ jobs:
path: reports/combined_charts/*
- name: Authenticate and Push to Branch
if: (success() || failure()) && (github.event_name != 'pull_request')
if: (success() || failure()) && (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch')
run: |
git config --global user.email "github-bot@agpt.co"
git config --global user.name "Auto-GPT-Bot"

View File

@@ -19,7 +19,9 @@ from agbenchmark.reports.reports import (
from agbenchmark.start_benchmark import CONFIG_PATH, get_regression_data
from agbenchmark.utils.data_types import SuiteConfig
GLOBAL_TIMEOUT = 1500 # The tests will stop after 25 minutes so we can send the reports.
GLOBAL_TIMEOUT = (
1500 # The tests will stop after 25 minutes so we can send the reports.
)
def resolve_workspace(workspace: str) -> str:
@@ -182,12 +184,14 @@ def timeout_monitor(start_time: int) -> None:
pytest.exit("Test suite exceeded the global timeout", returncode=1)
def pytest_sessionstart(session: Any) -> None:
start_time = time.time()
t = threading.Thread(target=timeout_monitor, args=(start_time,))
t.daemon = True # Daemon threads are abruptly stopped at shutdown
t.start()
def pytest_sessionfinish(session: Any) -> None:
"""Called at the end of the session to save regression tests and info"""

View File

@@ -57,6 +57,7 @@ def create_single_test(
# Define test method within the dynamically created class
def test_method(self, config: Dict[str, Any], request) -> None: # type: ignore
from helicone.lock import HeliconeLockManager
HeliconeLockManager.write_custom_property("challenge", self.data.name)
cutoff = self.data.cutoff or 60

View File

@@ -234,8 +234,10 @@ def finalize_reports(item: Any, challenge_data: dict[str, Any]) -> None:
if run_time:
cost = None
if not MOCK_FLAG and os.environ.get("HELICONE_API_KEY"):
print("Getting cost from Helicone")
cost = get_data_from_helicone(test_name)
else:
print("Helicone not setup or mock flag set, not getting cost")
info_details["metrics"]["cost"] = cost
info_details["metrics"]["run_time"] = f"{str(round(run_time, 3))} seconds"

View File

@@ -25,27 +25,24 @@ query ExampleQuery($properties: [PropertyFilter!]){
print(query)
variables = {
"filters": [
"properties": [
{
"property": {
"value": {"equals": os.environ.get("AGENT_NAME")},
"name": "agent",
}
"value": {"equals": os.environ.get("AGENT_NAME")},
"name": "agent",
},
{
"property": {
"value": {"equals": BENCHMARK_START_TIME},
"name": "benchmark_start_time",
}
"value": {"equals": BENCHMARK_START_TIME},
"name": "benchmark_start_time",
},
{"property": {"value": {"equals": challenge}, "name": "challenge"}},
{"value": {"equals": challenge}, "name": "challenge"},
]
}
print(json.dumps(variables, indent=4))
operation_name = "ExampleQuery"
data = None
data = {}
response = None
try:
@@ -70,10 +67,10 @@ query ExampleQuery($properties: [PropertyFilter!]){
print(f"Other error occurred: {err}")
return None
if data is None or data.get("data") is None:
print("Invalid response received from server: no data")
return None
try:
if data is None or data.get("data") is None:
print("Invalid response received from server: no data")
return None
return (
data.get("data", {})
.get("aggregatedHeliconeRequest", {})