mirror of
https://github.com/aljazceru/Auto-GPT.git
synced 2026-02-23 07:04:24 +01:00
Only run mini-agi on tests (#232)
This commit is contained in:
99
.github/workflows/ci.yml
vendored
99
.github/workflows/ci.yml
vendored
@@ -65,8 +65,31 @@ jobs:
|
||||
cmd="poetry run autoflake --remove-all-unused-imports --recursive --ignore-init-module-imports --ignore-pass-after-docstring agbenchmark"
|
||||
$cmd --check || (echo "You have unused imports or pass statements, please run '${cmd} --in-place'" && exit 1)
|
||||
if: success() || failure()
|
||||
matrix-setup:
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
matrix: ${{ steps.set-matrix.outputs.matrix }}
|
||||
env-name: ${{ steps.set-env-name.outputs.env-name }}
|
||||
steps:
|
||||
- id: set-matrix
|
||||
run: |
|
||||
if [ "${{ github.event_name }}" == "schedule" ] || [ "${{ github.event_name }}" == "workflow_dispatch" ]; then
|
||||
echo "::set-output name=matrix::[ 'gpt-engineer', 'smol-developer', 'Auto-GPT', 'mini-agi', 'beebot', 'BabyAGI' ]"
|
||||
else
|
||||
echo "::set-output name=matrix::[ 'mini-agi']"
|
||||
fi
|
||||
- id: set-env-name
|
||||
run: |
|
||||
if [[ "${{ github.event_name }}" == "push" ]] || [[ "${{ github.event_name }}" == "pull_request" ]]; then
|
||||
echo "::set-output name=env-name::testing"
|
||||
else
|
||||
echo "::set-output name=env-name::production"
|
||||
fi
|
||||
|
||||
tests:
|
||||
environment:
|
||||
name: '${{ needs.matrix-setup.outputs.env-name }}'
|
||||
needs: matrix-setup
|
||||
env:
|
||||
GH_TOKEN: ${{ github.event_name == 'pull_request' && github.token || secrets.PAT }}
|
||||
min-python-version: '3.10'
|
||||
@@ -76,14 +99,13 @@ jobs:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
agent-name:
|
||||
- 'gpt-engineer'
|
||||
- 'smol-developer'
|
||||
- 'Auto-GPT'
|
||||
- 'mini-agi'
|
||||
- 'beebot'
|
||||
- 'BabyAGI'
|
||||
agent-name: ${{fromJson(needs.matrix-setup.outputs.matrix)}}
|
||||
steps:
|
||||
- name: Print Environment Name
|
||||
run: |
|
||||
echo "Matrix Setup Environment Name: ${{ needs.matrix-setup.outputs.env-name }}"
|
||||
|
||||
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
@@ -150,47 +172,30 @@ jobs:
|
||||
fi
|
||||
|
||||
pip install ../../dist/*.whl
|
||||
|
||||
bash -c "$(curl -fsSL https://raw.githubusercontent.com/merwanehamadi/helicone/b7ab4bc53e51d8ab29fff19ce5986ab7720970c6/mitmproxy.sh)" -s start
|
||||
set +e # Ignore non-zero exit codes and continue execution
|
||||
${prefix}agbenchmark start --maintain --mock
|
||||
EXIT_CODE=$?
|
||||
set -e # Stop ignoring non-zero exit codes
|
||||
|
||||
if [ "${GITHUB_EVENT_NAME}" == "pull_request" ]; then
|
||||
# Check if the exit code was 5, and if so, exit with 0 instead
|
||||
if [ $EXIT_CODE -eq 5 ]; then
|
||||
echo "regression_tests.json is empty."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
set +e # Ignore non-zero exit codes and continue execution
|
||||
${prefix}agbenchmark start --maintain --mock
|
||||
EXIT_CODE=$?
|
||||
set -e # Stop ignoring non-zero exit codes
|
||||
|
||||
# Check if the exit code was 5, and if so, exit with 0 instead
|
||||
if [ $EXIT_CODE -eq 5 ]
|
||||
then
|
||||
echo "regression_tests.json is empty."
|
||||
exit 0
|
||||
else
|
||||
exit $EXIT_CODE
|
||||
fi
|
||||
|
||||
set +e # Ignore non-zero exit codes and continue execution
|
||||
improve_cmd = ${prefix}agbenchmark start --improve --mock
|
||||
EXIT_CODE=$?
|
||||
set -e # Stop ignoring non-zero exit codes
|
||||
|
||||
# Check if the exit code was 5, and if so, exit with 0 instead
|
||||
if [ $EXIT_CODE -eq 5 ]
|
||||
then
|
||||
echo "regression_tests.json is empty."
|
||||
exit 0
|
||||
else
|
||||
exit $EXIT_CODE
|
||||
fi
|
||||
|
||||
${prefix}agbenchmark start --mock
|
||||
${prefix}agbenchmark start --mock --category=retrieval
|
||||
${prefix}agbenchmark start --mock --category=interface
|
||||
${prefix}agbenchmark start --mock --category=code
|
||||
${prefix}agbenchmark start --mock --category=memory
|
||||
${prefix}agbenchmark start --mock --category=iterate
|
||||
${prefix}agbenchmark start --mock --suite TestReturnCode
|
||||
${prefix}agbenchmark start --mock --suite TestRevenueRetrieval
|
||||
${prefix}agbenchmark start --mock
|
||||
${prefix}agbenchmark start --mock --category=retrieval
|
||||
${prefix}agbenchmark start --mock --category=interface
|
||||
${prefix}agbenchmark start --mock --category=code
|
||||
${prefix}agbenchmark start --mock --category=memory
|
||||
${prefix}agbenchmark start --mock --category=iterate
|
||||
${prefix}agbenchmark start --mock --suite TestReturnCode
|
||||
${prefix}agbenchmark start --mock --suite TestRevenueRetrieval
|
||||
if [ "${GITHUB_EVENT_NAME}" == "pull_request" ] || [ "${{ github.event_name }}" == "push" ]; then
|
||||
${prefix}agbenchmark start --test=TestWriteFile
|
||||
else
|
||||
bash -c "$(curl -fsSL https://raw.githubusercontent.com/merwanehamadi/helicone/b7ab4bc53e51d8ab29fff19ce5986ab7720970c6/mitmproxy.sh)" -s start
|
||||
${prefix}agbenchmark start || echo "This command will always return a non zero exit code unless all the challenges are solved."
|
||||
fi
|
||||
|
||||
@@ -200,7 +205,7 @@ jobs:
|
||||
GITHUB_EVENT_NAME: ${{ github.event_name }}
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
AGENT_NAME: ${{ matrix.agent-name }}
|
||||
PROMPT_USER: false # For mini-agi. TODO: Remove this once mini-agi follows the standards.
|
||||
PROMPT_USER: false # For mini-agi. TODO: Remove this and put it in benchmarks.py
|
||||
HELICONE_API_KEY: ${{ secrets.HELICONE_API_KEY }}
|
||||
REQUESTS_CA_BUNDLE: /etc/ssl/certs/ca-certificates.crt
|
||||
HELICONE_CACHE_ENABLED: false
|
||||
@@ -295,7 +300,7 @@ jobs:
|
||||
path: reports/combined_charts/*
|
||||
|
||||
- name: Authenticate and Push to Branch
|
||||
if: (success() || failure()) && (github.event_name != 'pull_request')
|
||||
if: (success() || failure()) && (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch')
|
||||
run: |
|
||||
git config --global user.email "github-bot@agpt.co"
|
||||
git config --global user.name "Auto-GPT-Bot"
|
||||
|
||||
@@ -19,7 +19,9 @@ from agbenchmark.reports.reports import (
|
||||
from agbenchmark.start_benchmark import CONFIG_PATH, get_regression_data
|
||||
from agbenchmark.utils.data_types import SuiteConfig
|
||||
|
||||
GLOBAL_TIMEOUT = 1500 # The tests will stop after 25 minutes so we can send the reports.
|
||||
GLOBAL_TIMEOUT = (
|
||||
1500 # The tests will stop after 25 minutes so we can send the reports.
|
||||
)
|
||||
|
||||
|
||||
def resolve_workspace(workspace: str) -> str:
|
||||
@@ -182,12 +184,14 @@ def timeout_monitor(start_time: int) -> None:
|
||||
|
||||
pytest.exit("Test suite exceeded the global timeout", returncode=1)
|
||||
|
||||
|
||||
def pytest_sessionstart(session: Any) -> None:
|
||||
start_time = time.time()
|
||||
t = threading.Thread(target=timeout_monitor, args=(start_time,))
|
||||
t.daemon = True # Daemon threads are abruptly stopped at shutdown
|
||||
t.start()
|
||||
|
||||
|
||||
def pytest_sessionfinish(session: Any) -> None:
|
||||
"""Called at the end of the session to save regression tests and info"""
|
||||
|
||||
|
||||
@@ -57,6 +57,7 @@ def create_single_test(
|
||||
# Define test method within the dynamically created class
|
||||
def test_method(self, config: Dict[str, Any], request) -> None: # type: ignore
|
||||
from helicone.lock import HeliconeLockManager
|
||||
|
||||
HeliconeLockManager.write_custom_property("challenge", self.data.name)
|
||||
|
||||
cutoff = self.data.cutoff or 60
|
||||
|
||||
@@ -234,8 +234,10 @@ def finalize_reports(item: Any, challenge_data: dict[str, Any]) -> None:
|
||||
if run_time:
|
||||
cost = None
|
||||
if not MOCK_FLAG and os.environ.get("HELICONE_API_KEY"):
|
||||
print("Getting cost from Helicone")
|
||||
cost = get_data_from_helicone(test_name)
|
||||
|
||||
else:
|
||||
print("Helicone not setup or mock flag set, not getting cost")
|
||||
info_details["metrics"]["cost"] = cost
|
||||
info_details["metrics"]["run_time"] = f"{str(round(run_time, 3))} seconds"
|
||||
|
||||
|
||||
@@ -25,27 +25,24 @@ query ExampleQuery($properties: [PropertyFilter!]){
|
||||
print(query)
|
||||
|
||||
variables = {
|
||||
"filters": [
|
||||
"properties": [
|
||||
{
|
||||
"property": {
|
||||
"value": {"equals": os.environ.get("AGENT_NAME")},
|
||||
"name": "agent",
|
||||
}
|
||||
"value": {"equals": os.environ.get("AGENT_NAME")},
|
||||
"name": "agent",
|
||||
},
|
||||
{
|
||||
"property": {
|
||||
"value": {"equals": BENCHMARK_START_TIME},
|
||||
"name": "benchmark_start_time",
|
||||
}
|
||||
"value": {"equals": BENCHMARK_START_TIME},
|
||||
"name": "benchmark_start_time",
|
||||
},
|
||||
{"property": {"value": {"equals": challenge}, "name": "challenge"}},
|
||||
{"value": {"equals": challenge}, "name": "challenge"},
|
||||
]
|
||||
}
|
||||
|
||||
print(json.dumps(variables, indent=4))
|
||||
|
||||
operation_name = "ExampleQuery"
|
||||
|
||||
data = None
|
||||
data = {}
|
||||
response = None
|
||||
|
||||
try:
|
||||
@@ -70,10 +67,10 @@ query ExampleQuery($properties: [PropertyFilter!]){
|
||||
print(f"Other error occurred: {err}")
|
||||
return None
|
||||
|
||||
if data is None or data.get("data") is None:
|
||||
print("Invalid response received from server: no data")
|
||||
return None
|
||||
try:
|
||||
if data is None or data.get("data") is None:
|
||||
print("Invalid response received from server: no data")
|
||||
return None
|
||||
return (
|
||||
data.get("data", {})
|
||||
.get("aggregatedHeliconeRequest", {})
|
||||
|
||||
Submodule agent/Auto-GPT updated: 7cd407b7b4...3a2d08fb41
Submodule agent/beebot updated: 7409c06501...8e4cd92c1d
Reference in New Issue
Block a user