From 0799be7e28bf4805e5cd2c9296c142b31f9501a4 Mon Sep 17 00:00:00 2001 From: merwanehamadi Date: Mon, 10 Jul 2023 21:54:25 -0700 Subject: [PATCH] Fix tests ci (#82) --- .github/workflows/ci.yml | 7 +------ agbenchmark/agent_interface.py | 8 +++++--- agbenchmark/challenges/test_all.py | 32 +++++++++++++----------------- agent/Auto-GPT | 2 +- agent/gpt-engineer | 2 +- agent/smol-developer | 2 +- poetry.lock | 29 ++++++++++++++++++++++++++- pyproject.toml | 1 + 8 files changed, 52 insertions(+), 31 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index cac1dedb..9df4173b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -75,6 +75,7 @@ jobs: tests: name: ${{ matrix.agent-name }} runs-on: ubuntu-latest + timeout-minutes: 10 env: min-python-version: "3.10" strategy: @@ -156,16 +157,10 @@ jobs: if [ "${GITHUB_EVENT_NAME}" == "schedule" ] || [ "${GITHUB_EVENT_NAME}" == "workflow_dispatch" ]; then agbenchmark start --maintain else - exit 0 agbenchmark start --maintain --mock agbenchmark start --improve --mock agbenchmark start --mock agbenchmark start --mock --category=retrieval - agbenchmark start --mock --category=regression - agbenchmark start --mock --category=interface - agbenchmark start --mock --category=code - agbenchmark start --mock --category=memory - agbenchmark start --mock --category=memory --category=code fi env: GITHUB_EVENT_NAME: ${{ github.event_name }} diff --git a/agbenchmark/agent_interface.py b/agbenchmark/agent_interface.py index d058ad4c..713451f0 100644 --- a/agbenchmark/agent_interface.py +++ b/agbenchmark/agent_interface.py @@ -3,11 +3,12 @@ import shutil import subprocess import sys import time -from pathlib import Path from typing import Any, Dict from dotenv import load_dotenv +from agbenchmark.start_benchmark import CURRENT_DIRECTORY + load_dotenv() mock_test_str = os.getenv("MOCK_TEST") @@ -76,8 +77,9 @@ def copy_artifacts_into_workspace( workspace: str, artifact_folder_name: str, challenge_dir_path: str ) -> None: # this file is at agbenchmark\agent_interface.py - script_dir = Path(__file__).resolve().parent.parent - source_dir = os.path.join(script_dir, challenge_dir_path, artifact_folder_name) + source_dir = os.path.join( + CURRENT_DIRECTORY, "..", challenge_dir_path, artifact_folder_name + ) # Check if source_dir exists, if not then return immediately. if not os.path.exists(source_dir): diff --git a/agbenchmark/challenges/test_all.py b/agbenchmark/challenges/test_all.py index f8bb2347..00a6ed63 100644 --- a/agbenchmark/challenges/test_all.py +++ b/agbenchmark/challenges/test_all.py @@ -49,19 +49,6 @@ def generate_tests() -> None: class_name = data.get("name", "") challenge_location = get_test_path(json_file) - if data["ground"]["type"] == "custom_python": - custom_python_location = ( - f"{CURRENT_DIRECTORY}/../{challenge_location}/custom_python" - ) - sys.path.append(str(custom_python_location)) - - for module_loader, name, ispkg in pkgutil.iter_modules( - [str(custom_python_location)] - ): - module = importlib.import_module(name) - - if hasattr(module, "make_assertion"): - make_assertion = getattr(module, "make_assertion") # Define test class dynamically challenge_class = types.new_class(class_name, (Challenge,)) @@ -75,11 +62,20 @@ def generate_tests() -> None: scores = self.get_scores(config) # Check if make_assertion is defined and use it - if "make_assertion" in locals(): - try: - make_assertion() - except AssertionError as error: - print(error) # Or handle this in another way + if self.data.ground.type == "custom_python": + custom_python_location = ( + f"{CURRENT_DIRECTORY}/../{challenge_location}/custom_python" + ) + sys.path.append(str(custom_python_location)) + + for (module_loader, name, ispkg) in pkgutil.iter_modules( + [str(custom_python_location)] + ): + module = importlib.import_module(name) + + if hasattr(module, "make_assertion"): + make_assertion = getattr(module, "make_assertion") + make_assertion() else: assert 1 in scores diff --git a/agent/Auto-GPT b/agent/Auto-GPT index dc2a7699..ade8e6f8 160000 --- a/agent/Auto-GPT +++ b/agent/Auto-GPT @@ -1 +1 @@ -Subproject commit dc2a76990c75fafacbeaa76eb2e27d48de44cadd +Subproject commit ade8e6f8142a937160596a987ab96808b583f9e3 diff --git a/agent/gpt-engineer b/agent/gpt-engineer index cde9be3e..538bcba6 160000 --- a/agent/gpt-engineer +++ b/agent/gpt-engineer @@ -1 +1 @@ -Subproject commit cde9be3e73212b3d8366a4ed149a18122bfe2333 +Subproject commit 538bcba6efbb7cda7f6a355a8c8420bbbdb52f25 diff --git a/agent/smol-developer b/agent/smol-developer index c52b14b1..150981f7 160000 --- a/agent/smol-developer +++ b/agent/smol-developer @@ -1 +1 @@ -Subproject commit c52b14b1d5b1b74d886f08d9914e7f43437f609d +Subproject commit 150981f77f19777bf5aa76cb3a74869e4a8a8a05 diff --git a/poetry.lock b/poetry.lock index 5526da16..ad72f5e1 100644 --- a/poetry.lock +++ b/poetry.lock @@ -729,6 +729,21 @@ files = [ {file = "pathspec-0.11.1.tar.gz", hash = "sha256:2798de800fa92780e33acca925945e9a19a133b715067cf165b8866c15a31687"}, ] +[[package]] +name = "pexpect" +version = "4.8.0" +description = "Pexpect allows easy control of interactive console applications." +category = "main" +optional = false +python-versions = "*" +files = [ + {file = "pexpect-4.8.0-py2.py3-none-any.whl", hash = "sha256:0b48a55dcb3c05f3329815901ea4fc1537514d6ba867a152b581d69ae3710937"}, + {file = "pexpect-4.8.0.tar.gz", hash = "sha256:fc65a43959d153d0114afe13997d439c22823a27cefceb5ff35c2178c6784c0c"}, +] + +[package.dependencies] +ptyprocess = ">=0.5" + [[package]] name = "platformdirs" version = "3.8.0" @@ -761,6 +776,18 @@ files = [ dev = ["pre-commit", "tox"] testing = ["pytest", "pytest-benchmark"] +[[package]] +name = "ptyprocess" +version = "0.7.0" +description = "Run a subprocess in a pseudo terminal" +category = "main" +optional = false +python-versions = "*" +files = [ + {file = "ptyprocess-0.7.0-py2.py3-none-any.whl", hash = "sha256:4b41f3967fce3af57cc7e94b888626c18bf37a083e3651ca8feeb66d492fef35"}, + {file = "ptyprocess-0.7.0.tar.gz", hash = "sha256:5c5d0a3b48ceee0b48485e0c26037c0acd7d29765ca3fbb5cb3831d347423220"}, +] + [[package]] name = "pycodestyle" version = "2.7.0" @@ -1109,4 +1136,4 @@ multidict = ">=4.0" [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "81b84bbe08d4a09fb6a4f99c7fb018e0c0fcd879fa368c388b0af20c7c9a3f31" +content-hash = "09871e879785f0a7d5c31a61553cd2df08d88324a864b9c56b8e97d95893157f" diff --git a/pyproject.toml b/pyproject.toml index 1a96a51d..b0526ab5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,6 +17,7 @@ pytest-depends = "^1.0.1" python-dotenv = "^0.21.0" click = "^8.1.3" types-requests = "^2.31.0.1" +pexpect = "^4.8.0" [tool.poetry.group.dev.dependencies] flake8 = "^3.9.2"