diff --git a/autogpts/forge/run b/autogpts/forge/run index 6de613b8..8fa77196 100755 --- a/autogpts/forge/run +++ b/autogpts/forge/run @@ -8,4 +8,5 @@ if [ ! -f .env ]; then echo "Please add your api keys to the .env file." fi poetry run python -m forge & -poetry run agbenchmark serve & + +agbenchmark serve & #voluntarily not using poetry run so that it runs in editable mode diff --git a/benchmark/agbenchmark/__main__.py b/benchmark/agbenchmark/__main__.py index 494fae6c..bff606f9 100644 --- a/benchmark/agbenchmark/__main__.py +++ b/benchmark/agbenchmark/__main__.py @@ -114,8 +114,8 @@ def run_benchmark( no_dep: bool = False, nc: bool = False, keep_answers: bool = False, - category: Optional[list[str]] = None, - skip_category: Optional[list[str]] = None, + category: Optional[tuple[str]] = None, + skip_category: Optional[tuple[str]] = None, test: Optional[str] = None, cutoff: Optional[int] = None, server: bool = False, @@ -157,7 +157,6 @@ def run_benchmark( if test: print("Running specific test:", test) - pytest_args.extend(["-k", test, "--test"]) else: # Categories that are used in the challenges categories = get_unique_categories() diff --git a/benchmark/agbenchmark/app.py b/benchmark/agbenchmark/app.py index 8bc52613..0485528b 100644 --- a/benchmark/agbenchmark/app.py +++ b/benchmark/agbenchmark/app.py @@ -54,7 +54,18 @@ app.add_middleware( def run_single_test(body: CreateReportRequest) -> Any: from agbenchmark.__main__ import run_benchmark - run_benchmark(category=[body.category], mock=body.mock) + # it's a hack because other parts of the code are using sys.argv + sys.argv = [sys.argv[0]] + sys.argv.append("start") + if body.category: + sys.argv.append(f"--category={body.category}") + for body_test in body.tests: + sys.argv.append(f"--test={body_test}") + categories = None + if body.category: + categories = tuple([body.category]) + + run_benchmark(category=categories, mock=body.mock, test=tuple(body.tests)) import json from pathlib import Path @@ -95,6 +106,8 @@ from fastapi import FastAPI, Request, Response @app.get("/updates") def get_updates(request: Request) -> Any: + from agbenchmark.__main__ import UPDATES_JSON_PATH + try: # Read data from the "update.json" file (provide the correct file path) with open(UPDATES_JSON_PATH, "r") as file: diff --git a/benchmark/agbenchmark/generate_test.py b/benchmark/agbenchmark/generate_test.py index 3018726c..bf2c3be2 100644 --- a/benchmark/agbenchmark/generate_test.py +++ b/benchmark/agbenchmark/generate_test.py @@ -192,8 +192,12 @@ def generate_tests() -> None: # sourcery skip: invert-any-all continue # --test flag, only run the test if it's the exact one specified - test_flag = "--test" in commands - if test_flag and data["name"] not in commands: + tests = [] + for command in commands: + if command.startswith("--test="): + tests.append(command.split("=")[1]) + + if tests and data["name"] not in tests: continue # --maintain and --improve flag