diff --git a/.github/workflows/autogpts-ci.yml b/.github/workflows/autogpts-ci.yml index cdd89ba3..1711ac32 100644 --- a/.github/workflows/autogpts-ci.yml +++ b/.github/workflows/autogpts-ci.yml @@ -42,8 +42,8 @@ jobs: run: | sh run URL=http://127.0.0.1:8000 bash -c "$(curl -fsSL https://raw.githubusercontent.com/AI-Engineers-Foundation/agent-protocol/main/testing_suite/test.sh)" - poetry run agbenchmark start --mock - poetry run agbenchmark start --test=TestWriteFile + poetry run agbenchmark --mock + poetry run agbenchmark --test=TestWriteFile env: OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} AGENT_NAME: ${{ matrix.agent-name }} diff --git a/.github/workflows/benchmark-ci.yml b/.github/workflows/benchmark-ci.yml index 6b59af19..a6f4c31a 100644 --- a/.github/workflows/benchmark-ci.yml +++ b/.github/workflows/benchmark-ci.yml @@ -212,8 +212,8 @@ jobs: cd ../.. if [ "${GITHUB_EVENT_NAME}" == "pull_request" ] || [ "${{ github.event_name }}" == "push" ]; then set +e # Ignore non-zero exit codes and continue execution - echo "Running the following command: ${prefix}agbenchmark start --maintain --mock" - ${prefix}agbenchmark start --maintain --mock + echo "Running the following command: ${prefix}agbenchmark --maintain --mock" + ${prefix}agbenchmark --maintain --mock EXIT_CODE=$? set -e # Stop ignoring non-zero exit codes # Check if the exit code was 5, and if so, exit with 0 instead @@ -221,36 +221,36 @@ jobs: echo "regression_tests.json is empty." fi - echo "Running the following command: ${prefix}agbenchmark start --mock" - ${prefix}agbenchmark start --mock + echo "Running the following command: ${prefix}agbenchmark --mock" + ${prefix}agbenchmark --mock - echo "Running the following command: ${prefix}agbenchmark start --mock --category=retrieval" - ${prefix}agbenchmark start --mock --category=retrieval + echo "Running the following command: ${prefix}agbenchmark --mock --category=retrieval" + ${prefix}agbenchmark --mock --category=retrieval - echo "Running the following command: ${prefix}agbenchmark start --mock --category=interface" - ${prefix}agbenchmark start --mock --category=interface + echo "Running the following command: ${prefix}agbenchmark --mock --category=interface" + ${prefix}agbenchmark --mock --category=interface - echo "Running the following command: ${prefix}agbenchmark start --mock --category=code" - ${prefix}agbenchmark start --mock --category=code + echo "Running the following command: ${prefix}agbenchmark --mock --category=code" + ${prefix}agbenchmark --mock --category=code - echo "Running the following command: ${prefix}agbenchmark start --mock --category=memory" - ${prefix}agbenchmark start --mock --category=memory + echo "Running the following command: ${prefix}agbenchmark --mock --category=memory" + ${prefix}agbenchmark --mock --category=memory - echo "Running the following command: ${prefix}agbenchmark start --mock --suite TestRevenueRetrieval" - ${prefix}agbenchmark start --mock --suite TestRevenueRetrieval + echo "Running the following command: ${prefix}agbenchmark --mock --suite TestRevenueRetrieval" + ${prefix}agbenchmark --mock --suite TestRevenueRetrieval - echo "Running the following command: ${prefix}agbenchmark start --test=TestWriteFile" - ${prefix}agbenchmark start --test=TestWriteFile + echo "Running the following command: ${prefix}agbenchmark --test=TestWriteFile" + ${prefix}agbenchmark --test=TestWriteFile poetry install poetry run uvicorn server:app --reload & sleep 5 export AGENT_NAME=mini-agi - echo "poetry run agbenchmark start --mock --api_mode --host=http://localhost:8000" - poetry run agbenchmark start --mock --api_mode --host=http://localhost:8000 + echo "poetry run agbenchmark --mock --api_mode --host=http://localhost:8000" + poetry run agbenchmark --mock --api_mode --host=http://localhost:8000 else - echo "${prefix}agbenchmark start" - ${prefix}agbenchmark start || echo "This command will always return a non zero exit code unless all the challenges are solved." + echo "${prefix}agbenchmark" + ${prefix}agbenchmark || echo "This command will always return a non zero exit code unless all the challenges are solved." fi cd ../.. diff --git a/autogpts/forge/README.md b/autogpts/forge/README.md index 0170e923..455a89e1 100644 --- a/autogpts/forge/README.md +++ b/autogpts/forge/README.md @@ -38,7 +38,7 @@ Intial setup: ### 📊 **Benchmarking** -To run the benchmark, use the `agbenchmark start` command. Here are some options you can use with this command: +To run the benchmark, use the `agbenchmark` command. Here are some options you can use with this command: - `--backend`: If it's being run from the cli - `-c, --category TEXT`: Specific category to run @@ -55,12 +55,12 @@ To run the benchmark, use the `agbenchmark start` command. Here are some options - `--help`: Show this message and exit. For example, if you want to run a specific test, you can use the `--test` option like this: -`agbenchmark start --test your_test_name` +`agbenchmark --test your_test_name` If you want to run the benchmark without dependencies, you can use the `--no_dep` option like this: -`agbenchmark start --no_dep` +`agbenchmark --no_dep` You can combine multiple options as well. For example, to run a specific test without dependencies, you can do: -`agbenchmark start --test your_test_name --no_dep` +`agbenchmark --test your_test_name --no_dep` Remember to replace `your_test_name` with the name of the test you want to run. diff --git a/autogpts/forge/run_benchmark.sh b/autogpts/forge/run_benchmark.sh index f1fc0ae0..5364d232 100755 --- a/autogpts/forge/run_benchmark.sh +++ b/autogpts/forge/run_benchmark.sh @@ -3,8 +3,8 @@ kill $(lsof -t -i :8000) poetry install -poetry run pip3 uninstall agbenchmark --yes +poetry run pip3 uninstall agbenchmark --yes poetry run pip3 install -e ../../benchmark poetry run python3 -m forge & export PYTHONPATH=$PYTHONPATH:../../benchmark/agbenchmark -poetry run python3 -m agbenchmark start "$@" +poetry run python3 -m agbenchmark "$@" diff --git a/benchmark/agbenchmark/README.md b/benchmark/agbenchmark/README.md index 9bf7c0c7..7bc493a6 100644 --- a/benchmark/agbenchmark/README.md +++ b/benchmark/agbenchmark/README.md @@ -2,7 +2,7 @@ 1. `pip install auto-gpt-benchmarks` 2. Add boilerplate code to run and kill agent -3. `agbenchmark start` +3. `agbenchmark` - `--category challenge_category` to run tests in a specific category - `--mock` to only run mock tests if they exists for each test - `--noreg` to skip any tests that have passed in the past. When you run without this flag and a previous challenge that passed fails, it will now not be regression tests @@ -22,7 +22,7 @@ 5. `cp .env_example .env` 6. `git submodule update --init --remote --recursive` 7. `uvicorn server:app --reload` -8. `agbenchmark start --mock` +8. `agbenchmark --mock` Keep config the same and watch the logs :) ### To run with mini-agi @@ -31,7 +31,7 @@ 2. `pip install -r requirements.txt` 3. `cp .env_example .env`, set `PROMPT_USER=false` and add your `OPENAI_API_KEY=`. Sset `MODEL="gpt-3.5-turbo"` if you don't have access to `gpt-4` yet. Also make sure you have Python 3.10^ installed 4. set `AGENT_NAME=mini-agi` in `.env` file and where you want your `REPORT_LOCATION` to be -5. Make sure to follow the commands above, and remove mock flag `agbenchmark start` +5. Make sure to follow the commands above, and remove mock flag `agbenchmark` - To add requirements `poetry add requirement`. @@ -61,7 +61,7 @@ https://github.com/Significant-Gravitas/Auto-GPT-Benchmarks/pull/48/files ## How do I run agent in different environments? -**To just use as the benchmark for your agent**. `pip install` the package and run `agbenchmark start` +**To just use as the benchmark for your agent**. `pip install` the package and run `agbenchmark` **For internal Auto-GPT ci runs**, specify the `AGENT_NAME` you want you use and set the `HOME_ENV`. Ex. `AGENT_NAME=mini-agi` diff --git a/benchmark/agbenchmark/__main__.py b/benchmark/agbenchmark/__main__.py index bff606f9..e132acd1 100644 --- a/benchmark/agbenchmark/__main__.py +++ b/benchmark/agbenchmark/__main__.py @@ -217,12 +217,7 @@ def run_benchmark( return pytest.main(pytest_args) -@click.group() -def cli() -> None: - pass - - -@cli.command() +@click.group(invoke_without_command=True) @click.option("--backend", is_flag=True, help="If it's being run from the cli") @click.option("-c", "--category", multiple=True, help="Specific category to run") @click.option( @@ -248,7 +243,7 @@ def cli() -> None: @click.option("--nc", is_flag=True, help="Run without cutoff") @click.option("--keep-answers", is_flag=True, help="Keep answers") @click.option("--cutoff", help="Set or override tests cutoff (seconds)") -def start( +def cli( maintain: bool, improve: bool, explore: bool, diff --git a/benchmark/agbenchmark/challenges/CHALLENGE.md b/benchmark/agbenchmark/challenges/CHALLENGE.md index c6b71f30..203289cb 100644 --- a/benchmark/agbenchmark/challenges/CHALLENGE.md +++ b/benchmark/agbenchmark/challenges/CHALLENGE.md @@ -76,7 +76,7 @@ This folder contains all the files you want the agent to have in its workspace B ### artifacts_out This folder contains all the files you would like the agent to generate. This folder is used to mock the agent. -This allows to run agbenchmark start --test=TestExample --mock and make sure our challenge actually works. +This allows to run agbenchmark --test=TestExample --mock and make sure our challenge actually works. ### custom_python diff --git a/docs/content/challenges/introduction.md b/docs/content/challenges/introduction.md index 256a8238..ab9ed5f7 100644 --- a/docs/content/challenges/introduction.md +++ b/docs/content/challenges/introduction.md @@ -27,7 +27,7 @@ We look forward to your contributions and the exciting solutions that the commun We're slowly transitioning to agbenchmark. agbenchmark is a simpler way to improve Auto-GPT. Simply run: ``` - agbenchmark start + agbenchmark ``` and beat as many challenges as possible.