mirror of
https://github.com/aljazceru/Auto-GPT.git
synced 2025-12-19 15:04:26 +01:00
Remove start from agbenchmark (#5241)
Signed-off-by: Merwane Hamadi <merwanehamadi@gmail.com>
This commit is contained in:
4
.github/workflows/autogpts-ci.yml
vendored
4
.github/workflows/autogpts-ci.yml
vendored
@@ -42,8 +42,8 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
sh run
|
sh run
|
||||||
URL=http://127.0.0.1:8000 bash -c "$(curl -fsSL https://raw.githubusercontent.com/AI-Engineers-Foundation/agent-protocol/main/testing_suite/test.sh)"
|
URL=http://127.0.0.1:8000 bash -c "$(curl -fsSL https://raw.githubusercontent.com/AI-Engineers-Foundation/agent-protocol/main/testing_suite/test.sh)"
|
||||||
poetry run agbenchmark start --mock
|
poetry run agbenchmark --mock
|
||||||
poetry run agbenchmark start --test=TestWriteFile
|
poetry run agbenchmark --test=TestWriteFile
|
||||||
env:
|
env:
|
||||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||||
AGENT_NAME: ${{ matrix.agent-name }}
|
AGENT_NAME: ${{ matrix.agent-name }}
|
||||||
|
|||||||
40
.github/workflows/benchmark-ci.yml
vendored
40
.github/workflows/benchmark-ci.yml
vendored
@@ -212,8 +212,8 @@ jobs:
|
|||||||
cd ../..
|
cd ../..
|
||||||
if [ "${GITHUB_EVENT_NAME}" == "pull_request" ] || [ "${{ github.event_name }}" == "push" ]; then
|
if [ "${GITHUB_EVENT_NAME}" == "pull_request" ] || [ "${{ github.event_name }}" == "push" ]; then
|
||||||
set +e # Ignore non-zero exit codes and continue execution
|
set +e # Ignore non-zero exit codes and continue execution
|
||||||
echo "Running the following command: ${prefix}agbenchmark start --maintain --mock"
|
echo "Running the following command: ${prefix}agbenchmark --maintain --mock"
|
||||||
${prefix}agbenchmark start --maintain --mock
|
${prefix}agbenchmark --maintain --mock
|
||||||
EXIT_CODE=$?
|
EXIT_CODE=$?
|
||||||
set -e # Stop ignoring non-zero exit codes
|
set -e # Stop ignoring non-zero exit codes
|
||||||
# Check if the exit code was 5, and if so, exit with 0 instead
|
# Check if the exit code was 5, and if so, exit with 0 instead
|
||||||
@@ -221,36 +221,36 @@ jobs:
|
|||||||
echo "regression_tests.json is empty."
|
echo "regression_tests.json is empty."
|
||||||
fi
|
fi
|
||||||
|
|
||||||
echo "Running the following command: ${prefix}agbenchmark start --mock"
|
echo "Running the following command: ${prefix}agbenchmark --mock"
|
||||||
${prefix}agbenchmark start --mock
|
${prefix}agbenchmark --mock
|
||||||
|
|
||||||
echo "Running the following command: ${prefix}agbenchmark start --mock --category=retrieval"
|
echo "Running the following command: ${prefix}agbenchmark --mock --category=retrieval"
|
||||||
${prefix}agbenchmark start --mock --category=retrieval
|
${prefix}agbenchmark --mock --category=retrieval
|
||||||
|
|
||||||
echo "Running the following command: ${prefix}agbenchmark start --mock --category=interface"
|
echo "Running the following command: ${prefix}agbenchmark --mock --category=interface"
|
||||||
${prefix}agbenchmark start --mock --category=interface
|
${prefix}agbenchmark --mock --category=interface
|
||||||
|
|
||||||
echo "Running the following command: ${prefix}agbenchmark start --mock --category=code"
|
echo "Running the following command: ${prefix}agbenchmark --mock --category=code"
|
||||||
${prefix}agbenchmark start --mock --category=code
|
${prefix}agbenchmark --mock --category=code
|
||||||
|
|
||||||
echo "Running the following command: ${prefix}agbenchmark start --mock --category=memory"
|
echo "Running the following command: ${prefix}agbenchmark --mock --category=memory"
|
||||||
${prefix}agbenchmark start --mock --category=memory
|
${prefix}agbenchmark --mock --category=memory
|
||||||
|
|
||||||
echo "Running the following command: ${prefix}agbenchmark start --mock --suite TestRevenueRetrieval"
|
echo "Running the following command: ${prefix}agbenchmark --mock --suite TestRevenueRetrieval"
|
||||||
${prefix}agbenchmark start --mock --suite TestRevenueRetrieval
|
${prefix}agbenchmark --mock --suite TestRevenueRetrieval
|
||||||
|
|
||||||
echo "Running the following command: ${prefix}agbenchmark start --test=TestWriteFile"
|
echo "Running the following command: ${prefix}agbenchmark --test=TestWriteFile"
|
||||||
${prefix}agbenchmark start --test=TestWriteFile
|
${prefix}agbenchmark --test=TestWriteFile
|
||||||
|
|
||||||
poetry install
|
poetry install
|
||||||
poetry run uvicorn server:app --reload &
|
poetry run uvicorn server:app --reload &
|
||||||
sleep 5
|
sleep 5
|
||||||
export AGENT_NAME=mini-agi
|
export AGENT_NAME=mini-agi
|
||||||
echo "poetry run agbenchmark start --mock --api_mode --host=http://localhost:8000"
|
echo "poetry run agbenchmark --mock --api_mode --host=http://localhost:8000"
|
||||||
poetry run agbenchmark start --mock --api_mode --host=http://localhost:8000
|
poetry run agbenchmark --mock --api_mode --host=http://localhost:8000
|
||||||
else
|
else
|
||||||
echo "${prefix}agbenchmark start"
|
echo "${prefix}agbenchmark"
|
||||||
${prefix}agbenchmark start || echo "This command will always return a non zero exit code unless all the challenges are solved."
|
${prefix}agbenchmark || echo "This command will always return a non zero exit code unless all the challenges are solved."
|
||||||
fi
|
fi
|
||||||
|
|
||||||
cd ../..
|
cd ../..
|
||||||
|
|||||||
@@ -38,7 +38,7 @@ Intial setup:
|
|||||||
### 📊 **Benchmarking**
|
### 📊 **Benchmarking**
|
||||||
|
|
||||||
|
|
||||||
To run the benchmark, use the `agbenchmark start` command. Here are some options you can use with this command:
|
To run the benchmark, use the `agbenchmark` command. Here are some options you can use with this command:
|
||||||
|
|
||||||
- `--backend`: If it's being run from the cli
|
- `--backend`: If it's being run from the cli
|
||||||
- `-c, --category TEXT`: Specific category to run
|
- `-c, --category TEXT`: Specific category to run
|
||||||
@@ -55,12 +55,12 @@ To run the benchmark, use the `agbenchmark start` command. Here are some options
|
|||||||
- `--help`: Show this message and exit.
|
- `--help`: Show this message and exit.
|
||||||
|
|
||||||
For example, if you want to run a specific test, you can use the `--test` option like this:
|
For example, if you want to run a specific test, you can use the `--test` option like this:
|
||||||
`agbenchmark start --test your_test_name`
|
`agbenchmark --test your_test_name`
|
||||||
|
|
||||||
If you want to run the benchmark without dependencies, you can use the `--no_dep` option like this:
|
If you want to run the benchmark without dependencies, you can use the `--no_dep` option like this:
|
||||||
`agbenchmark start --no_dep`
|
`agbenchmark --no_dep`
|
||||||
|
|
||||||
You can combine multiple options as well. For example, to run a specific test without dependencies, you can do:
|
You can combine multiple options as well. For example, to run a specific test without dependencies, you can do:
|
||||||
`agbenchmark start --test your_test_name --no_dep`
|
`agbenchmark --test your_test_name --no_dep`
|
||||||
|
|
||||||
Remember to replace `your_test_name` with the name of the test you want to run.
|
Remember to replace `your_test_name` with the name of the test you want to run.
|
||||||
|
|||||||
@@ -7,4 +7,4 @@ poetry run pip3 uninstall agbenchmark --yes
|
|||||||
poetry run pip3 install -e ../../benchmark
|
poetry run pip3 install -e ../../benchmark
|
||||||
poetry run python3 -m forge &
|
poetry run python3 -m forge &
|
||||||
export PYTHONPATH=$PYTHONPATH:../../benchmark/agbenchmark
|
export PYTHONPATH=$PYTHONPATH:../../benchmark/agbenchmark
|
||||||
poetry run python3 -m agbenchmark start "$@"
|
poetry run python3 -m agbenchmark "$@"
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
1. `pip install auto-gpt-benchmarks`
|
1. `pip install auto-gpt-benchmarks`
|
||||||
2. Add boilerplate code to run and kill agent
|
2. Add boilerplate code to run and kill agent
|
||||||
3. `agbenchmark start`
|
3. `agbenchmark`
|
||||||
- `--category challenge_category` to run tests in a specific category
|
- `--category challenge_category` to run tests in a specific category
|
||||||
- `--mock` to only run mock tests if they exists for each test
|
- `--mock` to only run mock tests if they exists for each test
|
||||||
- `--noreg` to skip any tests that have passed in the past. When you run without this flag and a previous challenge that passed fails, it will now not be regression tests
|
- `--noreg` to skip any tests that have passed in the past. When you run without this flag and a previous challenge that passed fails, it will now not be regression tests
|
||||||
@@ -22,7 +22,7 @@
|
|||||||
5. `cp .env_example .env`
|
5. `cp .env_example .env`
|
||||||
6. `git submodule update --init --remote --recursive`
|
6. `git submodule update --init --remote --recursive`
|
||||||
7. `uvicorn server:app --reload`
|
7. `uvicorn server:app --reload`
|
||||||
8. `agbenchmark start --mock`
|
8. `agbenchmark --mock`
|
||||||
Keep config the same and watch the logs :)
|
Keep config the same and watch the logs :)
|
||||||
|
|
||||||
### To run with mini-agi
|
### To run with mini-agi
|
||||||
@@ -31,7 +31,7 @@
|
|||||||
2. `pip install -r requirements.txt`
|
2. `pip install -r requirements.txt`
|
||||||
3. `cp .env_example .env`, set `PROMPT_USER=false` and add your `OPENAI_API_KEY=`. Sset `MODEL="gpt-3.5-turbo"` if you don't have access to `gpt-4` yet. Also make sure you have Python 3.10^ installed
|
3. `cp .env_example .env`, set `PROMPT_USER=false` and add your `OPENAI_API_KEY=`. Sset `MODEL="gpt-3.5-turbo"` if you don't have access to `gpt-4` yet. Also make sure you have Python 3.10^ installed
|
||||||
4. set `AGENT_NAME=mini-agi` in `.env` file and where you want your `REPORT_LOCATION` to be
|
4. set `AGENT_NAME=mini-agi` in `.env` file and where you want your `REPORT_LOCATION` to be
|
||||||
5. Make sure to follow the commands above, and remove mock flag `agbenchmark start`
|
5. Make sure to follow the commands above, and remove mock flag `agbenchmark`
|
||||||
|
|
||||||
- To add requirements `poetry add requirement`.
|
- To add requirements `poetry add requirement`.
|
||||||
|
|
||||||
@@ -61,7 +61,7 @@ https://github.com/Significant-Gravitas/Auto-GPT-Benchmarks/pull/48/files
|
|||||||
|
|
||||||
## How do I run agent in different environments?
|
## How do I run agent in different environments?
|
||||||
|
|
||||||
**To just use as the benchmark for your agent**. `pip install` the package and run `agbenchmark start`
|
**To just use as the benchmark for your agent**. `pip install` the package and run `agbenchmark`
|
||||||
|
|
||||||
**For internal Auto-GPT ci runs**, specify the `AGENT_NAME` you want you use and set the `HOME_ENV`.
|
**For internal Auto-GPT ci runs**, specify the `AGENT_NAME` you want you use and set the `HOME_ENV`.
|
||||||
Ex. `AGENT_NAME=mini-agi`
|
Ex. `AGENT_NAME=mini-agi`
|
||||||
|
|||||||
@@ -217,12 +217,7 @@ def run_benchmark(
|
|||||||
return pytest.main(pytest_args)
|
return pytest.main(pytest_args)
|
||||||
|
|
||||||
|
|
||||||
@click.group()
|
@click.group(invoke_without_command=True)
|
||||||
def cli() -> None:
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
@cli.command()
|
|
||||||
@click.option("--backend", is_flag=True, help="If it's being run from the cli")
|
@click.option("--backend", is_flag=True, help="If it's being run from the cli")
|
||||||
@click.option("-c", "--category", multiple=True, help="Specific category to run")
|
@click.option("-c", "--category", multiple=True, help="Specific category to run")
|
||||||
@click.option(
|
@click.option(
|
||||||
@@ -248,7 +243,7 @@ def cli() -> None:
|
|||||||
@click.option("--nc", is_flag=True, help="Run without cutoff")
|
@click.option("--nc", is_flag=True, help="Run without cutoff")
|
||||||
@click.option("--keep-answers", is_flag=True, help="Keep answers")
|
@click.option("--keep-answers", is_flag=True, help="Keep answers")
|
||||||
@click.option("--cutoff", help="Set or override tests cutoff (seconds)")
|
@click.option("--cutoff", help="Set or override tests cutoff (seconds)")
|
||||||
def start(
|
def cli(
|
||||||
maintain: bool,
|
maintain: bool,
|
||||||
improve: bool,
|
improve: bool,
|
||||||
explore: bool,
|
explore: bool,
|
||||||
|
|||||||
@@ -76,7 +76,7 @@ This folder contains all the files you want the agent to have in its workspace B
|
|||||||
### artifacts_out
|
### artifacts_out
|
||||||
|
|
||||||
This folder contains all the files you would like the agent to generate. This folder is used to mock the agent.
|
This folder contains all the files you would like the agent to generate. This folder is used to mock the agent.
|
||||||
This allows to run agbenchmark start --test=TestExample --mock and make sure our challenge actually works.
|
This allows to run agbenchmark --test=TestExample --mock and make sure our challenge actually works.
|
||||||
|
|
||||||
### custom_python
|
### custom_python
|
||||||
|
|
||||||
|
|||||||
@@ -27,7 +27,7 @@ We look forward to your contributions and the exciting solutions that the commun
|
|||||||
We're slowly transitioning to agbenchmark. agbenchmark is a simpler way to improve Auto-GPT. Simply run:
|
We're slowly transitioning to agbenchmark. agbenchmark is a simpler way to improve Auto-GPT. Simply run:
|
||||||
|
|
||||||
```
|
```
|
||||||
agbenchmark start
|
agbenchmark
|
||||||
```
|
```
|
||||||
|
|
||||||
and beat as many challenges as possible.
|
and beat as many challenges as possible.
|
||||||
|
|||||||
Reference in New Issue
Block a user