mirror of
https://github.com/aljazceru/Auto-GPT.git
synced 2026-02-23 07:04:24 +01:00
Merge branch 'master' into feat/monitor
This commit is contained in:
20
.github/workflows/autogpt-ci.yml
vendored
20
.github/workflows/autogpt-ci.yml
vendored
@@ -4,13 +4,21 @@ on:
|
||||
push:
|
||||
branches: [ master, ci-test* ]
|
||||
paths:
|
||||
- 'autogpts/autogpt'
|
||||
- '!autogpts/autogpt/tests/Auto-GPT-test-cassettes'
|
||||
- 'autogpts/autogpt/**'
|
||||
- '!autogpts/autogpt/tests/vcr_cassettes'
|
||||
- '!autogpts/autogpt/tests/challenges/current_score.json'
|
||||
pull_request:
|
||||
branches: [ stable, master, release-* ]
|
||||
paths:
|
||||
- 'autogpts/autogpt/**'
|
||||
- '!autogpts/autogpt/tests/vcr_cassettes'
|
||||
- '!autogpts/autogpt/tests/challenges/current_score.json'
|
||||
pull_request_target:
|
||||
branches: [ master, release-*, ci-test* ]
|
||||
paths:
|
||||
- 'autogpts/autogpt/**'
|
||||
- '!autogpts/autogpt/tests/vcr_cassettes'
|
||||
- '!autogpts/autogpt/tests/challenges/current_score.json'
|
||||
|
||||
concurrency:
|
||||
group: ${{ format('autogpt-ci-{0}', github.head_ref && format('{0}-{1}', github.event_name, github.event.pull_request.number) || github.sha) }}
|
||||
@@ -114,7 +122,7 @@ jobs:
|
||||
run: |
|
||||
cassette_branch="${{ github.event.pull_request.user.login }}-${{ github.event.pull_request.head.ref }}"
|
||||
cassette_base_branch="${{ github.event.pull_request.base.ref }}"
|
||||
cd tests/Auto-GPT-test-cassettes
|
||||
cd tests/vcr_cassettes
|
||||
|
||||
if ! git ls-remote --exit-code --heads origin $cassette_base_branch ; then
|
||||
cassette_base_branch="master"
|
||||
@@ -184,7 +192,7 @@ jobs:
|
||||
git config "$config_key" \
|
||||
"Authorization: Basic $base64_pat"
|
||||
|
||||
cd tests/Auto-GPT-test-cassettes
|
||||
cd tests/vcr_cassettes
|
||||
git config "$config_key" \
|
||||
"Authorization: Basic $base64_pat"
|
||||
|
||||
@@ -215,7 +223,7 @@ jobs:
|
||||
cassette_branch="${{ github.ref_name }}"
|
||||
fi
|
||||
|
||||
cd tests/Auto-GPT-test-cassettes
|
||||
cd tests/vcr_cassettes
|
||||
# Commit & push changes to cassettes if any
|
||||
if ! git diff --quiet; then
|
||||
git add .
|
||||
@@ -223,7 +231,7 @@ jobs:
|
||||
git push origin HEAD:$cassette_branch
|
||||
if [ ! $is_pull_request ]; then
|
||||
cd ../..
|
||||
git add tests/Auto-GPT-test-cassettes
|
||||
git add tests/vcr_cassettes
|
||||
git commit -m "Update cassette submodule"
|
||||
git push origin HEAD:$cassette_branch
|
||||
fi
|
||||
|
||||
8
.github/workflows/autogpt-docker-ci.yml
vendored
8
.github/workflows/autogpt-docker-ci.yml
vendored
@@ -4,11 +4,15 @@ on:
|
||||
push:
|
||||
branches: [ master ]
|
||||
paths:
|
||||
- 'autogpts/autogpt'
|
||||
- '!autogpts/autogpt/tests/Auto-GPT-test-cassettes'
|
||||
- 'autogpts/autogpt/**'
|
||||
- '!autogpts/autogpt/tests/vcr_cassettes'
|
||||
- '!autogpts/autogpt/tests/challenges/current_score.json'
|
||||
pull_request:
|
||||
branches: [ master, release-*, stable ]
|
||||
paths:
|
||||
- 'autogpts/autogpt/**'
|
||||
- '!autogpts/autogpt/tests/vcr_cassettes'
|
||||
- '!autogpts/autogpt/tests/challenges/current_score.json'
|
||||
|
||||
concurrency:
|
||||
group: ${{ format('autogpt-docker-ci-{0}', github.head_ref && format('pr-{0}', github.event.pull_request.number) || github.sha) }}
|
||||
|
||||
21
.github/workflows/benchmark-ci.yml
vendored
21
.github/workflows/benchmark-ci.yml
vendored
@@ -7,7 +7,7 @@ on:
|
||||
agents:
|
||||
description: 'Agents to run (comma-separated)'
|
||||
required: false
|
||||
default: 'gpt-engineer,smol-developer,Auto-GPT,mini-agi,beebot,BabyAGI,PolyGPT,Turbo' # Default agents if none are specified
|
||||
default: 'gpt-engineer,smol-developer,Auto-GPT,mini-agi,beebot,babyagi,PolyGPT,Auto-GPT-Turbo' # Default agents if none are specified
|
||||
schedule:
|
||||
- cron: '0 8 * * *'
|
||||
push:
|
||||
@@ -17,6 +17,9 @@ on:
|
||||
- '!benchmark/reports/**'
|
||||
pull_request:
|
||||
branches: [stable, master, release-*]
|
||||
paths:
|
||||
- 'benchmark/**'
|
||||
- '!benchmark/reports/**'
|
||||
|
||||
jobs:
|
||||
lint:
|
||||
@@ -83,7 +86,7 @@ jobs:
|
||||
run: |
|
||||
if [ "${{ github.event_name }}" == "schedule" ]; then
|
||||
echo "::set-output name=env-name::production"
|
||||
echo "::set-output name=matrix::[ 'gpt-engineer', 'smol-developer', 'Auto-GPT', 'mini-agi', 'beebot', 'BabyAGI', 'PolyGPT', 'Turbo' ]"
|
||||
echo "::set-output name=matrix::[ 'gpt-engineer', 'smol-developer', 'Auto-GPT', 'mini-agi', 'beebot', 'babyagi', 'PolyGPT', 'Auto-GPT-Turbo' ]"
|
||||
elif [ "${{ github.event_name }}" == "workflow_dispatch" ]; then
|
||||
IFS=',' read -ra matrix_array <<< "${{ github.event.inputs.agents }}"
|
||||
matrix_string="[ \"$(echo "${matrix_array[@]}" | sed 's/ /", "/g')\" ]"
|
||||
@@ -146,9 +149,12 @@ jobs:
|
||||
mkdir agent
|
||||
link=$(jq -r '.["'"$AGENT_NAME"'"].url' agents_to_benchmark.json)
|
||||
branch=$(jq -r '.["'"$AGENT_NAME"'"].branch' agents_to_benchmark.json)
|
||||
commit=$(jq -r '.["'"$AGENT_NAME"'"].commit' agents_to_benchmark.json)
|
||||
cd agent
|
||||
git clone "$link" -b "$branch"
|
||||
cd $AGENT_NAME
|
||||
git checkout "$commit" || echo "Commit not found, using latest commit on branch"
|
||||
|
||||
prefix=""
|
||||
if [ "$AGENT_NAME" == "gpt-engineer" ]; then
|
||||
make install
|
||||
@@ -167,7 +173,7 @@ jobs:
|
||||
python -m venv venv
|
||||
source venv/bin/activate
|
||||
pip install -r requirements.txt
|
||||
elif [ "$AGENT_NAME" == "BabyAGI" ]; then
|
||||
elif [ "$AGENT_NAME" == "babyagi" ]; then
|
||||
python -m venv venv
|
||||
source venv/bin/activate
|
||||
pip install -r requirements.txt
|
||||
@@ -188,7 +194,7 @@ jobs:
|
||||
nvm install && nvm use
|
||||
yarn install
|
||||
export NODE_TLS_REJECT_UNAUTHORIZED=0
|
||||
elif [ "$AGENT_NAME" == "Turbo" ]; then
|
||||
elif [ "$AGENT_NAME" == "Auto-GPT-Turbo" ]; then
|
||||
python -m venv venv
|
||||
source venv/bin/activate
|
||||
pip install -r requirements.txt
|
||||
@@ -259,7 +265,6 @@ jobs:
|
||||
REQUESTS_CA_BUNDLE: /etc/ssl/certs/ca-certificates.crt
|
||||
HELICONE_CACHE_ENABLED: false
|
||||
HELICONE_PROPERTY_AGENT: ${{ matrix.agent-name }}
|
||||
REPORT_LOCATION: ${{ format('../../reports/{0}', matrix.agent-name) }}
|
||||
WOLFRAM_ALPHA_APPID: ${{ secrets.WOLFRAM_ALPHA_APPID }}
|
||||
SERPER_API_KEY: ${{ secrets.SERPER_API_KEY }}
|
||||
BING_SUBSCRIPTION_KEY: ${{ secrets.BING_SUBSCRIPTION_KEY }}
|
||||
@@ -273,11 +278,12 @@ jobs:
|
||||
|
||||
- name: Authenticate and Push to Branch
|
||||
working-directory: ./benchmark/
|
||||
if: (success() || failure()) && (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch')
|
||||
if: (success() || failure()) && (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' || startsWith(github.ref_name, 'ci-test-'))
|
||||
run: |
|
||||
git config --global user.email "github-bot@agpt.co"
|
||||
git config --global user.name "Auto-GPT-Bot"
|
||||
|
||||
cp -rn agent/$AGENT_NAME/agbenchmark/reports/* reports/$AGENT_NAME/
|
||||
rm -rf agent
|
||||
git add reports/* || echo "nothing to commit"
|
||||
commit_message="${{ matrix.agent-name }}-$(date +'%Y%m%d%H%M%S')"
|
||||
git commit -m "${commit_message}"
|
||||
@@ -303,3 +309,4 @@ jobs:
|
||||
env:
|
||||
GDRIVE_BASE64: ${{ secrets.GDRIVE_BASE64 }}
|
||||
GITHUB_REF_NAME: ${{ github.ref_name }}
|
||||
AGENT_NAME: ${{ matrix.agent-name }}
|
||||
|
||||
4
.github/workflows/benchmarks.yml
vendored
4
.github/workflows/benchmarks.yml
vendored
@@ -49,7 +49,7 @@ jobs:
|
||||
|
||||
- name: Run pytest with coverage
|
||||
run: |
|
||||
rm -rf tests/Auto-GPT-test-cassettes
|
||||
rm -rf tests/vcr_cassettes
|
||||
pytest -n auto --record-mode=all ${{ matrix.config.task }}
|
||||
env:
|
||||
CI: true
|
||||
@@ -70,4 +70,4 @@ jobs:
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: cassettes-${{ matrix.config.task-name }}
|
||||
path: tests/Auto-GPT-test-cassettes/
|
||||
path: tests/vcr_cassettes/
|
||||
|
||||
103
.github/workflows/forge-ci.yml
vendored
Normal file
103
.github/workflows/forge-ci.yml
vendored
Normal file
@@ -0,0 +1,103 @@
|
||||
name: Forge CI
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
branches: [master]
|
||||
schedule:
|
||||
- cron: '0 8 * * *'
|
||||
push:
|
||||
branches: [master, ci-test*]
|
||||
paths:
|
||||
- 'forge/**'
|
||||
- '.github/workflows/forge-ci.yml'
|
||||
pull_request:
|
||||
branches: [stable, master, release-*]
|
||||
paths:
|
||||
- 'forge/**'
|
||||
- '.github/workflows/forge-ci.yml'
|
||||
|
||||
jobs:
|
||||
lint:
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
min-python-version: '3.10'
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
fetch-depth: 0
|
||||
ref: ${{ github.event.pull_request.head.ref }}
|
||||
repository: ${{ github.event.pull_request.head.repo.full_name }}
|
||||
submodules: true
|
||||
|
||||
- name: Set up Python ${{ env.min-python-version }}
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: ${{ env.min-python-version }}
|
||||
|
||||
- name: Install Poetry
|
||||
working-directory: ./forge/
|
||||
run: |
|
||||
curl -sSL https://install.python-poetry.org | python -
|
||||
|
||||
- name: Install dependencies
|
||||
working-directory: ./forge/
|
||||
run: |
|
||||
poetry install
|
||||
|
||||
- name: Lint with flake8
|
||||
working-directory: ./forge/
|
||||
run: poetry run flake8
|
||||
|
||||
- name: Check black formatting
|
||||
working-directory: ./forge/
|
||||
run: poetry run black . --exclude test.py --check
|
||||
if: success() || failure()
|
||||
|
||||
- name: Check isort formatting
|
||||
working-directory: ./forge/
|
||||
run: poetry run isort . --check
|
||||
if: success() || failure()
|
||||
|
||||
agent-protocol-test:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 20
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
fetch-depth: 0
|
||||
ref: ${{ github.event.pull_request.head.ref }}
|
||||
repository: ${{ github.event.pull_request.head.repo.full_name }}
|
||||
submodules: true
|
||||
|
||||
- name: Set up Python ${{ env.min-python-version }}
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: ${{ env.min-python-version }}
|
||||
|
||||
- name: Install Poetry
|
||||
working-directory: ./forge/
|
||||
run: |
|
||||
curl -sSL https://install.python-poetry.org | python -
|
||||
|
||||
- name: Install dependencies
|
||||
working-directory: ./forge/
|
||||
run: |
|
||||
poetry install
|
||||
|
||||
- name: Run regression tests
|
||||
working-directory: ./forge/
|
||||
run: |
|
||||
cp .env.example .env
|
||||
poetry run python -m autogpt &
|
||||
URL=http://127.0.0.1:8000 bash -c "$(curl -fsSL https://raw.githubusercontent.com/AI-Engineers-Foundation/agent-protocol/main/testing_suite/test.sh)"
|
||||
env:
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
AGENT_NAME: ${{ matrix.agent-name }}
|
||||
HELICONE_API_KEY: ${{ secrets.HELICONE_API_KEY }}
|
||||
REQUESTS_CA_BUNDLE: /etc/ssl/certs/ca-certificates.crt
|
||||
HELICONE_CACHE_ENABLED: false
|
||||
HELICONE_PROPERTY_AGENT: ${{ matrix.agent-name }}
|
||||
REPORT_LOCATION: ${{ format('../../reports/{0}', matrix.agent-name) }}
|
||||
3
.github/workflows/pr-label.yml
vendored
3
.github/workflows/pr-label.yml
vendored
@@ -5,8 +5,9 @@ on:
|
||||
push:
|
||||
branches: [ master, release-* ]
|
||||
paths-ignore:
|
||||
- 'autogpts/autogpt/tests/Auto-GPT-test-cassettes'
|
||||
- 'autogpts/autogpt/tests/vcr_cassettes'
|
||||
- 'autogpts/autogpt/tests/challenges/current_score.json'
|
||||
- 'benchmark/reports/**'
|
||||
# So that the `dirtyLabel` is removed if conflicts are resolve
|
||||
# We recommend `pull_request_target` so that github secrets are available.
|
||||
# In `pull_request` we wouldn't be able to change labels of fork PRs
|
||||
|
||||
4
.gitignore
vendored
4
.gitignore
vendored
@@ -31,8 +31,6 @@ __pycache__/
|
||||
build/
|
||||
develop-eggs/
|
||||
dist/
|
||||
/plugins/
|
||||
plugins_config.yaml
|
||||
downloads/
|
||||
eggs/
|
||||
.eggs/
|
||||
@@ -166,4 +164,4 @@ agbenchmark/reports/
|
||||
|
||||
# Nodejs
|
||||
package-lock.json
|
||||
package.json
|
||||
package.json
|
||||
|
||||
3
.gitmodules
vendored
Normal file
3
.gitmodules
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
[submodule "autogpts/autogpt/tests/vcr_cassettes"]
|
||||
path = autogpts/autogpt/tests/vcr_cassettes
|
||||
url = https://github.com/Significant-Gravitas/Auto-GPT-test-cassettes
|
||||
21
Makefile
Normal file
21
Makefile
Normal file
@@ -0,0 +1,21 @@
|
||||
install:
|
||||
@echo "Installing dependencies..."
|
||||
@command -v poetry >/dev/null 2>&1 || { echo >&2 "Poetry not found, installing..."; curl -sSL https://install.python-poetry.org | python3 - ; }
|
||||
poetry install
|
||||
|
||||
list_agents:
|
||||
@echo "Listing all agents in autogpts..."
|
||||
@for agent in $$(ls autogpts); do \
|
||||
echo \\t$$agent; \
|
||||
done
|
||||
@echo \\t"forge"
|
||||
|
||||
|
||||
benchmark_%:
|
||||
@echo "Running benchmark for $*"
|
||||
poetry run sh -c 'export PYTHONPATH=$$PYTHONPATH:./benchmark:./autogpts/$*; echo $$PYTHONPATH; python -m benchmark start --agent-config autogpts/$*/benchmark_config.json'
|
||||
|
||||
|
||||
run:
|
||||
python main.py
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# Auto-GPT: An Autonomous GPT-4 Experiment
|
||||
[](https://agpt.co)
|
||||
[](https://github.com/Significant-Gravitas/Auto-GPT/actions/workflows/ci.yml)
|
||||
[](https://github.com/Significant-Gravitas/Auto-GPT/actions/workflows/autogpt-ci.yml)
|
||||
[](https://discord.gg/autogpt)
|
||||
[](https://github.com/Significant-Gravitas/Auto-GPT/stargazers)
|
||||
[](https://twitter.com/SigGravitas)
|
||||
|
||||
2
autogpts/autogpt/.gitattributes
vendored
2
autogpts/autogpt/.gitattributes
vendored
@@ -1,5 +1,5 @@
|
||||
# Exclude VCR cassettes from stats
|
||||
tests/Auto-GPT-test-cassettes/**/**.y*ml linguist-generated
|
||||
tests/vcr_cassettes/**/**.y*ml linguist-generated
|
||||
|
||||
# Mark documentation as such
|
||||
docs/**.md linguist-documentation
|
||||
|
||||
7
autogpts/autogpt/.gitignore
vendored
7
autogpts/autogpt/.gitignore
vendored
@@ -31,7 +31,7 @@ __pycache__/
|
||||
build/
|
||||
develop-eggs/
|
||||
dist/
|
||||
/plugins/
|
||||
/plugins/*
|
||||
plugins_config.yaml
|
||||
downloads/
|
||||
eggs/
|
||||
@@ -166,4 +166,7 @@ agbenchmark/reports/
|
||||
|
||||
# Nodejs
|
||||
package-lock.json
|
||||
package.json
|
||||
package.json
|
||||
|
||||
# Keep
|
||||
!.keep
|
||||
|
||||
4
autogpts/autogpt/.gitmodules
vendored
4
autogpts/autogpt/.gitmodules
vendored
@@ -1,4 +0,0 @@
|
||||
[submodule "tests/Auto-GPT-test-cassettes"]
|
||||
path = tests/Auto-GPT-test-cassettes
|
||||
url = https://github.com/Significant-Gravitas/Auto-GPT-test-cassettes
|
||||
branch = master
|
||||
@@ -1,6 +1,6 @@
|
||||
# Auto-GPT: An Autonomous GPT-4 Experiment
|
||||
[](https://agpt.co)
|
||||
[](https://github.com/Significant-Gravitas/Auto-GPT/actions/workflows/ci.yml)
|
||||
[](https://github.com/Significant-Gravitas/Auto-GPT/actions/workflows/autogpt-ci.yml)
|
||||
[](https://discord.gg/autogpt)
|
||||
[](https://github.com/Significant-Gravitas/Auto-GPT/stargazers)
|
||||
[](https://twitter.com/SigGravitas)
|
||||
|
||||
@@ -26,12 +26,11 @@ from autogpt.commands import COMMAND_CATEGORIES
|
||||
from autogpt.config import AIConfig, Config, ConfigBuilder, check_openai_api_key
|
||||
from autogpt.llm.api_manager import ApiManager
|
||||
from autogpt.logs.config import configure_chat_plugins, configure_logging
|
||||
from autogpt.logs.helpers import print_attribute
|
||||
from autogpt.logs.helpers import print_attribute, speak
|
||||
from autogpt.memory.vector import get_memory
|
||||
from autogpt.models.command_registry import CommandRegistry
|
||||
from autogpt.plugins import scan_plugins
|
||||
from autogpt.prompts.prompt import DEFAULT_TRIGGERING_PROMPT
|
||||
from autogpt.speech import say_text
|
||||
from autogpt.workspace import Workspace
|
||||
from scripts.install_plugin_deps import install_plugin_dependencies
|
||||
|
||||
@@ -366,7 +365,7 @@ def update_user(
|
||||
print_assistant_thoughts(ai_config.ai_name, assistant_reply_dict, config)
|
||||
|
||||
if config.speak_mode:
|
||||
say_text(f"I want to execute {command_name}", config)
|
||||
speak(f"I want to execute {command_name}")
|
||||
|
||||
# First log new-line so user can differentiate sections better in console
|
||||
print()
|
||||
@@ -531,8 +530,6 @@ def print_assistant_thoughts(
|
||||
assistant_reply_json_valid: dict,
|
||||
config: Config,
|
||||
) -> None:
|
||||
from autogpt.speech import say_text
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
assistant_thoughts_reasoning = None
|
||||
@@ -577,7 +574,7 @@ def print_assistant_thoughts(
|
||||
# Speak the assistant's thoughts
|
||||
if assistant_thoughts_speak:
|
||||
if config.speak_mode:
|
||||
say_text(assistant_thoughts_speak, config)
|
||||
speak(assistant_thoughts_speak)
|
||||
else:
|
||||
print_attribute("SPEAK", assistant_thoughts_speak, title_color=Fore.YELLOW)
|
||||
|
||||
|
||||
@@ -216,7 +216,7 @@ def generate_aiconfig_automatic(user_prompt: str, config: Config) -> AIConfig:
|
||||
# Call LLM with the string as user input
|
||||
output = create_chat_completion(
|
||||
ChatSequence.for_model(
|
||||
config.fast_llm,
|
||||
config.smart_llm,
|
||||
[
|
||||
Message("system", system_prompt),
|
||||
Message("user", prompt_ai_config_automatic),
|
||||
|
||||
@@ -61,7 +61,7 @@ def clean_input(config: Config, prompt: str = ""):
|
||||
def get_bulletin_from_web():
|
||||
try:
|
||||
response = requests.get(
|
||||
"https://raw.githubusercontent.com/Significant-Gravitas/Auto-GPT/master/BULLETIN.md"
|
||||
"https://raw.githubusercontent.com/Significant-Gravitas/Auto-GPT/master/autogpts/autogpt/BULLETIN.md"
|
||||
)
|
||||
if response.status_code == 200:
|
||||
return response.text
|
||||
|
||||
@@ -30,7 +30,7 @@ def command(
|
||||
) -> Callable[..., CommandOutput]:
|
||||
"""The command decorator is used to create Command objects from ordinary functions."""
|
||||
|
||||
def decorator(func: Callable[..., CommandOutput]) -> Command:
|
||||
def decorator(func: Callable[..., CommandOutput]):
|
||||
typed_parameters = [
|
||||
CommandParameter(
|
||||
name=param_name,
|
||||
@@ -55,8 +55,7 @@ def command(
|
||||
def wrapper(*args, **kwargs) -> Any:
|
||||
return func(*args, **kwargs)
|
||||
|
||||
wrapper.command = cmd
|
||||
|
||||
setattr(wrapper, "command", cmd)
|
||||
setattr(wrapper, AUTO_GPT_COMMAND_IDENTIFIER, True)
|
||||
|
||||
return wrapper
|
||||
|
||||
@@ -79,14 +79,22 @@ def execute_python_code(code: str, agent: Agent) -> str:
|
||||
"description": "The name of te file to execute",
|
||||
"required": True,
|
||||
},
|
||||
"args": {
|
||||
"type": "list[str]",
|
||||
"description": "The (command line) arguments to pass to the script",
|
||||
"required": False,
|
||||
},
|
||||
},
|
||||
)
|
||||
@sanitize_path_arg("filename")
|
||||
def execute_python_file(filename: Path, agent: Agent) -> str:
|
||||
def execute_python_file(
|
||||
filename: Path, agent: Agent, args: list[str] | str = []
|
||||
) -> str:
|
||||
"""Execute a Python file in a Docker container and return the output
|
||||
|
||||
Args:
|
||||
filename (Path): The name of the file to execute
|
||||
args (list, optional): The arguments with which to run the python script
|
||||
|
||||
Returns:
|
||||
str: The output of the file
|
||||
@@ -95,6 +103,9 @@ def execute_python_file(filename: Path, agent: Agent) -> str:
|
||||
f"Executing python file '{filename}' in working directory '{agent.config.workspace_path}'"
|
||||
)
|
||||
|
||||
if isinstance(args, str):
|
||||
args = args.split() # Convert space-separated string to a list
|
||||
|
||||
if not str(filename).endswith(".py"):
|
||||
raise InvalidArgumentError("Invalid file type. Only .py files are allowed.")
|
||||
|
||||
@@ -110,7 +121,7 @@ def execute_python_file(filename: Path, agent: Agent) -> str:
|
||||
f"Auto-GPT is running in a Docker container; executing {file_path} directly..."
|
||||
)
|
||||
result = subprocess.run(
|
||||
["python", "-B", str(file_path)],
|
||||
["python", "-B", str(file_path)] + args,
|
||||
capture_output=True,
|
||||
encoding="utf8",
|
||||
cwd=str(agent.workspace.root),
|
||||
@@ -152,7 +163,7 @@ def execute_python_file(filename: Path, agent: Agent) -> str:
|
||||
"python",
|
||||
"-B",
|
||||
file_path.relative_to(agent.workspace.root).as_posix(),
|
||||
],
|
||||
] + args,
|
||||
volumes={
|
||||
str(agent.workspace.root): {
|
||||
"bind": "/workspace",
|
||||
|
||||
@@ -27,6 +27,7 @@ DEBUG_LOG_FORMAT = (
|
||||
" %(title)s%(message)s"
|
||||
)
|
||||
|
||||
SPEECH_OUTPUT_LOGGER = "VOICE"
|
||||
USER_FRIENDLY_OUTPUT_LOGGER = "USER_FRIENDLY_OUTPUT"
|
||||
|
||||
_chat_plugins: list[AutoGPTPluginTemplate] = []
|
||||
@@ -96,6 +97,11 @@ def configure_logging(config: Config, log_dir: Path = LOG_DIR) -> None:
|
||||
user_friendly_output_logger.addHandler(stderr)
|
||||
user_friendly_output_logger.propagate = False
|
||||
|
||||
speech_output_logger = logging.getLogger(SPEECH_OUTPUT_LOGGER)
|
||||
speech_output_logger.setLevel(logging.INFO)
|
||||
speech_output_logger.addHandler(TTSHandler(config))
|
||||
speech_output_logger.propagate = False
|
||||
|
||||
# JSON logger with better formatting
|
||||
json_logger = logging.getLogger("JSON_LOGGER")
|
||||
json_logger.setLevel(logging.DEBUG)
|
||||
|
||||
@@ -8,7 +8,7 @@ import time
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from autogpt.logs.utils import remove_color_codes
|
||||
from autogpt.speech.say import say_text
|
||||
from autogpt.speech import TextToSpeechProvider
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from autogpt.config import Config
|
||||
@@ -53,6 +53,7 @@ class TTSHandler(logging.Handler):
|
||||
def __init__(self, config: Config):
|
||||
super().__init__()
|
||||
self.config = config
|
||||
self.tts_provider = TextToSpeechProvider(config)
|
||||
|
||||
def format(self, record: logging.LogRecord) -> str:
|
||||
if getattr(record, "title", ""):
|
||||
@@ -67,7 +68,7 @@ class TTSHandler(logging.Handler):
|
||||
return
|
||||
|
||||
message = self.format(record)
|
||||
say_text(message, self.config)
|
||||
self.tts_provider.say(message)
|
||||
|
||||
|
||||
class JsonFileHandler(logging.FileHandler):
|
||||
|
||||
@@ -3,7 +3,7 @@ from typing import Any, Optional
|
||||
|
||||
from colorama import Fore
|
||||
|
||||
from .config import USER_FRIENDLY_OUTPUT_LOGGER, _chat_plugins
|
||||
from .config import SPEECH_OUTPUT_LOGGER, USER_FRIENDLY_OUTPUT_LOGGER, _chat_plugins
|
||||
|
||||
|
||||
def user_friendly_output(
|
||||
@@ -65,3 +65,7 @@ def request_user_double_check(additionalText: Optional[str] = None) -> None:
|
||||
title="DOUBLE CHECK CONFIGURATION",
|
||||
preserve_message_color=True,
|
||||
)
|
||||
|
||||
|
||||
def speak(message: str, level: int = logging.INFO) -> None:
|
||||
logging.getLogger(SPEECH_OUTPUT_LOGGER).log(level, message)
|
||||
|
||||
@@ -14,9 +14,3 @@ class Singleton(abc.ABCMeta, type):
|
||||
if cls not in cls._instances:
|
||||
cls._instances[cls] = super(Singleton, cls).__call__(*args, **kwargs)
|
||||
return cls._instances[cls]
|
||||
|
||||
|
||||
class AbstractSingleton(abc.ABC, metaclass=Singleton):
|
||||
"""
|
||||
Abstract singleton class for ensuring only one instance of a class.
|
||||
"""
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
"""This module contains the speech recognition and speech synthesis functions."""
|
||||
from autogpt.speech.say import say_text
|
||||
from autogpt.speech.say import TextToSpeechProvider
|
||||
|
||||
__all__ = ["say_text"]
|
||||
__all__ = ["TextToSpeechProvider"]
|
||||
|
||||
@@ -9,10 +9,8 @@ from typing import TYPE_CHECKING
|
||||
if TYPE_CHECKING:
|
||||
from autogpt.config import Config
|
||||
|
||||
from autogpt.singleton import AbstractSingleton
|
||||
|
||||
|
||||
class VoiceBase(AbstractSingleton):
|
||||
class VoiceBase:
|
||||
"""
|
||||
Base class for all voice classes.
|
||||
"""
|
||||
|
||||
@@ -19,32 +19,37 @@ _QUEUE_SEMAPHORE = Semaphore(
|
||||
) # The amount of sounds to queue before blocking the main thread
|
||||
|
||||
|
||||
def say_text(text: str, config: Config, voice_index: int = 0) -> None:
|
||||
"""Speak the given text using the given voice index"""
|
||||
default_voice_engine, voice_engine = _get_voice_engine(config)
|
||||
class TextToSpeechProvider:
|
||||
def __init__(self, config: Config):
|
||||
self._config = config
|
||||
self._default_voice_engine, self._voice_engine = self._get_voice_engine(config)
|
||||
|
||||
def speak() -> None:
|
||||
success = voice_engine.say(text, voice_index)
|
||||
if not success:
|
||||
default_voice_engine.say(text)
|
||||
def say(self, text, voice_index: int = 0) -> None:
|
||||
def _speak() -> None:
|
||||
success = self._voice_engine.say(text, voice_index)
|
||||
if not success:
|
||||
self._default_voice_engine.say(text, voice_index)
|
||||
_QUEUE_SEMAPHORE.release()
|
||||
|
||||
_QUEUE_SEMAPHORE.release()
|
||||
if self._config.speak_mode:
|
||||
_QUEUE_SEMAPHORE.acquire(True)
|
||||
thread = threading.Thread(target=_speak)
|
||||
thread.start()
|
||||
|
||||
_QUEUE_SEMAPHORE.acquire(True)
|
||||
thread = threading.Thread(target=speak)
|
||||
thread.start()
|
||||
def __repr__(self):
|
||||
return f"{self.__class__.__name__}(enabled={self._config.speak_mode}, provider={self._voice_engine.__class__.__name__})"
|
||||
|
||||
@staticmethod
|
||||
def _get_voice_engine(config: Config) -> tuple[VoiceBase, VoiceBase]:
|
||||
"""Get the voice engine to use for the given configuration"""
|
||||
tts_provider = config.text_to_speech_provider
|
||||
if tts_provider == "elevenlabs":
|
||||
voice_engine = ElevenLabsSpeech(config)
|
||||
elif tts_provider == "macos":
|
||||
voice_engine = MacOSTTS(config)
|
||||
elif tts_provider == "streamelements":
|
||||
voice_engine = StreamElementsSpeech(config)
|
||||
else:
|
||||
voice_engine = GTTSVoice(config)
|
||||
|
||||
def _get_voice_engine(config: Config) -> tuple[VoiceBase, VoiceBase]:
|
||||
"""Get the voice engine to use for the given configuration"""
|
||||
tts_provider = config.text_to_speech_provider
|
||||
if tts_provider == "elevenlabs":
|
||||
voice_engine = ElevenLabsSpeech(config)
|
||||
elif tts_provider == "macos":
|
||||
voice_engine = MacOSTTS(config)
|
||||
elif tts_provider == "streamelements":
|
||||
voice_engine = StreamElementsSpeech(config)
|
||||
else:
|
||||
voice_engine = GTTSVoice(config)
|
||||
|
||||
return GTTSVoice(config), voice_engine
|
||||
return GTTSVoice(config), voice_engine
|
||||
|
||||
@@ -2,6 +2,7 @@ import os
|
||||
import random
|
||||
import string
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
@@ -21,12 +22,22 @@ def random_code(random_string) -> str:
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def python_test_file(config: Config, random_code: str) -> str:
|
||||
def python_test_file(config: Config, random_code: str):
|
||||
temp_file = tempfile.NamedTemporaryFile(dir=config.workspace_path, suffix=".py")
|
||||
temp_file.write(str.encode(random_code))
|
||||
temp_file.flush()
|
||||
|
||||
yield temp_file.name
|
||||
yield Path(temp_file.name)
|
||||
temp_file.close()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def python_test_args_file(config: Config):
|
||||
temp_file = tempfile.NamedTemporaryFile(dir=config.workspace_path, suffix=".py")
|
||||
temp_file.write(str.encode("import sys\nprint(sys.argv[1], sys.argv[2])"))
|
||||
temp_file.flush()
|
||||
|
||||
yield Path(temp_file.name)
|
||||
temp_file.close()
|
||||
|
||||
|
||||
@@ -35,34 +46,23 @@ def random_string():
|
||||
return "".join(random.choice(string.ascii_lowercase) for _ in range(10))
|
||||
|
||||
|
||||
def test_execute_python_file(python_test_file: str, random_string: str, agent: Agent):
|
||||
def test_execute_python_file(python_test_file: Path, random_string: str, agent: Agent):
|
||||
result: str = sut.execute_python_file(python_test_file, agent=agent)
|
||||
assert result.replace("\r", "") == f"Hello {random_string}!\n"
|
||||
|
||||
|
||||
def test_execute_python_code(random_code: str, random_string: str, agent: Agent):
|
||||
ai_name = agent.ai_config.ai_name
|
||||
|
||||
result: str = sut.execute_python_code(random_code, "test_code", agent=agent)
|
||||
assert result.replace("\r", "") == f"Hello {random_string}!\n"
|
||||
|
||||
# Check that the code is stored
|
||||
destination = os.path.join(
|
||||
agent.config.workspace_path, ai_name, "executed_code", "test_code.py"
|
||||
)
|
||||
with open(destination) as f:
|
||||
assert f.read() == random_code
|
||||
|
||||
|
||||
def test_execute_python_code_disallows_name_arg_path_traversal(
|
||||
random_code: str, agent: Agent
|
||||
def test_execute_python_file_args(
|
||||
python_test_args_file: Path, random_string: str, agent: Agent
|
||||
):
|
||||
with pytest.raises(AccessDeniedError, match="path traversal"):
|
||||
sut.execute_python_code(random_code, name="../../test_code", agent=agent)
|
||||
random_args = [random_string] * 2
|
||||
random_args_string = " ".join(random_args)
|
||||
result = sut.execute_python_file(python_test_args_file, agent=agent, random_args)
|
||||
assert result == f"{random_args_string}\n"
|
||||
|
||||
# Check that the code is not stored in parent directory
|
||||
dst_with_traversal = agent.workspace.get_path("test_code.py")
|
||||
assert not dst_with_traversal.is_file(), "Path traversal by filename not prevented"
|
||||
|
||||
def test_execute_python_code(random_code: str, random_string: str, agent: Agent):
|
||||
result: str = sut.execute_python_code(random_code, agent=agent)
|
||||
assert result.replace("\r", "") == f"Hello {random_string}!\n"
|
||||
|
||||
|
||||
def test_execute_python_code_overwrites_file(random_code: str, agent: Agent):
|
||||
@@ -75,7 +75,7 @@ def test_execute_python_code_overwrites_file(random_code: str, agent: Agent):
|
||||
with open(destination, "w+") as f:
|
||||
f.write("This will be overwritten")
|
||||
|
||||
sut.execute_python_code(random_code, "test_code.py", agent=agent)
|
||||
sut.execute_python_code(random_code, agent=agent)
|
||||
|
||||
# Check that the file is updated with the new code
|
||||
with open(destination) as f:
|
||||
|
||||
@@ -21,7 +21,7 @@ def test_initial_values(config: Config) -> None:
|
||||
assert config.debug_mode == False
|
||||
assert config.continuous_mode == False
|
||||
assert config.speak_mode == False
|
||||
assert config.fast_llm == "gpt-3.5-turbo"
|
||||
assert config.fast_llm == "gpt-3.5-turbo-16k"
|
||||
assert config.smart_llm == "gpt-4-0314"
|
||||
|
||||
|
||||
|
||||
@@ -245,10 +245,9 @@ def test_write_file_succeeds_if_content_different(
|
||||
test_file_with_content_path: Path, agent: Agent
|
||||
):
|
||||
new_content = "This is different content.\n"
|
||||
result = file_ops.write_to_file(
|
||||
file_ops.write_to_file(
|
||||
str(test_file_with_content_path), new_content, agent=agent
|
||||
)
|
||||
assert result == "File written to successfully."
|
||||
|
||||
|
||||
def test_append_to_file(test_nested_file: Path, agent: Agent):
|
||||
@@ -301,7 +300,7 @@ def test_list_files(workspace: Workspace, test_directory: Path, agent: Agent):
|
||||
with open(os.path.join(test_directory, file_a.name), "w") as f:
|
||||
f.write("This is file A in the subdirectory.")
|
||||
|
||||
files = file_ops.list_files(str(workspace.root), agent=agent)
|
||||
files = file_ops.list_folder(str(workspace.root), agent=agent)
|
||||
assert file_a.name in files
|
||||
assert file_b.name in files
|
||||
assert os.path.join(Path(test_directory).name, file_a.name) in files
|
||||
@@ -314,5 +313,5 @@ def test_list_files(workspace: Workspace, test_directory: Path, agent: Agent):
|
||||
|
||||
# Case 2: Search for a file that does not exist and make sure we don't throw
|
||||
non_existent_file = "non_existent_file.txt"
|
||||
files = file_ops.list_files("", agent=agent)
|
||||
files = file_ops.list_folder("", agent=agent)
|
||||
assert non_existent_file not in files
|
||||
|
||||
@@ -86,7 +86,7 @@ def test_get_bulletin_from_web_success(mock_get):
|
||||
|
||||
assert expected_content in bulletin
|
||||
mock_get.assert_called_with(
|
||||
"https://raw.githubusercontent.com/Significant-Gravitas/Auto-GPT/master/BULLETIN.md"
|
||||
"https://raw.githubusercontent.com/Significant-Gravitas/Auto-GPT/master/autogpts/autogpt/BULLETIN.md"
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -47,7 +47,7 @@ def get_base_vcr_config(request):
|
||||
@pytest.fixture()
|
||||
def vcr_cassette_dir(request):
|
||||
test_name = os.path.splitext(request.node.name)[0]
|
||||
return os.path.join("tests/Auto-GPT-test-cassettes", test_name)
|
||||
return os.path.join("tests/vcr_cassettes", test_name)
|
||||
|
||||
|
||||
def patch_api_base(requestor: openai.api_requestor.APIRequestor):
|
||||
|
||||
1
autogpts/autogpt/tests/vcr_cassettes
Submodule
1
autogpts/autogpt/tests/vcr_cassettes
Submodule
Submodule autogpts/autogpt/tests/vcr_cassettes added at 1896d8ac12
@@ -1,38 +1,47 @@
|
||||
{
|
||||
"Auto-GPT": {
|
||||
"url": "https://github.com/Significant-Gravitas/Auto-GPT",
|
||||
"branch": "master"
|
||||
"branch": "master",
|
||||
"commit": "3a2d08fb415071cc94dd6fcee24cfbdd1fb487dd"
|
||||
},
|
||||
"gpt-engineer": {
|
||||
"url": "https://github.com/merwanehamadi/gpt-engineer.git",
|
||||
"branch": "benchmark-integration"
|
||||
"branch": "benchmark-integration",
|
||||
"commit": "9bb81041ace9f09e8ea0e34e29f2e46bb9d46a36"
|
||||
},
|
||||
"mini-agi": {
|
||||
"url": "https://github.com/SilenNaihin/mini-agi.git",
|
||||
"branch": "benchmark-integration"
|
||||
"branch": "benchmark-integration",
|
||||
"commit": "2fc70aa0032eec986dfb1020854a1b3b8aaf6780"
|
||||
},
|
||||
"smol-developer": {
|
||||
"url": "https://github.com/e2b-dev/smol-developer.git",
|
||||
"branch": "benchmarks"
|
||||
"branch": "benchmarks",
|
||||
"commit": "a23d01369cea976e80b7889fdbf1096619471301"
|
||||
},
|
||||
"SuperAGI": {
|
||||
"url": "https://github.com/SilenNaihin/SuperAGI.git",
|
||||
"branch": "benchmark-integration"
|
||||
"branch": "benchmark-integration",
|
||||
"commit": "48b2101374264b97dbdfc2c0bb0ae45e769e157d"
|
||||
},
|
||||
"BabyAGI": {
|
||||
"babyagi": {
|
||||
"url": "https://github.com/SilenNaihin/babyagi.git",
|
||||
"branch": "benchmark-integration"
|
||||
"branch": "benchmark-integration",
|
||||
"commit": "16f1b9519fea5543695203be0262a1b41c77cbba"
|
||||
},
|
||||
"beebot": {
|
||||
"url": "https://github.com/AutoPackAI/beebot.git",
|
||||
"branch": "main"
|
||||
"branch": "main",
|
||||
"commit": "59d4e93c133612a0319d135bb0eb08bbcead9fa2"
|
||||
},
|
||||
"PolyGPT": {
|
||||
"url": "https://github.com/polywrap/PolyGPT.git",
|
||||
"branch": "nerfzael-use-local-wrap-library"
|
||||
"branch": "nerfzael-use-local-wrap-library",
|
||||
"commit": "d621adf5f54cc0f9a6d191139fb67ac3d1436d7b"
|
||||
},
|
||||
"Turbo": {
|
||||
"Auto-GPT-Turbo": {
|
||||
"url": "https://github.com/lc0rp/Auto-GPT-Turbo.git",
|
||||
"branch": "main"
|
||||
"branch": "main",
|
||||
"commit": "8469e09ae204f2d5f41d489b217551544597ee14"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -9,11 +9,10 @@ from typing import Any
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
|
||||
from agbenchmark.utils.utils import find_absolute_benchmark_path
|
||||
from fastapi import FastAPI, Query
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
|
||||
from agbenchmark.utils.utils import find_absolute_benchmark_path
|
||||
|
||||
app = FastAPI()
|
||||
|
||||
origins = ["http://localhost:3000"]
|
||||
|
||||
5
benchmark/benchmark/__init__.py
Normal file
5
benchmark/benchmark/__init__.py
Normal file
@@ -0,0 +1,5 @@
|
||||
# import pydevd_pycharm
|
||||
|
||||
# pydevd_pycharm.settrace(
|
||||
# "localhost", port=9739, stdoutToServer=True, stderrToServer=True
|
||||
# )
|
||||
285
benchmark/benchmark/__main__.py
Normal file
285
benchmark/benchmark/__main__.py
Normal file
@@ -0,0 +1,285 @@
|
||||
import glob
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any, Optional
|
||||
|
||||
import click
|
||||
import pytest
|
||||
import toml
|
||||
from helicone.lock import HeliconeLockManager
|
||||
|
||||
from benchmark.utils.data_types import AgentBenchmarkConfig
|
||||
|
||||
BENCHMARK_START_TIME = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%S+00:00")
|
||||
|
||||
if os.environ.get("HELICONE_API_KEY"):
|
||||
HeliconeLockManager.write_custom_property(
|
||||
"benchmark_start_time", BENCHMARK_START_TIME
|
||||
)
|
||||
|
||||
with open(
|
||||
Path(__file__).resolve().parent / "challenges" / "optional_categories.json"
|
||||
) as f:
|
||||
OPTIONAL_CATEGORIES = json.load(f)["optional_categories"]
|
||||
|
||||
|
||||
def get_unique_categories() -> set[str]:
|
||||
"""Find all data.json files in the directory relative to this file and its subdirectories,
|
||||
read the "category" field from each file, and return a set of unique categories."""
|
||||
categories = set()
|
||||
|
||||
# Get the directory of this file
|
||||
this_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
glob_path = os.path.join(this_dir, "./challenges/**/data.json")
|
||||
# Use it as the base for the glob pattern
|
||||
for data_file in glob.glob(glob_path, recursive=True):
|
||||
with open(data_file, "r") as f:
|
||||
try:
|
||||
data = json.load(f)
|
||||
categories.update(data.get("category", []))
|
||||
except json.JSONDecodeError:
|
||||
print(f"Error: {data_file} is not a valid JSON file.")
|
||||
continue
|
||||
except IOError:
|
||||
print(f"IOError: file could not be read: {data_file}")
|
||||
continue
|
||||
|
||||
return categories
|
||||
|
||||
|
||||
def run_benchmark(
|
||||
agent_benchmark_config_path: AgentBenchmarkConfig,
|
||||
maintain: bool = False,
|
||||
improve: bool = False,
|
||||
explore: bool = False,
|
||||
mock: bool = False,
|
||||
no_dep: bool = False,
|
||||
nc: bool = False,
|
||||
category: Optional[list[str]] = None,
|
||||
skip_category: Optional[list[str]] = None,
|
||||
test: Optional[str] = None,
|
||||
suite: Optional[str] = None,
|
||||
cutoff: Optional[int] = None,
|
||||
server: bool = False,
|
||||
) -> int:
|
||||
"""Start the benchmark tests. If a category flag is provided, run the categories with that mark."""
|
||||
# Check if configuration file exists and is not empty
|
||||
|
||||
try:
|
||||
with open(agent_benchmark_config_path, "r") as f:
|
||||
agent_benchmark_config = AgentBenchmarkConfig(**json.load(f))
|
||||
agent_benchmark_config.agent_benchmark_config_path = (
|
||||
agent_benchmark_config_path
|
||||
)
|
||||
except json.JSONDecodeError:
|
||||
print("Error: benchmark_config.json is not a valid JSON file.")
|
||||
return 1
|
||||
|
||||
if maintain and improve and explore:
|
||||
print(
|
||||
"Error: You can't use --maintain, --improve or --explore at the same time. Please choose one."
|
||||
)
|
||||
return 1
|
||||
|
||||
if test and (category or skip_category or maintain or improve or suite or explore):
|
||||
print(
|
||||
"Error: If you're running a specific test make sure no other options are selected. Please just pass the --test."
|
||||
)
|
||||
return 1
|
||||
|
||||
# TODO: test and ensure that this functionality works before removing
|
||||
# change elif suite below if removing
|
||||
if suite and (category or skip_category or maintain or improve or explore):
|
||||
print(
|
||||
"Error: If you're running a specific suite make sure no other options are selected. Please just pass the --suite."
|
||||
)
|
||||
return 1
|
||||
|
||||
assert not (
|
||||
agent_benchmark_config.api_mode and not agent_benchmark_config.host
|
||||
), "Error: host needs to be added to the config if api_mode is set to True."
|
||||
|
||||
print("Current configuration:")
|
||||
for key, value in vars(agent_benchmark_config).items():
|
||||
print(f"{key}: {value}")
|
||||
|
||||
pytest_args = ["-vs"]
|
||||
pytest_args.extend(["--agent_config_path", agent_benchmark_config_path])
|
||||
if test:
|
||||
print("Running specific test:", test)
|
||||
pytest_args.extend(["-k", test, "--test"])
|
||||
elif suite:
|
||||
print("Running specific suite:", suite)
|
||||
pytest_args.extend(["--suite"])
|
||||
else:
|
||||
# Categories that are used in the challenges
|
||||
categories = get_unique_categories()
|
||||
if category:
|
||||
invalid_categories = set(category) - categories
|
||||
assert (
|
||||
not invalid_categories
|
||||
), f"Invalid categories: {invalid_categories}. Valid categories are: {categories}"
|
||||
|
||||
if category:
|
||||
categories_to_run = set(category)
|
||||
if skip_category:
|
||||
categories_to_run = categories_to_run.difference(set(skip_category))
|
||||
assert categories_to_run, "Error: You can't skip all categories"
|
||||
pytest_args.extend(["-m", " or ".join(categories_to_run), "--category"])
|
||||
print("Running tests of category:", categories_to_run)
|
||||
elif skip_category:
|
||||
categories_to_run = categories - set(skip_category)
|
||||
assert categories_to_run, "Error: You can't skip all categories"
|
||||
pytest_args.extend(["-m", " or ".join(categories_to_run), "--category"])
|
||||
print("Running tests of category:", categories_to_run)
|
||||
else:
|
||||
print("Running all categories")
|
||||
|
||||
if maintain:
|
||||
print("Running only regression tests")
|
||||
pytest_args.append("--maintain")
|
||||
elif improve:
|
||||
print("Running only non-regression tests")
|
||||
pytest_args.append("--improve")
|
||||
elif explore:
|
||||
print("Only attempt challenges that have never been beaten")
|
||||
pytest_args.append("--explore")
|
||||
|
||||
if mock:
|
||||
pytest_args.append("--mock")
|
||||
|
||||
if no_dep:
|
||||
pytest_args.append("--no_dep")
|
||||
|
||||
if nc and cutoff:
|
||||
print(
|
||||
"Error: You can't use both --nc and --cutoff at the same time. Please choose one."
|
||||
)
|
||||
return 1
|
||||
|
||||
if nc:
|
||||
pytest_args.append("--nc")
|
||||
if cutoff:
|
||||
pytest_args.append("--cutoff")
|
||||
print(f"Setting cuttoff override to {cutoff} seconds.")
|
||||
current_dir = Path(__file__).resolve().parent
|
||||
print(f"Current directory: {current_dir}")
|
||||
pytest_args.extend((str(current_dir), "--cache-clear"))
|
||||
return pytest.main(pytest_args)
|
||||
|
||||
|
||||
@click.group()
|
||||
def cli() -> None:
|
||||
pass
|
||||
|
||||
|
||||
@cli.command()
|
||||
@click.option("--backend", is_flag=True, help="If it's being run from the cli")
|
||||
@click.option("-c", "--category", multiple=True, help="Specific category to run")
|
||||
@click.option(
|
||||
"-s",
|
||||
"--skip-category",
|
||||
multiple=True,
|
||||
help="Skips preventing the tests from this category from running",
|
||||
)
|
||||
@click.option("--test", help="Specific test to run")
|
||||
@click.option("--maintain", is_flag=True, help="Runs only regression tests")
|
||||
@click.option("--improve", is_flag=True, help="Run only non-regression tests")
|
||||
@click.option(
|
||||
"--explore",
|
||||
is_flag=True,
|
||||
help="Only attempt challenges that have never been beaten",
|
||||
)
|
||||
@click.option("--mock", is_flag=True, help="Run with mock")
|
||||
@click.option("--suite", help="Run a suite of related tests")
|
||||
@click.option(
|
||||
"--no_dep",
|
||||
is_flag=True,
|
||||
help="Run without dependencies (can be useful for a suite run)",
|
||||
)
|
||||
@click.option("--nc", is_flag=True, help="Run without cutoff")
|
||||
@click.option("--cutoff", help="Set or override tests cutoff (seconds)")
|
||||
@click.option(
|
||||
"--agent-config",
|
||||
type=click.Path(exists=True),
|
||||
help="Path to the agent benchmark_config.json file,",
|
||||
required=True,
|
||||
)
|
||||
def start(
|
||||
maintain: bool,
|
||||
improve: bool,
|
||||
explore: bool,
|
||||
mock: bool,
|
||||
no_dep: bool,
|
||||
nc: bool,
|
||||
agent_config: click.Path,
|
||||
category: Optional[list[str]] = None,
|
||||
skip_category: Optional[list[str]] = None,
|
||||
test: Optional[str] = None,
|
||||
suite: Optional[str] = None,
|
||||
cutoff: Optional[int] = None,
|
||||
backend: Optional[bool] = False,
|
||||
) -> Any:
|
||||
# Redirect stdout if backend is True
|
||||
original_stdout = sys.stdout # Save the original standard output
|
||||
exit_code = None
|
||||
|
||||
assert (
|
||||
"benchmark_config.json" in agent_config
|
||||
), "benchmark_config.json must be provided"
|
||||
|
||||
if backend:
|
||||
with open("backend/backend_stdout.txt", "w") as f:
|
||||
sys.stdout = f
|
||||
exit_code = run_benchmark(
|
||||
agent_benchmark_config_path=agent_config,
|
||||
maintain=maintain,
|
||||
improve=improve,
|
||||
explore=explore,
|
||||
mock=mock,
|
||||
no_dep=no_dep,
|
||||
nc=nc,
|
||||
category=category,
|
||||
skip_category=skip_category,
|
||||
test=test,
|
||||
suite=suite,
|
||||
cutoff=cutoff,
|
||||
)
|
||||
|
||||
sys.stdout = original_stdout
|
||||
|
||||
else:
|
||||
exit_code = run_benchmark(
|
||||
agent_benchmark_config_path=agent_config,
|
||||
maintain=maintain,
|
||||
improve=improve,
|
||||
explore=explore,
|
||||
mock=mock,
|
||||
no_dep=no_dep,
|
||||
nc=nc,
|
||||
category=category,
|
||||
skip_category=skip_category,
|
||||
test=test,
|
||||
suite=suite,
|
||||
cutoff=cutoff,
|
||||
)
|
||||
|
||||
sys.exit(exit_code)
|
||||
|
||||
|
||||
@cli.command()
|
||||
def version():
|
||||
"""Print the version of the benchmark tool."""
|
||||
current_directory = Path(__file__).resolve().parent
|
||||
version = toml.load(current_directory / ".." / "pyproject.toml")["tool"]["poetry"][
|
||||
"version"
|
||||
]
|
||||
print(f"Benchmark Tool Version {version}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
cli()
|
||||
@@ -5,8 +5,8 @@ from typing import Any, Dict, Optional
|
||||
|
||||
from agent_protocol_client import AgentApi, ApiClient, Configuration, TaskRequestBody
|
||||
|
||||
from agbenchmark.agent_interface import get_list_of_file_paths
|
||||
from agbenchmark.utils.data_types import ChallengeData
|
||||
from benchmark.agent_interface import get_list_of_file_paths
|
||||
from benchmark.utils.data_types import ChallengeData
|
||||
|
||||
|
||||
async def run_api_agent(
|
||||
@@ -51,7 +51,6 @@ async def run_api_agent(
|
||||
|
||||
artifacts = await api_instance.list_agent_task_artifacts(task_id=task_id)
|
||||
for artifact in artifacts:
|
||||
|
||||
if artifact.relative_path:
|
||||
folder_path = os.path.join(config["workspace"], artifact.relative_path)
|
||||
else:
|
||||
@@ -12,8 +12,6 @@ from typing import Any, List
|
||||
import psutil
|
||||
from dotenv import load_dotenv
|
||||
|
||||
import agbenchmark.start_benchmark
|
||||
|
||||
load_dotenv()
|
||||
|
||||
helicone_graphql_logs = os.getenv("HELICONE_GRAPHQL_LOGS")
|
||||
@@ -77,7 +75,7 @@ def run_windows_env(process: Any, start_time: float, timeout: float) -> None:
|
||||
def run_agent(task: str, timeout: int) -> None:
|
||||
"""Calling to get a response"""
|
||||
|
||||
entry_path = "agbenchmark.benchmarks"
|
||||
entry_path = "benchmark.benchmarks"
|
||||
|
||||
print(f"Running '{entry_path}' with timeout {timeout}")
|
||||
|
||||
@@ -87,7 +85,7 @@ def run_agent(task: str, timeout: int) -> None:
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.STDOUT,
|
||||
universal_newlines=True,
|
||||
cwd=agbenchmark.start_benchmark.HOME_DIRECTORY,
|
||||
cwd=benchmark.start_benchmark.HOME_DIRECTORY,
|
||||
bufsize=1,
|
||||
)
|
||||
|
||||
@@ -109,7 +107,7 @@ def get_list_of_file_paths(
|
||||
) -> List[str]:
|
||||
# this file is at agbenchmark\agent_interface.py
|
||||
source_dir = os.path.join(
|
||||
agbenchmark.start_benchmark.CURRENT_DIRECTORY,
|
||||
benchmark.start_benchmark.CURRENT_DIRECTORY,
|
||||
"..",
|
||||
challenge_dir_path,
|
||||
artifact_folder_name,
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user