mirror of
https://github.com/aljazceru/Auto-GPT.git
synced 2026-01-04 06:44:31 +01:00
* EvalNames with dates and the eval run * Ignore .idea files, update readme to use 3.10, updates for 0.3.0
159 lines
6.5 KiB
Python
159 lines
6.5 KiB
Python
"""
|
|
This is the main evaluation file. In it you can specify the following:
|
|
|
|
1. The number of threads to use for evaluation. This is set to 1 by default.And will remain that way until we can spin
|
|
up containers on command
|
|
2. The timeout for each thread. This is set to 60 seconds by default. This is the amount of time each thread will run
|
|
for before it is killed when evaluating an agent
|
|
3. The path to the AutoGPT code. This is a required parameter as we do not know where your code lives.
|
|
4. The evals you would like to run. The options here are any OpenAI eval, or any of the evals defined in this repository
|
|
|
|
|
|
What this file does is it parses the params given and then runs the evals with OpenAI's evals framework.
|
|
"""
|
|
|
|
import argparse
|
|
import os
|
|
import sys
|
|
from pathlib import Path
|
|
from datetime import datetime
|
|
import yaml
|
|
from datetime import datetime
|
|
|
|
|
|
|
|
def parse_args() -> argparse.Namespace:
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument(
|
|
"eval", type=str, help="Name of an eval. See registry.")
|
|
parser.add_argument(
|
|
"--completion-fn",
|
|
type=str,
|
|
dest="completion_fn",
|
|
default="auto_gpt_completion_fn",
|
|
help="One or more CompletionFn URLs, separated by commas (,). "
|
|
"A CompletionFn can either be the name of a model available in the OpenAI API or a key in the registry "
|
|
"(see evals/registry/completion_fns).",
|
|
)
|
|
parser.add_argument(
|
|
"--timeout",
|
|
type=int,
|
|
default=300,
|
|
help="The timeout for each thread",
|
|
)
|
|
parser.add_argument(
|
|
"--auto-gpt-path",
|
|
type=str,
|
|
default=None,
|
|
help="The path to the AutoGPT code. This updates auto_gpt_competion_fn.yaml in completion fns. "
|
|
"So you only need to set this once.",
|
|
)
|
|
parser.add_argument("--extra_eval_params", type=str, default="")
|
|
parser.add_argument("--max_samples", type=int, default=None)
|
|
parser.add_argument(
|
|
"--cache", action=argparse.BooleanOptionalAction, default=True)
|
|
parser.add_argument(
|
|
"--visible", action=argparse.BooleanOptionalAction, default=None)
|
|
parser.add_argument("--seed", type=int, default=20220722)
|
|
parser.add_argument("--user", type=str, default="")
|
|
parser.add_argument("--record_path", type=str, default=None)
|
|
parser.add_argument(
|
|
"--log_to_file", type=str, default=None, # default=str(
|
|
# Path(__file__).parent.parent / "data" / "log" / "log.txt"
|
|
# ), help="Log to a file instead of stdout"
|
|
)
|
|
parser.add_argument(
|
|
"--debug", action=argparse.BooleanOptionalAction, default=False)
|
|
parser.add_argument(
|
|
"--local-run", action=argparse.BooleanOptionalAction, default=True)
|
|
parser.add_argument(
|
|
"--dry-run", action=argparse.BooleanOptionalAction, default=False)
|
|
parser.add_argument("--dry-run-logging",
|
|
action=argparse.BooleanOptionalAction, default=True)
|
|
return parser.parse_args()
|
|
|
|
|
|
def update_yaml_with_auto_gpt_path(yaml_path: str, auto_gpt_path: str or None) -> Path:
|
|
"""
|
|
If there is a given auto_gpt_path, then we need to update the yaml file to include it in the system path
|
|
If we don't have one. Then we get the path from the yaml.
|
|
If none exists in the yaml and we don't have a path then we raise an exception.
|
|
:param yaml_path: The path to the yaml file
|
|
:param auto_gpt_path: The path to the AutoGPT code
|
|
:return: The path to the AutoGPT code
|
|
"""
|
|
with open(yaml_path, "r") as f:
|
|
yaml_data = yaml.safe_load(f)
|
|
if yaml_data["auto_gpt_completion_fn"]["args"]["auto_gpt_path"] is None and auto_gpt_path is None:
|
|
raise Exception(
|
|
"You must specify a auto_gpt_path in the yaml file or pass it in as a parameter")
|
|
if auto_gpt_path is None:
|
|
auto_gpt_path = yaml_data["auto_gpt_completion_fn"]["args"]["auto_gpt_path"]
|
|
if auto_gpt_path is not None:
|
|
yaml_data["auto_gpt_completion_fn"]["args"]["auto_gpt_path"] = auto_gpt_path
|
|
with open(yaml_path, "w") as f:
|
|
yaml.safe_dump(yaml_data, f)
|
|
|
|
return Path(auto_gpt_path).absolute()
|
|
|
|
|
|
def load_env_file(env_path: Path):
|
|
if not env_path.exists():
|
|
raise FileNotFoundError('You must set the OpenAI key in the AutoGPT env file. '
|
|
'We need your api keys to start the AutoGPT agent and use OpenAI evals')
|
|
with open(env_path, "r") as f:
|
|
# find the OPENAI_API_KEY key split it from the equals sign and assign it so OpenAI evals can use it.
|
|
for line in f.readlines():
|
|
if line.startswith("OPENAI_API_KEY"):
|
|
os.environ["OPENAI_API_KEY"] = line.split("=")[1].strip()
|
|
break
|
|
|
|
|
|
if __name__ == "__main__":
|
|
args = parse_args()
|
|
# do not run in multiprocessing mode We do not use this right now, as it disables OpenAI's timeouts :(
|
|
# os.environ["EVALS_SEQUENTIAL"] = "1"
|
|
os.environ["EVALS_THREAD_TIMEOUT"] = str(args.timeout)
|
|
os.environ["EVALS_THREADS"] = str(1)
|
|
|
|
# Update the yaml file with the auto_gpt_path
|
|
autogpt_path = update_yaml_with_auto_gpt_path(
|
|
str(Path(__file__).parent / "completion_fns" /
|
|
"auto_gpt_completion_fn.yaml"),
|
|
args.auto_gpt_path
|
|
)
|
|
|
|
# Add the benchmarks path to the system path so we can import auto_gpt_benchmarking
|
|
sys.path.append(str(Path(__file__).parent.parent.absolute()))
|
|
|
|
# load all of the environment variables in the auto-gpt path/.env file
|
|
load_env_file(Path(autogpt_path) / ".env")
|
|
|
|
# Obviously, a top level import would be better. This allows us to set the API key with the env file, as it gets
|
|
# set in the evaluator. We can't set it before the import because the import will fail without an API key.
|
|
from auto_gpt_benchmarking.Evaluator import Evaluator, OAIRunArgs
|
|
if args.record_path is None:
|
|
args.record_path = str(Path(
|
|
__file__).parent.parent / "data" / f"eval-{args.eval}-{datetime.now().strftime('%Y%m%d-%H%M%S')}.jsonl")
|
|
run_args = OAIRunArgs(
|
|
completion_fn=args.completion_fn,
|
|
eval=args.eval,
|
|
extra_eval_params=args.extra_eval_params,
|
|
max_samples=args.max_samples,
|
|
cache=args.cache,
|
|
visible=args.visible,
|
|
seed=args.seed,
|
|
user=args.user,
|
|
record_path=args.record_path,
|
|
log_to_file=args.log_to_file,
|
|
debug=args.debug,
|
|
local_run=args.local_run,
|
|
dry_run=args.dry_run,
|
|
dry_run_logging=args.dry_run_logging)
|
|
|
|
# Run the evals
|
|
evaluator = Evaluator(
|
|
run_args
|
|
)
|
|
evaluator.run()
|