mirror of
https://github.com/aljazceru/Auto-GPT.git
synced 2025-12-17 22:14:28 +01:00
Ability to run by categories (#5229)
* Ability to run by categories Signed-off-by: Merwane Hamadi <merwanehamadi@gmail.com> * always use Path.cwd() Signed-off-by: Merwane Hamadi <merwanehamadi@gmail.com> --------- Signed-off-by: Merwane Hamadi <merwanehamadi@gmail.com>
This commit is contained in:
@@ -10,8 +10,11 @@ from typing import Any, Dict, Optional
|
||||
|
||||
import pytest
|
||||
|
||||
from agbenchmark.__main__ import CHALLENGES_ALREADY_BEATEN, UPDATES_JSON_PATH
|
||||
from agbenchmark.agent_api_interface import append_updates_file
|
||||
from agbenchmark.utils.challenge import Challenge
|
||||
from agbenchmark.utils.data_types import AgentBenchmarkConfig, ChallengeData
|
||||
from agent_protocol_client.models.step import Step
|
||||
|
||||
DATA_CATEGORY = {}
|
||||
|
||||
@@ -48,7 +51,7 @@ def create_single_test(
|
||||
test_name = self.data.name
|
||||
|
||||
try:
|
||||
with open("challenges_already_beaten.json", "r") as f:
|
||||
with open(CHALLENGES_ALREADY_BEATEN, "r") as f:
|
||||
challenges_beaten_in_the_past = json.load(f)
|
||||
except:
|
||||
challenges_beaten_in_the_past = {}
|
||||
@@ -82,7 +85,24 @@ def create_single_test(
|
||||
)
|
||||
del scores["answers"] # remove answers from scores
|
||||
request.node.scores = scores # store scores in request.node
|
||||
assert 1 in scores["values"]
|
||||
is_score_100 = 1 in scores["values"]
|
||||
|
||||
evaluation = "Correct!" if is_score_100 else "Incorrect."
|
||||
eval_step = Step(
|
||||
input=evaluation,
|
||||
additional_input=None,
|
||||
task_id="irrelevant, this step is a hack",
|
||||
step_id="irrelevant, this step is a hack",
|
||||
name="",
|
||||
status="created",
|
||||
output=None,
|
||||
additional_output=None,
|
||||
artifacts=[],
|
||||
is_last=True,
|
||||
)
|
||||
await append_updates_file(eval_step)
|
||||
|
||||
assert is_score_100
|
||||
|
||||
# Parametrize the method here
|
||||
test_method = pytest.mark.parametrize(
|
||||
@@ -194,4 +214,18 @@ def challenge_should_be_ignored(json_file):
|
||||
return "challenges/deprecated" in json_file or "challenges/library" in json_file
|
||||
|
||||
|
||||
def initialize_updates_file():
|
||||
if os.path.exists(UPDATES_JSON_PATH):
|
||||
# If the file already exists, overwrite it with an empty list
|
||||
with open(UPDATES_JSON_PATH, "w") as file:
|
||||
json.dump([], file, indent=2)
|
||||
print("Initialized updates.json by overwriting with an empty array")
|
||||
else:
|
||||
# If the file doesn't exist, create it and write an empty list
|
||||
with open(UPDATES_JSON_PATH, "w") as file:
|
||||
json.dump([], file, indent=2)
|
||||
print("Created updates.json and initialized it with an empty array")
|
||||
|
||||
|
||||
initialize_updates_file()
|
||||
generate_tests()
|
||||
|
||||
Reference in New Issue
Block a user