file naming when --test (#164)

This commit is contained in:
Silen Naihin
2023-07-17 11:24:16 -04:00
committed by GitHub
parent dffc1dfd51
commit 8aa6452cc4
11 changed files with 315 additions and 72 deletions

View File

@@ -1,40 +1,72 @@
{
"mini-agi": {
"TestBasicMemory": [true, true, true],
"TestBasicRetrieval": [true, true, true],
"TestCreateSimpleWebServer": [false, false, false],
"TestDebugSimpleTypoWithGuidance": [
false,
false,
false,
false,
false,
false
],
"TestDebugSimpleTypoWithoutGuidance": [false, false, false],
"TestReadFile": [true, true, true, true],
"TestRememberMultipleIds": [true, true, true],
"TestRememberMultipleIdsWithNoise": [true, true, true],
"TestRememberMultiplePhrasesWithNoise": [true, true, true],
"TestRetrieval2": [true, true, true],
"TestRetrieval3": [true, true, true],
"TestSearch": [true, true, true, true],
"TestWriteFile": [
true,
true,
true,
false,
false,
false,
false,
true,
false,
true,
false,
false,
false,
false,
true
]
}
}
"mini-agi": {
"TestBasicMemory": [
true,
true,
true
],
"TestBasicRetrieval": [
true,
true,
true
],
"TestCreateSimpleWebServer": [
false,
false,
false
],
"TestDebugSimpleTypoWithGuidance": [
false,
false,
false
],
"TestDebugSimpleTypoWithoutGuidance": [
false,
false,
false
],
"TestReadFile": [
true,
true,
true,
true,
true
],
"TestRememberMultipleIds": [
true,
true,
true
],
"TestRememberMultipleIdsWithNoise": [
true,
true,
true
],
"TestRememberMultiplePhrasesWithNoise": [
true,
true,
true
],
"TestRetrieval2": [
true,
true,
true
],
"TestRetrieval3": [
true,
true,
true
],
"TestSearch": [
true,
true,
true,
true
],
"TestWriteFile": [
true,
true,
true
]
}
}

View File

@@ -0,0 +1,36 @@
{
"TestWriteFile": {
"data_path": "agbenchmark/challenges/interface/write_file",
"is_regression": true,
"metrics": {
"difficulty": "interface",
"success": true,
"non_mock_success_%": 100.0,
"run_time": "0.009 seconds"
}
},
"additional": {
"model": "gpt-3.5-turbo"
},
"command": "agbenchmark start --test TestWriteFile",
"completion_time": "2023-07-17-09:54",
"config": {
"workspace": "${os.path.join(Path.home(), 'miniagi')}"
},
"metrics": {
"run_time": "22.36 seconds",
"highest_difficulty": "interface: 1"
},
"tests": {
"TestWriteFile": {
"data_path": "agbenchmark/challenges/interface/write_file",
"is_regression": false,
"metrics": {
"difficulty": "interface",
"success": true,
"success_%": 40.0,
"run_time": "22.169 seconds"
}
}
}
}

View File

@@ -0,0 +1,27 @@
{
"command": "agbenchmark start --test TestWriteFile",
"completion_time": "2023-07-15-22:13",
"metrics": {
"run_time": "12.4 seconds",
"highest_difficulty": "interface: 1"
},
"tests": {
"TestWriteFile": {
"data_path": "agbenchmark/challenges/interface/write_file",
"is_regression": false,
"metrics": {
"difficulty": "interface",
"success": true,
"success_%": 50.0,
"run_time": "12.127 seconds"
}
}
},
"config": {
"workspace": "${os.path.join(Path.home(), 'miniagi')}",
"entry_path": "agbenchmark.benchmarks"
},
"additional": {
"model": "gpt-4"
}
}

View File

@@ -0,0 +1,27 @@
{
"command": "agbenchmark start --test TestReadFile",
"completion_time": "2023-07-17-10:12",
"metrics": {
"run_time": "65.27 seconds",
"highest_difficulty": "interface: 1"
},
"tests": {
"TestReadFile": {
"data_path": "agbenchmark/challenges/interface/read_file",
"is_regression": true,
"metrics": {
"difficulty": "interface",
"success": true,
"success_%": 100.0,
"run_time": "65.074 seconds"
}
}
},
"config": {
"workspace": "${os.path.join(Path.home(), 'miniagi')}"
},
"additional": {
"model": "gpt-4",
"reached_termination_time": true
}
}

View File

@@ -0,0 +1,27 @@
{
"command": "agbenchmark start --test TestReadFile",
"completion_time": "2023-07-15-22:13",
"metrics": {
"run_time": "31.2 seconds",
"highest_difficulty": "interface: 1"
},
"tests": {
"TestReadFile": {
"data_path": "agbenchmark/challenges/interface/read_file",
"is_regression": true,
"metrics": {
"difficulty": "interface",
"success": true,
"success_%": 100.0,
"run_time": "30.903 seconds"
}
}
},
"config": {
"workspace": "${os.path.join(Path.home(), 'miniagi')}",
"entry_path": "agbenchmark.benchmarks"
},
"additional": {
"model": "gpt-4"
}
}

View File

@@ -0,0 +1,27 @@
{
"command": "agbenchmark start --test TestSearch",
"completion_time": "2023-07-15-22:14",
"metrics": {
"run_time": "16.88 seconds",
"highest_difficulty": "interface: 1"
},
"tests": {
"TestSearch": {
"data_path": "agbenchmark/challenges/interface/search",
"is_regression": true,
"metrics": {
"difficulty": "interface",
"success": true,
"success_%": 100.0,
"run_time": "16.572 seconds"
}
}
},
"config": {
"workspace": "${os.path.join(Path.home(), 'miniagi')}",
"entry_path": "agbenchmark.benchmarks"
},
"additional": {
"model": "gpt-4"
}
}

View File

@@ -0,0 +1,28 @@
{
"command": "agbenchmark start --test TestDebugSimpleTypoWithGuidance",
"completion_time": "2023-07-15-22:16",
"metrics": {
"run_time": "45.92 seconds",
"highest_difficulty": ": 0"
},
"tests": {
"TestDebugSimpleTypoWithGuidance": {
"data_path": "agbenchmark/challenges/code/d1",
"is_regression": false,
"metrics": {
"difficulty": "basic",
"success": false,
"fail_reason": "assert 1 in [0.0]",
"success_%": 0.0,
"run_time": "45.599 seconds"
}
}
},
"config": {
"workspace": "${os.path.join(Path.home(), 'miniagi')}",
"entry_path": "agbenchmark.benchmarks"
},
"additional": {
"model": "gpt-4"
}
}

View File

@@ -0,0 +1,28 @@
{
"command": "agbenchmark start --test TestDebugSimpleTypoWithGuidance",
"completion_time": "2023-07-15-22:15",
"metrics": {
"run_time": "32.99 seconds",
"highest_difficulty": ": 0"
},
"tests": {
"TestDebugSimpleTypoWithGuidance": {
"data_path": "agbenchmark/challenges/code/d1",
"is_regression": false,
"metrics": {
"difficulty": "basic",
"success": false,
"fail_reason": "assert 1 in [0.0]",
"success_%": 0.0,
"run_time": "32.582 seconds"
}
}
},
"config": {
"workspace": "${os.path.join(Path.home(), 'miniagi')}",
"entry_path": "agbenchmark.benchmarks"
},
"additional": {
"model": "gpt-4"
}
}

View File

@@ -1,23 +0,0 @@
{
"command": "agbenchmark start --test TestWriteFile",
"completion_time": "2023-07-16-13:07",
"metrics": {
"run_time": "13.91 seconds",
"highest_difficulty": "interface: 1"
},
"tests": {
"TestWriteFile": {
"data_path": "agbenchmark/challenges/interface/write_file",
"is_regression": false,
"metrics": {
"difficulty": "interface",
"success": true,
"success_%": 30.0,
"run_time": "13.684 seconds"
}
}
},
"config": {
"workspace": "${os.path.join(Path.home(), 'miniagi')}"
}
}

View File

@@ -1,7 +1,9 @@
# radio charts, logs, helper functions for tests, anything else relevant.
import glob
import math
import os
import re
import sys
from datetime import datetime
from pathlib import Path
from typing import Any
@@ -17,17 +19,49 @@ HOME_ENV = os.getenv("HOME_ENV")
def calculate_info_test_path(reports_path: Path) -> str:
command = sys.argv
if not reports_path.exists():
reports_path.mkdir(parents=True, exist_ok=True)
return str(
reports_path / f"file1_{datetime.now().strftime('%m-%d-%H-%M')}.json"
)
else:
json_files = glob.glob(str(reports_path / "*.json"))
file_count = len(json_files)
run_name = f"file{file_count + 1}_{datetime.now().strftime('%m-%d-%H-%M')}.json"
new_file_path = reports_path / run_name
return str(new_file_path)
json_files = glob.glob(str(reports_path / "*.json"))
# Default naming scheme
file_count = len(json_files)
run_name = f"file{file_count + 1}_{datetime.now().strftime('%m-%d-%H-%M')}.json"
# # If "--test" is in command
if "--test" in command:
test_index = command.index("--test")
try:
test_arg = command[test_index + 1] # Argument after --test
except IndexError:
raise ValueError("Expected an argument after --test")
# Get all files that include the string that is the argument after --test
related_files = [f for f in json_files if test_arg in f]
related_file_count = len(related_files)
# Determine the prefix based on the existing files
if related_file_count == 0:
# Try to find the highest prefix number among all files, then increment it
all_prefix_numbers = []
for f in json_files:
number = float(Path(f).stem.split("_")[0])
all_prefix_numbers.append(math.floor(number))
max_prefix = max(all_prefix_numbers, default=0)
print("HEY WE ARE HERE BIG DAWG", max_prefix)
run_name = f"{max_prefix + 1}_{test_arg}.json"
else:
# Take the number from before the _ and add the .{number}
prefix_str = Path(related_files[0]).stem.rsplit("_", 1)[0].split(".")[0]
prefix = math.floor(float(prefix_str))
run_name = f"{prefix}.{related_file_count}_{test_arg}.json"
print("run_namerun_namerun_name", run_name)
new_file_path = reports_path / run_name
return str(new_file_path)
def replace_backslash(value: Any) -> Any: