mirror of
https://github.com/aljazceru/gpt-engineer.git
synced 2025-12-17 12:45:26 +01:00
Add benchmark report (#463)
* Add benchmark report * Update scripts/benchmark.py --------- Co-authored-by: Artem Moskvin <artemm@spotify.com> Co-authored-by: Anton Osika <anton.osika@gmail.com>
This commit is contained in:
7
.gitignore
vendored
7
.gitignore
vendored
@@ -49,3 +49,10 @@ scratchpad
|
|||||||
# Ignore GPT Engineer files
|
# Ignore GPT Engineer files
|
||||||
projects
|
projects
|
||||||
!projects/example
|
!projects/example
|
||||||
|
|
||||||
|
# Pyenv
|
||||||
|
.python-version
|
||||||
|
|
||||||
|
# Benchmark files
|
||||||
|
benchmark
|
||||||
|
!benchmark/*/prompt
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ repos:
|
|||||||
rev: v1.3.0
|
rev: v1.3.0
|
||||||
hooks:
|
hooks:
|
||||||
- id: mypy
|
- id: mypy
|
||||||
|
additional_dependencies: [types-tabulate==0.9.0.2]
|
||||||
|
|
||||||
- repo: https://github.com/psf/black
|
- repo: https://github.com/psf/black
|
||||||
rev: 23.3.0
|
rev: 23.3.0
|
||||||
|
|||||||
@@ -19,6 +19,7 @@ dependencies = [
|
|||||||
'typer >= 0.3.2',
|
'typer >= 0.3.2',
|
||||||
'rudder-sdk-python == 2.0.2',
|
'rudder-sdk-python == 2.0.2',
|
||||||
'dataclasses-json == 0.5.7',
|
'dataclasses-json == 0.5.7',
|
||||||
|
'tabulate == 0.9.0',
|
||||||
]
|
]
|
||||||
|
|
||||||
classifiers = [
|
classifiers = [
|
||||||
|
|||||||
@@ -1,13 +1,16 @@
|
|||||||
# list all folders in benchmark folder
|
# list all folders in benchmark folder
|
||||||
# for each folder, run the benchmark
|
# for each folder, run the benchmark
|
||||||
import contextlib
|
import contextlib
|
||||||
|
import json
|
||||||
import os
|
import os
|
||||||
import subprocess
|
import subprocess
|
||||||
|
|
||||||
|
from datetime import datetime
|
||||||
from itertools import islice
|
from itertools import islice
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Iterable, Union
|
from typing import Iterable, Union
|
||||||
|
|
||||||
|
from tabulate import tabulate
|
||||||
from typer import run
|
from typer import run
|
||||||
|
|
||||||
|
|
||||||
@@ -71,6 +74,75 @@ def main(
|
|||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
generate_report(benchmarks, path)
|
||||||
|
|
||||||
|
|
||||||
|
def generate_report(benchmarks, benchmark_path):
|
||||||
|
headers = ["Benchmark", "Ran", "Works", "Perfect", "Notes"]
|
||||||
|
rows = []
|
||||||
|
for bench_folder, _, _ in benchmarks:
|
||||||
|
memory = bench_folder / "memory"
|
||||||
|
with open(memory / "review") as f:
|
||||||
|
review = json.loads(f.read())
|
||||||
|
rows.append(
|
||||||
|
[
|
||||||
|
bench_folder.name,
|
||||||
|
to_emoji(review.get("ran", None)),
|
||||||
|
to_emoji(review.get("works", None)),
|
||||||
|
to_emoji(review.get("perfect", None)),
|
||||||
|
review.get("comments", None),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
table: str = tabulate(rows, headers, tablefmt="pipe")
|
||||||
|
print("\nBenchmark report:\n")
|
||||||
|
print(table)
|
||||||
|
print()
|
||||||
|
append_to_results = ask_yes_no("Append report to the results file?")
|
||||||
|
if append_to_results:
|
||||||
|
results_path = benchmark_path / "RESULTS.md"
|
||||||
|
current_date = datetime.now().strftime("%Y-%m-%d")
|
||||||
|
insert_markdown_section(results_path, current_date, table, 2)
|
||||||
|
|
||||||
|
|
||||||
|
def to_emoji(value: bool) -> str:
|
||||||
|
return "\U00002705" if value else "\U0000274C"
|
||||||
|
|
||||||
|
|
||||||
|
def insert_markdown_section(file_path, section_title, section_text, level):
|
||||||
|
with open(file_path, "r") as file:
|
||||||
|
lines = file.readlines()
|
||||||
|
|
||||||
|
header_prefix = "#" * level
|
||||||
|
new_section = f"{header_prefix} {section_title}\n\n{section_text}\n\n"
|
||||||
|
|
||||||
|
# Find the first section with the specified level
|
||||||
|
line_number = -1
|
||||||
|
for i, line in enumerate(lines):
|
||||||
|
if line.startswith(header_prefix):
|
||||||
|
line_number = i
|
||||||
|
break
|
||||||
|
|
||||||
|
if line_number != -1:
|
||||||
|
lines.insert(line_number, new_section)
|
||||||
|
else:
|
||||||
|
print(f"Markdown file was of unexpected format. No section of level {level} found. Did not write results.")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Write the file
|
||||||
|
with open(file_path, "w") as file:
|
||||||
|
file.writelines(lines)
|
||||||
|
|
||||||
|
|
||||||
|
def ask_yes_no(question: str) -> bool:
|
||||||
|
while True:
|
||||||
|
response = input(question + " (y/n): ").lower().strip()
|
||||||
|
if response == "y":
|
||||||
|
return True
|
||||||
|
elif response == "n":
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
print("Please enter either 'y' or 'n'.")
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
run(main)
|
run(main)
|
||||||
|
|||||||
Reference in New Issue
Block a user