diff --git a/.gitignore b/.gitignore index 3aedcd0..548bbb6 100644 --- a/.gitignore +++ b/.gitignore @@ -49,3 +49,10 @@ scratchpad # Ignore GPT Engineer files projects !projects/example + +# Pyenv +.python-version + +# Benchmark files +benchmark +!benchmark/*/prompt diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 1d94807..2bc8970 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -8,6 +8,7 @@ repos: rev: v1.3.0 hooks: - id: mypy + additional_dependencies: [types-tabulate==0.9.0.2] - repo: https://github.com/psf/black rev: 23.3.0 diff --git a/pyproject.toml b/pyproject.toml index e03cfbc..d6a00f0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,6 +19,7 @@ dependencies = [ 'typer >= 0.3.2', 'rudder-sdk-python == 2.0.2', 'dataclasses-json == 0.5.7', + 'tabulate == 0.9.0', ] classifiers = [ diff --git a/scripts/benchmark.py b/scripts/benchmark.py index 366ab31..a923f03 100644 --- a/scripts/benchmark.py +++ b/scripts/benchmark.py @@ -1,13 +1,16 @@ # list all folders in benchmark folder # for each folder, run the benchmark import contextlib +import json import os import subprocess +from datetime import datetime from itertools import islice from pathlib import Path from typing import Iterable, Union +from tabulate import tabulate from typer import run @@ -71,6 +74,75 @@ def main( ], ) + generate_report(benchmarks, path) + + +def generate_report(benchmarks, benchmark_path): + headers = ["Benchmark", "Ran", "Works", "Perfect", "Notes"] + rows = [] + for bench_folder, _, _ in benchmarks: + memory = bench_folder / "memory" + with open(memory / "review") as f: + review = json.loads(f.read()) + rows.append( + [ + bench_folder.name, + to_emoji(review.get("ran", None)), + to_emoji(review.get("works", None)), + to_emoji(review.get("perfect", None)), + review.get("comments", None), + ] + ) + table: str = tabulate(rows, headers, tablefmt="pipe") + print("\nBenchmark report:\n") + print(table) + print() + append_to_results = ask_yes_no("Append report to the results file?") + if append_to_results: + results_path = benchmark_path / "RESULTS.md" + current_date = datetime.now().strftime("%Y-%m-%d") + insert_markdown_section(results_path, current_date, table, 2) + + +def to_emoji(value: bool) -> str: + return "\U00002705" if value else "\U0000274C" + + +def insert_markdown_section(file_path, section_title, section_text, level): + with open(file_path, "r") as file: + lines = file.readlines() + + header_prefix = "#" * level + new_section = f"{header_prefix} {section_title}\n\n{section_text}\n\n" + + # Find the first section with the specified level + line_number = -1 + for i, line in enumerate(lines): + if line.startswith(header_prefix): + line_number = i + break + + if line_number != -1: + lines.insert(line_number, new_section) + else: + print(f"Markdown file was of unexpected format. No section of level {level} found. Did not write results.") + return + + # Write the file + with open(file_path, "w") as file: + file.writelines(lines) + + +def ask_yes_no(question: str) -> bool: + while True: + response = input(question + " (y/n): ").lower().strip() + if response == "y": + return True + elif response == "n": + return False + else: + print("Please enter either 'y' or 'n'.") + if __name__ == "__main__": run(main)