diff --git a/.gitignore b/.gitignore
index 3aedcd0..548bbb6 100644
--- a/.gitignore
+++ b/.gitignore
@@ -49,3 +49,10 @@ scratchpad
 # Ignore GPT Engineer files
 projects
 !projects/example
+
+# Pyenv
+.python-version
+
+# Benchmark files
+benchmark
+!benchmark/*/prompt
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 1d94807..2bc8970 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -8,6 +8,7 @@ repos:
     rev: v1.3.0
     hooks:
       - id: mypy
+        additional_dependencies: [types-tabulate==0.9.0.2]
 
   - repo: https://github.com/psf/black
     rev: 23.3.0
diff --git a/pyproject.toml b/pyproject.toml
index e03cfbc..d6a00f0 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -19,6 +19,7 @@ dependencies = [
   'typer >= 0.3.2',
   'rudder-sdk-python == 2.0.2',
   'dataclasses-json == 0.5.7',
+  'tabulate == 0.9.0',
 ]
 
 classifiers = [
diff --git a/scripts/benchmark.py b/scripts/benchmark.py
index 366ab31..a923f03 100644
--- a/scripts/benchmark.py
+++ b/scripts/benchmark.py
@@ -1,13 +1,16 @@
 # list all folders in benchmark folder
 # for each folder, run the benchmark
 import contextlib
+import json
 import os
 import subprocess
 
+from datetime import datetime
 from itertools import islice
 from pathlib import Path
 from typing import Iterable, Union
 
+from tabulate import tabulate
 from typer import run
 
 
@@ -71,6 +74,75 @@ def main(
                 ],
             )
 
+    generate_report(benchmarks, path)
+
+
+def generate_report(benchmarks, benchmark_path):
+    headers = ["Benchmark", "Ran", "Works", "Perfect", "Notes"]
+    rows = []
+    for bench_folder, _, _ in benchmarks:
+        memory = bench_folder / "memory"
+        with open(memory / "review") as f:
+            review = json.loads(f.read())
+            rows.append(
+                [
+                    bench_folder.name,
+                    to_emoji(review.get("ran", None)),
+                    to_emoji(review.get("works", None)),
+                    to_emoji(review.get("perfect", None)),
+                    review.get("comments", None),
+                ]
+            )
+    table: str = tabulate(rows, headers, tablefmt="pipe")
+    print("\nBenchmark report:\n")
+    print(table)
+    print()
+    append_to_results = ask_yes_no("Append report to the results file?")
+    if append_to_results:
+        results_path = benchmark_path / "RESULTS.md"
+        current_date = datetime.now().strftime("%Y-%m-%d")
+        insert_markdown_section(results_path, current_date, table, 2)
+
+
+def to_emoji(value: bool) -> str:
+    return "\U00002705" if value else "\U0000274C"
+
+
+def insert_markdown_section(file_path, section_title, section_text, level):
+    with open(file_path, "r") as file:
+        lines = file.readlines()
+
+    header_prefix = "#" * level
+    new_section = f"{header_prefix} {section_title}\n\n{section_text}\n\n"
+
+    # Find the first section with the specified level
+    line_number = -1
+    for i, line in enumerate(lines):
+        if line.startswith(header_prefix):
+            line_number = i
+            break
+
+    if line_number != -1:
+        lines.insert(line_number, new_section)
+    else:
+        print(f"Markdown file was of unexpected format. No section of level {level} found. Did not write results.")
+        return
+
+    # Write the file
+    with open(file_path, "w") as file:
+        file.writelines(lines)
+
+
+def ask_yes_no(question: str) -> bool:
+    while True:
+        response = input(question + " (y/n): ").lower().strip()
+        if response == "y":
+            return True
+        elif response == "n":
+            return False
+        else:
+            print("Please enter either 'y' or 'n'.")
+
 
 if __name__ == "__main__":
     run(main)