diff --git a/gpt_engineer/steps.py b/gpt_engineer/steps.py
index 897087f..791a4ea 100644
--- a/gpt_engineer/steps.py
+++ b/gpt_engineer/steps.py
@@ -143,9 +143,9 @@ def execute_entrypoint(ai, dbs):
     print()
     print(command)
     print()
-    print('If yes, type "yes". If no, press enter.')
+    print('If yes, press enter. Otherwise, type "no"')
     print()
-    if input().lower() != "yes":
+    if input() != "":
         print("Ok, not executing the code.")
         return []
     print("Executing the code...")
diff --git a/scripts/benchmark.py b/scripts/benchmark.py
index 8e267a3..587030f 100644
--- a/scripts/benchmark.py
+++ b/scripts/benchmark.py
@@ -16,31 +16,55 @@ from itertools import islice
 def main(
     n_benchmarks: int | None = None,
 ):
-    processes = []
-    files = []
-    path = Path('benchmark')
+    path = Path("benchmark")
+
+    folders = path.iterdir()
 
     if n_benchmarks:
-        benchmarks = islice(path.iterdir(), n_benchmarks)
+        folders = islice(folders, n_benchmarks)
 
-    for folder in benchmarks:
-        if os.path.isdir(folder):
-            print('Running benchmark for {}'.format(folder))
+    benchmarks = []
+    for bench_folder in folders:
+        if os.path.isdir(bench_folder):
+            print("Running benchmark for {}".format(bench_folder))
 
-            log_path = folder / 'log.txt'
-            log_file = open(log_path, 'w')
-            processes.append(subprocess.Popen(['python', '-m', 'gpt_engineer.main', folder], stdout=log_file, stderr=log_file, bufsize=0))
-            files.append(log_file)
+            log_path = bench_folder / "log.txt"
+            log_file = open(log_path, "w")
+            process = subprocess.Popen(
+                [
+                    "python",
+                    "-u", # Unbuffered output
+                    "-m",
+                    "gpt_engineer.main",
+                    bench_folder,
+                    "--steps-config",
+                    "benchmark",
+                ],
+                stdout=log_file,
+                stderr=log_file,
+                bufsize=0,
+            )
+            benchmarks.append((bench_folder, process, log_file))
 
-            print('You can stream the log file by running: tail -f {}'.format(log_path))
+            print("You can stream the log file by running: tail -f {}".format(log_path))
 
-    for process, file in zip(processes, files):
+    for bench_folder, process, file in benchmarks:
         process.wait()
-        print('process finished with code', process.returncode)
         file.close()
 
+        print("process", bench_folder.name, "finished with code", process.returncode)
+        print('Running it. Original benchmark prompt:')
+        print()
+        with open(bench_folder / "main_prompt") as f:
+            print(f.read())
+        print()
 
-if __name__ == '__main__':
+        try:
+            subprocess.run(
+                ['python', "-m", "gpt_engineer.main", bench_folder, "--steps-config", "execute_only"],
+            )
+        except KeyboardInterrupt:
+            pass
+
+if __name__ == "__main__":
     run(main)
-            
-