Update benchmarking

2025-12-17 20:55:09 +01:00 · 2023-06-18 15:16:20 +02:00
parent a038117efa
commit 2e4baf7fe1
2 changed files with 38 additions and 19 deletions
--- a/gpt_engineer/steps.py
+++ b/gpt_engineer/steps.py
@@ -142,9 +142,9 @@ def execute_entrypoint(ai, dbs):
    print()
    print(command)
    print()
-    print('If yes, type "yes". If no, press enter.')
+    print('If yes, press enter. Otherwise, type "no"')
    print()
-    if input().lower() != "yes":
+    if input() != "":
        print("Ok, not executing the code.")
        return []
    print("Executing the code...")
--- a/scripts/benchmark.py
+++ b/scripts/benchmark.py
@@ -13,36 +13,55 @@ from typer import run
 def main(
    n_benchmarks: int | None = None,
 ):
    processes = []
    files = []
    path = Path("benchmark")
    folders = path.iterdir()
    if n_benchmarks:
-        benchmarks = islice(path.iterdir(), n_benchmarks)
+        folders = islice(folders, n_benchmarks)
-    for folder in benchmarks:
+    benchmarks = []
-        if os.path.isdir(folder):
+    for bench_folder in folders:
-            print("Running benchmark for {}".format(folder))
+        if os.path.isdir(bench_folder):
            print("Running benchmark for {}".format(bench_folder))
-            log_path = folder / "log.txt"
+            log_path = bench_folder / "log.txt"
            log_file = open(log_path, "w")
-            processes.append(
+            process = subprocess.Popen(
-                subprocess.Popen(
+                [
-                    ["python", "-m", "gpt_engineer.main", folder],
+                    "python",
                    "-u", # Unbuffered output
                    "-m",
                    "gpt_engineer.main",
                    bench_folder,
                    "--steps-config",
                    "benchmark",
                ],
                stdout=log_file,
                stderr=log_file,
                bufsize=0,
            )
-            )
+            benchmarks.append((bench_folder, process, log_file))
            files.append(log_file)
            print("You can stream the log file by running: tail -f {}".format(log_path))
-    for process, file in zip(processes, files):
+    for bench_folder, process, file in benchmarks:
        process.wait()
        print("process finished with code", process.returncode)
        file.close()
        print("process", bench_folder.name, "finished with code", process.returncode)
        print('Running it. Original benchmark prompt:')
        print()
        with open(bench_folder / "main_prompt") as f:
            print(f.read())
        print()
        try:
            subprocess.run(
                ['python', "-m", "gpt_engineer.main", bench_folder, "--steps-config", "execute_only"],
            )
        except KeyboardInterrupt:
            pass
 if __name__ == "__main__":
    run(main)