From 7f16abecb348b0137bc8eb31cd672f2dae25cf9c Mon Sep 17 00:00:00 2001
From: Anton Osika <anton.osika@gmail.com>
Date: Sun, 25 Jun 2023 15:53:34 +0200
Subject: [PATCH] Make sure benchmark runs and be more strict about collecting
 feedback (#397)

* Update benchmark script

* Bump version
---
 gpt_engineer/learning.py | 6 +++---
 gpt_engineer/steps.py    | 2 ++
 pyproject.toml           | 2 +-
 scripts/benchmark.py     | 2 +-
 4 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/gpt_engineer/learning.py b/gpt_engineer/learning.py
index 6536e00..becb011 100644
--- a/gpt_engineer/learning.py
+++ b/gpt_engineer/learning.py
@@ -59,7 +59,7 @@ def human_input() -> Review:
     print()
 
     ran = input("Did the generated code run at all? " + TERM_CHOICES)
-    while ran not in ("y", "n", "u", ""):
+    while ran not in ("y", "n", "u"):
         ran = input("Invalid input. Please enter y, n, or u: ")
 
     perfect = ""
@@ -69,12 +69,12 @@ def human_input() -> Review:
         perfect = input(
             "Did the generated code do everything you wanted? " + TERM_CHOICES
         )
-        while perfect not in ("y", "n", "u", ""):
+        while perfect not in ("y", "n", "u"):
             perfect = input("Invalid input. Please enter y, n, or u: ")
 
         if perfect != "y":
             useful = input("Did the generated code do anything useful? " + TERM_CHOICES)
-            while useful not in ("y", "n", "u", ""):
+            while useful not in ("y", "n", "u"):
                 useful = input("Invalid input. Please enter y, n, or u: ")
 
     comments = ""
diff --git a/gpt_engineer/steps.py b/gpt_engineer/steps.py
index 9e26b71..7a012f4 100644
--- a/gpt_engineer/steps.py
+++ b/gpt_engineer/steps.py
@@ -273,6 +273,7 @@ class Config(str, Enum):
     CLARIFY = "clarify"
     RESPEC = "respec"
     EXECUTE_ONLY = "execute_only"
+    EVALUATE = "evaluate"
     USE_FEEDBACK = "use_feedback"
 
 
@@ -323,6 +324,7 @@ STEPS = {
     ],
     Config.USE_FEEDBACK: [use_feedback, gen_entrypoint, execute_entrypoint, human_review],
     Config.EXECUTE_ONLY: [execute_entrypoint],
+    Config.EVALUATE: [execute_entrypoint, human_review],
 }
 
 # Future steps that can be added:
diff --git a/pyproject.toml b/pyproject.toml
index 8c7b17f..3023883 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -3,7 +3,7 @@ requires = ["setuptools", "wheel"]
 
 [project]
 name = "gpt-engineer"
-version = "0.0.6"
+version = "0.0.7"
 description = "Specify what you want it to build, the AI asks for clarification, and then builds it."
 readme = "README.md"
 requires-python = ">=3"
diff --git a/scripts/benchmark.py b/scripts/benchmark.py
index 518ec76..ef3434f 100644
--- a/scripts/benchmark.py
+++ b/scripts/benchmark.py
@@ -68,7 +68,7 @@ def main(
                     "gpt_engineer.main",
                     bench_folder,
                     "--steps",
-                    "execute_only",
+                    "evaluate",
                 ],
             )