Make sure benchmark runs and be more strict about collecting feedback (#397)

* Update benchmark script * Bump version
2025-12-17 04:35:17 +01:00 · 2023-06-25 15:53:34 +02:00
parent b09f0e9193
commit 7f16abecb3
4 changed files with 7 additions and 5 deletions
--- a/gpt_engineer/steps.py
+++ b/gpt_engineer/steps.py
@@ -273,6 +273,7 @@ class Config(str, Enum):
    CLARIFY = "clarify"
    RESPEC = "respec"
    EXECUTE_ONLY = "execute_only"
+    EVALUATE = "evaluate"
    USE_FEEDBACK = "use_feedback"


@@ -323,6 +324,7 @@ STEPS = {
    ],
    Config.USE_FEEDBACK: [use_feedback, gen_entrypoint, execute_entrypoint, human_review],
    Config.EXECUTE_ONLY: [execute_entrypoint],
+    Config.EVALUATE: [execute_entrypoint, human_review],
 }

 # Future steps that can be added: