From 7f16abecb348b0137bc8eb31cd672f2dae25cf9c Mon Sep 17 00:00:00 2001 From: Anton Osika Date: Sun, 25 Jun 2023 15:53:34 +0200 Subject: [PATCH] Make sure benchmark runs and be more strict about collecting feedback (#397) * Update benchmark script * Bump version --- gpt_engineer/learning.py | 6 +++--- gpt_engineer/steps.py | 2 ++ pyproject.toml | 2 +- scripts/benchmark.py | 2 +- 4 files changed, 7 insertions(+), 5 deletions(-) diff --git a/gpt_engineer/learning.py b/gpt_engineer/learning.py index 6536e00..becb011 100644 --- a/gpt_engineer/learning.py +++ b/gpt_engineer/learning.py @@ -59,7 +59,7 @@ def human_input() -> Review: print() ran = input("Did the generated code run at all? " + TERM_CHOICES) - while ran not in ("y", "n", "u", ""): + while ran not in ("y", "n", "u"): ran = input("Invalid input. Please enter y, n, or u: ") perfect = "" @@ -69,12 +69,12 @@ def human_input() -> Review: perfect = input( "Did the generated code do everything you wanted? " + TERM_CHOICES ) - while perfect not in ("y", "n", "u", ""): + while perfect not in ("y", "n", "u"): perfect = input("Invalid input. Please enter y, n, or u: ") if perfect != "y": useful = input("Did the generated code do anything useful? " + TERM_CHOICES) - while useful not in ("y", "n", "u", ""): + while useful not in ("y", "n", "u"): useful = input("Invalid input. Please enter y, n, or u: ") comments = "" diff --git a/gpt_engineer/steps.py b/gpt_engineer/steps.py index 9e26b71..7a012f4 100644 --- a/gpt_engineer/steps.py +++ b/gpt_engineer/steps.py @@ -273,6 +273,7 @@ class Config(str, Enum): CLARIFY = "clarify" RESPEC = "respec" EXECUTE_ONLY = "execute_only" + EVALUATE = "evaluate" USE_FEEDBACK = "use_feedback" @@ -323,6 +324,7 @@ STEPS = { ], Config.USE_FEEDBACK: [use_feedback, gen_entrypoint, execute_entrypoint, human_review], Config.EXECUTE_ONLY: [execute_entrypoint], + Config.EVALUATE: [execute_entrypoint, human_review], } # Future steps that can be added: diff --git a/pyproject.toml b/pyproject.toml index 8c7b17f..3023883 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,7 +3,7 @@ requires = ["setuptools", "wheel"] [project] name = "gpt-engineer" -version = "0.0.6" +version = "0.0.7" description = "Specify what you want it to build, the AI asks for clarification, and then builds it." readme = "README.md" requires-python = ">=3" diff --git a/scripts/benchmark.py b/scripts/benchmark.py index 518ec76..ef3434f 100644 --- a/scripts/benchmark.py +++ b/scripts/benchmark.py @@ -68,7 +68,7 @@ def main( "gpt_engineer.main", bench_folder, "--steps", - "execute_only", + "evaluate", ], )