Make sure benchmark runs and be more strict about collecting feedback (#397)

* Update benchmark script

* Bump version
This commit is contained in:
Anton Osika
2023-06-25 15:53:34 +02:00
committed by GitHub
parent b09f0e9193
commit 7f16abecb3
4 changed files with 7 additions and 5 deletions

View File

@@ -59,7 +59,7 @@ def human_input() -> Review:
print() print()
ran = input("Did the generated code run at all? " + TERM_CHOICES) ran = input("Did the generated code run at all? " + TERM_CHOICES)
while ran not in ("y", "n", "u", ""): while ran not in ("y", "n", "u"):
ran = input("Invalid input. Please enter y, n, or u: ") ran = input("Invalid input. Please enter y, n, or u: ")
perfect = "" perfect = ""
@@ -69,12 +69,12 @@ def human_input() -> Review:
perfect = input( perfect = input(
"Did the generated code do everything you wanted? " + TERM_CHOICES "Did the generated code do everything you wanted? " + TERM_CHOICES
) )
while perfect not in ("y", "n", "u", ""): while perfect not in ("y", "n", "u"):
perfect = input("Invalid input. Please enter y, n, or u: ") perfect = input("Invalid input. Please enter y, n, or u: ")
if perfect != "y": if perfect != "y":
useful = input("Did the generated code do anything useful? " + TERM_CHOICES) useful = input("Did the generated code do anything useful? " + TERM_CHOICES)
while useful not in ("y", "n", "u", ""): while useful not in ("y", "n", "u"):
useful = input("Invalid input. Please enter y, n, or u: ") useful = input("Invalid input. Please enter y, n, or u: ")
comments = "" comments = ""

View File

@@ -273,6 +273,7 @@ class Config(str, Enum):
CLARIFY = "clarify" CLARIFY = "clarify"
RESPEC = "respec" RESPEC = "respec"
EXECUTE_ONLY = "execute_only" EXECUTE_ONLY = "execute_only"
EVALUATE = "evaluate"
USE_FEEDBACK = "use_feedback" USE_FEEDBACK = "use_feedback"
@@ -323,6 +324,7 @@ STEPS = {
], ],
Config.USE_FEEDBACK: [use_feedback, gen_entrypoint, execute_entrypoint, human_review], Config.USE_FEEDBACK: [use_feedback, gen_entrypoint, execute_entrypoint, human_review],
Config.EXECUTE_ONLY: [execute_entrypoint], Config.EXECUTE_ONLY: [execute_entrypoint],
Config.EVALUATE: [execute_entrypoint, human_review],
} }
# Future steps that can be added: # Future steps that can be added:

View File

@@ -3,7 +3,7 @@ requires = ["setuptools", "wheel"]
[project] [project]
name = "gpt-engineer" name = "gpt-engineer"
version = "0.0.6" version = "0.0.7"
description = "Specify what you want it to build, the AI asks for clarification, and then builds it." description = "Specify what you want it to build, the AI asks for clarification, and then builds it."
readme = "README.md" readme = "README.md"
requires-python = ">=3" requires-python = ">=3"

View File

@@ -68,7 +68,7 @@ def main(
"gpt_engineer.main", "gpt_engineer.main",
bench_folder, bench_folder,
"--steps", "--steps",
"execute_only", "evaluate",
], ],
) )