Make sure benchmark runs and be more strict about collecting feedback (#397)

* Update benchmark script

* Bump version
This commit is contained in:
Anton Osika
2023-06-25 15:53:34 +02:00
committed by GitHub
parent b09f0e9193
commit 7f16abecb3
4 changed files with 7 additions and 5 deletions

View File

@@ -59,7 +59,7 @@ def human_input() -> Review:
print()
ran = input("Did the generated code run at all? " + TERM_CHOICES)
while ran not in ("y", "n", "u", ""):
while ran not in ("y", "n", "u"):
ran = input("Invalid input. Please enter y, n, or u: ")
perfect = ""
@@ -69,12 +69,12 @@ def human_input() -> Review:
perfect = input(
"Did the generated code do everything you wanted? " + TERM_CHOICES
)
while perfect not in ("y", "n", "u", ""):
while perfect not in ("y", "n", "u"):
perfect = input("Invalid input. Please enter y, n, or u: ")
if perfect != "y":
useful = input("Did the generated code do anything useful? " + TERM_CHOICES)
while useful not in ("y", "n", "u", ""):
while useful not in ("y", "n", "u"):
useful = input("Invalid input. Please enter y, n, or u: ")
comments = ""

View File

@@ -273,6 +273,7 @@ class Config(str, Enum):
CLARIFY = "clarify"
RESPEC = "respec"
EXECUTE_ONLY = "execute_only"
EVALUATE = "evaluate"
USE_FEEDBACK = "use_feedback"
@@ -323,6 +324,7 @@ STEPS = {
],
Config.USE_FEEDBACK: [use_feedback, gen_entrypoint, execute_entrypoint, human_review],
Config.EXECUTE_ONLY: [execute_entrypoint],
Config.EVALUATE: [execute_entrypoint, human_review],
}
# Future steps that can be added:

View File

@@ -3,7 +3,7 @@ requires = ["setuptools", "wheel"]
[project]
name = "gpt-engineer"
version = "0.0.6"
version = "0.0.7"
description = "Specify what you want it to build, the AI asks for clarification, and then builds it."
readme = "README.md"
requires-python = ">=3"

View File

@@ -68,7 +68,7 @@ def main(
"gpt_engineer.main",
bench_folder,
"--steps",
"execute_only",
"evaluate",
],
)