mirror of
https://github.com/aljazceru/gpt-engineer.git
synced 2025-12-17 12:45:26 +01:00
Make sure benchmark runs and be more strict about collecting feedback (#397)
* Update benchmark script * Bump version
This commit is contained in:
@@ -59,7 +59,7 @@ def human_input() -> Review:
|
|||||||
print()
|
print()
|
||||||
|
|
||||||
ran = input("Did the generated code run at all? " + TERM_CHOICES)
|
ran = input("Did the generated code run at all? " + TERM_CHOICES)
|
||||||
while ran not in ("y", "n", "u", ""):
|
while ran not in ("y", "n", "u"):
|
||||||
ran = input("Invalid input. Please enter y, n, or u: ")
|
ran = input("Invalid input. Please enter y, n, or u: ")
|
||||||
|
|
||||||
perfect = ""
|
perfect = ""
|
||||||
@@ -69,12 +69,12 @@ def human_input() -> Review:
|
|||||||
perfect = input(
|
perfect = input(
|
||||||
"Did the generated code do everything you wanted? " + TERM_CHOICES
|
"Did the generated code do everything you wanted? " + TERM_CHOICES
|
||||||
)
|
)
|
||||||
while perfect not in ("y", "n", "u", ""):
|
while perfect not in ("y", "n", "u"):
|
||||||
perfect = input("Invalid input. Please enter y, n, or u: ")
|
perfect = input("Invalid input. Please enter y, n, or u: ")
|
||||||
|
|
||||||
if perfect != "y":
|
if perfect != "y":
|
||||||
useful = input("Did the generated code do anything useful? " + TERM_CHOICES)
|
useful = input("Did the generated code do anything useful? " + TERM_CHOICES)
|
||||||
while useful not in ("y", "n", "u", ""):
|
while useful not in ("y", "n", "u"):
|
||||||
useful = input("Invalid input. Please enter y, n, or u: ")
|
useful = input("Invalid input. Please enter y, n, or u: ")
|
||||||
|
|
||||||
comments = ""
|
comments = ""
|
||||||
|
|||||||
@@ -273,6 +273,7 @@ class Config(str, Enum):
|
|||||||
CLARIFY = "clarify"
|
CLARIFY = "clarify"
|
||||||
RESPEC = "respec"
|
RESPEC = "respec"
|
||||||
EXECUTE_ONLY = "execute_only"
|
EXECUTE_ONLY = "execute_only"
|
||||||
|
EVALUATE = "evaluate"
|
||||||
USE_FEEDBACK = "use_feedback"
|
USE_FEEDBACK = "use_feedback"
|
||||||
|
|
||||||
|
|
||||||
@@ -323,6 +324,7 @@ STEPS = {
|
|||||||
],
|
],
|
||||||
Config.USE_FEEDBACK: [use_feedback, gen_entrypoint, execute_entrypoint, human_review],
|
Config.USE_FEEDBACK: [use_feedback, gen_entrypoint, execute_entrypoint, human_review],
|
||||||
Config.EXECUTE_ONLY: [execute_entrypoint],
|
Config.EXECUTE_ONLY: [execute_entrypoint],
|
||||||
|
Config.EVALUATE: [execute_entrypoint, human_review],
|
||||||
}
|
}
|
||||||
|
|
||||||
# Future steps that can be added:
|
# Future steps that can be added:
|
||||||
|
|||||||
@@ -3,7 +3,7 @@ requires = ["setuptools", "wheel"]
|
|||||||
|
|
||||||
[project]
|
[project]
|
||||||
name = "gpt-engineer"
|
name = "gpt-engineer"
|
||||||
version = "0.0.6"
|
version = "0.0.7"
|
||||||
description = "Specify what you want it to build, the AI asks for clarification, and then builds it."
|
description = "Specify what you want it to build, the AI asks for clarification, and then builds it."
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
requires-python = ">=3"
|
requires-python = ">=3"
|
||||||
|
|||||||
@@ -68,7 +68,7 @@ def main(
|
|||||||
"gpt_engineer.main",
|
"gpt_engineer.main",
|
||||||
bench_folder,
|
bench_folder,
|
||||||
"--steps",
|
"--steps",
|
||||||
"execute_only",
|
"evaluate",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user