First step in making gpt-engineer learn. Rename main_prompt -> prompt (#381)

* First step in collecting learnings

* Rename prompts

* remove requirements, use pip install -e . instead

* Add requirements

* Fix tests
This commit is contained in:
Anton Osika
2023-06-24 17:53:04 +02:00
committed by GitHub
parent bc6371bdd0
commit f159dc45f2
24 changed files with 196 additions and 38 deletions

85
gpt_engineer/collect.py Normal file
View File

@@ -0,0 +1,85 @@
import hashlib
import json
import os
import random
import tempfile
from dataclasses import dataclass
from pathlib import Path
from dataclasses_json import dataclass_json
from gpt_engineer import steps
from gpt_engineer.db import DBs
from gpt_engineer.steps import Step
@dataclass_json
@dataclass
class Learning:
model: str
temperature: float
steps: str
steps_file_hash: str
prompt: str
feedback: str | None
session: str
version: str = "0.1"
def steps_file_hash():
with open(steps.__file__, "r") as f:
content = f.read()
return hashlib.sha256(content.encode("utf-8"), usedforsecurity=False).hexdigest()
def extract_learning(
model: str, temperature: float, steps: list[Step], dbs: DBs
) -> Learning:
learning = Learning(
prompt=dbs.input["prompt"],
model=model,
temperature=temperature,
steps=json.dumps([step.__name__ for step in steps]),
steps_file_hash=steps_file_hash(),
feedback=dbs.input.get("feedback"),
session=get_session(),
)
return learning
def send_learnings(learning: Learning):
import rudderstack.analytics as rudder_analytics
rudder_analytics.write_key = "2Re4kqwL61GDp7S8ewe6K5dbogG"
rudder_analytics.dataPlaneUrl = "https://gptengineerezm.dataplane.rudderstack.com"
rudder_analytics.track(
user_id=learning.session,
event="learning",
properties=learning.to_dict(), # type: ignore
)
def get_session():
path = Path(tempfile.gettempdir()) / "gpt_engineer_user_id.txt"
try:
if path.exists():
user_id = path.read_text()
else:
# random uuid:
user_id = str(random.randint(0, 2**32))
path.write_text(user_id)
return user_id
except IOError:
return "ephemeral_" + str(random.randint(0, 2**32))
def collect_learnings(model: str, temperature: float, steps: list[Step], dbs: DBs):
if os.environ.get("COLLECT_LEARNINGS_OPT_OUT") in ["true", "1"]:
print("COLLECT_LEARNINGS_OPT_OUT is set to true, not collecting learning")
return
learnings = extract_learning(model, temperature, steps, dbs)
send_learnings(learnings)

View File

@@ -22,6 +22,12 @@ class DB:
with full_path.open("r", encoding="utf-8") as f:
return f.read()
def get(self, key, default=None):
try:
return self[key]
except KeyError:
return default
def __setitem__(self, key, val):
full_path = self.path / key
full_path.parent.mkdir(parents=True, exist_ok=True)

View File

@@ -8,6 +8,7 @@ import typer
from gpt_engineer import steps
from gpt_engineer.ai import AI
from gpt_engineer.collect import collect_learnings
from gpt_engineer.db import DB, DBs
from gpt_engineer.steps import STEPS
@@ -56,10 +57,13 @@ def main(
preprompts=DB(Path(__file__).parent / "preprompts"),
)
for step in STEPS[steps_config]:
steps = STEPS[steps_config]
for step in steps:
messages = step(ai, dbs)
dbs.logs[step.__name__] = json.dumps(messages)
collect_learnings(model, temperature, steps, dbs)
if __name__ == "__main__":
app()

View File

@@ -5,6 +5,8 @@ import subprocess
from enum import Enum
from typing import Callable, List, TypeVar
from termcolor import colored
from gpt_engineer.ai import AI
from gpt_engineer.chat_to_files import to_files
from gpt_engineer.db import DBs
@@ -19,12 +21,24 @@ def setup_sys_prompt(dbs):
Step = TypeVar("Step", bound=Callable[[AI, DBs], List[dict]])
def get_prompt(dbs):
"""While we migrate we have this fallback getter"""
assert (
"prompt" in dbs.input or "main_prompt" in dbs.input
), "Please put your prompt in the file `prompt` in the project directory"
if "prompt" not in dbs.input:
print(
colored("Please put the prompt in the file `prompt`, not `main_prompt", "red")
)
print()
return dbs.input.get("prompt", dbs.input["main_prompt"])
def simple_gen(ai: AI, dbs: DBs):
"""Run the AI on the main prompt and save the results"""
messages = ai.start(
setup_sys_prompt(dbs),
dbs.input["main_prompt"],
)
messages = ai.start(setup_sys_prompt(dbs), get_prompt(dbs))
to_files(messages[-1]["content"], dbs.workspace)
return messages
@@ -34,22 +48,31 @@ def clarify(ai: AI, dbs: DBs):
Ask the user if they want to clarify anything and save the results to the workspace
"""
messages = [ai.fsystem(dbs.preprompts["qa"])]
user = dbs.input["main_prompt"]
user_input = get_prompt(dbs)
while True:
messages = ai.next(messages, user)
messages = ai.next(messages, user_input)
if messages[-1]["content"].strip().lower().startswith("no"):
print(" Nothing more to clarify.")
print("Nothing more to clarify.")
break
print()
user = input('(answer in text, or "c" to move on)\n')
user_input = input('(answer in text, or "c" to move on)\n')
print()
if not user or user == "c":
break
if not user_input or user_input == "c":
print("(letting gpt-engineer make its own assumptions)")
print()
messages = ai.next(
messages,
ai.fuser(
"Make your own assumptions and state them explicitly before starting"
),
)
print()
return messages
user += (
user_input += (
"\n\n"
"Is anything else unclear? If yes, only answer in the form:\n"
"{remaining unclear areas} remaining questions.\n"
@@ -68,7 +91,7 @@ def gen_spec(ai: AI, dbs: DBs):
"""
messages = [
ai.fsystem(setup_sys_prompt(dbs)),
ai.fsystem(f"Instructions: {dbs.input['main_prompt']}"),
ai.fsystem(f"Instructions: {dbs.input['prompt']}"),
]
messages = ai.next(messages, dbs.preprompts["spec"])
@@ -105,7 +128,7 @@ def gen_unit_tests(ai: AI, dbs: DBs):
"""
messages = [
ai.fsystem(setup_sys_prompt(dbs)),
ai.fuser(f"Instructions: {dbs.input['main_prompt']}"),
ai.fuser(f"Instructions: {dbs.input['prompt']}"),
ai.fuser(f"Specification:\n\n{dbs.memory['specification']}"),
]
@@ -136,7 +159,7 @@ def gen_code(ai: AI, dbs: DBs):
messages = [
ai.fsystem(setup_sys_prompt(dbs)),
ai.fuser(f"Instructions: {dbs.input['main_prompt']}"),
ai.fuser(f"Instructions: {dbs.input['prompt']}"),
ai.fuser(f"Specification:\n\n{dbs.memory['specification']}"),
ai.fuser(f"Unit tests:\n\n{dbs.memory['unit_tests']}"),
]
@@ -200,7 +223,7 @@ def gen_entrypoint(ai, dbs):
def use_feedback(ai: AI, dbs: DBs):
messages = [
ai.fsystem(setup_sys_prompt(dbs)),
ai.fuser(f"Instructions: {dbs.input['main_prompt']}"),
ai.fuser(f"Instructions: {dbs.input['prompt']}"),
ai.fassistant(dbs.workspace["all_output.txt"]),
ai.fsystem(dbs.preprompts["use_feedback"]),
]
@@ -213,7 +236,7 @@ def fix_code(ai: AI, dbs: DBs):
code_output = json.loads(dbs.logs[gen_code.__name__])[-1]["content"]
messages = [
ai.fsystem(setup_sys_prompt(dbs)),
ai.fuser(f"Instructions: {dbs.input['main_prompt']}"),
ai.fuser(f"Instructions: {dbs.input['prompt']}"),
ai.fuser(code_output),
ai.fsystem(dbs.preprompts["fix_code"]),
]