Simplify and clarify

2025-12-17 12:45:26 +01:00 · 2023-06-03 23:19:19 +02:00
parent 6a31d875c7
commit de046d39e9
6 changed files with 66 additions and 28 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1 @@
-input
+archive
 memory
 TODO.md
--- a/README.md
+++ b/README.md
@@ -1,9 +1,36 @@
-# gpt-engineer
+# GPT Engineer
 Specify what you want it to build, the AI asks for clarification, and then builds it.
-How to use:
+Even if it is complex.
- Install requirements.txt `pip install -r requirements.txt`
+## Philosophy
- Copy the example folder `cp example -r my-new-project`
+The philosophy of this project is that it should be be
- Edit the file main_prompt in my-new-project
+- Simple to get value
- run `python main.py my-new-prompt`
+- Flexible and simple to add new own "AI steps" (see `steps.py`)
- Check the results in my-new-project/workspace
+- Incrementally build towards a user experience of:
  - high level prompting
  - giving feedback to the AI that it will remember over time
 - Fast handovers back and forth between AI and human
 - No databases, all computation is "resumable" and persisted to filesystems
 ## Usage
 **Install**:
 - `pip install -r requirements.txt`
 **Run**:
 - Create a new empty folder with a `main_prompt` file (or copy the example folder `cp example -r my-new-project`)
 - Fill in the `main_prompt` in your new folder
 - run `python main.py my-new-project`
 **Results**:
 - Check the generated files in my-new-project/workspace_clarified
 ## Features
 Allows you to specify the "identity" of the AI agent by editing the files in the `identity` folder.
 This, and reusing snippets in the main_prompt, is currently how you make the agent remember things between sessions.
 Each step in steps.py will 
--- a/TODO.md
+++ b/TODO.md
@@ -0,0 +1,13 @@
 ### Next up
 We have noticed that for complex projects the model is "lazy" in implementing many files.
 Hence, we want to let LLM generate a new prompt for a "sub-engnieer" that goes through each file, takes other relevant files as context and rewrites the files.
 ### More things to try
 - allow for human edits in the code/add comments and store those edits as diffs, to use as "feedback" in the future etc
 - Add step of generating tests
 - Fix code based on failing tests
 - Allow for rerunning the entire run from scratch, but "replay" human diffs by adding the diffs to the prompt and asking LLM to apply them in the new code
 - keep a repository of examples of human feedback that can be reused globally
 - Allow for fine grained configuration, per project, so that it can be regenerated from scratch applying all the human diffs that came after the initial AI generation step. Which diffs come in which steps, etc.
--- a/ai.py
+++ b/ai.py
@@ -20,12 +20,13 @@ class AI:
    def fuser(self, msg):
        return {"role": "user", "content": msg}
-    def next(self, messages, prompt=None):
+    def next(self, messages: list[dict[str, str]], prompt=None):
        if prompt:
            messages = messages + [{"role": "user", "content": prompt}]
        response = openai.ChatCompletion.create(
            messages=messages,
            stream=True,
            **self.kwargs
        )
--- a/main.py
+++ b/main.py
@@ -16,28 +16,23 @@ app = typer.Typer()
@app.command()
 def chat(
    project_path: str = typer.Argument(None, help="path"),
-    run_prefix: str = typer.Option("", help="run prefix"),
+    run_prefix: str = typer.Option("", help="run prefix, if you want to run multiple variants of the same project and later compare them"),
    model: str = "gpt-4",
    temperature: float = 0.1,
    max_tokens: int = 4096,
    n: int = 1,
    stream: bool = True,
 ):
    if project_path is None:
        project_path = str(pathlib.Path(__file__).parent / "example")
    input_path = project_path
-    memory_path = pathlib.Path(project_path) / "memory"
+    memory_path = pathlib.Path(project_path) / (run_prefix + "memory")
    workspace_path = pathlib.Path(project_path) / (run_prefix + "workspace")
    ai = AI(
        model=model,
        temperature=temperature,
        max_tokens=max_tokens,
        n=n,
        stream=stream,
        stop=None,
    )
    dbs = DBs(
@@ -51,8 +46,8 @@ def chat(
    for step in STEPS:
        messages = step(ai, dbs)
-        dbs.logs[run_prefix + step.__name__] = json.dumps(messages)
+        dbs.logs[step.__name__] = json.dumps(messages)
 if __name__ == "__main__":
-    app()
+    app()
--- a/steps.py
+++ b/steps.py
@@ -11,12 +11,14 @@ from db import DB
 def setup_sys_prompt(dbs):
    return dbs.identity['setup'] + '\nUseful to know:\n' + dbs.identity['philosophy']
-def setup(ai: AI, dbs: DBs):
+def run(ai: AI, dbs: DBs):
    '''Run the AI on the main prompt and save the results'''
    messages = ai.start(setup_sys_prompt(dbs), dbs.input['main_prompt'])
    to_files(messages[-1]['content'], dbs.workspace)
    return messages
 def clarify(ai: AI, dbs: DBs):
    '''Ask the user if they want to clarify anything and save the results to the workspace'''
    messages = [ai.fsystem(dbs.identity['qa'])]
    user = dbs.input['main_prompt']
    while True:
@@ -26,12 +28,13 @@ def clarify(ai: AI, dbs: DBs):
            break
        print()
-        user = input('Answer: ')
+        user = input('(answer in text, or "q" to move on)\n')
        if not user or user == 'q':
            break
-        user += '\n\nIs anything else unclear? If everything is sufficiently clear to write the code, just answer "no".'
+        user += '\n\nIs anything else unclear? If yes, only answer in the form: {remaining unclear areas} remaining questions. {Next question}\nIf everything is sufficiently clear, only answer "no".'
    print()
    return messages
 def run_clarified(ai: AI, dbs: DBs):
@@ -49,12 +52,13 @@ def run_clarified(ai: AI, dbs: DBs):
    return messages
 # STEPS: List[Callable[[AI, DBs], List]] = [
 STEPS=[
    # setup,
    clarify,
    run_clarified
    # improve_files,
    # run_tests,
    # ImproveBasedOnHumanComments
 ]
 # Future steps that can be added:
 # improve_files,
 # add_tests
 # run_tests_and_fix_files,
 # improve_based_on_in_file_feedback_comments