Merge branch 'main' of github.com:AntonOsika/gpt-engineer

* 'main' of github.com:AntonOsika/gpt-engineer: Mark test as failed because it requires OpenAI API access currently `black` Create test_ai.py fix to_files execute_workspace -> gen_entrypoint; execute_entrypoint Ignore my-new-project/ Added CODE_OF_CONDUCT.md to the .github directory (#147) make pre commit pass in the whole codebase (#149) Create ci.yaml Fix linting Add support for directory paths in filenames and improve code splitting - Enforce an explicit markdown code block format - Add a token to split the output to clearly detect when the code blocks start - Save all non-code output to a `README.md` file - Update RegEx to extract and strip text more reliably and clean up the output - Update the identify prompts appropriately Enhance philosophy to include supporting documents - Create instructions for running/compiling the project - Create any package manager files Generate instructions for all platforms - Update prompt to create instructions for all 3 major OS platforms - Fix small typo Add support for directory creation and binary files - Use the `Path` module instead of `os` - Add ability to create any amount of missing directories for a given file - Add ability to save both text and binary files to save images (or other file types) later Add cleanup & move `projects` to their own directory - Add optional argument to clean and delete the working directories of the project before running the prompt - Add `.gitignore` entry to ignore all possible projects - Update readme
2025-12-17 20:55:09 +01:00 · 2023-06-18 15:13:34 +02:00
parent 4a212d968d d3d1c9e5aa
commit e90ac46fc8
17 changed files with 355 additions and 117 deletions
--- a/gpt_engineer/ai.py
+++ b/gpt_engineer/ai.py
@@ -8,10 +8,12 @@ class AI:
        try:
            openai.Model.retrieve("gpt-4")
        except openai.error.InvalidRequestError:
-            print("Model gpt-4 not available for provided api key reverting "
-                  "to gpt-3.5.turbo. Sign up for the gpt-4 wait list here: "
-                  "https://openai.com/waitlist/gpt-4-api")
-            self.kwargs['model'] = "gpt-3.5-turbo"
+            print(
+                "Model gpt-4 not available for provided api key reverting "
+                "to gpt-3.5.turbo. Sign up for the gpt-4 wait list here: "
+                "https://openai.com/waitlist/gpt-4-api"
+            )
+            self.kwargs["model"] = "gpt-3.5-turbo"

    def start(self, system, user):
        messages = [
@@ -26,10 +28,10 @@ class AI:

    def fuser(self, msg):
        return {"role": "user", "content": msg}
+
    def fassistant(self, msg):
        return {"role": "assistant", "content": msg}

-
    def next(self, messages: list[dict[str, str]], prompt=None):
        if prompt:
            messages = messages + [{"role": "user", "content": prompt}]
--- a/gpt_engineer/chat_to_files.py
+++ b/gpt_engineer/chat_to_files.py
@@ -1,27 +1,42 @@
 import re
-from typing import List, Tuple
-from gpt_engineer.db import DB


-def parse_chat(chat) -> List[Tuple[str, str]]:
-    # Get all ``` blocks
-    regex = r"```(.*?)```"
+def parse_chat(chat):  # -> List[Tuple[str, str]]:
+    # Split the chat into sections by the "*CODEBLOCKSBELOW*" token
+    split_chat = chat.split("*CODEBLOCKSBELOW*")

-    matches = re.finditer(regex, chat, re.DOTALL)
+    # Check if the "*CODEBLOCKSBELOW*" token was found
+    is_token_found = len(split_chat) > 1
+
+    # If the "*CODEBLOCKSBELOW*" token is found, use the first part as README
+    # and second part as code blocks. Otherwise, treat README as optional and
+    # proceed with empty README and the entire chat as code blocks
+    readme = split_chat[0].strip() if is_token_found else "No readme"
+    code_blocks = split_chat[1] if is_token_found else chat
+
+    # Get all ``` blocks and preceding filenames
+    regex = r"(\S+?)\n```\S+\n(.+?)```"
+    matches = re.finditer(regex, code_blocks, re.DOTALL)

    files = []
    for match in matches:
-        path = match.group(1).split("\n")[0]
+        # Strip the filename of any non-allowed characters and convert / to \
+        path = re.sub(r'[<>"|?*]', "", match.group(1))
+
        # Get the code
-        code = match.group(1).split("\n")[1:]
-        code = "\n".join(code)
+        code = match.group(2)
+
        # Add the file to the list
        files.append((path, code))

+    # Add README to the list
+    files.append(("README.txt", readme))
+
+    # Return the files
    return files


-def to_files(chat: str, workspace: DB):
+def to_files(chat, workspace):
    workspace["all_output.txt"] = chat

    files = parse_chat(chat)
--- a/gpt_engineer/db.py
+++ b/gpt_engineer/db.py
@@ -1,33 +1,51 @@
 from dataclasses import dataclass
-import os
 from pathlib import Path


+# This class represents a simple database that stores its data as files in a directory.
+# It supports both text and binary files, and can handle directory structures.
 class DB:
-    """A simple key-value store, where keys are filenames and values are file contents."""
-
    def __init__(self, path):
+        # Convert the path string to a Path object and get its absolute path.
        self.path = Path(path).absolute()
-        os.makedirs(self.path, exist_ok=True)
+
+        # Create the directory if it doesn't exist.
+        self.path.mkdir(parents=True, exist_ok=True)

    def __getitem__(self, key):
-        with open(self.path / key, encoding='utf-8') as f:
-            return f.read()
+        # Combine the database directory with the provided file path.
+        full_path = self.path / key
+
+        # Check if the file exists before trying to open it.
+        if full_path.is_file():
+            # Open the file in text mode and return its content.
+            with full_path.open("r") as f:
+                return f.read()
+        else:
+            # If the file doesn't exist, raise an error.
+            raise FileNotFoundError(f"No such file: '{full_path}'")

    def __setitem__(self, key, val):
-        Path(self.path / key).absolute().parent.mkdir(parents=True, exist_ok=True)
+        # Combine the database directory with the provided file path.
+        full_path = self.path / key

-        with open(self.path / key, 'w', encoding='utf-8') as f:
-            f.write(val)
+        # Create the directory tree if it doesn't exist.
+        full_path.parent.mkdir(parents=True, exist_ok=True)

-    def __contains__(self, key):
-        return (self.path / key).exists()
+        # Write the data to the file. If val is a string, it's written as text.
+        # If val is bytes, it's written as binary data.
+        if isinstance(val, str):
+            full_path.write_text(val)
+        elif isinstance(val, bytes):
+            full_path.write_bytes(val)
+        else:
+            # If val is neither a string nor bytes, raise an error.
+            raise TypeError("val must be either a str or bytes")


+# dataclass for all dbs:
@dataclass
 class DBs:
-    """A dataclass for all dbs"""
-
    memory: DB
    logs: DB
    identity: DB
--- a/gpt_engineer/main.py
+++ b/gpt_engineer/main.py
@@ -1,32 +1,41 @@
-import os
 import json
+import os
 import pathlib
+import shutil
+
 import typer

-from gpt_engineer.chat_to_files import to_files
 from gpt_engineer.ai import AI
-from gpt_engineer.steps import STEPS
 from gpt_engineer.db import DB, DBs
-
+from gpt_engineer.steps import STEPS

 app = typer.Typer()


@app.command()
 def chat(
-    project_path: str = typer.Argument(str(pathlib.Path(os.path.curdir) / "example"), help="path"),
+    project_path: str = typer.Argument("example", help="path"),
+    delete_existing: str = typer.Argument(None, help="delete existing files"),
    run_prefix: str = typer.Option(
        "",
-        help="run prefix, if you want to run multiple variants of the same project and later compare them",
+        help=(
+            "run prefix, if you want to run multiple variants of the same project and "
+            "later compare them",
+        ),
    ),
    model: str = "gpt-4",
    temperature: float = 0.1,
    steps_config: str = "default",
 ):
    app_dir = pathlib.Path(os.path.curdir)
-    input_path = project_path
-    memory_path = pathlib.Path(project_path) / (run_prefix + "memory")
-    workspace_path = pathlib.Path(project_path) / (run_prefix + "workspace")
+    input_path = pathlib.Path(app_dir / "projects" / project_path)
+    memory_path = input_path / (run_prefix + "memory")
+    workspace_path = input_path / (run_prefix + "workspace")
+
+    if delete_existing == "true":
+        # Delete files and subdirectories in paths
+        shutil.rmtree(memory_path, ignore_errors=True)
+        shutil.rmtree(workspace_path, ignore_errors=True)

    ai = AI(
        model=model,
@@ -45,5 +54,6 @@ def chat(
        messages = step(ai, dbs)
        dbs.logs[step.__name__] = json.dumps(messages)

+
 if __name__ == "__main__":
    app()
--- a/gpt_engineer/steps.py
+++ b/gpt_engineer/steps.py
@@ -2,9 +2,8 @@ import json
 import subprocess

 from gpt_engineer.ai import AI
-from gpt_engineer.chat_to_files import to_files
+from gpt_engineer.chat_to_files import parse_chat, to_files
 from gpt_engineer.db import DBs
-from gpt_engineer.chat_to_files import parse_chat


 def setup_sys_prompt(dbs):
@@ -54,7 +53,8 @@ def clarify(ai: AI, dbs: DBs):

 def gen_spec(ai: AI, dbs: DBs):
    """
-    Generate a spec from the main prompt + clarifications and save the results to the workspace
+    Generate a spec from the main prompt + clarifications and save the results to
+    the workspace
    """
    messages = [
        ai.fsystem(setup_sys_prompt(dbs)),
@@ -67,6 +67,7 @@ def gen_spec(ai: AI, dbs: DBs):

    return messages

+
 def respec(ai: AI, dbs: DBs):
    messages = dbs.logs[gen_spec.__name__]
    messages += [ai.fsystem(dbs.identity["respec"])]
@@ -75,10 +76,13 @@ def respec(ai: AI, dbs: DBs):
    messages = ai.next(
        messages,
        (
-            'Based on the conversation so far, please reiterate the specification for the program. '
-            'If there are things that can be improved, please incorporate the improvements. '
-            "If you are satisfied with the specification, just write out the specification word by word again."
-        )
+            "Based on the conversation so far, please reiterate the specification for "
+            "the program. "
+            "If there are things that can be improved, please incorporate the "
+            "improvements. "
+            "If you are satisfied with the specification, just write out the "
+            "specification word by word again."
+        ),
    )

    dbs.memory["specification"] = messages[-1]["content"]
@@ -116,6 +120,7 @@ def gen_clarified_code(ai: AI, dbs: DBs):
    to_files(messages[-1]["content"], dbs.workspace)
    return messages

+
 def gen_code(ai: AI, dbs: DBs):
    # get the messages from previous step

@@ -130,12 +135,6 @@ def gen_code(ai: AI, dbs: DBs):
    return messages


-def execute_workspace(ai: AI, dbs: DBs):
-    messages = gen_entrypoint(ai, dbs)
-    execute_entrypoint(ai, dbs)
-    return messages
-
-
 def execute_entrypoint(ai, dbs):
    command = dbs.workspace["run.sh"]

@@ -157,8 +156,10 @@ def execute_entrypoint(ai, dbs):
 def gen_entrypoint(ai, dbs):
    messages = ai.start(
        system=(
-            f"You will get information about a codebase that is currently on disk in the current folder.\n"
-            "From this you will answer with one code block that includes all the necessary macos terminal commands to "
+            "You will get information about a codebase that is currently on disk in "
+            f"the folder {dbs.workspace.path}.\n"
+            "From this you will answer with code blocks that includes all the necessary "
+            "Windows, MacOS, and Linux terminal commands to "
            "a) install dependencies "
            "b) run all necessary parts of the codebase (in parallell if necessary).\n"
            "Do not install globally. Do not use sudo.\n"
@@ -170,11 +171,16 @@ def gen_entrypoint(ai, dbs):

    blocks = parse_chat(messages[-1]["content"])
    for lang, _ in blocks:
-        assert lang in ["", "bash", "sh"], "Generated entrypoint command that was not bash"
+        assert lang in [
+            "",
+            "bash",
+            "sh",
+        ], "Generated entrypoint command that was not bash"

    dbs.workspace["run.sh"] = "\n".join(block for lang, block in blocks)
    return messages

+
 def use_feedback(ai: AI, dbs: DBs):
    messages = [
        ai.fsystem(setup_sys_prompt(dbs)),
@@ -182,18 +188,25 @@ def use_feedback(ai: AI, dbs: DBs):
        ai.fassistant(dbs.workspace["all_output.txt"]),
        ai.fsystem(dbs.identity["use_feedback"]),
    ]
-    messages = ai.next(messages, dbs.memory['feedback'])
+    messages = ai.next(messages, dbs.memory["feedback"])
    to_files(messages[-1]["content"], dbs.workspace)
    return messages


 # Different configs of what steps to run
 STEPS = {
-    "default": [gen_spec, gen_unit_tests, gen_code, execute_workspace],
+    "default": [gen_spec, gen_unit_tests, gen_code, gen_entrypoint, execute_entrypoint],
    "benchmark": [gen_spec, gen_unit_tests, gen_code, gen_entrypoint],
-    "simple": [simple_gen, execute_workspace],
-    "clarify": [clarify, gen_clarified_code, execute_workspace],
-    "respec": [gen_spec, respec, gen_unit_tests, gen_code, execute_workspace],
+    "simple": [simple_gen, gen_entrypoint, execute_entrypoint],
+    "clarify": [clarify, gen_clarified_code, gen_entrypoint, execute_entrypoint],
+    "respec": [
+        gen_spec,
+        respec,
+        gen_unit_tests,
+        gen_code,
+        gen_entrypoint,
+        execute_entrypoint,
+    ],
    "execute_only": [execute_entrypoint],
    "use_feedback": [use_feedback],
 }