Add support for directory paths in filenames and improve code splitting

- Enforce an explicit markdown code block format - Add a token to split the output to clearly detect when the code blocks start - Save all non-code output to a `README.md` file - Update RegEx to extract and strip text more reliably and clean up the output - Update the identify prompts appropriately
2025-12-17 12:45:26 +01:00 · 2023-06-18 10:06:36 +02:00
parent e29e7bec2f
commit e7df947b98
3 changed files with 48 additions and 29 deletions
--- a/gpt_engineer/chat_to_files.py
+++ b/gpt_engineer/chat_to_files.py
@@ -1,28 +1,41 @@
 import re
-from typing import List, Tuple
-from gpt_engineer.db import DB

+def parse_chat(chat):# -> List[Tuple[str, str]]:
+    # Split the chat into sections by the '*CODEBLOCKSBELOW*' token
+    split_chat = chat.split('*CODEBLOCKSBELOW*')

-def parse_chat(chat) -> List[Tuple[str, str]]:
-    # Get all ``` blocks
-    regex = r"```(.*?)```"
+    # Check if the '*CODEBLOCKSBELOW*' token was found
+    is_token_found = len(split_chat) > 1

-    matches = re.finditer(regex, chat, re.DOTALL)
+    # If the '*CODEBLOCKSBELOW*' token is found, use the first part as README and second part as code blocks.
+    # Otherwise, treat README as optional and proceed with empty README and the entire chat as code blocks
+    readme = split_chat[0].strip() if is_token_found else 'No readme'
+    code_blocks = split_chat[1] if is_token_found else chat
+
+    # Get all ``` blocks and preceding filenames
+    regex = r"\[(.*?)\]\s*```.*?\n(.*?)```"
+    matches = re.finditer(regex, code_blocks, re.DOTALL)

    files = []
    for match in matches:
-        path = match.group(1).split("\n")[0]
+        # Strip the filename of any non-allowed characters and convert / to \
+        path = re.sub(r'[<>"|?*]', '', match.group(1))
+
        # Get the code
-        code = match.group(1).split("\n")[1:]
-        code = "\n".join(code)
+        code = match.group(2)
+
        # Add the file to the list
        files.append((path, code))

+    # Add README to the list
+    files.append(('README.txt', readme))
+
+    # Return the files
    return files


-def to_files(chat: str, workspace: DB):
-    workspace["all_output.txt"] = chat
+def to_files(chat, workspace):
+    workspace['all_output.txt'] = chat

    files = parse_chat(chat)
    for file_name, file_content in files: