Add support for directory paths in filenames and improve code splitting

- Enforce an explicit markdown code block format
- Add a token to split the output to clearly detect when the code blocks start
- Save all non-code output to a `README.md` file
- Update RegEx to extract and strip text more reliably and clean up the output
- Update the identify prompts appropriately
This commit is contained in:
Enzo Martin
2023-06-18 10:06:36 +02:00
parent e29e7bec2f
commit e7df947b98
3 changed files with 48 additions and 29 deletions

View File

@@ -1,28 +1,41 @@
import re
from typing import List, Tuple
from gpt_engineer.db import DB
def parse_chat(chat):# -> List[Tuple[str, str]]:
# Split the chat into sections by the '*CODEBLOCKSBELOW*' token
split_chat = chat.split('*CODEBLOCKSBELOW*')
def parse_chat(chat) -> List[Tuple[str, str]]:
# Get all ``` blocks
regex = r"```(.*?)```"
# Check if the '*CODEBLOCKSBELOW*' token was found
is_token_found = len(split_chat) > 1
matches = re.finditer(regex, chat, re.DOTALL)
# If the '*CODEBLOCKSBELOW*' token is found, use the first part as README and second part as code blocks.
# Otherwise, treat README as optional and proceed with empty README and the entire chat as code blocks
readme = split_chat[0].strip() if is_token_found else 'No readme'
code_blocks = split_chat[1] if is_token_found else chat
# Get all ``` blocks and preceding filenames
regex = r"\[(.*?)\]\s*```.*?\n(.*?)```"
matches = re.finditer(regex, code_blocks, re.DOTALL)
files = []
for match in matches:
path = match.group(1).split("\n")[0]
# Strip the filename of any non-allowed characters and convert / to \
path = re.sub(r'[<>"|?*]', '', match.group(1))
# Get the code
code = match.group(1).split("\n")[1:]
code = "\n".join(code)
code = match.group(2)
# Add the file to the list
files.append((path, code))
# Add README to the list
files.append(('README.txt', readme))
# Return the files
return files
def to_files(chat: str, workspace: DB):
workspace["all_output.txt"] = chat
def to_files(chat, workspace):
workspace['all_output.txt'] = chat
files = parse_chat(chat)
for file_name, file_content in files: