Fix the errors with parsing

2025-12-17 12:45:26 +01:00 · 2023-06-18 22:34:25 +02:00
parent 89d9b6e356
commit 8180f0346c
7 changed files with 26 additions and 33 deletions
--- a/gpt_engineer/ai.py
+++ b/gpt_engineer/ai.py
@@ -55,6 +55,7 @@ class AI:
            msg = delta.get("content", "")
            print(msg, end="")
            chat.append(msg)
        print()
        messages = messages + [{"role": "assistant", "content": "".join(chat)}]
        logger.debug(f"Chat completion finished: {messages}")
        return messages
--- a/gpt_engineer/chat_to_files.py
+++ b/gpt_engineer/chat_to_files.py
@@ -2,21 +2,9 @@ import re
 def parse_chat(chat):  # -> List[Tuple[str, str]]:
    # Split the chat into sections by the "*CODEBLOCKSBELOW*" token
    split_chat = chat.split("*CODEBLOCKSBELOW*")
    # Check if the "*CODEBLOCKSBELOW*" token was found
    is_token_found = len(split_chat) > 1
    # If the "*CODEBLOCKSBELOW*" token is found, use the first part as README
    # and second part as code blocks. Otherwise, treat README as optional and
    # proceed with empty README and the entire chat as code blocks
    readme = split_chat[0].strip() if is_token_found else "No readme"
    code_blocks = split_chat[1] if is_token_found else chat
    # Get all ``` blocks and preceding filenames
    regex = r"(\S+?)\n```\S+\n(.+?)```"
-    matches = re.finditer(regex, code_blocks, re.DOTALL)
+    matches = re.finditer(regex, chat, re.DOTALL)
    files = []
    for match in matches:
@@ -29,8 +17,9 @@ def parse_chat(chat):  # -> List[Tuple[str, str]]:
        # Add the file to the list
        files.append((path, code))
-    # Add README to the list
+    # Get all the text before the first ``` block
-    files.append(("README.txt", readme))
+    readme = chat.split("```")[0]
    files.append(("README.md", readme))
    # Return the files
    return files
--- a/gpt_engineer/steps.py
+++ b/gpt_engineer/steps.py
@@ -1,8 +1,9 @@
 import json
 import re
 import subprocess
 from gpt_engineer.ai import AI
-from gpt_engineer.chat_to_files import parse_chat, to_files
+from gpt_engineer.chat_to_files import to_files
 from gpt_engineer.db import DBs
@@ -169,15 +170,9 @@ def gen_entrypoint(ai, dbs):
    )
    print()
-    blocks = parse_chat(messages[-1]["content"])
+    regex = r"```\S*\n(.+?)```"
-    for lang, _ in blocks:
+    matches = re.finditer(regex, messages[-1]["content"], re.DOTALL)
-        assert lang in [
+    dbs.workspace["run.sh"] = "\n".join(match.group(1) for match in matches)
            "",
            "bash",
            "sh",
        ], "Generated entrypoint command that was not bash"
    dbs.workspace["run.sh"] = "\n".join(block for lang, block in blocks)
    return messages
--- a/identity/generate
+++ b/identity/generate
@@ -5,15 +5,15 @@ Make sure that every detail of the architecture is, in the end, implemented as c
 Think step by step and reason yourself to the right decisions to make sure we get it right.
 You will first lay out the names of the core classes, functions, methods that will be necessary, as well as a quick comment on their purpose.
-Then you will output the content of each file, with syntax below, including ALL code.
+Then you will output the content of each file including ALL code.
 Each file must strictly follow a markdown code block format, where the following tokens must be replaced such that
 [FILENAME] is the lowercase file name including the file extension,
 [LANG] is the markup code block language for the code's language, and [CODE] is the code:
 Syntax:
 [FILENAME]
 ```[LANG]
 [CODE]
 ```
 Where [FILENAME] is the lowercase file name including the file extension,
 [LANG] is the language for the code's language, and [CODE] is the code:
 You will start with the "entrypoint" file, then go to the ones that are imported by that file, and so on.
 Please note that the code should be fully functional. No placeholders.
--- a/identity/philosophy
+++ b/identity/philosophy
@@ -1,6 +1,11 @@
 You almost always put different classes in different files.
 For Python, you always create an appropriate requirements.txt file.
 For NodeJS, you always create an appropriate package.json file.
 You always add a comment briefly describing the purpose of the function definition.
 You try to add comments explaining very complex bits of logic.
 You always follow the best practices for the requested languages in terms of describing the code written as a defined
 package/project.
 Python toolbelt preferences:
 - pytest
--- a/identity/use_qa
+++ b/identity/use_qa
@@ -3,15 +3,16 @@ Please now remember the steps:
 Think step by step and reason yourself to the right decisions to make sure we get it right.
 First lay out the names of the core classes, functions, methods that will be necessary, As well as a quick comment on their purpose.
-Then you will output the content of each file, with syntax below, including ALL code.
+Then you will output the content of each file including ALL code.
 Each file must strictly follow a markdown code block format, where the following tokens must be replaced such that
 [FILENAME] is the lowercase file name including the file extension,
 [LANG] is the markup code block language for the code's language, and [CODE] is the code:
 Syntax:
 [FILENAME]
 ```[LANG]
 [CODE]
 ```
 Where [FILENAME] is the lowercase file name including the file extension,
 [LANG] is the language for the code's language, and [CODE] is the code:
 Please note that the code should be fully functional. No placeholders.
--- a/scripts/benchmark.py
+++ b/scripts/benchmark.py
@@ -44,7 +44,9 @@ def main(
            )
            benchmarks.append((bench_folder, process, log_file))
-            print("You can stream the log file by running: tail -f {}".format(log_path))
+            print("You can stream the log file by running:")
            print("tail -f {}".format(log_path))
            print()
    for bench_folder, process, file in benchmarks:
        process.wait()