Fix the errors with parsing

This commit is contained in:
Anton Osika
2023-06-18 22:34:25 +02:00
parent 89d9b6e356
commit 8180f0346c
7 changed files with 26 additions and 33 deletions

View File

@@ -55,6 +55,7 @@ class AI:
msg = delta.get("content", "")
print(msg, end="")
chat.append(msg)
print()
messages = messages + [{"role": "assistant", "content": "".join(chat)}]
logger.debug(f"Chat completion finished: {messages}")
return messages

View File

@@ -2,21 +2,9 @@ import re
def parse_chat(chat): # -> List[Tuple[str, str]]:
# Split the chat into sections by the "*CODEBLOCKSBELOW*" token
split_chat = chat.split("*CODEBLOCKSBELOW*")
# Check if the "*CODEBLOCKSBELOW*" token was found
is_token_found = len(split_chat) > 1
# If the "*CODEBLOCKSBELOW*" token is found, use the first part as README
# and second part as code blocks. Otherwise, treat README as optional and
# proceed with empty README and the entire chat as code blocks
readme = split_chat[0].strip() if is_token_found else "No readme"
code_blocks = split_chat[1] if is_token_found else chat
# Get all ``` blocks and preceding filenames
regex = r"(\S+?)\n```\S+\n(.+?)```"
matches = re.finditer(regex, code_blocks, re.DOTALL)
matches = re.finditer(regex, chat, re.DOTALL)
files = []
for match in matches:
@@ -29,8 +17,9 @@ def parse_chat(chat): # -> List[Tuple[str, str]]:
# Add the file to the list
files.append((path, code))
# Add README to the list
files.append(("README.txt", readme))
# Get all the text before the first ``` block
readme = chat.split("```")[0]
files.append(("README.md", readme))
# Return the files
return files

View File

@@ -1,8 +1,9 @@
import json
import re
import subprocess
from gpt_engineer.ai import AI
from gpt_engineer.chat_to_files import parse_chat, to_files
from gpt_engineer.chat_to_files import to_files
from gpt_engineer.db import DBs
@@ -169,15 +170,9 @@ def gen_entrypoint(ai, dbs):
)
print()
blocks = parse_chat(messages[-1]["content"])
for lang, _ in blocks:
assert lang in [
"",
"bash",
"sh",
], "Generated entrypoint command that was not bash"
dbs.workspace["run.sh"] = "\n".join(block for lang, block in blocks)
regex = r"```\S*\n(.+?)```"
matches = re.finditer(regex, messages[-1]["content"], re.DOTALL)
dbs.workspace["run.sh"] = "\n".join(match.group(1) for match in matches)
return messages

View File

@@ -5,15 +5,15 @@ Make sure that every detail of the architecture is, in the end, implemented as c
Think step by step and reason yourself to the right decisions to make sure we get it right.
You will first lay out the names of the core classes, functions, methods that will be necessary, as well as a quick comment on their purpose.
Then you will output the content of each file, with syntax below, including ALL code.
Then you will output the content of each file including ALL code.
Each file must strictly follow a markdown code block format, where the following tokens must be replaced such that
[FILENAME] is the lowercase file name including the file extension,
[LANG] is the markup code block language for the code's language, and [CODE] is the code:
Syntax:
[FILENAME]
```[LANG]
[CODE]
```
Where [FILENAME] is the lowercase file name including the file extension,
[LANG] is the language for the code's language, and [CODE] is the code:
You will start with the "entrypoint" file, then go to the ones that are imported by that file, and so on.
Please note that the code should be fully functional. No placeholders.

View File

@@ -1,6 +1,11 @@
You almost always put different classes in different files.
For Python, you always create an appropriate requirements.txt file.
For NodeJS, you always create an appropriate package.json file.
You always add a comment briefly describing the purpose of the function definition.
You try to add comments explaining very complex bits of logic.
You always follow the best practices for the requested languages in terms of describing the code written as a defined
package/project.
Python toolbelt preferences:
- pytest

View File

@@ -3,15 +3,16 @@ Please now remember the steps:
Think step by step and reason yourself to the right decisions to make sure we get it right.
First lay out the names of the core classes, functions, methods that will be necessary, As well as a quick comment on their purpose.
Then you will output the content of each file, with syntax below, including ALL code.
Then you will output the content of each file including ALL code.
Each file must strictly follow a markdown code block format, where the following tokens must be replaced such that
[FILENAME] is the lowercase file name including the file extension,
[LANG] is the markup code block language for the code's language, and [CODE] is the code:
Syntax:
[FILENAME]
```[LANG]
[CODE]
```
Where [FILENAME] is the lowercase file name including the file extension,
[LANG] is the language for the code's language, and [CODE] is the code:
Please note that the code should be fully functional. No placeholders.

View File

@@ -44,7 +44,9 @@ def main(
)
benchmarks.append((bench_folder, process, log_file))
print("You can stream the log file by running: tail -f {}".format(log_path))
print("You can stream the log file by running:")
print("tail -f {}".format(log_path))
print()
for bench_folder, process, file in benchmarks:
process.wait()