mirror of
https://github.com/aljazceru/gpt-engineer.git
synced 2025-12-17 20:55:09 +01:00
Revert "Improved chat parsing with no AI logic (#120)"
This reverts commit 8facedd8d1.
This commit is contained in:
@@ -2,82 +2,21 @@ import re
|
||||
from typing import List, Tuple
|
||||
from gpt_engineer.db import DB
|
||||
|
||||
# Amount of lines within the code block to consider for filename discovery
|
||||
N_CODELINES_FOR_FILENAME_TA = 5
|
||||
|
||||
# Default path to use if no filename is found
|
||||
DEFAULT_PATH = 'unknown.txt'
|
||||
def parse_chat(chat) -> List[Tuple[str, str]]:
|
||||
# Get all ``` blocks
|
||||
regex = r"```(.*?)```"
|
||||
|
||||
matches = re.finditer(regex, chat, re.DOTALL)
|
||||
|
||||
def parse_chat(chat: str, verbose: bool = False) -> List[Tuple[str, str]]:
|
||||
'''
|
||||
Parses a chat message and returns a list of tuples containing
|
||||
the file path and the code content for each file.
|
||||
'''
|
||||
code_regex = r'```(.*?)```'
|
||||
filename_regex = r'\b[\w-]+\.[\w]{1,6}\b'
|
||||
|
||||
# Get all ``` (code) blocks
|
||||
code_matches = re.finditer(code_regex, chat, re.DOTALL)
|
||||
|
||||
prev_code_y_end = 0
|
||||
files = []
|
||||
for match in code_matches:
|
||||
lines = match.group(1).split('\n')
|
||||
code_y_start = match.start()
|
||||
code_y_end = match.end()
|
||||
|
||||
# Now, we need to get the filename associated with this code block.
|
||||
# We will look for the filename somewhere near the code block start.
|
||||
#
|
||||
# This "somewhere near" is referred to as the "filename_ta", to
|
||||
# resemble a sort-of target area (ta).
|
||||
#
|
||||
# The target area includes the text preceding the code block that
|
||||
# does not belong to previous code blocks ("no_code").
|
||||
# Additionally, as sometimes the filename is defined within
|
||||
# the code block itself, we will also include the first few lines
|
||||
# of the code block in the filename_ta.
|
||||
#
|
||||
# Example:
|
||||
# ```python
|
||||
# # File: entrypoint.py
|
||||
# import pygame
|
||||
# ```
|
||||
#
|
||||
# The amount of lines to consider within the code block is set by
|
||||
# the constant 'N_CODELINES_FOR_FILENAME_TA'.
|
||||
#
|
||||
# Get the "preceding" text, which is located between codeblocks
|
||||
no_code = chat[prev_code_y_end:code_y_start].strip()
|
||||
within_code = '\n'.join(lines[:N_CODELINES_FOR_FILENAME_TA])
|
||||
filename_ta = no_code + '\n' + within_code
|
||||
|
||||
# The path is the filename itself which we greedily match
|
||||
filename = re.search(filename_regex, filename_ta)
|
||||
path = filename.group(0) if filename else DEFAULT_PATH
|
||||
|
||||
# Visualize the filename_ta if verbose
|
||||
if verbose:
|
||||
print('-' * 20)
|
||||
print(f'Path: {path}')
|
||||
print('-' * 20)
|
||||
print(filename_ta)
|
||||
print('-' * 20)
|
||||
|
||||
# Check if its not a false positive
|
||||
#
|
||||
# For instance, the match with ```main.py``` should not be considered.
|
||||
# ```main.py```
|
||||
# ```python
|
||||
# ...
|
||||
# ```
|
||||
if not re.fullmatch(filename_regex, '\n'.join(lines)):
|
||||
# Update the previous code block end
|
||||
prev_code_y_end = code_y_end
|
||||
|
||||
# File and code have been matched, add them to the list
|
||||
files.append((path, '\n'.join(lines[1:])))
|
||||
for match in matches:
|
||||
path = match.group(1).split("\n")[0]
|
||||
# Get the code
|
||||
code = match.group(1).split("\n")[1:]
|
||||
code = "\n".join(code)
|
||||
# Add the file to the list
|
||||
files.append((path, code))
|
||||
|
||||
return files
|
||||
|
||||
|
||||
Reference in New Issue
Block a user