Revert "Improved chat parsing with no AI logic (#120)"

This reverts commit 8facedd8d1.
2025-12-17 20:55:09 +01:00 · 2023-06-18 14:25:49 +02:00
parent 8facedd8d1
commit c999f7c2c8
2 changed files with 11 additions and 201 deletions
--- a/gpt_engineer/chat_to_files.py
+++ b/gpt_engineer/chat_to_files.py
@@ -2,82 +2,21 @@ import re
 from typing import List, Tuple
 from gpt_engineer.db import DB

-# Amount of lines within the code block to consider for filename discovery
-N_CODELINES_FOR_FILENAME_TA = 5

-# Default path to use if no filename is found
-DEFAULT_PATH = 'unknown.txt'
+def parse_chat(chat) -> List[Tuple[str, str]]:
+    # Get all ``` blocks
+    regex = r"```(.*?)```"

+    matches = re.finditer(regex, chat, re.DOTALL)

-def parse_chat(chat: str, verbose: bool = False) -> List[Tuple[str, str]]:
-    '''
-    Parses a chat message and returns a list of tuples containing
-    the file path and the code content for each file.
-    '''
-    code_regex = r'```(.*?)```'
-    filename_regex = r'\b[\w-]+\.[\w]{1,6}\b'
-
-    # Get all ``` (code) blocks
-    code_matches = re.finditer(code_regex, chat, re.DOTALL)
-    
-    prev_code_y_end = 0
    files = []
-    for match in code_matches:
-        lines = match.group(1).split('\n')
-        code_y_start = match.start()
-        code_y_end = match.end()
-
-        # Now, we need to get the filename associated with this code block.
-        # We will look for the filename somewhere near the code block start.
-        #
-        # This "somewhere near" is referred to as the "filename_ta", to
-        # resemble a sort-of target area (ta).
-        #
-        # The target area includes the text preceding the code block that
-        # does not belong to previous code blocks ("no_code").
-        # Additionally, as sometimes the filename is defined within
-        # the code block itself, we will also include the first few lines
-        # of the code block in the filename_ta.
-        #
-        # Example:
-        # ```python
-        # # File: entrypoint.py
-        # import pygame
-        # ```
-        #
-        # The amount of lines to consider within the code block is set by
-        # the constant 'N_CODELINES_FOR_FILENAME_TA'.
-        #
-        # Get the "preceding" text, which is located between codeblocks
-        no_code = chat[prev_code_y_end:code_y_start].strip()
-        within_code = '\n'.join(lines[:N_CODELINES_FOR_FILENAME_TA])
-        filename_ta = no_code + '\n' + within_code
-        
-        # The path is the filename itself which we greedily match
-        filename = re.search(filename_regex, filename_ta)
-        path = filename.group(0) if filename else DEFAULT_PATH
-
-        # Visualize the filename_ta if verbose
-        if verbose:
-            print('-' * 20)
-            print(f'Path: {path}')
-            print('-' * 20)
-            print(filename_ta)
-            print('-' * 20)
-        
-        # Check if its not a false positive
-        #
-        # For instance, the match with ```main.py``` should not be considered.
-        # ```main.py```
-        # ```python
-        # ...
-        # ```
-        if not re.fullmatch(filename_regex, '\n'.join(lines)):
-            # Update the previous code block end
-            prev_code_y_end = code_y_end
-
-            # File and code have been matched, add them to the list
-            files.append((path, '\n'.join(lines[1:])))
+    for match in matches:
+        path = match.group(1).split("\n")[0]
+        # Get the code
+        code = match.group(1).split("\n")[1:]
+        code = "\n".join(code)
+        # Add the file to the list
+        files.append((path, code))

    return files