mirror of
https://github.com/aljazceru/gpt-engineer.git
synced 2025-12-18 21:25:11 +01:00
Revert "Improved chat parsing with no AI logic (#120)"
This reverts commit 8facedd8d1.
This commit is contained in:
@@ -2,82 +2,21 @@ import re
|
|||||||
from typing import List, Tuple
|
from typing import List, Tuple
|
||||||
from gpt_engineer.db import DB
|
from gpt_engineer.db import DB
|
||||||
|
|
||||||
# Amount of lines within the code block to consider for filename discovery
|
|
||||||
N_CODELINES_FOR_FILENAME_TA = 5
|
|
||||||
|
|
||||||
# Default path to use if no filename is found
|
def parse_chat(chat) -> List[Tuple[str, str]]:
|
||||||
DEFAULT_PATH = 'unknown.txt'
|
# Get all ``` blocks
|
||||||
|
regex = r"```(.*?)```"
|
||||||
|
|
||||||
|
matches = re.finditer(regex, chat, re.DOTALL)
|
||||||
|
|
||||||
def parse_chat(chat: str, verbose: bool = False) -> List[Tuple[str, str]]:
|
|
||||||
'''
|
|
||||||
Parses a chat message and returns a list of tuples containing
|
|
||||||
the file path and the code content for each file.
|
|
||||||
'''
|
|
||||||
code_regex = r'```(.*?)```'
|
|
||||||
filename_regex = r'\b[\w-]+\.[\w]{1,6}\b'
|
|
||||||
|
|
||||||
# Get all ``` (code) blocks
|
|
||||||
code_matches = re.finditer(code_regex, chat, re.DOTALL)
|
|
||||||
|
|
||||||
prev_code_y_end = 0
|
|
||||||
files = []
|
files = []
|
||||||
for match in code_matches:
|
for match in matches:
|
||||||
lines = match.group(1).split('\n')
|
path = match.group(1).split("\n")[0]
|
||||||
code_y_start = match.start()
|
# Get the code
|
||||||
code_y_end = match.end()
|
code = match.group(1).split("\n")[1:]
|
||||||
|
code = "\n".join(code)
|
||||||
# Now, we need to get the filename associated with this code block.
|
# Add the file to the list
|
||||||
# We will look for the filename somewhere near the code block start.
|
files.append((path, code))
|
||||||
#
|
|
||||||
# This "somewhere near" is referred to as the "filename_ta", to
|
|
||||||
# resemble a sort-of target area (ta).
|
|
||||||
#
|
|
||||||
# The target area includes the text preceding the code block that
|
|
||||||
# does not belong to previous code blocks ("no_code").
|
|
||||||
# Additionally, as sometimes the filename is defined within
|
|
||||||
# the code block itself, we will also include the first few lines
|
|
||||||
# of the code block in the filename_ta.
|
|
||||||
#
|
|
||||||
# Example:
|
|
||||||
# ```python
|
|
||||||
# # File: entrypoint.py
|
|
||||||
# import pygame
|
|
||||||
# ```
|
|
||||||
#
|
|
||||||
# The amount of lines to consider within the code block is set by
|
|
||||||
# the constant 'N_CODELINES_FOR_FILENAME_TA'.
|
|
||||||
#
|
|
||||||
# Get the "preceding" text, which is located between codeblocks
|
|
||||||
no_code = chat[prev_code_y_end:code_y_start].strip()
|
|
||||||
within_code = '\n'.join(lines[:N_CODELINES_FOR_FILENAME_TA])
|
|
||||||
filename_ta = no_code + '\n' + within_code
|
|
||||||
|
|
||||||
# The path is the filename itself which we greedily match
|
|
||||||
filename = re.search(filename_regex, filename_ta)
|
|
||||||
path = filename.group(0) if filename else DEFAULT_PATH
|
|
||||||
|
|
||||||
# Visualize the filename_ta if verbose
|
|
||||||
if verbose:
|
|
||||||
print('-' * 20)
|
|
||||||
print(f'Path: {path}')
|
|
||||||
print('-' * 20)
|
|
||||||
print(filename_ta)
|
|
||||||
print('-' * 20)
|
|
||||||
|
|
||||||
# Check if its not a false positive
|
|
||||||
#
|
|
||||||
# For instance, the match with ```main.py``` should not be considered.
|
|
||||||
# ```main.py```
|
|
||||||
# ```python
|
|
||||||
# ...
|
|
||||||
# ```
|
|
||||||
if not re.fullmatch(filename_regex, '\n'.join(lines)):
|
|
||||||
# Update the previous code block end
|
|
||||||
prev_code_y_end = code_y_end
|
|
||||||
|
|
||||||
# File and code have been matched, add them to the list
|
|
||||||
files.append((path, '\n'.join(lines[1:])))
|
|
||||||
|
|
||||||
return files
|
return files
|
||||||
|
|
||||||
|
|||||||
@@ -1,129 +0,0 @@
|
|||||||
import unittest
|
|
||||||
from gpt_engineer.chat_to_files import parse_chat
|
|
||||||
|
|
||||||
CODE_FORMATS = '''
|
|
||||||
(1)
|
|
||||||
File: main.py
|
|
||||||
|
|
||||||
```python
|
|
||||||
import pygame
|
|
||||||
````
|
|
||||||
|
|
||||||
(2)
|
|
||||||
entry.py
|
|
||||||
```python
|
|
||||||
import pygame
|
|
||||||
```
|
|
||||||
|
|
||||||
(3)
|
|
||||||
```python
|
|
||||||
# File: rickroll.py
|
|
||||||
import pygame
|
|
||||||
```
|
|
||||||
|
|
||||||
(4)
|
|
||||||
```python
|
|
||||||
|
|
||||||
# File: engineer.py
|
|
||||||
import pygame
|
|
||||||
```
|
|
||||||
|
|
||||||
(5)
|
|
||||||
```adastra.py
|
|
||||||
import pygame
|
|
||||||
```
|
|
||||||
|
|
||||||
(6)
|
|
||||||
```python bird.py
|
|
||||||
import pygame
|
|
||||||
```
|
|
||||||
|
|
||||||
(7)
|
|
||||||
```obstacle.py python
|
|
||||||
import pygame
|
|
||||||
```
|
|
||||||
|
|
||||||
(8)
|
|
||||||
```major1.py````
|
|
||||||
```python
|
|
||||||
import pygame
|
|
||||||
```
|
|
||||||
|
|
||||||
(9)
|
|
||||||
```major2.py````
|
|
||||||
```python
|
|
||||||
import pygame
|
|
||||||
```
|
|
||||||
|
|
||||||
(10)
|
|
||||||
```js
|
|
||||||
// File: bruh.js
|
|
||||||
const a = 1;
|
|
||||||
```
|
|
||||||
|
|
||||||
(11)
|
|
||||||
```swag.tsx
|
|
||||||
// File: swag.tsx
|
|
||||||
const a: number = 1;
|
|
||||||
```
|
|
||||||
|
|
||||||
(12)
|
|
||||||
```gmoita.ts
|
|
||||||
// File: gmoita.tsx
|
|
||||||
const a: number = 1;
|
|
||||||
```
|
|
||||||
|
|
||||||
(13)
|
|
||||||
** file1.py **
|
|
||||||
```python
|
|
||||||
import pygame
|
|
||||||
```
|
|
||||||
|
|
||||||
(14)
|
|
||||||
**file2.py**
|
|
||||||
```python
|
|
||||||
import pygame
|
|
||||||
```
|
|
||||||
|
|
||||||
(15)
|
|
||||||
#### `gm.py`
|
|
||||||
```python
|
|
||||||
import pygame
|
|
||||||
```
|
|
||||||
'''
|
|
||||||
|
|
||||||
class TestChatParsing(unittest.TestCase):
|
|
||||||
|
|
||||||
def setUp(self):
|
|
||||||
self._expected_filenames = (
|
|
||||||
'main.py',
|
|
||||||
'entry.py',
|
|
||||||
'rickroll.py',
|
|
||||||
'engineer.py',
|
|
||||||
'adastra.py',
|
|
||||||
'bird.py',
|
|
||||||
'obstacle.py',
|
|
||||||
'major1.py',
|
|
||||||
'major2.py',
|
|
||||||
'bruh.js',
|
|
||||||
'swag.tsx',
|
|
||||||
'gmoita.ts',
|
|
||||||
'file1.py',
|
|
||||||
'file2.py',
|
|
||||||
'gm.py',
|
|
||||||
)
|
|
||||||
self.chat = CODE_FORMATS
|
|
||||||
|
|
||||||
def test_parsing(self):
|
|
||||||
codefiles = parse_chat(self.chat)
|
|
||||||
|
|
||||||
self.assertEqual(len(codefiles), len(self._expected_filenames))
|
|
||||||
for i, cf in enumerate(codefiles):
|
|
||||||
filename, content = cf
|
|
||||||
|
|
||||||
self.assertEqual(filename, self._expected_filenames[i])
|
|
||||||
self.assertNotEqual(content, '')
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
unittest.main()
|
|
||||||
|
|
||||||
Reference in New Issue
Block a user