split_file() rework

rework the split_file function to make it simple and only have one yield while providing an overlap at the start and end of each chunk
This commit is contained in:
Maiko Bossuyt
2023-04-12 23:31:26 +02:00
parent 2f1181f9a1
commit 36d455c20e

View File

@@ -35,15 +35,12 @@ def split_file(content, max_length=4000, overlap=0):
while start < content_length: while start < content_length:
end = start + max_length end = start + max_length
chunk = content[start:end] if end + overlap < content_length:
chunk = content[start:end+overlap]
else:
chunk = content[start:content_length]
yield chunk yield chunk
start += max_length - overlap start += max_length - overlap
if start + max_length > content_length:
start = content_length - max_length
end = content_length
chunk = content[start:end]
yield chunk
break
def read_file(filename): def read_file(filename):
"""Read a file and return the contents""" """Read a file and return the contents"""