From 36d455c20e52aa1e09766a01c880f7914c5c24b7 Mon Sep 17 00:00:00 2001 From: Maiko Bossuyt Date: Wed, 12 Apr 2023 23:31:26 +0200 Subject: [PATCH] split_file() rework rework the split_file function to make it simple and only have one yield while providing an overlap at the start and end of each chunk --- scripts/file_operations.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/scripts/file_operations.py b/scripts/file_operations.py index db4702c5..c12774b9 100644 --- a/scripts/file_operations.py +++ b/scripts/file_operations.py @@ -35,15 +35,12 @@ def split_file(content, max_length=4000, overlap=0): while start < content_length: end = start + max_length - chunk = content[start:end] + if end + overlap < content_length: + chunk = content[start:end+overlap] + else: + chunk = content[start:content_length] yield chunk start += max_length - overlap - if start + max_length > content_length: - start = content_length - max_length - end = content_length - chunk = content[start:end] - yield chunk - break def read_file(filename): """Read a file and return the contents"""