From 8b3270f694477e800fc8ee504c7b3feb349d099d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Florian=20Ho=CC=88nicke?= <hoenicke.florian@gmail.com>
Date: Wed, 24 May 2023 01:17:28 +0200
Subject: [PATCH] =?UTF-8?q?=E2=9B=B0=20fix:=20prevent=20large=20string=20c?=
 =?UTF-8?q?ontent=20in=20error=20message?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 dev_gpt/apis/jina_cloud.py    |  7 +++++--
 dev_gpt/utils/string_tools.py |  9 ++++++++-
 test/unit/test_strings.py     | 14 +++++++++++++-
 3 files changed, 26 insertions(+), 4 deletions(-)

diff --git a/dev_gpt/apis/jina_cloud.py b/dev_gpt/apis/jina_cloud.py
index ac0953b..fc204f3 100644
--- a/dev_gpt/apis/jina_cloud.py
+++ b/dev_gpt/apis/jina_cloud.py
@@ -18,7 +18,7 @@ from jina import Flow
 
 from dev_gpt.constants import DEMO_TOKEN
 from dev_gpt.utils.io import suppress_stdout, is_docker_running
-from dev_gpt.utils.string_tools import print_colored
+from dev_gpt.utils.string_tools import print_colored, clean_large_words
 
 
 def wait_until_app_is_ready(url):
@@ -309,6 +309,7 @@ def clean_color_codes(response):
     response = re.sub(r'\x1b\[[0-9;]*m', '', response)
     return response
 
+
 def process_error_message(error_message):
     lines = error_message.split('\n')
 
@@ -330,10 +331,12 @@ def process_error_message(error_message):
 
     response = clean_color_codes(response)
 
+    # the following code makes sure that the error message is cleaned from irrelevant sequences of e.g. base64 strings.
+    response = clean_large_words(response)
+
     # the following code tests the case that the docker file is corrupted and can not be parsed
     # the method above will not return a relevant error message in this case
     # but the last line of the error message will start with "error"
-
     last_line = lines[-1]
     if not response and last_line.startswith('error: '):
         return last_line
diff --git a/dev_gpt/utils/string_tools.py b/dev_gpt/utils/string_tools.py
index de4ac5c..ba759a9 100644
--- a/dev_gpt/utils/string_tools.py
+++ b/dev_gpt/utils/string_tools.py
@@ -1,6 +1,7 @@
 import os
 import platform
 import string
+import re
 
 if platform.system() == "Windows":
     os.system("color")
@@ -39,4 +40,10 @@ def get_template_parameters(formatted_string):
         if field_name is not None:
             parameters.append(field_name)
 
-    return parameters
\ No newline at end of file
+    return parameters
+
+def clean_large_words(text):
+    """Large words like base64 strings are returned by omitting the middle part of the word."""
+    pattern = r'\b([a-zA-Z0-9+/]{20})([a-zA-Z0-9+/]{200,})([a-zA-Z0-9+/]{20})\b'
+    cleaned_text = re.sub(pattern, r'\1...\3', text)
+    return cleaned_text
\ No newline at end of file
diff --git a/test/unit/test_strings.py b/test/unit/test_strings.py
index 8a77211..eabdfdd 100644
--- a/test/unit/test_strings.py
+++ b/test/unit/test_strings.py
@@ -1,4 +1,5 @@
 from dev_gpt.apis.jina_cloud import clean_color_codes
+from dev_gpt.utils.string_tools import clean_large_words
 
 
 def test_clean_color_codes():
@@ -8,4 +9,15 @@ def test_clean_color_codes():
     color = f"{bold_start}{color_start}test{reset}"
     cleaned = clean_color_codes(color)
     print('with color codes:', color)
-    print('without color codes:', cleaned)
\ No newline at end of file
+    print('without color codes:', cleaned)
+
+
+def test_clean_large_words():
+    assert clean_large_words(
+        '''test 2VAzLpbBUDBInhtN5ToJZAXL8L6F4J+Xr/L/42vs2r+9Pb0E3Y1ZLy7E3GsYRzAqQ037iKABMHL9VDoAaBAuAGgQLgBoEC4AaBAuAGgQLgB\
+oEC4AaBAuAGgQLgBoEC4AaBAuAGgQLgBoEC4AaBAuAGgQLgBoEC4AaBAuAGgQLgBoEC4AaBAuAGgQLgBoEC4AaBAuAGgQLgBoEC4AaBAuAG\
+gQLgBoEC4AaBAuAGgQLgBoEC4AaBAuAGgQLgBoEC4AaBAuAGgQLgBoEC4AaBAuAGgQLgBoEC4AaBAuAGgQLgBoEC4AaBAuAGgQLgBoEC4Aa\
+BAuAGgQLgBoEC4AaBAuAGgQLgBoEC4AaBAuAGgQLgBoEC4AaBAuAGgQLgBoEC4AaBAuAGgQLgBoEC4AaBAuAGgQLgBoEC4AaBAuAGgQLgBo test'''
+    ) == 'test 2VAzLpbBUDBInhtN5ToJ...LgBoEC4AaBAuAGgQLgBo test'
+
+    assert clean_large_words('2VAzLpbBUDBInhtN5ToJZAXL8L6F4J+Xr/L/4') == '2VAzLpbBUDBInhtN5ToJZAXL8L6F4J+Xr/L/4'