From 8b3270f694477e800fc8ee504c7b3feb349d099d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Florian=20Ho=CC=88nicke?= Date: Wed, 24 May 2023 01:17:28 +0200 Subject: [PATCH] =?UTF-8?q?=E2=9B=B0=20fix:=20prevent=20large=20string=20c?= =?UTF-8?q?ontent=20in=20error=20message?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- dev_gpt/apis/jina_cloud.py | 7 +++++-- dev_gpt/utils/string_tools.py | 9 ++++++++- test/unit/test_strings.py | 14 +++++++++++++- 3 files changed, 26 insertions(+), 4 deletions(-) diff --git a/dev_gpt/apis/jina_cloud.py b/dev_gpt/apis/jina_cloud.py index ac0953b..fc204f3 100644 --- a/dev_gpt/apis/jina_cloud.py +++ b/dev_gpt/apis/jina_cloud.py @@ -18,7 +18,7 @@ from jina import Flow from dev_gpt.constants import DEMO_TOKEN from dev_gpt.utils.io import suppress_stdout, is_docker_running -from dev_gpt.utils.string_tools import print_colored +from dev_gpt.utils.string_tools import print_colored, clean_large_words def wait_until_app_is_ready(url): @@ -309,6 +309,7 @@ def clean_color_codes(response): response = re.sub(r'\x1b\[[0-9;]*m', '', response) return response + def process_error_message(error_message): lines = error_message.split('\n') @@ -330,10 +331,12 @@ def process_error_message(error_message): response = clean_color_codes(response) + # the following code makes sure that the error message is cleaned from irrelevant sequences of e.g. base64 strings. + response = clean_large_words(response) + # the following code tests the case that the docker file is corrupted and can not be parsed # the method above will not return a relevant error message in this case # but the last line of the error message will start with "error" - last_line = lines[-1] if not response and last_line.startswith('error: '): return last_line diff --git a/dev_gpt/utils/string_tools.py b/dev_gpt/utils/string_tools.py index de4ac5c..ba759a9 100644 --- a/dev_gpt/utils/string_tools.py +++ b/dev_gpt/utils/string_tools.py @@ -1,6 +1,7 @@ import os import platform import string +import re if platform.system() == "Windows": os.system("color") @@ -39,4 +40,10 @@ def get_template_parameters(formatted_string): if field_name is not None: parameters.append(field_name) - return parameters \ No newline at end of file + return parameters + +def clean_large_words(text): + """Large words like base64 strings are returned by omitting the middle part of the word.""" + pattern = r'\b([a-zA-Z0-9+/]{20})([a-zA-Z0-9+/]{200,})([a-zA-Z0-9+/]{20})\b' + cleaned_text = re.sub(pattern, r'\1...\3', text) + return cleaned_text \ No newline at end of file diff --git a/test/unit/test_strings.py b/test/unit/test_strings.py index 8a77211..eabdfdd 100644 --- a/test/unit/test_strings.py +++ b/test/unit/test_strings.py @@ -1,4 +1,5 @@ from dev_gpt.apis.jina_cloud import clean_color_codes +from dev_gpt.utils.string_tools import clean_large_words def test_clean_color_codes(): @@ -8,4 +9,15 @@ def test_clean_color_codes(): color = f"{bold_start}{color_start}test{reset}" cleaned = clean_color_codes(color) print('with color codes:', color) - print('without color codes:', cleaned) \ No newline at end of file + print('without color codes:', cleaned) + + +def test_clean_large_words(): + assert clean_large_words( + '''test 2VAzLpbBUDBInhtN5ToJZAXL8L6F4J+Xr/L/42vs2r+9Pb0E3Y1ZLy7E3GsYRzAqQ037iKABMHL9VDoAaBAuAGgQLgBoEC4AaBAuAGgQLgB\ +oEC4AaBAuAGgQLgBoEC4AaBAuAGgQLgBoEC4AaBAuAGgQLgBoEC4AaBAuAGgQLgBoEC4AaBAuAGgQLgBoEC4AaBAuAGgQLgBoEC4AaBAuAG\ +gQLgBoEC4AaBAuAGgQLgBoEC4AaBAuAGgQLgBoEC4AaBAuAGgQLgBoEC4AaBAuAGgQLgBoEC4AaBAuAGgQLgBoEC4AaBAuAGgQLgBoEC4Aa\ +BAuAGgQLgBoEC4AaBAuAGgQLgBoEC4AaBAuAGgQLgBoEC4AaBAuAGgQLgBoEC4AaBAuAGgQLgBoEC4AaBAuAGgQLgBoEC4AaBAuAGgQLgBo test''' + ) == 'test 2VAzLpbBUDBInhtN5ToJ...LgBoEC4AaBAuAGgQLgBo test' + + assert clean_large_words('2VAzLpbBUDBInhtN5ToJZAXL8L6F4J+Xr/L/4') == '2VAzLpbBUDBInhtN5ToJZAXL8L6F4J+Xr/L/4'