From 412c0a940307b67280c5cf738c38a74b43fdff17 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Florian=20Ho=CC=88nicke?= Date: Wed, 3 May 2023 00:01:02 +0200 Subject: [PATCH] =?UTF-8?q?=F0=9F=A7=AA4=EF=B8=8F=E2=83=A3=20test:=20level?= =?UTF-8?q?=204=20gpt=20fix?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/ci.yml | 28 +++++++- src/constants.py | 2 - src/options/generate/generator.py | 90 +++++++++++++++--------- test/integration/__init__.py | 0 test/{ => integration}/test_generator.py | 10 +-- test/test_api.py | 15 ---- test/unit/__init__.py | 0 test/unit/test_api.py | 34 +++++++++ test/{ => unit}/test_strings.py | 0 9 files changed, 122 insertions(+), 57 deletions(-) create mode 100644 test/integration/__init__.py rename test/{ => integration}/test_generator.py (96%) delete mode 100644 test/test_api.py create mode 100644 test/unit/__init__.py create mode 100644 test/unit/test_api.py rename test/{ => unit}/test_strings.py (100%) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 97be0f4..a56cc2c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -27,9 +27,35 @@ jobs: - name: Test id: test run: | - pytest -vs test/test_generator.py::test_generation_level_${{ matrix.group }} + pytest -vs test/integration/test_generator.py::test_generation_level_${{ matrix.group }} timeout-minutes: 15 env: OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} SCENEX_API_KEY: ${{ secrets.SCENEX_API_KEY }} WHISPER_API_KEY: ${{ secrets.WHISPER_API_KEY }} + + test_unit: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Set up Python 3.8 + uses: actions/setup-python@v2 + with: + python-version: 3.8 + - name: Prepare environment + run: | + python -m pip install --upgrade pip + python -m pip install wheel + pip install --no-cache-dir ".[full,test]" + pip install pytest + pip install pytest-split + - name: Test + id: test + run: | + pytest -vs test/unit + timeout-minutes: 15 + env: + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + SCENEX_API_KEY: ${{ secrets.SCENEX_API_KEY }} + WHISPER_API_KEY: ${{ secrets.WHISPER_API_KEY }} + diff --git a/src/constants.py b/src/constants.py index 43f6b8b..3119882 100644 --- a/src/constants.py +++ b/src/constants.py @@ -39,10 +39,8 @@ MAX_DEBUGGING_ITERATIONS = 10 DEMO_TOKEN = '45372338e04f5a41af949024db929d46' BLACKLISTED_PACKAGES = [ - # 'Pyrender', 'Trimesh', 'moderngl', 'pyopengl', 'pyglet', 'pythreejs', 'panda3d', # because they need a screen, 'tika', # because it needs java - 'fastapi', 'uvicorn', 'starlette', ] UNNECESSARY_PACKAGES = [ 'fastapi', 'uvicorn', 'starlette' # because the wrappers are used instead diff --git a/src/options/generate/generator.py b/src/options/generate/generator.py index 9dcadfe..f59a1a6 100644 --- a/src/options/generate/generator.py +++ b/src/options/generate/generator.py @@ -4,7 +4,6 @@ import random import re import shutil from typing import Callable - from typing import List, Text, Optional from langchain import PromptTemplate @@ -19,16 +18,15 @@ from src.constants import FILE_AND_TAG_PAIRS, NUM_IMPLEMENTATION_STRATEGIES, MAX BLACKLISTED_PACKAGES, EXECUTOR_FILE_NAME, TEST_EXECUTOR_FILE_NAME, TEST_EXECUTOR_FILE_TAG, \ REQUIREMENTS_FILE_NAME, REQUIREMENTS_FILE_TAG, DOCKER_FILE_NAME, IMPLEMENTATION_FILE_NAME, \ IMPLEMENTATION_FILE_TAG, LANGUAGE_PACKAGES, UNNECESSARY_PACKAGES -from src.options.generate.templates_system import system_task_iteration, system_task_introduction, system_test_iteration +from src.options.generate.templates_system import system_task_iteration, system_task_introduction, system_test_iteration from src.options.generate.templates_user import template_generate_microservice_name, \ template_generate_possible_packages, \ template_solve_code_issue, \ template_solve_pip_dependency_issue, template_is_dependency_issue, template_generate_playground, \ template_generate_function, template_generate_test, template_generate_requirements, \ template_chain_of_thought, template_summarize_error, \ - template_generate_apt_get_install, template_solve_apt_get_dependency_issue, template_pm_task_iteration, \ + template_solve_apt_get_dependency_issue, template_pm_task_iteration, \ template_pm_test_iteration - from src.options.generate.ui import get_random_employee from src.utils.io import persist_file, get_all_microservice_files_with_content, get_microservice_path from src.utils.string_tools import print_colored @@ -39,6 +37,7 @@ class TaskSpecification: task: Optional[Text] test: Optional[Text] + class Generator: def __init__(self, task_description, path, model='gpt-4'): self.gpt_session = gpt.GPTSession(task_description, model=model) @@ -46,7 +45,7 @@ class Generator: self.microservice_root_path = path def extract_content_from_result(self, plain_text, file_name, match_single_block=False, can_contain_code_block=True): - optional_line_break = '\n' if can_contain_code_block else '' # the \n at the end makes sure that ``` within the generated code is not matched because it is not right before a line break + optional_line_break = '\n' if can_contain_code_block else '' # the \n at the end makes sure that ``` within the generated code is not matched because it is not right before a line break pattern = fr"\*?\*?{file_name}\*?\*?\n```(?:\w+\n)?([\s\S]*?){optional_line_break}```" match = re.search(pattern, plain_text, re.MULTILINE) if match: @@ -80,10 +79,11 @@ metas: def _default_parse_result_fn(x): _parsed_results = {} for _file_name in files_names: - _content = self.extract_content_from_result(x, _file_name, match_single_block=len(files_names)==1) + _content = self.extract_content_from_result(x, _file_name, match_single_block=len(files_names) == 1) if _content != '': _parsed_results[_file_name] = _content return _parsed_results + return _default_parse_result_fn def generate_and_persist_file( @@ -114,7 +114,9 @@ metas: parse_result_fn = self.get_default_parse_result_fn(file_name_s) print_colored('', f'\n\n############# {section_title} #############', 'blue') - system_introduction_message = _GPTConversation._create_system_message(self.microservice_specification.task, self.microservice_specification.test, system_definition_examples) + system_introduction_message = _GPTConversation._create_system_message(self.microservice_specification.task, + self.microservice_specification.test, + system_definition_examples) conversation = self.gpt_session.get_conversation(messages=[system_introduction_message]) template_kwargs = {k: v for k, v in template_kwargs.items() if k in template.input_variables} if 'file_name' in template.input_variables and len(file_name_s) == 1: @@ -126,7 +128,8 @@ metas: ) content = parse_result_fn(content_raw) if content == {}: - content_raw = conversation.chat('You must add the content' + (f' for {file_name_s[0]}' if len(file_name_s) == 1 else '')) + content_raw = conversation.chat( + 'You must add the content' + (f' for {file_name_s[0]}' if len(file_name_s) == 1 else '')) content = parse_result_fn(content_raw) for _file_name, _file_content in content.items(): persist_file(_file_content, os.path.join(destination_folder, _file_name)) @@ -138,12 +141,14 @@ metas: packages, num_approach, ): - MICROSERVICE_FOLDER_v1 = get_microservice_path(self.microservice_root_path, microservice_name, packages, num_approach, 1) + MICROSERVICE_FOLDER_v1 = get_microservice_path(self.microservice_root_path, microservice_name, packages, + num_approach, 1) os.makedirs(MICROSERVICE_FOLDER_v1) with open(os.path.join(os.path.dirname(__file__), 'static_files', 'microservice', 'microservice.py'), 'r') as f: microservice_executor_boilerplate = f.read() - microservice_executor_code = microservice_executor_boilerplate.replace('class GPTDeployExecutor(Executor):', f'class {microservice_name}(Executor):') + microservice_executor_code = microservice_executor_boilerplate.replace('class GPTDeployExecutor(Executor):', + f'class {microservice_name}(Executor):') persist_file(microservice_executor_code, os.path.join(MICROSERVICE_FOLDER_v1, EXECUTOR_FILE_NAME)) with open(os.path.join(os.path.dirname(__file__), 'static_files', 'microservice', 'apis.py'), 'r') as f: @@ -202,14 +207,14 @@ metas: # }) # ) - - with open(os.path.join(os.path.dirname(__file__), 'static_files', 'microservice', 'Dockerfile'), 'r', encoding='utf-8') as f: + with open(os.path.join(os.path.dirname(__file__), 'static_files', 'microservice', 'Dockerfile'), 'r', + encoding='utf-8') as f: docker_file_template_lines = f.readlines() - docker_file_template_lines = [line for line in docker_file_template_lines if not line.startswith('RUN apt-get update')] + docker_file_template_lines = [line for line in docker_file_template_lines if + not line.startswith('RUN apt-get update')] docker_file_content = '\n'.join(docker_file_template_lines) persist_file(docker_file_content, os.path.join(MICROSERVICE_FOLDER_v1, 'Dockerfile')) - self.write_config_yml(microservice_name, MICROSERVICE_FOLDER_v1) print('\nFirst version of the microservice generated. Start iterating on it to make the tests pass...') @@ -224,13 +229,15 @@ metas: packages = ' '.join(json.loads(json_string)['packages']) docker_file_template = self.read_docker_template() - return {DOCKER_FILE_NAME: docker_file_template.replace('{{apt_get_packages}}', '{apt_get_packages}').format(apt_get_packages=packages)} + return {DOCKER_FILE_NAME: docker_file_template.replace('{{apt_get_packages}}', '{apt_get_packages}').format( + apt_get_packages=packages)} def parse_result_fn_requirements(self, content_raw: str): content_parsed = self.extract_content_from_result(content_raw, 'requirements.txt', match_single_block=True) lines = content_parsed.split('\n') - lines = [line for line in lines if not any([pkg in line for pkg in ['jina', 'docarray', 'openai', 'pytest', 'gpt_3_5_turbo']])] + lines = [line for line in lines if + not any([pkg in line for pkg in ['jina', 'docarray', 'openai', 'pytest', 'gpt_3_5_turbo']])] content_modified = f'''jina==3.15.1.dev14 docarray==0.21.0 openai==0.27.5 @@ -292,8 +299,10 @@ pytest for i in range(1, MAX_DEBUGGING_ITERATIONS): print('Debugging iteration', i) print('Trying to debug the microservice. Might take a while...') - previous_microservice_path = get_microservice_path(self.microservice_root_path, microservice_name, packages, num_approach, i) - next_microservice_path = get_microservice_path(self.microservice_root_path, microservice_name, packages, num_approach, i + 1) + previous_microservice_path = get_microservice_path(self.microservice_root_path, microservice_name, packages, + num_approach, i) + next_microservice_path = get_microservice_path(self.microservice_root_path, microservice_name, packages, + num_approach, i + 1) log_hubble = push_executor(previous_microservice_path) error = process_error_message(log_hubble) if error: @@ -357,7 +366,8 @@ pytest summarized_error=summarized_error, task_description=self.microservice_specification.task, test_description=self.microservice_specification.test, - all_files_string=self.files_to_string({key: val for key, val in file_name_to_content.items() if key != EXECUTOR_FILE_NAME}), + all_files_string=self.files_to_string( + {key: val for key, val in file_name_to_content.items() if key != EXECUTOR_FILE_NAME}), ) class MaxDebugTimeReachedException(BaseException): @@ -368,15 +378,19 @@ pytest def is_dependency_issue(self, summarized_error, dock_req_string: str, package_manager: str): # a few heuristics to quickly jump ahead - if any([error_message in summarized_error for error_message in ['AttributeError', 'NameError', 'AssertionError']]): + if any([error_message in summarized_error for error_message in + ['AttributeError', 'NameError', 'AssertionError']]): return False - if package_manager.lower() == 'pip' and any([em in summarized_error for em in ['ModuleNotFoundError', 'ImportError']]): + if package_manager.lower() == 'pip' and any( + [em in summarized_error for em in ['ModuleNotFoundError', 'ImportError']]): return True print_colored('', f'Is it a {package_manager} dependency issue?', 'blue') conversation = self.gpt_session.get_conversation() answer_raw = conversation.chat( - template_is_dependency_issue.format(summarized_error=summarized_error, all_files_string=dock_req_string).replace('PACKAGE_MANAGER', package_manager) + template_is_dependency_issue.format(summarized_error=summarized_error, + all_files_string=dock_req_string).replace('PACKAGE_MANAGER', + package_manager) ) answer_json_string = self.extract_content_from_result(answer_raw, 'response.json', match_single_block=True, ) answer = json.loads(answer_json_string)['dependency_installation_failure'] @@ -403,7 +417,8 @@ pytest description=self.microservice_specification.task )['strategies.json'] packages_list = [[pkg.strip().lower() for pkg in packages] for packages in json.loads(packages_json_string)] - packages_list = [[self.replace_with_gpt_3_5_turbo_if_possible(pkg) for pkg in packages] for packages in packages_list] + packages_list = [[self.replace_with_gpt_3_5_turbo_if_possible(pkg) for pkg in packages] for packages in + packages_list] packages_list = self.filter_packages_list(packages_list) packages_list = packages_list[:NUM_IMPLEMENTATION_STRATEGIES] @@ -497,11 +512,14 @@ Test scenario: {self.microservice_specification.test} ''') - def refine_requirements(self, pm, messages, refinement_type, custom_suffix, template_pm_iteration, micro_service_initial_description=None): + def refine_requirements(self, pm, messages, refinement_type, custom_suffix, template_pm_iteration, + micro_service_initial_description=None): user_input = self.microservice_specification.task num_parsing_tries = 0 while True: - conversation = self.gpt_session.get_conversation(messages, print_stream=os.environ['VERBOSE'].lower() == 'true', print_costs=False) + conversation = self.gpt_session.get_conversation(messages, + print_stream=os.environ['VERBOSE'].lower() == 'true', + print_costs=False) agent_response_raw = conversation.chat( template_pm_iteration.format( custom_suffix=custom_suffix, @@ -510,7 +528,8 @@ Test scenario: role='user' ) messages.append(HumanMessage(content=user_input)) - agent_question = self.extract_content_from_result(agent_response_raw, 'prompt.json', can_contain_code_block=False) + agent_question = self.extract_content_from_result(agent_response_raw, 'prompt.json', + can_contain_code_block=False) final = self.extract_content_from_result(agent_response_raw, 'final.json', can_contain_code_block=False) if final: messages.append(AIMessage(content=final)) @@ -525,8 +544,8 @@ Test scenario: raise self.TaskRefinementException() num_parsing_tries += 1 messages.append(AIMessage(content=agent_response_raw)) - messages.append(SystemMessage(content='You did not put your answer into the right format using *** and ```.')) - + messages.append( + SystemMessage(content='You did not put your answer into the right format using *** and ```.')) @staticmethod def get_user_input(employee, prompt_to_user): @@ -547,16 +566,19 @@ Test scenario: # filter out complete package lists packages_list = [ packages for packages in packages_list if all([ - pkg == 'gpt_3_5_turbo' - or ( - is_package_on_pypi(pkg) # all packages must be on pypi or it is gpt_3_5_turbo - and pkg not in BLACKLISTED_PACKAGES # no package is allowed to be blacklisted - ) + pkg not in BLACKLISTED_PACKAGES # no package is allowed to be blacklisted for pkg in packages ]) ] # filter out single packages packages_list = [ - [package for package in packages if package not in UNNECESSARY_PACKAGES] for packages in packages_list + [ + package for package in packages + if (package not in UNNECESSARY_PACKAGES) + and ( # all packages must be on pypi or it is gpt_3_5_turbo + is_package_on_pypi(package) + or package == 'gpt_3_5_turbo' + ) + ] for packages in packages_list ] return packages_list diff --git a/test/integration/__init__.py b/test/integration/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/test/test_generator.py b/test/integration/test_generator.py similarity index 96% rename from test/test_generator.py rename to test/integration/test_generator.py index 7f7b022..fec9658 100644 --- a/test/test_generator.py +++ b/test/integration/test_generator.py @@ -20,7 +20,7 @@ def test_generation_level_0(tmpdir): os.environ['VERBOSE'] = 'true' generator = Generator( "The microservice is very simple, it does not take anything as input and only outputs the word 'test'", - str(tmpdir) + 'microservice', + str(tmpdir), 'gpt-3.5-turbo' ) assert generator.generate() == 0 @@ -44,7 +44,7 @@ def test_generation_level_1(tmpdir): Example tweet: \'When your coworker microwaves fish in the break room... AGAIN. 🐟🤢 But hey, at least SOMEONE's enjoying their lunch. #officelife\'''', - str(tmpdir) + 'microservice', + str(tmpdir), 'gpt-3.5-turbo' ) assert generator.generate() == 0 @@ -63,7 +63,7 @@ def test_generation_level_2(tmpdir): os.environ['VERBOSE'] = 'true' generator = Generator( "The input is a PDF like https://www.africau.edu/images/default/sample.pdf and the output the summarized text (50 words).", - str(tmpdir) + 'microservice', + str(tmpdir), 'gpt-3.5-turbo' ) assert generator.generate() == 0 @@ -123,7 +123,7 @@ print('This is the text from the audio file:', response.json()['text']) 4. Return the the audio file as base64 encoded binary. Example input file: https://www.signalogic.com/melp/EngSamples/Orig/ENG_M.wav ''', - str(tmpdir) + 'microservice', + str(tmpdir), 'gpt-4' ) assert generator.generate() == 0 @@ -163,7 +163,7 @@ The joke is the put on the image. The output is the image with the joke on it. Example input image: https://upload.wikimedia.org/wikipedia/commons/thumb/4/47/PNG_transparency_demonstration_1.png/560px-PNG_transparency_demonstration_1.png ''', - str(tmpdir) + 'microservice', + str(tmpdir), 'gpt-3.5-turbo' ) assert generator.generate() == 0 diff --git a/test/test_api.py b/test/test_api.py deleted file mode 100644 index 4a5e9e6..0000000 --- a/test/test_api.py +++ /dev/null @@ -1,15 +0,0 @@ -from src.apis.jina_cloud import is_executor_in_hub -from src.apis.pypi import is_package_on_pypi - - -def test_is_microservice_in_hub(): - assert is_executor_in_hub('reoihoflsnvoiawejeruhvflsfk') is False - assert is_executor_in_hub('CLIPImageEncoder') is True - -def test_is_package_on_pypi(): - assert is_package_on_pypi('jina') is True - assert is_package_on_pypi('jina', '0.9.25') is True - assert is_package_on_pypi('jina', '10.10.10') is False - assert is_package_on_pypi('jina-jina-jina') is False - assert is_package_on_pypi('jina-jina-jina', '0.9.25') is False - assert is_package_on_pypi('jina-jina-jina', '10.10.10') is False diff --git a/test/unit/__init__.py b/test/unit/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/test/unit/test_api.py b/test/unit/test_api.py new file mode 100644 index 0000000..5202a89 --- /dev/null +++ b/test/unit/test_api.py @@ -0,0 +1,34 @@ +from src.apis.jina_cloud import is_executor_in_hub +from src.apis.pypi import is_package_on_pypi +from src.options.generate.generator import Generator + + +def test_is_microservice_in_hub(): + assert is_executor_in_hub('reoihoflsnvoiawejeruhvflsfk') is False + assert is_executor_in_hub('CLIPImageEncoder') is True + + +def test_is_package_on_pypi(): + assert is_package_on_pypi('jina') is True + assert is_package_on_pypi('jina', '0.9.25') is True + assert is_package_on_pypi('jina', '10.10.10') is False + assert is_package_on_pypi('jina-jina-jina') is False + assert is_package_on_pypi('jina-jina-jina', '0.9.25') is False + assert is_package_on_pypi('jina-jina-jina', '10.10.10') is False + + +def test_filter_packages_list(): + filtered_list = Generator.filter_packages_list([ + ["gpt_3_5_turbo", "requests", "base64", "gtts", "pydub"], + ["requests", "base64", "gtts", "pydub"], + ["gpt_3_5_turbo", "requests", "base64", "gtts"], + ["gpt_3_5_turbo", "requests", "base64", "pydub"], + ["requests", "base64", "gtts"] + ]) + assert filtered_list == [ + ["gpt_3_5_turbo", "requests", "gtts", "pydub"], + ["requests", "gtts", "pydub"], + ["gpt_3_5_turbo", "requests", "gtts"], + ["gpt_3_5_turbo", "requests", "pydub"], + ["requests", "gtts"] + ] diff --git a/test/test_strings.py b/test/unit/test_strings.py similarity index 100% rename from test/test_strings.py rename to test/unit/test_strings.py