diff --git a/main.py b/main.py index 718a695..873d0b4 100644 --- a/main.py +++ b/main.py @@ -6,18 +6,11 @@ from jina import Client from src import gpt, jina_cloud from src.constants import TAG_TO_FILE_NAME, EXECUTOR_FOLDER from src.prompt_examples import executor_example, docarray_example -from src.prompt_tasks import general_guidelines, executor_name_task, executor_file_task, requirements_file_task, \ +from src.prompt_tasks import general_guidelines, executor_file_task, requirements_file_task, \ test_executor_file_task, docker_file_task from src.utils.io import recreate_folder from src.utils.string import find_between, clean_content -input_executor_description = "Write an executor that takes image bytes as input (document.blob within a DocumentArray) and use BytesIO to convert it to PIL and detects ocr " \ - "and returns the texts as output (as DocumentArray). " - -input_test_description = 'The test downloads the image ' \ - 'https://upload.wikimedia.org/wikipedia/commons/thumb/a/aa/Onlineocr.png/640px-Onlineocr.png ' \ - ' loads it as bytes, takes it as input to the executor and asserts that the output is "Hello World".' - def extract_content_from_result(plain_text, tag): content = find_between(plain_text, f'$$$start_{tag}$$$', f'$$$end_{tag}$$$') @@ -35,17 +28,17 @@ def extract_and_write(plain_text): def write_config_yml(executor_name): config_content = f''' - jtype: {executor_name} - py_modules: - - executor.py - metas: - name: {executor_name} +jtype: {executor_name} +py_modules: + - executor.py +metas: + name: {executor_name} ''' with open('executor/config.yml', 'w') as f: f.write(config_content) -def main(): +def main(executor_name, input_executor_description, input_test_description): recreate_folder(EXECUTOR_FOLDER) system_definition = ( "You are a principal engineer working at Jina - an open source company." @@ -57,9 +50,8 @@ def main(): user_query = ( input_executor_description + general_guidelines - + executor_name_task() + executor_file_task() - + test_executor_file_task() + + test_executor_file_task(executor_name) + requirements_file_task() + docker_file_task() + input_test_description @@ -68,8 +60,6 @@ def main(): plain_text = gpt.get_response(system_definition, user_query) extract_and_write(plain_text) - executor_name = extract_content_from_result(plain_text, 'executor_name') - write_config_yml(executor_name) jina_cloud.push_executor() @@ -85,4 +75,17 @@ def main(): if __name__ == '__main__': - main() + main( + executor_name='MyBelovedOcrExecutor', + input_executor_description=( + "Write an executor that takes image bytes as input (document.blob within a DocumentArray) " + # "and use BytesIO to convert it to PIL " \ + "and detects ocr " + "and returns the texts as output (as DocumentArray). " + ), + + input_test_description='The test downloads the image ' \ + 'https://miro.medium.com/v2/resize:fit:1024/0*4ty0Adbdg4dsVBo3.png ' \ + ' loads it as bytes, takes it as input to the executor and asserts that the output is "> Hello, world!_".', + + ) diff --git a/requirements.txt b/requirements.txt index 3bebdb7..692df68 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1,4 @@ -jina[perf]==3.14.2.dev18 \ No newline at end of file +jina[perf]==3.14.2.dev18 +openai +ptest +jcloud \ No newline at end of file diff --git a/src/jina_cloud.py b/src/jina_cloud.py index 53747c1..3bc5565 100644 --- a/src/jina_cloud.py +++ b/src/jina_cloud.py @@ -1,42 +1,53 @@ import os +import hubble from jcloud.flow import CloudFlow +from jina import Flow def push_executor(): cmd = 'jina hub push executor/. --verbose' os.system(cmd) +def get_user_name(): + client = hubble.Client(max_retries=None, jsonify=True) + response = client.get_user_info() + return response['data']['name'] + def deploy_flow(executor_name): - flow = f''' - jtype: Flow - with: - monitoring: true - env: - JINA_LOG_LEVEL: DEBUG +jtype: Flow +with: + monitoring: true + env: + JINA_LOG_LEVEL: DEBUG +jcloud: + version: '3.14.2.dev18' + labels: + team: microchain +gateway: + jcloud: + expose: true +executors: + - name: {executor_name.lower()} + uses: jinaai+docker://{get_user_name()}/{executor_name}:latest + env: + JINA_LOG_LEVEL: DEBUG jcloud: - version: '3.14.2.dev18' - labels: - team: now - gateway: - jcloud: - expose: true - executors: - - name: {executor_name.lower()} - uses: jinaai+docker://team-now-prod/{executor_name} - env: - JINA_LOG_LEVEL: DEBUG - jcloud: - expose: true - resources: - instance: C4 - capacity: spot - replicas: 1 + expose: true + resources: + instance: C4 + capacity: spot + replicas: 1 ''' full_flow_path = os.path.join('executor', 'flow.yml') with open(full_flow_path, 'w') as f: f.write(flow) + # try local first + flow = Flow.load_config(full_flow_path) + with flow: + pass + return CloudFlow(path=full_flow_path).__enter__().endpoints['gateway'] \ No newline at end of file diff --git a/src/prompt_examples.py b/src/prompt_examples.py index bdb42d4..81ee1f4 100644 --- a/src/prompt_examples.py +++ b/src/prompt_examples.py @@ -1,8 +1,9 @@ executor_example = "Here is an example of how an executor can be defined. It always starts with a comment:" ''' + # this executor takes ... as input and returns ... as output # it processes each document in the following way: ... -from jina import Executor, requests, DocumentArray, Document, Deployment +from jina import Executor, requests, DocumentArray, Document class MyExecutor(Executor): def __init__(self, **kwargs): super().__init__() @@ -15,8 +16,47 @@ class MyExecutor(Executor): ''' "An executor gets a DocumentArray as input and returns a DocumentArray as output." -docarray_example = "Here is an example of how a DocumentArray can be defined:" +docarray_example = ( + "A DocumentArray is a python class that can be seen as a list of Documents. " + "A Document is a python class that represents a single document. " + "Here is the protobuf definition of a Document: " ''' + +message DocumentProto { + // A hexdigest that represents a unique document ID + string id = 1; + + oneof content { + // the raw binary content of this document, which often represents the original document when comes into jina + bytes blob = 2; + + // the ndarray of the image/audio/video document + NdArrayProto tensor = 3; + + // a text document + string text = 4; + } + + // a uri of the document could be: a local file path, a remote url starts with http or https or data URI scheme + string uri = 5; + + // list of the sub-documents of this document (recursive structure) + repeated DocumentProto chunks = 6; + + // the matched documents on the same level (recursive structure) + repeated DocumentProto matches = 7; + + // the embedding of this document + NdArrayProto embedding = 8; + + // a structured data value, consisting of field which map to dynamically typed values. + google.protobuf.Struct tags = 9; + +} +''' + "Here is an example of how a DocumentArray can be defined: " +''' + from jina import DocumentArray, Document d1 = Document(text='hello') @@ -26,8 +66,12 @@ d4 = Document( uri='https://docs.docarray.org', tags={'foo': 'bar'}, ) - +d5 = Document() +d5.tensor = np.ones((2,4)) +d6 = Document() +d6.blob = b'RIFF\x00\x00\x00\x00WAVEfmt \x10\x00...' docs = DocumentArray([ d1, d2, d3, d4 ]) -''' \ No newline at end of file +''' +) \ No newline at end of file diff --git a/src/prompt_tasks.py b/src/prompt_tasks.py index f0a2a4f..1a69b19 100644 --- a/src/prompt_tasks.py +++ b/src/prompt_tasks.py @@ -9,37 +9,40 @@ general_guidelines = ( ) -def _task(task, tag_name, file_name=None): - return task + f"{f'The code will go into {file_name}. ' if tag_name else ''}. Wrap the code in the string $$$start_{tag_name}$$$...$$$end_{tag_name}$$$ " +def _task(task, tag_name, file_name): + return task + f"The code will go into {file_name}. Wrap the code in the string $$$start_{tag_name}$$$...$$$end_{tag_name}$$$ " -def executor_name_task(): - return _task("Write the executor name. " - "The executor name only consists of lower case and upper case letters.", 'executor_name' - ) - def executor_file_task(): - return _task("Write the executor code.", 'executor', EXECUTOR_FILE_NAME) + return _task("Write the executor code. ", 'executor', EXECUTOR_FILE_NAME) def requirements_file_task(): - return _task("Write the content of the requirements.txt file. Make sure to include pytest.", 'requirements', + return _task("Write the content of the requirements.txt file. " + "Make sure to include pytest. " + "All versions are fixed. " , 'requirements', REQUIREMENTS_FILE_NAME) -def test_executor_file_task(): +def test_executor_file_task(executor_name): return _task( "Write a small unit test for the executor. " "Start the test with an extensive comment about the test case. " - "Never do relative imports.", 'test_executor', TEST_EXECUTOR_FILE_NAME) + "Use the following import to import the executor: " + f"from executor import {executor_name}", + 'test_executor', + TEST_EXECUTOR_FILE_NAME + ) def docker_file_task(): return _task( "Write the Dockerfile that defines the environment with all necessary dependencies that the executor uses. " "The Dockerfile runs the test during the build process. " - "It is important to make sure that all libs are installed that are required by the python packages." + "It is important to make sure that all libs are installed that are required by the python packages. " + "Usually libraries are installed with apt-get. " + "Add the config.yml file to the Dockerfile. " "The base image of the Dockerfile is FROM jinaai/jina:3.14.2-dev18-py310-standard. " 'The entrypoint is ENTRYPOINT ["jina", "executor", "--uses", "config.yml"] ' "The Dockerfile runs the test during the build process. "