refactor: adjust structure

This commit is contained in:
Florian Hönicke
2023-03-18 20:05:52 +01:00
parent d84cc8ee04
commit 47aa456e88
10 changed files with 221 additions and 131 deletions

163
main.py
View File

@@ -1,101 +1,39 @@
import os import os
import shutil
import openai
from docarray import DocumentArray, Document from docarray import DocumentArray, Document
from jcloud.flow import CloudFlow
from jina import Client from jina import Client
from prompt_examples import executor_example, docarray_example from src import gpt, jina_cloud
from src.constants import TAG_TO_FILE_NAME, EXECUTOR_FOLDER
openai.api_key = os.environ['OPENAI_API_KEY'] from src.prompt_examples import executor_example, docarray_example
from src.prompt_tasks import general_guidelines, executor_name_task, executor_file_task, requirements_file_task, \
test_executor_file_task, docker_file_task
from src.utils.io import recreate_folder
from src.utils.string import find_between, clean_content
input_executor_description = "Write an executor that takes image bytes as input (document.blob within a DocumentArray) and use BytesIO to convert it to PIL and detects ocr " \ input_executor_description = "Write an executor that takes image bytes as input (document.blob within a DocumentArray) and use BytesIO to convert it to PIL and detects ocr " \
"and returns the texts as output (as DocumentArray). " "and returns the texts as output (as DocumentArray). "
input_test_description = 'The test downloads the image ' \ input_test_description = 'The test downloads the image ' \
'https://upload.wikimedia.org/wikipedia/commons/thumb/a/aa/Onlineocr.png/640px-Onlineocr.png ' \ 'https://upload.wikimedia.org/wikipedia/commons/thumb/a/aa/Onlineocr.png/640px-Onlineocr.png ' \
' loads it as bytes, takes it as input to the executor and asserts that the output is "Double Rhyme".' ' loads it as bytes, takes it as input to the executor and asserts that the output is "Hello World".'
response = openai.ChatCompletion.create(
temperature=0,
model="gpt-4",
messages=[
{
"role": "system",
"content": "You are a principal engineer working at Jina - an open source company."
"Using the Jina framework, users can define executors."
+ executor_example
+ docarray_example
},
{
"role": "user",
"content":
input_executor_description
+ "The code you write is production ready. Every file starts with a 5 sentence comment of what the code is doing before the first import. Start from top-level and then fully implement all methods."
"First, write the executor name. (wrap the code in the string $$$start_executor_name$$$...$$$end_executor_name$$$) "
"The executor name only consists of lower case and upper case letters. "
"Then, write the executor code. (executor.py) (wrap the code in the string $$$start_executor$$$ ... $$$end_executor$$$)"
"In addition write the content of the requirements.txt file. Make sure to include pytest. (wrap the code in the string $$$start_requirements$$$ ... $$$end_requirements$$$)"
"Then write a small unit test for the executor (test_executor.py). Start the test with an extensive comment about the test case. "
"Never do relative imports."
"(wrap the code in the string $$$start_test_executor$$$ ... $$$end_test_executor$$$)"
"Comments can only be written between tags."
# "the snipped should take the local file wolf.obj as input and save the output as png files. "
+ input_test_description
+ "Finally write the Dockerfile that defines the environment with all necessary dependencies that the executor uses. "
'First start with comments that give an executor-specific description the Dockerfile. '
"It is important to make sure that all libs are installed that are required by the python packages. "
"The base image of the Dockerfile is FROM jinaai/jina:3.14.2-dev18-py310-standard. "
'The entrypoint is ENTRYPOINT ["jina", "executor", "--uses", "config.yml"] '
"The Dockerfile runs the test during the build process. "
"(wrap the code in the string $$$start_dockerfile$$$ ... $$$end_dockerfile$$$)"
},
]
)
plain_text = response['choices'][0]['message']['content']
print(plain_text)
def find_between(input_string, start, end): def extract_content_from_result(plain_text, tag):
try:
start_index = input_string.index(start) + len(start)
end_index = input_string.index(end, start_index)
return input_string[start_index:end_index]
except ValueError:
raise ValueError(f'Could not find {start} and {end} in {input_string}')
def clean_content(content):
return content.replace('```', '').strip()
executor_name = find_between(plain_text, f'$$$start_executor_name$$$', f'$$$end_executor_name$$$').replace('#', '').strip()
# delete folder and recreate it
def recreate_folder(folder_path):
# Check if the folder exists
if os.path.exists(folder_path) and os.path.isdir(folder_path):
# Delete the folder if it exists
shutil.rmtree(folder_path)
# Create the folder
os.makedirs(folder_path)
folder = 'executor'
recreate_folder(folder)
for tag, file_name in [['executor', f'executor.py'], ['requirements', 'requirements.txt'], ['test_executor', 'test_executor.py'], ['dockerfile', 'Dockerfile']]:
content = find_between(plain_text, f'$$$start_{tag}$$$', f'$$$end_{tag}$$$') content = find_between(plain_text, f'$$$start_{tag}$$$', f'$$$end_{tag}$$$')
clean = clean_content(content) clean = clean_content(content)
full_path = os.path.join(folder, file_name) return clean
def extract_and_write(plain_text):
for tag, file_name in TAG_TO_FILE_NAME.items():
clean = extract_content_from_result(plain_text, tag)
full_path = os.path.join(EXECUTOR_FOLDER, file_name)
with open(full_path, 'w') as f: with open(full_path, 'w') as f:
f.write(clean) f.write(clean)
def write_config_yml(executor_name):
config_content = f''' config_content = f'''
jtype: {executor_name} jtype: {executor_name}
py_modules: py_modules:
@@ -106,40 +44,38 @@ metas:
with open('executor/config.yml', 'w') as f: with open('executor/config.yml', 'w') as f:
f.write(config_content) f.write(config_content)
cmd = 'jina hub push executor/. --verbose'
os.system(cmd)
flow = f''' def main():
jtype: Flow recreate_folder(EXECUTOR_FOLDER)
with: system_definition = (
monitoring: true "You are a principal engineer working at Jina - an open source company."
env: "Using the Jina framework, users can define executors."
JINA_LOG_LEVEL: DEBUG + executor_example
jcloud: + docarray_example
version: '3.14.2.dev18' )
labels:
team: now user_query = (
gateway: input_executor_description
jcloud: + general_guidelines
expose: true + executor_name_task()
executors: + executor_file_task()
- name: {executor_name.lower()} + test_executor_file_task()
uses: jinaai+docker://team-now-prod/{executor_name} + requirements_file_task()
env: + docker_file_task()
JINA_LOG_LEVEL: DEBUG + input_test_description
jcloud: )
expose: true
resources: plain_text = gpt.get_response(system_definition, user_query)
instance: C4 extract_and_write(plain_text)
capacity: spot
replicas: 1 executor_name = extract_content_from_result(plain_text, 'executor_name')
'''
full_flow_path = os.path.join('executor', 'flow.yml') write_config_yml(executor_name)
with open(full_flow_path, 'w') as f:
f.write(flow) jina_cloud.push_executor()
host = jina_cloud.deploy_flow(executor_name)
cloud_flow = CloudFlow(path=full_flow_path).__enter__()
host = cloud_flow.endpoints['gateway']
client = Client(host=host) client = Client(host=host)
d = Document(uri='data/txt.png') d = Document(uri='data/txt.png')
@@ -147,5 +83,6 @@ d.load_uri_to_blob()
response = client.post('/index', inputs=DocumentArray([d])) response = client.post('/index', inputs=DocumentArray([d]))
response[0].summary() response[0].summary()
# "Write an executor using open3d that takes 3d models in obj format (within a DocumentArray) as input and returns 3 2d renderings for each 3d model from unique random angles as output (as DocumentArray). Each document of the output DocumentArray has 3 chunks. Each chunk is one of the 2d renderings as png. "
if __name__ == '__main__':
main()

18
src/constants.py Normal file
View File

@@ -0,0 +1,18 @@
EXECUTOR_FILE_NAME = 'executor.py'
TEST_EXECUTOR_FILE_NAME = 'test_executor.py'
REQUIREMENTS_FILE_NAME = 'requirements.txt'
DOCKER_FILE_NAME = 'Dockerfile'
EXECUTOR_FILE_TAG = 'executor'
TEST_EXECUTOR_FILE_TAG = 'test_executor'
REQUIREMENTS_FILE_TAG = 'requirements'
DOCKER_FILE_TAG = 'dockerfile'
TAG_TO_FILE_NAME = {
EXECUTOR_FILE_TAG: EXECUTOR_FILE_NAME,
TEST_EXECUTOR_FILE_TAG: TEST_EXECUTOR_FILE_NAME,
REQUIREMENTS_FILE_TAG: REQUIREMENTS_FILE_NAME,
DOCKER_FILE_TAG: DOCKER_FILE_NAME
}
EXECUTOR_FOLDER = 'executor'

27
src/gpt.py Normal file
View File

@@ -0,0 +1,27 @@
import os
import openai
openai.api_key = os.environ['OPENAI_API_KEY']
def get_response(system_definition, user_query):
response = openai.ChatCompletion.create(
temperature=0,
model="gpt-4",
messages=[
{
"role": "system",
"content": system_definition
},
{
"role": "user",
"content":
user_query
},
]
)
content = response['choices'][0]['message']['content']
print(content)
return content

42
src/jina_cloud.py Normal file
View File

@@ -0,0 +1,42 @@
import os
from jcloud.flow import CloudFlow
def push_executor():
cmd = 'jina hub push executor/. --verbose'
os.system(cmd)
def deploy_flow(executor_name):
flow = f'''
jtype: Flow
with:
monitoring: true
env:
JINA_LOG_LEVEL: DEBUG
jcloud:
version: '3.14.2.dev18'
labels:
team: now
gateway:
jcloud:
expose: true
executors:
- name: {executor_name.lower()}
uses: jinaai+docker://team-now-prod/{executor_name}
env:
JINA_LOG_LEVEL: DEBUG
jcloud:
expose: true
resources:
instance: C4
capacity: spot
replicas: 1
'''
full_flow_path = os.path.join('executor', 'flow.yml')
with open(full_flow_path, 'w') as f:
f.write(flow)
return CloudFlow(path=full_flow_path).__enter__().endpoints['gateway']

47
src/prompt_tasks.py Normal file
View File

@@ -0,0 +1,47 @@
from src.constants import EXECUTOR_FILE_NAME, REQUIREMENTS_FILE_NAME, TEST_EXECUTOR_FILE_NAME, DOCKER_FILE_NAME, \
DOCKER_FILE_TAG
general_guidelines = (
"The code you write is production ready. "
"Every file starts with comments describing what the code is doing before the first import. "
"Comments can only be written between tags. "
"Start from top-level and then fully implement all methods."
)
def _task(task, tag_name, file_name=None):
return task + f"{f'The code will go into {file_name}. ' if tag_name else ''}. Wrap the code in the string $$$start_{tag_name}$$$...$$$end_{tag_name}$$$ "
def executor_name_task():
return _task("Write the executor name. "
"The executor name only consists of lower case and upper case letters.", 'executor_name'
)
def executor_file_task():
return _task("Write the executor code.", 'executor', EXECUTOR_FILE_NAME)
def requirements_file_task():
return _task("Write the content of the requirements.txt file. Make sure to include pytest.", 'requirements',
REQUIREMENTS_FILE_NAME)
def test_executor_file_task():
return _task(
"Write a small unit test for the executor. "
"Start the test with an extensive comment about the test case. "
"Never do relative imports.", 'test_executor', TEST_EXECUTOR_FILE_NAME)
def docker_file_task():
return _task(
"Write the Dockerfile that defines the environment with all necessary dependencies that the executor uses. "
"The Dockerfile runs the test during the build process. "
"It is important to make sure that all libs are installed that are required by the python packages."
"The base image of the Dockerfile is FROM jinaai/jina:3.14.2-dev18-py310-standard. "
'The entrypoint is ENTRYPOINT ["jina", "executor", "--uses", "config.yml"] '
"The Dockerfile runs the test during the build process. "
, DOCKER_FILE_TAG, DOCKER_FILE_NAME)

0
src/utils/__init__.py Normal file
View File

8
src/utils/io.py Normal file
View File

@@ -0,0 +1,8 @@
import os
import shutil
def recreate_folder(folder_path):
if os.path.exists(folder_path) and os.path.isdir(folder_path):
shutil.rmtree(folder_path)
os.makedirs(folder_path)

11
src/utils/string.py Normal file
View File

@@ -0,0 +1,11 @@
def find_between(input_string, start, end):
try:
start_index = input_string.index(start) + len(start)
end_index = input_string.index(end, start_index)
return input_string[start_index:end_index]
except ValueError:
raise ValueError(f'Could not find {start} and {end} in {input_string}')
def clean_content(content):
return content.replace('```', '').strip()