mirror of
https://github.com/aljazceru/dev-gpt.git
synced 2025-12-23 16:44:20 +01:00
feat: stable
This commit is contained in:
160
micro_chain.py
160
micro_chain.py
@@ -1,10 +1,9 @@
|
||||
import random
|
||||
|
||||
from main import extract_content_from_result, write_config_yml, get_all_executor_files_with_content, files_to_string
|
||||
|
||||
from src import gpt, jina_cloud
|
||||
from src.constants import FILE_AND_TAG_PAIRS
|
||||
from src.jina_cloud import build_docker
|
||||
from src.jina_cloud import push_executor, process_error_message
|
||||
from src.prompt_tasks import general_guidelines, executor_file_task, chain_of_thought_creation, test_executor_file_task, \
|
||||
chain_of_thought_optimization, requirements_file_task, docker_file_task, not_allowed
|
||||
from src.utils.io import recreate_folder, persist_file
|
||||
@@ -15,35 +14,33 @@ def wrap_content_in_code_block(executor_content, file_name, tag):
|
||||
return f'**{file_name}**\n```{tag}\n{executor_content}\n```\n\n'
|
||||
|
||||
|
||||
|
||||
|
||||
def create_executor(
|
||||
executor_description,
|
||||
input_modality,
|
||||
output_modality,
|
||||
test_scenario,
|
||||
executor_name
|
||||
executor_name,
|
||||
is_chain_of_thought=False,
|
||||
):
|
||||
input_doc_field = 'text' if input_modality == 'text' else 'blob'
|
||||
output_doc_field = 'text' if output_modality == 'text' else 'blob'
|
||||
# random integer at the end of the executor name to avoid name clashes
|
||||
|
||||
recreate_folder('executor')
|
||||
EXECUTOR_FOLDER_v1 = 'executor/v1'
|
||||
recreate_folder(EXECUTOR_FOLDER_v1)
|
||||
recreate_folder('flow')
|
||||
|
||||
|
||||
|
||||
print_colored('', '############# Executor #############', 'red')
|
||||
user_query = (
|
||||
general_guidelines()
|
||||
+ executor_file_task(executor_name, executor_description, input_modality, input_doc_field,
|
||||
output_modality, output_doc_field)
|
||||
+ executor_file_task(executor_name, executor_description, test_scenario)
|
||||
+ chain_of_thought_creation()
|
||||
)
|
||||
conversation = gpt.Conversation()
|
||||
conversation.query(user_query)
|
||||
executor_content_raw = conversation.query(f"General rules: " + not_allowed() + chain_of_thought_optimization('python', 'executor.py'))
|
||||
executor_content_raw = conversation.query(user_query)
|
||||
if is_chain_of_thought:
|
||||
executor_content_raw = conversation.query(
|
||||
f"General rules: " + not_allowed() + chain_of_thought_optimization('python', 'executor.py'))
|
||||
executor_content = extract_content_from_result(executor_content_raw, 'executor.py')
|
||||
|
||||
persist_file(executor_content, EXECUTOR_FOLDER_v1 + '/executor.py')
|
||||
|
||||
print_colored('', '############# Test Executor #############', 'red')
|
||||
@@ -53,12 +50,13 @@ def create_executor(
|
||||
+ test_executor_file_task(executor_name, test_scenario)
|
||||
)
|
||||
conversation = gpt.Conversation()
|
||||
conversation.query(user_query)
|
||||
test_executor_content_raw = conversation.query(
|
||||
f"General rules: " + not_allowed() +
|
||||
chain_of_thought_optimization('python', 'test_executor.py')
|
||||
+ "Don't add any additional tests. "
|
||||
)
|
||||
test_executor_content_raw = conversation.query(user_query)
|
||||
if is_chain_of_thought:
|
||||
test_executor_content_raw = conversation.query(
|
||||
f"General rules: " + not_allowed() +
|
||||
chain_of_thought_optimization('python', 'test_executor.py')
|
||||
+ "Don't add any additional tests. "
|
||||
)
|
||||
test_executor_content = extract_content_from_result(test_executor_content_raw, 'test_executor.py')
|
||||
persist_file(test_executor_content, EXECUTOR_FOLDER_v1 + '/test_executor.py')
|
||||
|
||||
@@ -70,8 +68,10 @@ def create_executor(
|
||||
+ requirements_file_task()
|
||||
)
|
||||
conversation = gpt.Conversation()
|
||||
conversation.query(user_query)
|
||||
requirements_content_raw = conversation.query(chain_of_thought_optimization('', 'requirements.txt') + "Keep the same version of jina ")
|
||||
requirements_content_raw = conversation.query(user_query)
|
||||
if is_chain_of_thought:
|
||||
requirements_content_raw = conversation.query(
|
||||
chain_of_thought_optimization('', 'requirements.txt') + "Keep the same version of jina ")
|
||||
|
||||
requirements_content = extract_content_from_result(requirements_content_raw, 'requirements.txt')
|
||||
persist_file(requirements_content, EXECUTOR_FOLDER_v1 + '/requirements.txt')
|
||||
@@ -85,13 +85,16 @@ def create_executor(
|
||||
+ docker_file_task()
|
||||
)
|
||||
conversation = gpt.Conversation()
|
||||
conversation.query(user_query)
|
||||
dockerfile_content_raw = conversation.query(f"General rules: " + not_allowed() + chain_of_thought_optimization('dockerfile', 'Dockerfile'))
|
||||
dockerfile_content_raw = conversation.query(user_query)
|
||||
if is_chain_of_thought:
|
||||
dockerfile_content_raw = conversation.query(
|
||||
f"General rules: " + not_allowed() + chain_of_thought_optimization('dockerfile', 'Dockerfile'))
|
||||
dockerfile_content = extract_content_from_result(dockerfile_content_raw, 'Dockerfile')
|
||||
persist_file(dockerfile_content, EXECUTOR_FOLDER_v1 + '/Dockerfile')
|
||||
|
||||
write_config_yml(executor_name, EXECUTOR_FOLDER_v1)
|
||||
|
||||
|
||||
def create_playground(executor_name, executor_path, host):
|
||||
print_colored('', '############# Playground #############', 'red')
|
||||
|
||||
@@ -112,33 +115,40 @@ print(response[0].text) # can also be blob in case of image/audio..., this shoul
|
||||
)
|
||||
conversation = gpt.Conversation()
|
||||
conversation.query(user_query)
|
||||
playground_content_raw = conversation.query(f"General rules: " + not_allowed() + chain_of_thought_optimization('python', 'playground.py'))
|
||||
playground_content_raw = conversation.query(
|
||||
f"General rules: " + not_allowed() + chain_of_thought_optimization('python', 'playground.py'))
|
||||
playground_content = extract_content_from_result(playground_content_raw, 'playground.py')
|
||||
persist_file(playground_content, f'{executor_path}/playground.py')
|
||||
|
||||
def debug_executor():
|
||||
for i in range(1, 20):
|
||||
|
||||
error = build_docker(f'executor/v{i}')
|
||||
def debug_executor():
|
||||
MAX_DEBUGGING_ITERATIONS = 20
|
||||
error_before = ''
|
||||
for i in range(1, MAX_DEBUGGING_ITERATIONS):
|
||||
# error_docker = build_docker(f'executor/v{i}')
|
||||
log_hubble = push_executor(f'executor/v{i}')
|
||||
error = process_error_message(log_hubble)
|
||||
if error:
|
||||
recreate_folder(f'executor/v{i + 1}')
|
||||
file_name_to_content = get_all_executor_files_with_content(f'executor/v{i}')
|
||||
all_files_string = files_to_string(file_name_to_content)
|
||||
user_query = (
|
||||
f"General rules: " + not_allowed()
|
||||
+ 'Here are all the files I use:\n'
|
||||
+ all_files_string
|
||||
+ 'I got the following error:\n'
|
||||
+ error + '\n'
|
||||
+ 'Think quickly about possible reasons. '
|
||||
'Then output the files that need change. '
|
||||
"Don't output files that don't need change. "
|
||||
"If you output a file, then write the complete file. "
|
||||
"Use the exact same syntax to wrap the code:\n"
|
||||
f"**...**\n"
|
||||
f"```...\n"
|
||||
f"...code...\n"
|
||||
f"```\n\n"
|
||||
f"General rules: " + not_allowed()
|
||||
+ 'Here are all the files I use:\n'
|
||||
+ all_files_string
|
||||
+ (('This is an error that is already fixed before:\n'
|
||||
+ error_before) if error_before else '')
|
||||
+ '\n\nNow, I get the following error:\n'
|
||||
+ error + '\n'
|
||||
+ 'Think quickly about possible reasons. '
|
||||
'Then output the files that need change. '
|
||||
"Don't output files that don't need change. "
|
||||
"If you output a file, then write the complete file. "
|
||||
"Use the exact same syntax to wrap the code:\n"
|
||||
f"**...**\n"
|
||||
f"```...\n"
|
||||
f"...code...\n"
|
||||
f"```\n\n"
|
||||
)
|
||||
conversation = gpt.Conversation()
|
||||
returned_files_raw = conversation.query(user_query)
|
||||
@@ -149,8 +159,12 @@ def debug_executor():
|
||||
|
||||
for file_name, content in file_name_to_content.items():
|
||||
persist_file(content, f'executor/v{i + 1}/{file_name}')
|
||||
error_before = error
|
||||
|
||||
else:
|
||||
break
|
||||
if i == MAX_DEBUGGING_ITERATIONS - 1:
|
||||
raise Exception('Could not debug the executor.')
|
||||
return f'executor/v{i}'
|
||||
|
||||
|
||||
@@ -161,31 +175,48 @@ def main(
|
||||
test_scenario,
|
||||
):
|
||||
executor_name = f'MicroChainExecutor{random.randint(0, 1000_000)}'
|
||||
create_executor(executor_description, input_modality, output_modality, test_scenario, executor_name)
|
||||
create_executor(executor_description, test_scenario, executor_name)
|
||||
# executor_name = 'MicroChainExecutor790050'
|
||||
executor_path = debug_executor()
|
||||
print('Executor can be built locally, now we will push it to the cloud.')
|
||||
jina_cloud.push_executor(executor_path)
|
||||
# print('Executor can be built locally, now we will push it to the cloud.')
|
||||
# jina_cloud.push_executor(executor_path)
|
||||
print('Deploy a jina flow')
|
||||
host = jina_cloud.deploy_flow(executor_name, 'flow')
|
||||
print(f'Flow is deployed create the playground for {host}')
|
||||
executor_name = 'MicroChainExecutor48442'
|
||||
executor_path = 'executor/v2'
|
||||
host = 'grpcs://mybelovedocrflow-24a412bc63.wolf.jina.ai'
|
||||
create_playground(executor_name, executor_path, host)
|
||||
print(
|
||||
'Executor name:', executor_name, '\n',
|
||||
'Executor path:', executor_path, '\n',
|
||||
'Host:', host, '\n',
|
||||
'Playground:', f'streamlit run {executor_path}/playground.py', '\n',
|
||||
)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# ######## Level 1 task #########
|
||||
main(
|
||||
executor_description="The executor takes a pdf file as input, parses it and returns the text.",
|
||||
input_modality='pdf',
|
||||
output_modality='text',
|
||||
test_scenario='Takes https://www2.deloitte.com/content/dam/Deloitte/de/Documents/about-deloitte/Deloitte-Unternehmensgeschichte.pdf and returns a string that is at least 100 characters long',
|
||||
)
|
||||
# money prompt: $0.56
|
||||
# money generation: $0.22
|
||||
# total money: $0.78
|
||||
# main(
|
||||
# executor_description="The executor takes a pdf file as input, parses it and returns the text.",
|
||||
# input_modality='pdf',
|
||||
# output_modality='text',
|
||||
# test_scenario='Takes https://www2.deloitte.com/content/dam/Deloitte/de/Documents/about-deloitte/Deloitte-Unternehmensgeschichte.pdf and returns a string that is at least 100 characters long',
|
||||
# )
|
||||
|
||||
|
||||
main(
|
||||
executor_description="The executor takes a url of a website as input and returns the logo of the website as an image.",
|
||||
input_modality='url',
|
||||
output_modality='image',
|
||||
test_scenario='Takes https://jina.ai/ as input and returns an svg image of the logo.',
|
||||
)
|
||||
|
||||
# # # ######## Level 1 task #########
|
||||
# main(
|
||||
# executor_description="The executor takes a pdf file as input, parses it and returns the text.",
|
||||
# input_modality='pdf',
|
||||
# output_modality='text',
|
||||
# test_scenario='Takes https://www2.deloitte.com/content/dam/Deloitte/de/Documents/about-deloitte/Deloitte-Unternehmensgeschichte.pdf and returns a string that is at least 100 characters long',
|
||||
# )
|
||||
|
||||
# ######## Level 2 task #########
|
||||
# main(
|
||||
# executor_description="OCR detector",
|
||||
@@ -194,13 +225,12 @@ if __name__ == '__main__':
|
||||
# test_scenario='Takes https://miro.medium.com/v2/resize:fit:1024/0*4ty0Adbdg4dsVBo3.png as input and returns a string that contains "Hello, world"',
|
||||
# )
|
||||
|
||||
|
||||
# ######## Level 3 task #########
|
||||
# main(
|
||||
# executor_description="The executor takes an mp3 file as input and returns bpm and pitch in the tags.",
|
||||
# executor_description="The executor takes an mp3 file as input and returns bpm and pitch in a json.",
|
||||
# input_modality='audio',
|
||||
# output_modality='tags',
|
||||
# test_scenario='Takes https://miro.medium.com/v2/resize:fit:1024/0*4ty0Adbdg4dsVBo3.png as input and returns a string that contains "Hello, world"',
|
||||
# output_modality='json',
|
||||
# test_scenario='Takes https://miro.medium.com/v2/resize:fit:1024/0*4ty0Adbdg4dsVBo3.png as input and returns a json with bpm and pitch',
|
||||
# )
|
||||
|
||||
######### Level 4 task #########
|
||||
@@ -212,3 +242,11 @@ if __name__ == '__main__':
|
||||
# test_scenario='Test that 3d object from https://raw.githubusercontent.com/polygonjs/polygonjs-assets/master/models/wolf.obj '
|
||||
# 'is put in and out comes a 2d rendering of it',
|
||||
# )
|
||||
|
||||
# ######## Level 8 task #########
|
||||
# main(
|
||||
# executor_description="The executor takes an image as input and returns a list of bounding boxes of all animals in the image.",
|
||||
# input_modality='blob',
|
||||
# output_modality='json',
|
||||
# test_scenario='Take the image from https://thumbs.dreamstime.com/b/dog-professor-red-bow-tie-glasses-white-background-isolated-dog-professor-glasses-197036807.jpg as input and assert that the list contains at least one bounding box. ',
|
||||
# )
|
||||
|
||||
@@ -1,16 +1,59 @@
|
||||
import hashlib
|
||||
import json
|
||||
import os
|
||||
import subprocess
|
||||
import re
|
||||
from argparse import Namespace
|
||||
from pathlib import Path
|
||||
|
||||
import hubble
|
||||
from hubble.executor.helper import upload_file, archive_package, get_request_header
|
||||
from jcloud.flow import CloudFlow
|
||||
from jina import Flow
|
||||
|
||||
|
||||
|
||||
def push_executor(dir_path):
|
||||
cmd = f'jina hub push {dir_path}/. --verbose --replay'
|
||||
os.system(cmd)
|
||||
dir_path = Path(dir_path)
|
||||
|
||||
md5_hash = hashlib.md5()
|
||||
bytesio = archive_package(dir_path)
|
||||
content = bytesio.getvalue()
|
||||
md5_hash.update(content)
|
||||
md5_digest = md5_hash.hexdigest()
|
||||
|
||||
form_data = {
|
||||
'public': 'True',
|
||||
'private': 'False',
|
||||
'verbose': 'True',
|
||||
'md5sum': md5_digest,
|
||||
}
|
||||
req_header = get_request_header()
|
||||
resp = upload_file(
|
||||
'https://api.hubble.jina.ai/v2/rpc/executor.push',
|
||||
'filename',
|
||||
content,
|
||||
dict_data=form_data,
|
||||
headers=req_header,
|
||||
stream=False,
|
||||
method='post',
|
||||
)
|
||||
json_lines_str = resp.content.decode('utf-8')
|
||||
if 'exited on non-zero code' not in json_lines_str:
|
||||
return ''
|
||||
responses = []
|
||||
for json_line in json_lines_str.splitlines():
|
||||
if 'exit code:' in json_line:
|
||||
break
|
||||
|
||||
d = json.loads(json_line)
|
||||
|
||||
if 'payload' in d and type(d['payload']) == str:
|
||||
responses.append(d['payload'])
|
||||
elif type(d) == str:
|
||||
responses.append(d)
|
||||
return '\n'.join(responses)
|
||||
|
||||
|
||||
def get_user_name():
|
||||
client = hubble.Client(max_retries=None, jsonify=True)
|
||||
@@ -51,10 +94,10 @@ executors:
|
||||
with open(full_flow_path, 'w') as f:
|
||||
f.write(flow)
|
||||
|
||||
print('try local execution')
|
||||
flow = Flow.load_config(full_flow_path)
|
||||
with flow:
|
||||
pass
|
||||
# print('try local execution')
|
||||
# flow = Flow.load_config(full_flow_path)
|
||||
# with flow:
|
||||
# pass
|
||||
print('deploy flow on jcloud')
|
||||
return deploy_on_jcloud(flow_yaml=full_flow_path)
|
||||
|
||||
@@ -78,22 +121,24 @@ def update_client_line_in_file(file_path, host):
|
||||
file.write(replaced_content)
|
||||
|
||||
|
||||
def process_error_message(error_message):
|
||||
lines = error_message.split('\n')
|
||||
relevant_lines = []
|
||||
|
||||
pattern = re.compile(r"^#\d+ \[[ \d]+/[ \d]+\]") # Pattern to match lines like "#11 [7/8]"
|
||||
last_matching_line_index = None
|
||||
|
||||
for index, line in enumerate(lines):
|
||||
if pattern.match(line):
|
||||
last_matching_line_index = index
|
||||
|
||||
if last_matching_line_index is not None:
|
||||
relevant_lines = lines[last_matching_line_index:]
|
||||
|
||||
return '\n'.join(relevant_lines[-25:])
|
||||
|
||||
def build_docker(path):
|
||||
def process_error_message(error_message):
|
||||
lines = error_message.split('\n')
|
||||
relevant_lines = []
|
||||
|
||||
pattern = re.compile(r"^#\d+ \[[ \d]+/[ \d]+\]") # Pattern to match lines like "#11 [7/8]"
|
||||
last_matching_line_index = None
|
||||
|
||||
for index, line in enumerate(lines):
|
||||
if pattern.match(line):
|
||||
last_matching_line_index = index
|
||||
|
||||
if last_matching_line_index is not None:
|
||||
relevant_lines = lines[last_matching_line_index:]
|
||||
|
||||
return '\n'.join(relevant_lines)
|
||||
|
||||
# The command to build the Docker image
|
||||
cmd = f"docker build -t micromagic {path}"
|
||||
|
||||
@@ -27,11 +27,12 @@ def _task(task, tag_name, file_name):
|
||||
)
|
||||
|
||||
|
||||
def executor_file_task(executor_name, executor_description, input_modality, input_doc_field,
|
||||
def executor_file_task(executor_name, executor_description, test_scenario, input_modality, input_doc_field,
|
||||
output_modality, output_doc_field):
|
||||
return _task(f'''
|
||||
Write the executor called '{executor_name}'.
|
||||
It matches the following description: '{executor_description}'.
|
||||
It will be tested with the following scenario: '{test_scenario}'.
|
||||
It gets a DocumentArray as input where each document has the input modality '{input_modality}' and can be accessed via document.{input_doc_field}.
|
||||
It returns a DocumentArray as output where each document has the output modality '{output_modality}' that is stored in document.{output_doc_field}.
|
||||
Have in mind that d.uri is never a path to a local file. It is always a url.
|
||||
|
||||
Reference in New Issue
Block a user