Revert "♻ refactor: rename package"

This reverts commit 55796828dd.
This commit is contained in:
Florian Hönicke
2023-04-24 23:20:09 +02:00
parent 9ee674817f
commit 0339b24353
33 changed files with 1940 additions and 7 deletions

View File

@@ -1,4 +1,4 @@
from gptdeploy import main from src import main
if __name__ == "__main__": if __name__ == "__main__":
main() main()

View File

@@ -7,7 +7,7 @@
set -ex set -ex
INIT_FILE='gptdeploy/__init__.py' INIT_FILE='src/__init__.py'
VER_TAG='__version__ = ' VER_TAG='__version__ = '
RELEASENOTE='./node_modules/.bin/git-release-notes' RELEASENOTE='./node_modules/.bin/git-release-notes'

View File

@@ -8,7 +8,7 @@ def read_requirements():
try: try:
libinfo_py = path.join('gptdeploy', '__init__.py') libinfo_py = path.join('src', '__init__.py')
libinfo_content = open(libinfo_py, 'r', encoding='utf8').readlines() libinfo_content = open(libinfo_py, 'r', encoding='utf8').readlines()
version_line = [l.strip() for l in libinfo_content if l.startswith('__version__')][ version_line = [l.strip() for l in libinfo_content if l.startswith('__version__')][
0 0
@@ -33,7 +33,7 @@ setup(
scripts=['gptdeploy.py'], scripts=['gptdeploy.py'],
entry_points={ entry_points={
'console_scripts': [ 'console_scripts': [
'gptdeploy = gptdeploy:main', 'gptdeploy = src:main',
], ],
}, },

3
src/__init__.py Normal file
View File

@@ -0,0 +1,3 @@
__version__ = '0.18.34'
from src.cli import main

0
src/apis/__init__.py Normal file
View File

149
src/apis/gpt.py Normal file
View File

@@ -0,0 +1,149 @@
import os
from time import sleep
from typing import List, Any
import openai
from langchain import PromptTemplate
from langchain.callbacks import CallbackManager
from langchain.chat_models import ChatOpenAI
from openai.error import RateLimitError
from langchain.schema import HumanMessage, SystemMessage, BaseMessage
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from requests.exceptions import ConnectionError
from urllib3.exceptions import InvalidChunkLength
from src.constants import PRICING_GPT4_PROMPT, PRICING_GPT4_GENERATION, PRICING_GPT3_5_TURBO_PROMPT, \
PRICING_GPT3_5_TURBO_GENERATION, CHARS_PER_TOKEN
from src.options.generate.templates_system import template_system_message_base, executor_example, docarray_example, \
client_example, gpt_example
from src.utils.string_tools import print_colored
def configure_openai_api_key():
if 'OPENAI_API_KEY' not in os.environ:
print_colored('You need to set OPENAI_API_KEY in your environment.', '''
Run:
gptdeploy configure --key <your_openai_api_key>
If you have updated it already, please restart your terminal.
''', 'red')
exit(1)
openai.api_key = os.environ['OPENAI_API_KEY']
class GPTSession:
def __init__(self, task_description, test_description, model: str = 'gpt-4', ):
self.task_description = task_description
self.test_description = test_description
if model == 'gpt-4' and self.is_gpt4_available():
self.pricing_prompt = PRICING_GPT4_PROMPT
self.pricing_generation = PRICING_GPT4_GENERATION
else:
if model == 'gpt-4':
print_colored('GPT version info', 'GPT-4 is not available. Using GPT-3.5-turbo instead.', 'yellow')
model = 'gpt-3.5-turbo'
self.pricing_prompt = PRICING_GPT3_5_TURBO_PROMPT
self.pricing_generation = PRICING_GPT3_5_TURBO_GENERATION
self.model_name = model
self.chars_prompt_so_far = 0
self.chars_generation_so_far = 0
def get_conversation(self, system_definition_examples: List[str] = ['gpt', 'executor', 'docarray', 'client']):
return _GPTConversation(
self.model_name, self.cost_callback, self.task_description, self.test_description, system_definition_examples
)
@staticmethod
def is_gpt4_available():
try:
for i in range(5):
try:
openai.ChatCompletion.create(
model="gpt-4",
messages=[{
"role": 'system',
"content": 'you respond nothing'
}]
)
break
except RateLimitError:
sleep(1)
continue
return True
except openai.error.InvalidRequestError:
return False
def cost_callback(self, chars_prompt, chars_generation):
self.chars_prompt_so_far += chars_prompt
self.chars_generation_so_far += chars_generation
print('\n')
money_prompt = self._calculate_money_spent(self.chars_prompt_so_far, self.pricing_prompt)
money_generation = self._calculate_money_spent(self.chars_generation_so_far, self.pricing_generation)
print('Total money spent so far on openai.com:', f'${money_prompt + money_generation:.3f}')
print('\n')
@staticmethod
def _calculate_money_spent(num_chars, price):
return round(num_chars / CHARS_PER_TOKEN * price / 1000, 3)
class AssistantStreamingStdOutCallbackHandler(StreamingStdOutCallbackHandler):
def on_llm_new_token(self, token: str, **kwargs: Any) -> None:
"""Run on new LLM token. Only available when streaming is enabled."""
print_colored('', token, 'green', end='')
class _GPTConversation:
def __init__(self, model: str, cost_callback, task_description, test_description, system_definition_examples: List[str] = ['executor', 'docarray', 'client']):
self._chat = ChatOpenAI(
model_name=model,
streaming=True,
callback_manager=CallbackManager([AssistantStreamingStdOutCallbackHandler()]),
verbose=True,
temperature=0,
)
self.cost_callback = cost_callback
self.messages: List[BaseMessage] = []
self.system_message = self._create_system_message(task_description, test_description, system_definition_examples)
if os.environ['VERBOSE'].lower() == 'true':
print_colored('system', self.system_message.content, 'magenta')
def chat(self, prompt: str):
chat_message = HumanMessage(content=prompt)
self.messages.append(chat_message)
if os.environ['VERBOSE'].lower() == 'true':
print_colored('user', prompt, 'blue')
print_colored('assistant', '', 'green', end='')
for i in range(10):
try:
response = self._chat([self.system_message] + self.messages)
break
except (ConnectionError, InvalidChunkLength) as e:
print('There was a connection error. Retrying...')
if i == 9:
raise e
sleep(10)
if os.environ['VERBOSE'].lower() == 'true':
print()
self.cost_callback(sum([len(m.content) for m in self.messages]), len(response.content))
self.messages.append(response)
return response.content
@staticmethod
def _create_system_message(task_description, test_description, system_definition_examples: List[str] = []) -> SystemMessage:
system_message = PromptTemplate.from_template(template_system_message_base).format(
task_description=task_description,
test_description=test_description,
)
if 'gpt' in system_definition_examples:
system_message += f'\n{gpt_example}'
if 'executor' in system_definition_examples:
system_message += f'\n{executor_example}'
if 'docarray' in system_definition_examples:
system_message += f'\n{docarray_example}'
if 'client' in system_definition_examples:
system_message += f'\n{client_example}'
return SystemMessage(content=system_message)

323
src/apis/jina_cloud.py Normal file
View File

@@ -0,0 +1,323 @@
import hashlib
import json
import os
import re
import subprocess
import threading
import time
import webbrowser
from pathlib import Path
from typing import Dict
import click
import hubble
import requests
from hubble.executor.helper import upload_file, archive_package, get_full_version
from jcloud.flow import CloudFlow
from jina import Flow
from src.constants import DEMO_TOKEN
from src.utils.io import suppress_stdout, is_docker_running
from src.utils.string_tools import print_colored
def wait_until_app_is_ready(url):
is_app_ready = False
while not is_app_ready:
try:
response = requests.get(url)
print('waiting for app to be ready...')
if response.status_code == 200:
is_app_ready = True
except requests.exceptions.RequestException:
pass
time.sleep(0.5)
def open_streamlit_app(host: str):
url = f"{host}/playground"
wait_until_app_is_ready(url)
webbrowser.open(url, new=2)
def redirect_callback(href):
print(
f'You need login to Jina first to use GPTDeploy\n'
f'Please open this link if it does not open automatically in your browser: {href}'
)
webbrowser.open(href, new=0, autoraise=True)
def jina_auth_login():
try:
hubble.Client(jsonify=True).get_user_info(log_error=False)
except hubble.AuthenticationRequiredError:
print('You need login to Jina first to use GPTDeploy')
print_colored('', '''
If you just created an account, it can happen that the login callback is not working.
In this case, please cancel this run, rerun your gptdeploy command and login into your account again.
''', 'green'
)
hubble.login(prompt='login', redirect_callback=redirect_callback)
def push_executor(dir_path):
for i in range(3):
try:
return _push_executor(dir_path)
except Exception as e:
if i == 2:
raise e
print(f'connection error - retrying in 5 seconds...')
time.sleep(5)
def get_request_header() -> Dict:
"""Return the header of request with an authorization token.
:return: request header
"""
metas, envs = get_full_version()
headers = {
**{f'jinameta-{k}': str(v) for k, v in metas.items()},
**envs,
}
headers['Authorization'] = f'token {DEMO_TOKEN}'
return headers
def _push_executor(dir_path):
dir_path = Path(dir_path)
md5_hash = hashlib.md5()
bytesio = archive_package(dir_path)
content = bytesio.getvalue()
md5_hash.update(content)
md5_digest = md5_hash.hexdigest()
form_data = {
'public': 'True',
'private': 'False',
'verbose': 'True',
'buildEnv': f'{{"OPENAI_API_KEY": "{os.environ["OPENAI_API_KEY"]}"}}',
'md5sum': md5_digest,
}
with suppress_stdout():
headers = get_request_header()
resp = upload_file(
'https://api.hubble.jina.ai/v2/rpc/executor.push',
'filename',
content,
dict_data=form_data,
headers=headers,
stream=False,
method='post',
)
json_lines_str = resp.content.decode('utf-8')
if 'AuthenticationRequiredWithBearerChallengeError' in json_lines_str:
raise Exception('The executor is not authorized to be pushed to Jina Cloud.')
if 'exited on non-zero code' not in json_lines_str:
return ''
responses = []
for json_line in json_lines_str.splitlines():
if 'exit code:' in json_line:
break
d = json.loads(json_line)
if 'payload' in d and type(d['payload']) == str:
responses.append(d['payload'])
elif type(d) == str:
responses.append(d)
return '\n'.join(responses)
def get_user_name(token=None):
client = hubble.Client(max_retries=None, jsonify=True, token=token)
response = client.get_user_info()
return response['data']['name']
def _deploy_on_jcloud(flow_yaml):
cloud_flow = CloudFlow(path=flow_yaml)
return cloud_flow.__enter__().endpoints['gateway']
def deploy_on_jcloud(executor_name, microservice_path):
print('Deploy a jina flow')
full_flow_path = create_flow_yaml(microservice_path, executor_name, use_docker=True, use_custom_gateway=True)
for i in range(3):
try:
host = _deploy_on_jcloud(flow_yaml=full_flow_path)
break
except Exception as e:
print(f'Could not deploy on Jina Cloud. Trying again in 5 seconds. Error: {e}')
time.sleep(5)
except SystemExit as e:
raise SystemExit(f'''
Looks like your free credits ran out.
Please add payment information to your account and try again.
Visit https://cloud.jina.ai/
''') from e
if i == 2:
raise Exception('''
Could not deploy on Jina Cloud.
This can happen when the microservice is buggy, if it requires too much memory or if the Jina Cloud is overloaded.
Please try again later.
'''
)
print(f'''
Your Microservice is deployed at {host} and the playground is available at {host}/playground
We open now the playground in your browser.
''')
open_streamlit_app(host)
return host
def run_streamlit_app(app_path):
subprocess.run(['streamlit', 'run', app_path, 'server.address', '0.0.0.0', '--server.port', '8081'])
def run_locally(executor_name, microservice_version_path):
if is_docker_running():
use_docker = True
else:
click.echo('''
Docker daemon doesn\'t seem to be running (possible reasons: incorrect docker installation, docker command isn\'t in system path, insufficient permissions, docker is running but unrespnsive).
It might be important to run your microservice within a docker container.
Your machine might not have all the dependencies installed.
You have 3 options:
a) start the docker daemon
b) run gptdeploy deploy... to deploy your microservice on Jina Cloud. All dependencies will be installed there.
c) try to run your microservice locally without docker. It is worth a try but might fail.
'''
)
user_input = click.prompt('Do you want to run your microservice locally without docker? (Y/n)', type=str, default='y')
if user_input.lower() != 'y':
exit(1)
use_docker = False
print('Run a jina flow locally')
full_flow_path = create_flow_yaml(microservice_version_path, executor_name, use_docker, False)
flow = Flow.load_config(full_flow_path)
with flow:
print(f'''
Your microservice started locally.
We now start the playground for you.
''')
app_path = os.path.join(microservice_version_path, 'gateway', "app.py")
# Run the Streamlit app in a separate thread
streamlit_thread = threading.Thread(target=run_streamlit_app, args=(app_path,))
streamlit_thread.start()
# Open the Streamlit app in the user's default web browser
open_streamlit_app(host='http://localhost:8081')
flow.block()
def create_flow_yaml(dest_folder, executor_name, use_docker, use_custom_gateway):
if use_docker:
prefix = 'jinaai+docker'
else:
prefix = 'jinaai'
flow = f'''jtype: Flow
with:
port: 8080
protocol: http
jcloud:
version: 3.15.1.dev14
labels:
creator: microchain
name: gptdeploy
gateway:
{f"uses: {prefix}://{get_user_name(DEMO_TOKEN)}/Gateway{executor_name}:latest" if use_custom_gateway else ""}
{"" if use_docker else "install-requirements: True"}
executors:
- name: {executor_name.lower()}
uses: {prefix}://{get_user_name(DEMO_TOKEN)}/{executor_name}:latest
{"" if use_docker else "install-requirements: True"}
env:
OPENAI_API_KEY: {os.environ['OPENAI_API_KEY']}
jcloud:
resources:
instance: C2
capacity: spot
'''
full_flow_path = os.path.join(dest_folder,
'flow.yml')
with open(full_flow_path, 'w', encoding='utf-8') as f:
f.write(flow)
return full_flow_path
def replace_client_line(file_content: str, replacement: str) -> str:
lines = file_content.split('\n')
for index, line in enumerate(lines):
if 'Client(' in line:
lines[index] = replacement
break
return '\n'.join(lines)
def update_client_line_in_file(file_path, host):
with open(file_path, 'r', encoding='utf-8') as file:
content = file.read()
replaced_content = replace_client_line(content, f"client = Client(host='{host}')")
with open(file_path, 'w', encoding='utf-8') as file:
file.write(replaced_content)
def shorten_logs(relevant_lines):
# handle duplicate error messages
for index, line in enumerate(relevant_lines):
if '--- Captured stderr call ----' in line:
relevant_lines = relevant_lines[:index]
# filter pip install logs
relevant_lines = [line for line in relevant_lines if ' Requirement already satisfied: ' not in line]
# filter version not found logs
for index, line in enumerate(relevant_lines):
if 'ERROR: Could not find a version that satisfies the requirement ' in line:
start_and_end = line[:150] + '...' + line[-150:]
relevant_lines[index] = start_and_end
return relevant_lines
def clean_color_codes(response):
response = re.sub(r'\x1b\[[0-9;]*m', '', response)
return response
def process_error_message(error_message):
lines = error_message.split('\n')
relevant_lines = []
pattern = re.compile(r"^#\d+ \[[ \d]+/[ \d]+\]") # Pattern to match lines like "#11 [7/8]"
last_matching_line_index = None
for index, line in enumerate(lines):
if pattern.match(line):
last_matching_line_index = index
if last_matching_line_index is not None:
relevant_lines = lines[last_matching_line_index:]
relevant_lines = shorten_logs(relevant_lines)
response = '\n'.join(relevant_lines[-100:]).strip()
response = clean_color_codes(response)
# the following code tests the case that the docker file is corrupted and can not be parsed
# the method above will not return a relevant error message in this case
# but the last line of the error message will start with "error"
last_line = lines[-1]
if not response and last_line.startswith('error: '):
return last_line
return response

103
src/cli.py Normal file
View File

@@ -0,0 +1,103 @@
from src import env # noqa: F401 to make sure certain environment variables are set
import functools
import os
import click
from src.apis.gpt import configure_openai_api_key
from src.apis.jina_cloud import jina_auth_login
from src.options.configure.key_handling import set_api_key
def openai_api_key_needed(func):
def wrapper(*args, **kwargs):
configure_openai_api_key()
return func(*args, **kwargs)
return wrapper
def exception_interceptor(func):
@functools.wraps(func)
def wrapper(*args, **kwargs):
try:
return func(*args, **kwargs)
except Exception as e:
raise type(e)(f'''
{str(e)}
😱😱😱 Sorry for this experience.
Could you please report an issue about this on our github repo? We'll try to fix it asap.
https://github.com/jina-ai/gptdeploy/issues/new
''') from e
return wrapper
def path_param(func):
@click.option('--path', required=True, help='Path to the generated microservice.')
@functools.wraps(func)
def wrapper(*args, **kwargs):
path = os.path.expanduser(kwargs['path'])
path = os.path.abspath(path)
kwargs['path'] = path
return func(*args, **kwargs)
return wrapper
@click.group(invoke_without_command=True)
@click.pass_context
@exception_interceptor
def main(ctx):
if ctx.invoked_subcommand is None:
click.echo(ctx.get_help())
@openai_api_key_needed
@main.command()
@click.option('--description', required=True, help='Description of the microservice.')
@click.option('--test', required=True, help='Test scenario for the microservice.')
@click.option('--model', default='gpt-4', help='GPT model to use (default: gpt-4).')
@click.option('--verbose', default=False, is_flag=True, help='Verbose mode.') # only for development
@path_param
def generate(
description,
test,
model,
verbose,
path,
):
os.environ['VERBOSE'] = str(verbose)
path = os.path.expanduser(path)
path = os.path.abspath(path)
if os.path.exists(path):
if os.listdir(path):
click.echo(f"Error: The path {path} you provided via --path is not empty. Please choose a directory that does not exist or is empty.")
return
from src.options.generate.generator import Generator
generator = Generator(description, test, model=model)
generator.generate(path)
@openai_api_key_needed
@main.command()
@path_param
def run(path):
from src.options.run import Runner
path = os.path.expanduser(path)
path = os.path.abspath(path)
Runner().run(path)
@openai_api_key_needed
@main.command()
@path_param
def deploy(path):
jina_auth_login()
from src.options.deploy.deployer import Deployer
path = os.path.expanduser(path)
path = os.path.abspath(path)
Deployer().deploy(path)
@main.command()
@click.option('--key', required=True, help='Your OpenAI API key.')
def configure(key):
set_api_key(key)
if __name__ == '__main__':
main()

44
src/constants.py Normal file
View File

@@ -0,0 +1,44 @@
EXECUTOR_FILE_NAME = 'microservice.py'
TEST_EXECUTOR_FILE_NAME = 'test_microservice.py'
REQUIREMENTS_FILE_NAME = 'requirements.txt'
DOCKER_FILE_NAME = 'Dockerfile'
CLIENT_FILE_NAME = 'client.py'
STREAMLIT_FILE_NAME = 'streamlit.py'
EXECUTOR_FILE_TAG = 'python'
TEST_EXECUTOR_FILE_TAG = 'python'
REQUIREMENTS_FILE_TAG = ''
DOCKER_FILE_TAG = 'dockerfile'
CLIENT_FILE_TAG = 'python'
STREAMLIT_FILE_TAG = 'python'
FILE_AND_TAG_PAIRS = [
(EXECUTOR_FILE_NAME, EXECUTOR_FILE_TAG),
(TEST_EXECUTOR_FILE_NAME, TEST_EXECUTOR_FILE_TAG),
(REQUIREMENTS_FILE_NAME, REQUIREMENTS_FILE_TAG),
(DOCKER_FILE_NAME, DOCKER_FILE_TAG),
(CLIENT_FILE_NAME, CLIENT_FILE_TAG),
(STREAMLIT_FILE_NAME, STREAMLIT_FILE_TAG)
]
FLOW_URL_PLACEHOLDER = 'jcloud.jina.ai'
PRICING_GPT4_PROMPT = 0.03
PRICING_GPT4_GENERATION = 0.06
PRICING_GPT3_5_TURBO_PROMPT = 0.002
PRICING_GPT3_5_TURBO_GENERATION = 0.002
CHARS_PER_TOKEN = 3.4
NUM_IMPLEMENTATION_STRATEGIES = 5
MAX_DEBUGGING_ITERATIONS = 10
DEMO_TOKEN = '45372338e04f5a41af949024db929d46'
PROBLEMATIC_PACKAGES = [
# 'Pyrender', 'Trimesh',
'ModernGL', 'PyOpenGL', 'Pyglet', 'pythreejs', 'panda3d', # because they need a screen,
]
UNNECESSARY_PACKAGES = ['FastAPI']

5
src/env.py Normal file
View File

@@ -0,0 +1,5 @@
import os
os.environ['PYTHONIOENCODING'] = 'utf-8'
os.environ['PYTHONLEGACYWINDOWSSTDIO'] = 'utf-8'
os.environ['PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION'] = 'python' # because protobuf issues on windows

55
src/options/__init__.py Normal file
View File

@@ -0,0 +1,55 @@
import os
def listdir_no_hidden(path):
"""
List all non-hidden files and directories in the specified path.
:param path: str, optional (default is '.')
The path to the directory you want to list files and directories from.
:return: list
A list of filenames and directory names that are not hidden.
"""
return [entry for entry in os.listdir(path) if not entry.startswith('.')]
def get_latest_folder(path, max_fn=max):
return max_fn([os.path.join(path, f) for f in listdir_no_hidden(path) if os.path.isdir(os.path.join(path, f))])
def version_max_fn(path_list):
version_list = [int(os.path.split(path)[-1].replace('v', '')) for path in path_list]
max_version = max(version_list)
max_index = version_list.index(max_version)
return path_list[max_index]
def get_latest_version_path(microservice_path):
executor_name_path = get_latest_folder(microservice_path)
latest_approach_path = get_latest_folder(executor_name_path)
latest_version_path = get_latest_folder(latest_approach_path, max_fn=version_max_fn)
return latest_version_path
def get_executor_name(microservice_path):
latest_folder = get_latest_folder(microservice_path)
return os.path.split(latest_folder)[-1]
def validate_folder_is_correct(microservice_path):
if not os.path.exists(microservice_path):
raise ValueError(f'Path {microservice_path} does not exist')
if not os.path.isdir(microservice_path):
raise ValueError(f'Path {microservice_path} is not a directory')
if len(listdir_no_hidden(microservice_path)) == 0:
raise ValueError(f'Path {microservice_path} is empty. Please generate a microservice first. Type `gptdeploy generate` for further instructions.')
if len(listdir_no_hidden(microservice_path)) > 1:
raise ValueError(f'Path {microservice_path} needs to contain only one folder. Please make sure that you only have one microservice in this folder.')
latest_version_path = get_latest_version_path(microservice_path)
required_files = [
'gateway/app.py',
'requirements.txt',
'Dockerfile',
'config.yml',
'microservice.py',
'test_microservice.py',
]
for file_name in required_files:
if not os.path.exists(os.path.join(latest_version_path, file_name)):
raise ValueError(f'Path {latest_version_path} needs to contain a file named {file_name}')

View File

View File

@@ -0,0 +1,140 @@
import os
import platform
import re
import subprocess
import click
try:
import psutil
except ImportError:
psutil = None
def get_shell():
if psutil is None:
return None
shell_names = ["bash", "zsh", "sh", "fish", "csh", "tcsh", "ksh", "dash"]
# Check the SHELL environment variable first
shell_env = os.environ.get('SHELL')
if shell_env:
shell_name = os.path.basename(shell_env)
if shell_name in shell_names:
return shell_name
# Fallback to traversing the process tree
try:
p = psutil.Process(os.getpid())
# Traverse the process tree
while p.parent():
p = p.parent()
if p.name() in shell_names:
return p.name()
return None
except Exception as e:
click.echo(f"Error detecting shell: {e}")
return None
def get_shell_config(key):
return {
"bash": {"config_file": "~/.bashrc", "export_line": f"export OPENAI_API_KEY={key}"},
"zsh": {"config_file": "~/.zshrc", "export_line": f"export OPENAI_API_KEY={key}"},
"sh": {"config_file": "~/.profile", "export_line": f"export OPENAI_API_KEY={key}"},
"fish": {
"config_file": "~/.config/fish/config.fish",
"export_line": f"set -gx OPENAI_API_KEY {key}",
},
"csh": {"config_file": "~/.cshrc", "export_line": f"setenv OPENAI_API_KEY {key}"},
"tcsh": {"config_file": "~/.tcshrc", "export_line": f"setenv OPENAI_API_KEY {key}"},
"ksh": {"config_file": "~/.kshrc", "export_line": f"export OPENAI_API_KEY={key}"},
"dash": {"config_file": "~/.profile", "export_line": f"export OPENAI_API_KEY={key}"}
}
def set_env_variable(shell, key):
shell_config = get_shell_config(key)
if shell not in shell_config:
click.echo("Sorry, your shell is not supported. Please add the key OPENAI_API_KEY manually.")
return
config_file = os.path.expanduser(shell_config[shell]["config_file"])
try:
with open(config_file, "r", encoding='utf-8') as file:
content = file.read()
export_line = shell_config[shell]['export_line']
# Update the existing API key if it exists, otherwise append it to the config file
if f"OPENAI_API_KEY" in content:
content = re.sub(r'OPENAI_API_KEY=.*', f'OPENAI_API_KEY={key}', content, flags=re.MULTILINE)
with open(config_file, "w", encoding='utf-8') as file:
file.write(content)
else:
with open(config_file, "a", encoding='utf-8') as file:
file.write(f"\n{export_line}\n")
click.echo(f'''
✅ Success, OPENAI_API_KEY has been set in {config_file}.
Please restart your shell to apply the changes or run:
source {config_file}
'''
)
except FileNotFoundError:
click.echo(f"Error: {config_file} not found. Please set the environment variable manually.")
def set_api_key(key):
system_platform = platform.system().lower()
if system_platform == "windows":
set_env_variable_command = f'setx OPENAI_API_KEY "{key}"'
subprocess.call(set_env_variable_command, shell=True)
click.echo('''
✅ Success, OPENAI_API_KEY has been set.
Please restart your Command Prompt to apply the changes.
'''
)
elif system_platform in ["linux", "darwin"]:
if "OPENAI_API_KEY" in os.environ or is_key_set_in_config_file(key):
if not click.confirm("OPENAI_API_KEY is already set. Do you want to overwrite it?"):
click.echo("Aborted.")
return
shell = get_shell()
if shell is None:
click.echo(
"Error: Unable to detect your shell or psutil is not available. Please set the environment variable manually.")
return
set_env_variable(shell, key)
else:
click.echo("Sorry, this platform is not supported.")
def is_key_set_in_config_file(key):
shell = get_shell()
if shell is None:
return False
shell_config = get_shell_config(key)
config_file = os.path.expanduser(shell_config[shell]["config_file"])
try:
with open(config_file, "r", encoding='utf-8') as file:
content = file.read()
if f"OPENAI_API_KEY" in content:
return True
except FileNotFoundError:
pass
return False

View File

View File

@@ -0,0 +1,10 @@
from src.apis.jina_cloud import deploy_on_jcloud
from src.options import validate_folder_is_correct, get_executor_name, get_latest_version_path
class Deployer:
def deploy(self, microservice_path):
validate_folder_is_correct(microservice_path)
executor_name = get_executor_name(microservice_path)
latest_version_path = get_latest_version_path(microservice_path)
deploy_on_jcloud(executor_name, latest_version_path)

View File

View File

@@ -0,0 +1,313 @@
import os
import random
import re
import shutil
from typing import List
from src.apis import gpt
from src.apis.jina_cloud import process_error_message, push_executor
from src.constants import FILE_AND_TAG_PAIRS, NUM_IMPLEMENTATION_STRATEGIES, MAX_DEBUGGING_ITERATIONS, \
PROBLEMATIC_PACKAGES, EXECUTOR_FILE_NAME, EXECUTOR_FILE_TAG, TEST_EXECUTOR_FILE_NAME, TEST_EXECUTOR_FILE_TAG, \
REQUIREMENTS_FILE_NAME, REQUIREMENTS_FILE_TAG, DOCKER_FILE_NAME, DOCKER_FILE_TAG, UNNECESSARY_PACKAGES
from src.options.generate.templates_user import template_generate_microservice_name, \
template_generate_possible_packages, \
template_solve_code_issue, \
template_solve_dependency_issue, template_is_dependency_issue, template_generate_playground, \
template_generate_executor, template_generate_test, template_generate_requirements, template_generate_dockerfile, \
template_chain_of_thought, template_summarize_error, template_generate_possible_packages_output_format_string
from src.utils.io import persist_file, get_all_microservice_files_with_content, get_microservice_path
from src.utils.string_tools import print_colored
class Generator:
def __init__(self, task_description, test_description, model='gpt-4'):
self.gpt_session = gpt.GPTSession(task_description, test_description, model=model)
self.task_description = task_description
self.test_description = test_description
def extract_content_from_result(self, plain_text, file_name, match_single_block=False):
pattern = fr"^\*\*{file_name}\*\*\n```(?:\w+\n)?([\s\S]*?)\n```" # the \n at the end makes sure that ``` within the generated code is not matched
match = re.search(pattern, plain_text, re.MULTILINE)
if match:
return match.group(1).strip()
elif match_single_block:
# Check for a single code block
single_code_block_pattern = r"^```(?:\w+\n)?([\s\S]*?)```"
single_code_block_match = re.findall(single_code_block_pattern, plain_text, re.MULTILINE)
if len(single_code_block_match) == 1:
return single_code_block_match[0].strip()
return ''
def write_config_yml(self, class_name, dest_folder, python_file='microservice.py'):
config_content = f'''jtype: {class_name}
py_modules:
- {python_file}
metas:
name: {class_name}
'''
with open(os.path.join(dest_folder, 'config.yml'), 'w', encoding='utf-8') as f:
f.write(config_content)
def files_to_string(self, file_name_to_content, restrict_keys=None):
all_microservice_files_string = ''
for file_name, tag in FILE_AND_TAG_PAIRS:
if file_name in file_name_to_content and (not restrict_keys or file_name in restrict_keys):
all_microservice_files_string += f'**{file_name}**\n```{tag}\n{file_name_to_content[file_name]}\n```\n\n'
return all_microservice_files_string.strip()
def generate_and_persist_file(self, section_title, template, destination_folder=None, file_name=None, system_definition_examples: List[str] = ['gpt', 'executor', 'docarray', 'client'], **template_kwargs):
print_colored('', f'\n\n############# {section_title} #############', 'blue')
conversation = self.gpt_session.get_conversation(system_definition_examples=system_definition_examples)
template_kwargs = {k: v for k, v in template_kwargs.items() if k in template.input_variables}
content_raw = conversation.chat(
template.format(
file_name=file_name,
**template_kwargs
)
)
content = self.extract_content_from_result(content_raw, file_name, match_single_block=True)
if content == '':
content_raw = conversation.chat(f'You must add the content for {file_name}.')
content = self.extract_content_from_result(
content_raw, file_name, match_single_block=True
)
if destination_folder:
persist_file(content, os.path.join(destination_folder, file_name))
return content
def generate_microservice(
self,
path,
microservice_name,
packages,
num_approach,
):
MICROSERVICE_FOLDER_v1 = get_microservice_path(path, microservice_name, packages, num_approach, 1)
os.makedirs(MICROSERVICE_FOLDER_v1)
microservice_content = self.generate_and_persist_file(
'Microservice',
template_generate_executor,
MICROSERVICE_FOLDER_v1,
microservice_name=microservice_name,
microservice_description=self.task_description,
test_description=self.test_description,
packages=packages,
file_name_purpose=EXECUTOR_FILE_NAME,
tag_name=EXECUTOR_FILE_TAG,
file_name=EXECUTOR_FILE_NAME,
)
test_microservice_content = self.generate_and_persist_file(
'Test Microservice',
template_generate_test,
MICROSERVICE_FOLDER_v1,
code_files_wrapped=self.files_to_string({'microservice.py': microservice_content}),
microservice_name=microservice_name,
microservice_description=self.task_description,
test_description=self.test_description,
file_name_purpose=TEST_EXECUTOR_FILE_NAME,
tag_name=TEST_EXECUTOR_FILE_TAG,
file_name=TEST_EXECUTOR_FILE_NAME,
)
requirements_content = self.generate_and_persist_file(
'Requirements',
template_generate_requirements,
MICROSERVICE_FOLDER_v1,
code_files_wrapped=self.files_to_string({
'microservice.py': microservice_content,
'test_microservice.py': test_microservice_content,
}),
file_name_purpose=REQUIREMENTS_FILE_NAME,
file_name=REQUIREMENTS_FILE_NAME,
tag_name=REQUIREMENTS_FILE_TAG,
)
self.generate_and_persist_file(
'Dockerfile',
template_generate_dockerfile,
MICROSERVICE_FOLDER_v1,
code_files_wrapped=self.files_to_string({
'microservice.py': microservice_content,
'test_microservice.py': test_microservice_content,
'requirements.txt': requirements_content,
}),
file_name_purpose=DOCKER_FILE_NAME,
file_name=DOCKER_FILE_NAME,
tag_name=DOCKER_FILE_TAG,
)
self.write_config_yml(microservice_name, MICROSERVICE_FOLDER_v1)
print('\nFirst version of the microservice generated. Start iterating on it to make the tests pass...')
def generate_playground(self, microservice_name, microservice_path):
print_colored('', '\n\n############# Playground #############', 'blue')
file_name_to_content = get_all_microservice_files_with_content(microservice_path)
conversation = self.gpt_session.get_conversation([])
conversation.chat(
template_generate_playground.format(
code_files_wrapped=self.files_to_string(file_name_to_content, ['microservice.py', 'test_microservice.py']),
microservice_name=microservice_name,
)
)
playground_content_raw = conversation.chat(
template_chain_of_thought.format(
file_name_purpose='app.py/the playground',
file_name='app.py',
tag_name='python',
)
)
playground_content = self.extract_content_from_result(playground_content_raw, 'app.py', match_single_block=True)
if playground_content == '':
content_raw = conversation.chat(f'You must add the app.py code. You most not output any other code')
playground_content = self.extract_content_from_result(
content_raw, 'app.py', match_single_block=True
)
gateway_path = os.path.join(microservice_path, 'gateway')
shutil.copytree(os.path.join(os.path.dirname(__file__), 'static_files', 'gateway'), gateway_path)
persist_file(playground_content, os.path.join(gateway_path, 'app.py'))
# fill-in name of microservice
gateway_name = f'Gateway{microservice_name}'
custom_gateway_path = os.path.join(gateway_path, 'custom_gateway.py')
with open(custom_gateway_path, 'r', encoding='utf-8') as f:
custom_gateway_content = f.read()
custom_gateway_content = custom_gateway_content.replace(
'class CustomGateway(CompositeGateway):',
f'class {gateway_name}(CompositeGateway):'
)
with open(custom_gateway_path, 'w', encoding='utf-8') as f:
f.write(custom_gateway_content)
# write config.yml
self.write_config_yml(gateway_name, gateway_path, 'custom_gateway.py')
# push the gateway
print('Final step...')
hubble_log = push_executor(gateway_path)
def debug_microservice(self, path, microservice_name, num_approach, packages):
for i in range(1, MAX_DEBUGGING_ITERATIONS):
print('Debugging iteration', i)
print('Trying to debug the microservice. Might take a while...')
previous_microservice_path = get_microservice_path(path, microservice_name, packages, num_approach, i)
next_microservice_path = get_microservice_path(path, microservice_name, packages, num_approach, i + 1)
log_hubble = push_executor(previous_microservice_path)
error = process_error_message(log_hubble)
if error:
print('An error occurred during the build process. Feeding the error back to the assistent...')
self.do_debug_iteration(error, next_microservice_path, previous_microservice_path)
if i == MAX_DEBUGGING_ITERATIONS - 1:
raise self.MaxDebugTimeReachedException('Could not debug the microservice.')
else:
print('Successfully build microservice.')
break
return get_microservice_path(path, microservice_name, packages, num_approach, i)
def do_debug_iteration(self, error, next_microservice_path, previous_microservice_path):
os.makedirs(next_microservice_path)
file_name_to_content = get_all_microservice_files_with_content(previous_microservice_path)
summarized_error = self.summarize_error(error)
is_dependency_issue = self.is_dependency_issue(error, file_name_to_content['Dockerfile'])
if is_dependency_issue:
all_files_string = self.files_to_string({
key: val for key, val in file_name_to_content.items() if
key in ['requirements.txt', 'Dockerfile']
})
user_query = template_solve_dependency_issue.format(
summarized_error=summarized_error, all_files_string=all_files_string,
)
else:
user_query = template_solve_code_issue.format(
task_description=self.task_description, test_description=self.test_description,
summarized_error=summarized_error, all_files_string=self.files_to_string(file_name_to_content),
)
conversation = self.gpt_session.get_conversation()
returned_files_raw = conversation.chat(user_query)
for file_name, tag in FILE_AND_TAG_PAIRS:
updated_file = self.extract_content_from_result(returned_files_raw, file_name)
if updated_file and (not is_dependency_issue or file_name in ['requirements.txt', 'Dockerfile']):
file_name_to_content[file_name] = updated_file
print(f'Updated {file_name}')
for file_name, content in file_name_to_content.items():
persist_file(content, os.path.join(next_microservice_path, file_name))
class MaxDebugTimeReachedException(BaseException):
pass
def is_dependency_issue(self, error, docker_file: str):
# a few heuristics to quickly jump ahead
if any([error_message in error for error_message in ['AttributeError', 'NameError', 'AssertionError']]):
return False
print_colored('', 'Is it a dependency issue?', 'blue')
conversation = self.gpt_session.get_conversation([])
answer = conversation.chat(template_is_dependency_issue.format(error=error, docker_file=docker_file))
return 'yes' in answer.lower()
def generate_microservice_name(self, description):
print_colored('', '\n\n############# What should be the name of the Microservice? #############', 'blue')
conversation = self.gpt_session.get_conversation()
name_raw = conversation.chat(template_generate_microservice_name.format(description=description))
name = self.extract_content_from_result(name_raw, 'name.txt')
return name
def get_possible_packages(self):
print_colored('', '\n\n############# What packages to use? #############', 'blue')
packages_csv_string = self.generate_and_persist_file(
'packages to use',
template_generate_possible_packages,
None,
file_name='packages.csv',
system_definition_examples=['gpt'],
description=self.task_description
)
packages_list = [[pkg.strip() for pkg in packages_string.split(',')] for packages_string in packages_csv_string.split('\n')]
packages_list = packages_list[:NUM_IMPLEMENTATION_STRATEGIES]
return packages_list
def generate(self, microservice_path):
generated_name = self.generate_microservice_name(self.task_description)
microservice_name = f'{generated_name}{random.randint(0, 10_000_000)}'
packages_list = self.get_possible_packages()
packages_list = [
packages for packages in packages_list if len(set(packages).intersection(set(PROBLEMATIC_PACKAGES))) == 0
]
packages_list = [
[package for package in packages if package not in UNNECESSARY_PACKAGES] for packages in packages_list
]
for num_approach, packages in enumerate(packages_list):
try:
self.generate_microservice(microservice_path, microservice_name, packages, num_approach)
final_version_path = self.debug_microservice(
microservice_path, microservice_name, num_approach, packages
)
self.generate_playground(microservice_name, final_version_path)
except self.MaxDebugTimeReachedException:
print('Could not debug the Microservice with the approach:', packages)
if num_approach == len(packages_list) - 1:
print_colored('',
f'Could not debug the Microservice with any of the approaches: {packages} giving up.',
'red')
continue
print(f'''
You can now run or deploy your microservice:
gptdeploy run --path {microservice_path}
gptdeploy deploy --path {microservice_path}
'''
)
break
def summarize_error(self, error):
conversation = self.gpt_session.get_conversation([])
error_summary = conversation.chat(template_summarize_error.format(error=error))
return error_summary

View File

@@ -0,0 +1,14 @@
FROM jinaai/jina:3.15.1-dev14-py39-standard
RUN apt-get update && apt-get install --no-install-recommends -y git pip nginx && rm -rf /var/lib/apt/lists/*
## install requirements for the executor
COPY requirements.txt .
RUN pip install --compile -r requirements.txt
# setup the workspace
COPY . /workdir/
WORKDIR /workdir
ENTRYPOINT ["jina", "gateway", "--uses", "config.yml"]

View File

@@ -0,0 +1,4 @@
[server]
baseUrlPath = "/playground"
headless = true

View File

@@ -0,0 +1,153 @@
import os
import shutil
import subprocess
from time import sleep
from typing import List, Tuple
import streamlit.web.bootstrap
from jina import Gateway
from jina.serve.runtimes.gateway.composite import CompositeGateway
from streamlit.file_util import get_streamlit_file_path
from streamlit.web.server import Server as StreamlitServer
cur_dir = os.path.dirname(__file__)
def cmd(command, std_output=False, wait=True):
if isinstance(command, str):
command = command.split()
if not std_output:
process = subprocess.Popen(
command, stdout=subprocess.PIPE, stderr=subprocess.PIPE
)
else:
process = subprocess.Popen(command)
if wait:
output, error = process.communicate()
return output, error
class PlaygroundGateway(Gateway):
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.streamlit_script = 'app.py'
# copy playground/config.toml to streamlit config.toml
streamlit_config_toml_src = os.path.join(cur_dir, 'app_config.toml')
streamlit_config_toml_dest = get_streamlit_file_path("config.toml")
# create streamlit_config_toml_dest if it doesn't exist
os.makedirs(os.path.dirname(streamlit_config_toml_dest), exist_ok=True)
shutil.copyfile(streamlit_config_toml_src, streamlit_config_toml_dest)
async def setup_server(self):
streamlit.web.bootstrap._fix_sys_path(self.streamlit_script)
streamlit.web.bootstrap._fix_matplotlib_crash()
streamlit.web.bootstrap._fix_tornado_crash()
streamlit.web.bootstrap._fix_sys_argv(self.streamlit_script, ())
streamlit.web.bootstrap._fix_pydeck_mapbox_api_warning()
streamlit_cmd = f'streamlit run {self.streamlit_script}'
self.streamlit_server = StreamlitServer(
os.path.join(cur_dir, self.streamlit_script), streamlit_cmd
)
async def run_server(self):
await self.streamlit_server.start()
streamlit.web.bootstrap._on_server_start(self.streamlit_server)
streamlit.web.bootstrap._set_up_signal_handler(self.streamlit_server)
async def shutdown(self):
self.streamlit_server.stop()
class CustomGateway(CompositeGateway):
"""The CustomGateway assumes that the gateway has been started with http on port 8080.
This is the port on which the nginx process listens. After nginx has been started,
it will start the playground on port 8501 and the actual HTTP gateway will start on port 8082.
Nginx is configured to route the requests in the following way:
- /playground -> playground on port 8501
- / -> HTTP gateway on port 8082
"""
def __init__(self, **kwargs):
# need to update port to 8082, as nginx will listen on 8080
http_idx = 0
http_port = kwargs['runtime_args']['port'][http_idx]
if kwargs['runtime_args']['port'][http_idx] != 8080:
raise ValueError(
f'Please, let http port ({http_port}) be 8080 for nginx to work'
)
kwargs['runtime_args']['port'][http_idx] = 8082
super().__init__(**kwargs)
# remove potential clashing arguments from kwargs
kwargs.pop("port", None)
kwargs.pop("protocol", None)
# note order is important
self._add_gateway(
PlaygroundGateway,
8501,
**kwargs,
)
self.setup_nginx()
self.nginx_was_shutdown = False
async def shutdown(self):
await super().shutdown()
if not self.nginx_was_shutdown:
self.shutdown_nginx()
self.nginx_was_shutdown = True
def setup_nginx(self):
command = [
'nginx',
'-c',
os.path.join(cur_dir, '', 'nginx.conf'),
]
output, error = self._run_nginx_command(command)
self.logger.info('Nginx started')
self.logger.info(f'nginx output: {output}')
self.logger.info(f'nginx error: {error}')
def shutdown_nginx(self):
command = ['nginx', '-s', 'stop']
output, error = self._run_nginx_command(command)
self.logger.info('Nginx stopped')
self.logger.info(f'nginx output: {output}')
self.logger.info(f'nginx error: {error}')
def _run_nginx_command(self, command: List[str]) -> Tuple[bytes, bytes]:
self.logger.info(f'Running command: {command}')
output, error = cmd(command)
if error != b'':
# on CI we need to use sudo; using NOW_CI_RUN isn't good if running test locally
self.logger.info(f'nginx error: {error}')
command.insert(0, 'sudo')
self.logger.info(f'So running command: {command}')
output, error = cmd(command)
sleep(10)
return output, error
def _add_gateway(self, gateway_cls, port, protocol='http', **kwargs):
# ignore metrics_registry since it is not copyable
runtime_args = self._deepcopy_with_ignore_attrs(
self.runtime_args,
[
'metrics_registry',
'tracer_provider',
'grpc_tracing_server_interceptors',
'aio_tracing_client_interceptors',
'tracing_client_interceptor',
'monitoring', # disable it for fastapi gateway
],
)
runtime_args.port = [port]
runtime_args.protocol = [protocol]
gateway_kwargs = {k: v for k, v in kwargs.items() if k != 'runtime_args'}
gateway_kwargs['runtime_args'] = dict(vars(runtime_args))
gateway = gateway_cls(**gateway_kwargs)
gateway.streamer = self.streamer
self.gateways.insert(0, gateway)

View File

@@ -0,0 +1,62 @@
events {
worker_connections 4096; ## Default: 1024
}
http {
server {
listen 8080;
server_name localhost;
# from https://medium.com/@dasirra/using-streamlit-nginx-docker-to-build-and-put-in-production-dashboards-in-aws-lightsail-781dab8f2836
location ^~ /static {
proxy_pass http://localhost:8501/static/;
}
location ^~ /healthz {
proxy_pass http://localhost:8501/healthz;
}
location ^~ /vendor {
proxy_pass http://localhost:8501/vendor;
}
location ^~ /st-allowed-message-origins {
proxy_pass http://localhost:8501/st-allowed-message-origins;
}
# for jcloud deployment, very important; actually talks via websocket
location ^~ /stream {
# inspired from https://discuss.streamlit.io/t/how-to-use-streamlit-with-nginx/378/7
proxy_pass http://localhost:8501/stream;
proxy_http_version 1.1;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header Host $host;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection "upgrade";
proxy_read_timeout 86400;
}
location ^~ /favicon.png {
proxy_pass http://localhost:8501/favicon.png;
}
# to make extra components work
location ^~ /component {
proxy_pass http://localhost:8501/component;
}
location /playground {
# streamlit specific from https://discuss.streamlit.io/t/streamlit-docker-nginx-ssl-https/2195
proxy_http_version 1.1;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header Host $host;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection "upgrade";
proxy_read_timeout 86400;
proxy_pass http://localhost:8501;
client_max_body_size 50M;
}
location / {
proxy_pass http://localhost:8082;
client_max_body_size 50M;
}
}
}

View File

@@ -0,0 +1,3 @@
streamlit==1.16.0
extra-streamlit-components==0.1.55
jina==3.15.1.dev14

View File

@@ -0,0 +1,109 @@
from src.constants import FLOW_URL_PLACEHOLDER
from src.options.generate.templates_user import not_allowed_docker_string, not_allowed_executor_string
gpt_example = '''
# gpt_3_5_turbo is a language model that can be used to generate text.
you can use it to generate text given a system definition and a user prompt.
The system definition defines the agent the user is talking to.
The user prompt is precise question and the expected answer format.
Example:
# in the executor init:
gpt = GPT_3_5_Turbo_API(
system=\'\'\'
You are a tv-reporter who is specialized in C-list celebrities.
When you get asked something like 'Who was having a date with <X>?', then you answer with a json like '{"dates": ["<Y>", "<Z>"]}'.
You must not answer something else - only the json.
\'\'\')
# in the executor endpoint function:
response_string = gpt(prompt)
response = json.loads(response_string) # response is a string
'''
executor_example = '''Using the Jina framework, users can define executors.
Here is an example of how an executor can be defined. It always starts with a comment:
**microservice.py**
```python
from jina import Executor, requests, DocumentArray, Document
import json
class MyInfoExecutor(Executor):
def __init__(self, **kwargs):
super().__init__()
@requests() # each Executor must have exactly this decorator without parameters
def foo(self, docs: DocumentArray, **kwargs) => DocumentArray:
for d in docs:
content = json.loads(d.text)
...
d.text = json.dumps(modified_content) # serialized json
return docs
```
An Executor gets a DocumentArray as input and returns a DocumentArray as output.
'''
docarray_example = f'''A DocumentArray is a python class that can be seen as a list of Documents.
A Document is a python class that represents a single document.
Here is the protobuf definition of a Document:
```
message DocumentProto {{
// used to store serialized json data the executor gets and returns
string text = 1;
}}
```
Here are examples of how a DocumentArray can be defined:
```
from jina import DocumentArray, Document
import json
d1 = Document(text=json.dumps({{'he_says': 'hello'}}))
# you can load binary data into a document
url = 'https://...'
response = requests.get(url)
obj_data = response.content
base64_data = base64.b64encode(png_data).decode('utf-8')
d2 = Document(text=json.dumps({{'image': base64_data}}))
array = numpy.array([1, 2, 3])
array_list = array.tolist()
d3 = Document(text=json.dumps(array_list))
d4 = Document()
d4.text = '{{"uri": "https://.../logo.png"}}'
```
'''
client_example = f'''After the executor is deployed, it can be called via Jina Client.
Here is an example of a client file:
**client.py**
```python
from jina import Client, Document, DocumentArray
client = Client(host='{FLOW_URL_PLACEHOLDER}', protocol='http')
d = Document(uri='...')
d.load_uri_to_blob()
response = client.post('/', inputs=DocumentArray([d])) # the client must be called on '/'
print(response[0].text)
```'''
template_system_message_base = f'''It is the year 2021.
You are a principal engineer working at Jina - an open source company.
You accurately satisfy all of the user's requirements.
To be more specific, you help the user to build a microservice with the following requirements:
```
{{task_description}}
```
and the following test scenario:
```
{{test_description}}
```
You must obey the following rules:
{not_allowed_executor_string}
{not_allowed_docker_string}'''

View File

@@ -0,0 +1,351 @@
from langchain import PromptTemplate
general_guidelines_string = '''The code you write is production ready. Every file starts with comments describing what the code is doing before the first import. Comments can only be written within code blocks.
Then all imports are listed. It is important to import all modules that could be needed in the Executor code. Always import:
from jina import Executor, DocumentArray, Document, requests
import json
from io import BytesIO
import requests as req
Start from top-level and then fully implement all methods.'''
not_allowed_docker_string = '''Note that the Dockerfile only has access to the files: microservice.py, requirements.txt, config.yml, test_microservice.py.
Note that the Dockerfile runs the test_microservice.py during the build process.
The Dockerfile must not attach a virtual display when running test_microservice.py.'''
not_allowed_executor_string = '''The executor and the test must not use the GPU.
The executor and the test must not access a database.
The executor and the test must not access a display.
The executor and the test must not access external apis except unless it is explicitly mentioned in the description or test case (e.g. by mentioning the api that should be used or by providing a URL to access the data).
The executor and the test must not load data from the local file system unless it was created by the executor itself.
The executor and the test must not use a pre-trained model unless it is explicitly mentioned in the description.
The executor and the test must not train a model.
The executor and the test must not use any attribute of Document accept Document.text.
The executor and the test must not contain prototype or placeholder implementations.
The executor and the test must run in a docker container based on debian.
The executor and the test must use gpt_3_5_turbo_api if the task requires understanding or generating natural language or using any language model. Other language models are not allowed.'''
template_generate_microservice_name = PromptTemplate.from_template(
'''Generate a name for the executor matching the description:
"{description}"
The executor name must fulfill the following criteria:
- camel case
- start with a capital letter
- only consists of lower and upper case characters
- end with Executor.
The output is a the raw string wrapped into ``` and starting with **name.txt** like this:
**name.txt**
```
PDFParserExecutor
```'''
)
template_generate_possible_packages_output_format_string = '''You must output the package combinations as a list of lists wrapped into ``` and name it **packages.csv**. Do not use quotation marks around packages names in the output. Separate packages in a combination by comma. The output looks this:
**{file_name}**
```
package1a, package1b ...
package2a, package2b, package2c
package3a ...
package4a ...
package5a ...
...
```'''
template_generate_possible_packages = PromptTemplate.from_template(
'''Here is the task description of the problem you need to solve:
"{description}"
1. Write down the different strategies to solve the task. For each strategy write down all the non-trivial subtasks you need to solve. If there is a natural language understanding or generation stragegy, write it down.
2. Find out what is the core problem to solve.
3. List up to 15 Python packages that are specifically designed or have functionalities to solve the complete core problem with one of the defined strategies. You must add gpt_3_5_turbo_api if the task involves generating or understanding natural language or using a (pre-trained) language model.
4. Exclude any package that can generate or understand natural language or enables using any language model, but you must not exclude gpt_3_5_turbo_api. Print the cleaned list of packages and give a brief reason for keeping it after its name.
5. For each cleaned package think if it fulfills the following requirements:
a) specifically designed or have functionalities to solve the complete core problem.
b) has a stable api among different versions
c) does not have system requirements
d) can solve the task when running in a docker container
e) the implementation of the core problem using the package would obey the following rules:
''' + not_allowed_executor_string + '''
When answering, just write "yes" or "no".
6. Determine the 5 most suitable python package combinations, ordered from the best to the least suitable. Combine the packages to achieve a comprehensive solution.
If the package is mentioned in the description, then it is automatically the best one.
If you listed gpt_3_5_turbo_api earlier, you must use it. gpt_3_5_turbo_api is the best package for handling text-based tasks. Also, gpt_3_5_turbo_api doesn't need any other packages processing text or using language models. It can handle any text-based task alone.
''' + template_generate_possible_packages_output_format_string)
template_code_wrapping_string = '''The code will go into {file_name_purpose}. Make sure to wrap the code into ``` marks even if you only output code:
**{file_name}**
```{tag_name}
...code...
```
You must provide the complete file with the exact same syntax to wrap the code.'''
template_generate_executor = PromptTemplate.from_template(
general_guidelines_string + '''
Write the executor called '{microservice_name}'. The name is very important to keep.
It matches the following description: '{microservice_description}'.
It will be tested with the following scenario: '{test_description}'.
For the implementation use the following package(s): '{packages}'.
Obey the following rules:
Have in mind that d.uri is never a path to a local file. It is always a url.
''' + not_allowed_executor_string + '''
Your approach:
1. Identify the core challenge when implementing the executor.
2. Think about solutions for these challenges. Use gpt_3_5_turbo_api if it is mentioned in the above list of packages.
3. Decide for one of the solutions.
4. Write the code for the executor. Don't write code for the test.
If and only if gpt_3_5_turbo_api is in the package list, then you must always include the following code in microservice.py:
```
import os
import openai
openai.api_key = os.getenv("OPENAI_API_KEY")
class GPT_3_5_Turbo_API:
def __init__(self, system: str = ''):
self.system = system
def __call__(self, prompt: str) -> str:
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=[{{
"role": 'system',
"content": self.system
}}, {{
"role": 'user',
"content": prompt
}}]
)
return response.choices[0]['message']['content']
```
''' + template_code_wrapping_string
)
template_generate_test = PromptTemplate.from_template(
general_guidelines_string + '''
{code_files_wrapped}
Write a single test case that tests the following scenario: '{test_description}'. In case the test scenario is not precise enough, test a general case without any assumptions.
Start the test with an extensive comment about the test case. If gpt_3_5_turbo_api is used in the executor, then the test must not check the exact output of the executor as it is not deterministic.
Use the following import to import the executor:
```
from microservice import {microservice_name}
```
''' + not_allowed_executor_string + '''
The test must not open local files.
The test must not mock a function of the executor.
The test must not use other data than the one provided in the test scenario.
The test must not set any environment variables which require a key.
''' + '\n' + template_code_wrapping_string
)
template_generate_requirements = PromptTemplate.from_template(
general_guidelines_string + '''
{code_files_wrapped}
Write the content of the requirements.txt file.
Make sure to include pytest.
Make sure to include openai>=0.26.0.
Make sure that jina==3.15.1.dev14.
Make sure that docarray==0.21.0.
You must not add gpt_3_5_turbo_api to the requirements.txt file.
All versions are fixed using ~=, ==, <, >, <=, >=. The package versions must not have conflicts.
''' + '\n' + template_code_wrapping_string
)
template_generate_dockerfile = PromptTemplate.from_template(
general_guidelines_string + '''
{code_files_wrapped}
Write the Dockerfile that defines the environment with all necessary dependencies that the executor uses.
It is important to make sure that all libs are installed that are required by the python packages.
Usually libraries are installed with apt-get.
Be aware that the machine the docker container is running on does not have a GPU - only CPU.
Add the config.yml file to the Dockerfile.
Note that the Dockerfile only has access to the files: microservice.py, requirements.txt, config.yml and test_microservice.py.
The base image of the Dockerfile is FROM jinaai/jina:3.15.1-dev14-py39-standard.
The entrypoint is ENTRYPOINT ["jina", "executor", "--uses", "config.yml"].
Make sure the all files are in the /workdir.
The Dockerfile runs the test during the build process.
''' + not_allowed_docker_string + '\n' + template_code_wrapping_string
)
template_summarize_error = PromptTemplate.from_template(
'''Here is an error message I encountered during the docker build process:
"{error}"
Your task is to summarize the error message as compact and informative as possible while maintaining all information necessary to debug the core issue.
Warnings are not worth mentioning.'''
)
template_is_dependency_issue = PromptTemplate.from_template(
'''Your task is to assist in identifying the root cause of a Docker build error for a python application.
The error message is as follows:
{error}
The docker file is as follows:
{docker_file}
Is this a dependency installation failure? Answer with "yes" or "no".'''
)
template_solve_dependency_issue = PromptTemplate.from_template(
'''Your task is to provide guidance on how to solve an error that occurred during the Docker build process.
Here is the summary of the error that occurred:
{summarized_error}
To solve this error, you should:
1. Suggest 3 to 5 possible solutions on how to solve it. You have no access to the documentation of the package.
2. Decide for the best solution and explain it in detail.
3. Write down the files that need to be changed, but not files that don't need to be changed.
For files that need to be changed, you must provide the complete file with the exact same syntax to wrap the code.
Obey the following rules:
''' + not_allowed_docker_string + '''
You are given the following files:
{all_files_string}
Output all the files that need change. Don't output files that don't need change.
If you output a file, then write the complete file. Use the exact following syntax to wrap the code:
**...**
```
...code...
```
Example:
**requirements.txt**
```
jina==2.0.0
```
'''
)
template_solve_code_issue = PromptTemplate.from_template(
'''General rules:
''' + not_allowed_executor_string + '''
Here is the description of the task the executor must solve:
{task_description}
Here is the test scenario the executor must pass:
{test_description}
Here are all the files I use:
{all_files_string}
Here is the summary of the error that occurred:
{summarized_error}
To solve this error, you should:
1. Suggest 3 to 5 possible solutions on how to solve it. You have no access to the documentation of the package.
2. Decide for the best solution and explain it in detail.
3. Write down the files that need to be changed, but not files that don't need to be changed.
Obey the following rules:
''' + f'{not_allowed_executor_string}\n{not_allowed_docker_string}' + '''
Output all the files that need change.
Don't output files that don't need change. If you output a file, then write the complete file.
If you change microservice.py and it uses gpt_3_5_turbo_api, then you must keep the code for gpt_3_5_turbo_api in the microservice.py file.
Use the exact following syntax to wrap the code:
**...**
```...
...code...
```
Example:
**microservice.py**
```python
print('hello world')
```'''
)
template_generate_playground = PromptTemplate.from_template(
general_guidelines_string + '''👨‍💻
{code_files_wrapped}
Create a playground for the executor {microservice_name} using streamlit.
The playground must look like it was made by a professional designer.
All the ui elements are well thought out to make them visually appealing and easy to use.
Don't mention the word Playground in the title.
The playground contains many emojis that fit the theme of the playground and has an emoji as favicon.
The playground encourages the user to deploy their own microservice by clicking on this link: https://github.com/jina-ai/gptdeploy
The playground uses the following code to send a request to the microservice:
```
from jina import Client, Document, DocumentArray
client = Client(host='http://localhost:8080')
response = client.post('/', inputs=DocumentArray([d])) # always use '/'
print(response[0].text) # can also be blob in case of image/audio..., this should be visualized in the streamlit app
```
Note that the response will always be in response[0].text
The playground displays a code block containing the microservice specific curl code that can be used to send the request to the microservice.
While the exact payload in the curl might change, the host and deployment ID always stay the same. Example:
```
deployment_id = os.environ.get("K8S_NAMESPACE_NAME", "")
host = f'https://gptdeploy-{{deployment_id.split("-")[1]}}.wolf.jina.ai/post' if deployment_id else "http://localhost:8080/post"
with st.expander("See curl command"):
st.code(
f'curl -X \\'POST\\' \\'host\\' -H \\'accept: application/json\\' -H \\'Content-Type: application/json\\' -d \\'{{{{"data": [{{{{"text": "hello, world!"}}}}]}}}}\\'',
language='bash'
)
```
You must provide the complete app.py file using the following syntax to wrap the code:
**app.py**
```python
...
```
The playground (app.py) must always use the host on http://localhost:8080 and must not let the user configure the host on the UI.
The playground (app.py) must not import the executor.
'''
)
template_chain_of_thought = PromptTemplate.from_template(
'''First, write down an extensive list of obvious and non-obvious observations about {file_name_purpose} that could need an adjustment. Explain why.
Think if all the changes are required and finally decide for the changes you want to make, but you are not allowed disregard the instructions in the previous message.
Be very hesitant to change the code. Only make a change if you are sure that it is necessary.
Output only {file_name_purpose}
Write the whole content of {file_name_purpose} - even if you decided to change only a small thing or even nothing.
''' + '\n' + template_code_wrapping_string + '''
Remember:
The playground (app.py) must always use the host on http://localhost:8080 and must not let the user configure the host on the UI.
The playground (app.py) must not import the executor.
'''
)

View File

@@ -0,0 +1 @@
from src.options.run.runner import Runner

11
src/options/run/runner.py Normal file
View File

@@ -0,0 +1,11 @@
from src.apis.jina_cloud import run_locally
from src.options import validate_folder_is_correct, get_executor_name, get_latest_version_path
class Runner():
def run(self, microservice_path):
validate_folder_is_correct(microservice_path)
executor_name = get_executor_name(microservice_path)
latest_version_path = get_latest_version_path(microservice_path)
run_locally(executor_name, latest_version_path)

0
src/utils/__init__.py Normal file
View File

51
src/utils/io.py Normal file
View File

@@ -0,0 +1,51 @@
import os
import subprocess
import sys
from contextlib import contextmanager
def get_microservice_path(path, microservice_name, packages, num_approach, version):
package_path = '_'.join(packages)
return os.path.join(path, microservice_name, f'{num_approach}_{package_path}', f'v{version}')
def persist_file(file_content, file_path):
with open(file_path, 'w', encoding='utf-8') as f:
f.write(file_content)
def get_all_microservice_files_with_content(folder_path):
file_name_to_content = {}
for filename in os.listdir(folder_path):
file_path = os.path.join(folder_path, filename)
if os.path.isfile(file_path):
with open(file_path, 'r', encoding='utf-8') as file:
content = file.read()
file_name_to_content[filename] = content
return file_name_to_content
@contextmanager
def suppress_stdout():
original_stdout = sys.stdout
sys.stdout = open(os.devnull, 'w')
try:
yield
finally:
sys.stdout.close()
sys.stdout = original_stdout
def is_docker_running():
try:
if sys.platform.startswith('win'):
command = 'docker info'
else:
command = 'docker info 2> /dev/null'
subprocess.check_output(command, shell=True)
return True
except subprocess.CalledProcessError:
return False

29
src/utils/string_tools.py Normal file
View File

@@ -0,0 +1,29 @@
import os
import platform
if platform.system() == "Windows":
os.system("color")
def print_colored(headline, text, color_code, end='\n'):
if color_code == 'black':
color_code = '30'
elif color_code == 'red':
color_code = '31'
elif color_code == 'green':
color_code = '32'
elif color_code == 'yellow':
color_code = '33'
elif color_code == 'blue':
color_code = '34'
elif color_code == 'magenta':
color_code = '35'
elif color_code == 'cyan':
color_code = '36'
elif color_code == 'white':
color_code = '37'
color_start = f"\033[{color_code}m"
reset = "\033[0m"
bold_start = "\033[1m"
if headline:
print(f"{bold_start}{color_start}{headline}{reset}")
print(f"{color_start}{text}{reset}", end=end)

View File

@@ -1,6 +1,6 @@
import unittest.mock as mock import unittest.mock as mock
from gptdeploy.options.generate.generator import Generator from src.options.generate.generator import Generator
from gptdeploy.apis.gpt import GPTSession from src.apis.gpt import GPTSession
def test_generator(tmpdir): def test_generator(tmpdir):
# Define a mock response # Define a mock response

View File

@@ -1,4 +1,4 @@
from gptdeploy.apis.jina_cloud import clean_color_codes from src.apis.jina_cloud import clean_color_codes
def test_clean_color_codes(): def test_clean_color_codes():