fix: package

This commit is contained in:
Florian Hönicke
2023-04-10 02:21:54 +02:00
parent e784d3332d
commit c191d53ba1
12 changed files with 2 additions and 784 deletions

View File

@@ -7,7 +7,7 @@ def read_requirements():
setup( setup(
name='gptdeploy', name='gptdeploy',
version='0.18.2', version='0.18.3',
description='Use natural language interface to create, deploy and update your microservice infrastructure.', description='Use natural language interface to create, deploy and update your microservice infrastructure.',
long_description=open('README.md', 'r', encoding='utf-8').read(), long_description=open('README.md', 'r', encoding='utf-8').read(),
long_description_content_type='text/markdown', long_description_content_type='text/markdown',
@@ -18,7 +18,7 @@ setup(
install_requires=read_requirements(), install_requires=read_requirements(),
entry_points={ entry_points={
'console_scripts': [ 'console_scripts': [
'gptdeploy=gptdeploy:main', 'gptdeploy = src:main',
], ],
}, },

View File

@@ -1 +0,0 @@
__version__ = '0.18.1'

View File

@@ -1,32 +0,0 @@
EXECUTOR_FILE_NAME = 'executor.py'
TEST_EXECUTOR_FILE_NAME = 'test_executor.py'
REQUIREMENTS_FILE_NAME = 'requirements.txt'
DOCKER_FILE_NAME = 'Dockerfile'
CLIENT_FILE_NAME = 'client.py'
STREAMLIT_FILE_NAME = 'streamlit.py'
EXECUTOR_FILE_TAG = 'python'
TEST_EXECUTOR_FILE_TAG = 'python'
REQUIREMENTS_FILE_TAG = ''
DOCKER_FILE_TAG = 'dockerfile'
CLIENT_FILE_TAG = 'python'
STREAMLIT_FILE_TAG = 'python'
FILE_AND_TAG_PAIRS = [
(EXECUTOR_FILE_NAME, EXECUTOR_FILE_TAG),
(TEST_EXECUTOR_FILE_NAME, TEST_EXECUTOR_FILE_TAG),
(REQUIREMENTS_FILE_NAME, REQUIREMENTS_FILE_TAG),
(DOCKER_FILE_NAME, DOCKER_FILE_TAG),
(CLIENT_FILE_NAME, CLIENT_FILE_TAG),
(STREAMLIT_FILE_NAME, STREAMLIT_FILE_TAG)
]
EXECUTOR_FOLDER_v1 = 'executor_v1'
EXECUTOR_FOLDER_v2 = 'executor_v2'
FLOW_URL_PLACEHOLDER = 'jcloud.jina.ai'
PRICING_GPT4_PROMPT = 0.03
PRICING_GPT4_GENERATION = 0.06
PRICING_GPT3_5_TURBO_PROMPT = 0.002
PRICING_GPT3_5_TURBO_GENERATION = 0.002

View File

@@ -1,117 +0,0 @@
import os
from time import sleep
from typing import List, Tuple
import openai
from openai.error import RateLimitError, Timeout
from src.constants import PRICING_GPT4_PROMPT, PRICING_GPT4_GENERATION, PRICING_GPT3_5_TURBO_PROMPT, \
PRICING_GPT3_5_TURBO_GENERATION
from src.prompt_system import system_base_definition
from src.utils.io import timeout_generator_wrapper, GenerationTimeoutError
from src.utils.string_tools import print_colored
class GPTSession:
def __init__(self):
self.get_openai_api_key()
if self.is_gpt4_available():
self.supported_model = 'gpt-4'
self.pricing_prompt = PRICING_GPT4_PROMPT
self.pricing_generation = PRICING_GPT4_GENERATION
else:
self.supported_model = 'gpt-3.5-turbo'
self.pricing_prompt = PRICING_GPT3_5_TURBO_PROMPT
self.pricing_generation = PRICING_GPT3_5_TURBO_GENERATION
self.chars_prompt_so_far = 0
self.chars_generation_so_far = 0
def get_openai_api_key(self):
if 'OPENAI_API_KEY' not in os.environ:
raise Exception('You need to set OPENAI_API_KEY in your environment')
openai.api_key = os.environ['OPENAI_API_KEY']
def is_gpt4_available(self):
try:
openai.ChatCompletion.create(
model="gpt-4",
messages=[{
"role": 'system',
"content": 'test'
}]
)
return True
except openai.error.InvalidRequestError:
return False
def cost_callback(self, chars_prompt, chars_generation):
self.chars_prompt_so_far += chars_prompt
self.chars_generation_so_far += chars_generation
print('\n')
money_prompt = round(self.chars_prompt_so_far / 3.4 * self.pricing_prompt / 1000, 2)
money_generation = round(self.chars_generation_so_far / 3.4 * self.pricing_generation / 1000, 2)
print('money prompt:', f'${money_prompt}')
print('money generation:', f'${money_generation}')
print('total money:', f'${money_prompt + money_generation}')
print('\n')
def get_conversation(self):
return _GPTConversation(self.supported_model, self.cost_callback)
class _GPTConversation:
def __init__(self, model: str, cost_callback, prompt_list: List[Tuple[str, str]] = None):
self.model = model
if prompt_list is None:
prompt_list = [('system', system_base_definition)]
self.prompt_list = prompt_list
self.cost_callback = cost_callback
print_colored('system', system_base_definition, 'magenta')
def query(self, prompt: str):
print_colored('user', prompt, 'blue')
self.prompt_list.append(('user', prompt))
response = self.get_response(self.prompt_list)
self.prompt_list.append(('assistant', response))
return response
def get_response_from_stream(self, response_generator):
response_generator_with_timeout = timeout_generator_wrapper(response_generator, 10)
complete_string = ''
for chunk in response_generator_with_timeout:
delta = chunk['choices'][0]['delta']
if 'content' in delta:
content = delta['content']
print_colored('' if complete_string else 'assistent', content, 'green', end='')
complete_string += content
return complete_string
def get_response(self, prompt_list: List[Tuple[str, str]]):
for i in range(10):
try:
response_generator = openai.ChatCompletion.create(
temperature=0,
max_tokens=2_000,
model=self.model,
stream=True,
messages=[
{
"role": prompt[0],
"content": prompt[1]
}
for prompt in prompt_list
]
)
complete_string = self.get_response_from_stream(response_generator)
except (RateLimitError, Timeout, ConnectionError, GenerationTimeoutError) as e:
print(e)
print('retrying, be aware that this affects the cost calculation')
sleep(3)
continue
chars_prompt = sum(len(prompt[1]) for prompt in prompt_list)
chars_generation = len(complete_string)
self.cost_callback(chars_prompt, chars_generation)
return complete_string
raise Exception('Failed to get response')

View File

@@ -1,168 +0,0 @@
import hashlib
import json
import os
import re
import subprocess
import webbrowser
from pathlib import Path
import hubble
from hubble.executor.helper import upload_file, archive_package, get_request_header
from jcloud.flow import CloudFlow
def redirect_callback(href):
print(
f'You need login to Jina first to use GPTDeploy\n'
f'Please open this link if it does not open automatically in your browser: {href}'
)
webbrowser.open(href, new=0, autoraise=True)
def jina_auth_login():
try:
hubble.Client(jsonify=True).get_user_info(log_error=False)
except hubble.AuthenticationRequiredError:
hubble.login(prompt='login', redirect_callback=redirect_callback)
def push_executor(dir_path):
dir_path = Path(dir_path)
md5_hash = hashlib.md5()
bytesio = archive_package(dir_path)
content = bytesio.getvalue()
md5_hash.update(content)
md5_digest = md5_hash.hexdigest()
form_data = {
'public': 'True',
'private': 'False',
'verbose': 'True',
'md5sum': md5_digest,
}
req_header = get_request_header()
resp = upload_file(
'https://api.hubble.jina.ai/v2/rpc/executor.push',
'filename',
content,
dict_data=form_data,
headers=req_header,
stream=False,
method='post',
)
json_lines_str = resp.content.decode('utf-8')
if 'exited on non-zero code' not in json_lines_str:
return ''
responses = []
for json_line in json_lines_str.splitlines():
if 'exit code:' in json_line:
break
d = json.loads(json_line)
if 'payload' in d and type(d['payload']) == str:
responses.append(d['payload'])
elif type(d) == str:
responses.append(d)
return '\n'.join(responses)
def get_user_name():
client = hubble.Client(max_retries=None, jsonify=True)
response = client.get_user_info()
return response['data']['name']
def deploy_on_jcloud(flow_yaml):
cloud_flow = CloudFlow(path=flow_yaml)
return cloud_flow.__enter__().endpoints['gateway']
def deploy_flow(executor_name, dest_folder):
flow = f'''
jtype: Flow
with:
name: nowapi
env:
JINA_LOG_LEVEL: DEBUG
jcloud:
version: 3.14.2.dev18
labels:
creator: microchain
name: gptdeploy
executors:
- name: {executor_name.lower()}
uses: jinaai+docker://{get_user_name()}/{executor_name}:latest
env:
JINA_LOG_LEVEL: DEBUG
jcloud:
resources:
instance: C2
capacity: spot
'''
full_flow_path = os.path.join(dest_folder,
'flow.yml')
with open(full_flow_path, 'w') as f:
f.write(flow)
# print('try local execution')
# flow = Flow.load_config(full_flow_path)
# with flow:
# pass
print('deploy flow on jcloud')
return deploy_on_jcloud(flow_yaml=full_flow_path)
def replace_client_line(file_content: str, replacement: str) -> str:
lines = file_content.split('\n')
for index, line in enumerate(lines):
if 'Client(' in line:
lines[index] = replacement
break
return '\n'.join(lines)
def update_client_line_in_file(file_path, host):
with open(file_path, 'r') as file:
content = file.read()
replaced_content = replace_client_line(content, f"client = Client(host='{host}')")
with open(file_path, 'w') as file:
file.write(replaced_content)
def process_error_message(error_message):
lines = error_message.split('\n')
relevant_lines = []
pattern = re.compile(r"^#\d+ \[[ \d]+/[ \d]+\]") # Pattern to match lines like "#11 [7/8]"
last_matching_line_index = None
for index, line in enumerate(lines):
if pattern.match(line):
last_matching_line_index = index
if last_matching_line_index is not None:
relevant_lines = lines[last_matching_line_index:]
return '\n'.join(relevant_lines[-25:])
def build_docker(path):
# The command to build the Docker image
cmd = f"docker build -t micromagic {path}"
# Run the command and capture the output
process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
stdout, stderr = process.communicate()
# Check if there was an error
if process.returncode != 0:
error_message = stderr.decode("utf-8")
relevant_error_message = process_error_message(error_message)
return relevant_error_message
else:
print("Docker build completed successfully.")
return ''

View File

@@ -1,69 +0,0 @@
import os
import platform
import subprocess
import click
try:
import psutil
except ImportError:
psutil = None
def get_shell():
if psutil is None:
return None
try:
p = psutil.Process(os.getpid())
while p.parent() and p.parent().name() != "init":
p = p.parent()
return p.name()
except Exception as e:
click.echo(f"Error detecting shell: {e}")
return None
def set_env_variable(shell, key):
shell_config = {
"bash": {"config_file": "~/.bashrc", "export_line": f"export OPENAI_API_KEY={key}"},
"zsh": {"config_file": "~/.zshrc", "export_line": f"export OPENAI_API_KEY={key}"},
"fish": {
"config_file": "~/.config/fish/config.fish",
"export_line": f"set -gx OPENAI_API_KEY {key}",
},
}
if shell not in shell_config:
click.echo("Sorry, your shell is not supported.")
return
config_file = os.path.expanduser(shell_config[shell]["config_file"])
with open(config_file, "a") as file:
file.write(f"\n{shell_config[shell]['export_line']}\n")
click.echo(f"OPENAI_API_KEY has been set in {config_file}.")
def set_api_key(key):
system_platform = platform.system().lower()
if system_platform == "windows":
set_env_variable_command = f'setx OPENAI_API_KEY "{key}"'
subprocess.call(set_env_variable_command, shell=True)
click.echo("OPENAI_API_KEY has been set.")
elif system_platform in ["linux", "darwin"]:
if "OPENAI_API_KEY" in os.environ:
if not click.confirm("OPENAI_API_KEY is already set. Do you want to overwrite it?"):
click.echo("Aborted.")
return
shell = get_shell()
if shell is None:
click.echo("Error: Unable to detect your shell or psutil is not available. Please set the environment variable manually.")
return
set_env_variable(shell, key)
else:
click.echo("Sorry, this platform is not supported.")

View File

@@ -1,114 +0,0 @@
from src.constants import FLOW_URL_PLACEHOLDER
executor_example = '''
Using the Jina framework, users can define executors.
Here is an example of how an executor can be defined. It always starts with a comment:
**executor.py**
```python
# this executor binary files as input and returns the length of each binary file as output
from jina import Executor, requests, DocumentArray, Document
class MyInfoExecutor(Executor):
def __init__(self, **kwargs):
super().__init__()
@requests() # each executor must have exactly this decorator without parameters
def foo(self, docs: DocumentArray, **kwargs) => DocumentArray:
for d in docs:
d.load_uri_to_blob()
d.blob = None
return docs
```
An executor gets a DocumentArray as input and returns a DocumentArray as output.
'''
docarray_example = f'''
A DocumentArray is a python class that can be seen as a list of Documents.
A Document is a python class that represents a single document.
Here is the protobuf definition of a Document:
message DocumentProto {{
// A hexdigest that represents a unique document ID
string id = 1;
oneof content {{
// the raw binary content of this document, which often represents the original document when comes into jina
bytes blob = 2;
// the ndarray of the image/audio/video document
NdArrayProto tensor = 3;
// a text document
string text = 4;
}}
// a uri of the document is a remote url starts with http or https or data URI scheme
string uri = 5;
// list of the sub-documents of this document (recursive structure)
repeated DocumentProto chunks = 6;
// the matched documents on the same level (recursive structure)
repeated DocumentProto matches = 7;
// the embedding of this document
NdArrayProto embedding = 8;
}}
Here is an example of how a DocumentArray can be defined:
from jina import DocumentArray, Document
d1 = Document(text='hello')
# you can load binary data into a document
url = 'https://...'
response = requests.get(url)
obj_data = response.content
d2 = Document(blob=obj_data) # blob is bytes like b'\\x89PNG\\r\\n\\x1a\\n\
d3 = Document(tensor=numpy.array([1, 2, 3]), chunks=[Document(uri=/local/path/to/file)]
d4 = Document(
uri='https://docs.docarray.org/img/logo.png',
)
d5 = Document()
d5.tensor = np.ones((2,4))
d5.uri = 'https://audio.com/audio.mp3'
d6 = Document()
d6.blob # like b'RIFF\\x00\\x00\\x00\\x00WAVEfmt \\x10\\x00...'
docs = DocumentArray([
d1, d2, d3, d4
])
d7 = Document()
d7.text = 'test string'
d8 = Document()
d8.text = json.dumps([{{"id": "1", "text": ["hello", 'test']}}, {{"id": "2", "text": "world"}}])
# the document has a helper function load_uri_to_blob:
# For instance, d4.load_uri_to_blob() downloads the file from d4.uri and stores it in d4.blob.
# If d4.uri was something like 'https://website.web/img.jpg', then d4.blob would be something like b'\\xff\\xd8\\xff\\xe0\\x00\\x10JFIF\\x00\\x01\\x01...
'''
client_example = f'''
After the executor is deployed, it can be called via Jina Client.
Here is an example of a client file:
**client.py**
```python
from jina import Client, Document, DocumentArray
client = Client(host='{FLOW_URL_PLACEHOLDER}')
d = Document(uri='...')
d.load_uri_to_blob()
response = client.post('/', inputs=DocumentArray([d])) # the client must be called on '/'
print(response[0].text)
```
'''
system_base_definition = f'''
You are a principal engineer working at Jina - an open source company."
{executor_example}
{docarray_example}
{client_example}
'''

View File

@@ -1,138 +0,0 @@
from src.constants import EXECUTOR_FILE_NAME, REQUIREMENTS_FILE_NAME, TEST_EXECUTOR_FILE_NAME, DOCKER_FILE_NAME, \
DOCKER_FILE_TAG, CLIENT_FILE_TAG, CLIENT_FILE_NAME, STREAMLIT_FILE_TAG, STREAMLIT_FILE_NAME, EXECUTOR_FILE_TAG, \
REQUIREMENTS_FILE_TAG, TEST_EXECUTOR_FILE_TAG
def general_guidelines():
return (
"The code you write is production ready. "
"Every file starts with comments describing what the code is doing before the first import. "
"Comments can only be written within code blocks. "
"Then all imports are listed. "
"It is important to import all modules that could be needed in the executor code. "
"Always import: "
"from jina import Executor, DocumentArray, Document, requests "
"Start from top-level and then fully implement all methods. "
"\n"
)
def _task(task, tag_name, file_name):
return (
task + f"The code will go into {file_name}. Wrap the code into:\n"
f"**{file_name}**\n"
f"```{tag_name}\n"
f"...code...\n"
f"```\n\n"
)
def executor_file_task(executor_name, executor_description, test_scenario, package):
return _task(f'''
Write the executor called '{executor_name}'.
It matches the following description: '{executor_description}'.
It will be tested with the following scenario: '{test_scenario}'.
For the implementation use the following package: '{package}'.
Have in mind that d.uri is never a path to a local file. It is always a url.
''' + not_allowed(),
EXECUTOR_FILE_TAG,
EXECUTOR_FILE_NAME
)
def test_executor_file_task(executor_name, test_scenario):
return _task(
"Write a small unit test for the executor. "
"Start the test with an extensive comment about the test case. "
+ (
f"Write a single test case that tests the following scenario: '{test_scenario}'. "
f"In case the test scenario is not precise enough, test the most general case without any assumptions."
if test_scenario else ""
)
+ "Use the following import to import the executor: "
f"from executor import {executor_name} "
+ not_allowed()
+ "The test must not open local files. "
+ "The test must not mock a function of the executor. "
+ "The test must not use other data than the one provided in the test scenario. ",
TEST_EXECUTOR_FILE_TAG,
TEST_EXECUTOR_FILE_NAME
)
def requirements_file_task():
return _task(
"Write the content of the requirements.txt file. "
"Make sure to include pytest. "
"Make sure that jina==3.14.1. "
"All versions are fixed using ~=, ==, <, >, <=, >=. The package versions should not have conflicts. ",
REQUIREMENTS_FILE_TAG,
REQUIREMENTS_FILE_NAME
)
def docker_file_task():
return _task(
"Write the Dockerfile that defines the environment with all necessary dependencies that the executor uses. "
"The Dockerfile runs the test during the build process. "
"It is important to make sure that all libs are installed that are required by the python packages. "
"Usually libraries are installed with apt-get. "
"Be aware that the machine the docker container is running on does not have a GPU - only CPU. "
"Add the config.yml file to the Dockerfile. "
"The base image of the Dockerfile is FROM jinaai/jina:3.14.1-py39-standard. "
'The entrypoint is ENTRYPOINT ["jina", "executor", "--uses", "config.yml"]. '
'Make sure the all files are in the /workdir. '
"The Dockerfile runs the test during the build process. " + not_allowed(),
DOCKER_FILE_TAG,
DOCKER_FILE_NAME
)
def client_file_task():
return _task(
"Write the client file. ",
CLIENT_FILE_TAG,
CLIENT_FILE_NAME
)
def streamlit_file_task():
return _task(
"Write the streamlit file allowing to make requests . ",
STREAMLIT_FILE_TAG,
STREAMLIT_FILE_NAME
)
def chain_of_thought_creation():
return (
"First, write down some non-obvious thoughts about the challenges of the task and give multiple approaches on how you handle them. "
"For example, the given package you could used in different ways and not all of them obay the rules: "
+ "Discuss the pros and cons for all of these approaches and then decide for one of the approaches. "
"Then write as I told you. "
)
def chain_of_thought_optimization(tag_name, file_name):
return _task(
f'First, write down an extensive list of obvious and non-obvious observations about {file_name} that could need an adjustment. Explain why. '
f"Think if all the changes are required and finally decide for the changes you want to make, "
f"but you are not allowed disregard the instructions in the previous message. "
f"Be very hesitant to change the code. Only make a change if you are sure that it is necessary. "
f"Output only {file_name} "
f"Write the whole content of {file_name} - even if you decided to change only a small thing or even nothing. ",
tag_name,
file_name
)
def not_allowed():
return '''
The executor must not use the GPU.
The executor must not access a database.
The executor must not access a display.
The executor must not access external apis except unless it is explicitly mentioned in the description or test case (e.g. by mentioning the api that should be used or by providing a URL to access the data).
The executor must not load data from the local file system unless it was created by the executor itself.
The executor must not use a pre-trained model unless it is explicitly mentioned in the description.
The executor must not train a model.
The executor must not use Document.tags.
'''

View File

@@ -1,53 +0,0 @@
# from fastapi import FastAPI
# from fastapi.exceptions import RequestValidationError
# from pydantic import BaseModel
# from typing import Optional, Dict
#
# from starlette.middleware.cors import CORSMiddleware
# from starlette.requests import Request
# from starlette.responses import JSONResponse
# from main import main
#
# app = FastAPI()
#
# # Define the request model
# class CreateRequest(BaseModel):
# test_scenario: str
# executor_description: str
#
# # Define the response model
# class CreateResponse(BaseModel):
# result: Dict[str, str]
# success: bool
# message: Optional[str]
#
# @app.post("/create", response_model=CreateResponse)
# def create_endpoint(request: CreateRequest):
#
# result = main(
# executor_description=request.executor_description,
# test_scenario=request.test_scenario,
# )
# return CreateResponse(result=result, success=True, message=None)
#
#
# app.add_middleware(
# CORSMiddleware,
# allow_origins=["*"],
# allow_credentials=True,
# allow_methods=["*"],
# allow_headers=["*"],
# )
#
# # Add a custom exception handler for RequestValidationError
# @app.exception_handler(RequestValidationError)
# def validation_exception_handler(request: Request, exc: RequestValidationError):
# return JSONResponse(
# status_code=422,
# content={"detail": exc.errors()},
# )
#
#
# if __name__ == "__main__":
# import uvicorn
# uvicorn.run("server:app", host="0.0.0.0", port=8000, log_level="info")

View File

View File

@@ -1,37 +0,0 @@
import os
import shutil
import concurrent.futures
import concurrent.futures
from typing import Generator
def recreate_folder(folder_path):
if os.path.exists(folder_path) and os.path.isdir(folder_path):
shutil.rmtree(folder_path)
os.makedirs(folder_path)
def persist_file(file_content, file_name):
with open(f'{file_name}', 'w') as f:
f.write(file_content)
class GenerationTimeoutError(Exception):
pass
def timeout_generator_wrapper(generator, timeout):
def generator_func():
for item in generator:
yield item
def wrapper() -> Generator:
gen = generator_func()
while True:
try:
with concurrent.futures.ThreadPoolExecutor() as executor:
future = executor.submit(next, gen)
yield future.result(timeout=timeout)
except StopIteration:
break
except concurrent.futures.TimeoutError:
raise GenerationTimeoutError(f"Generation took longer than {timeout} seconds")
return wrapper()

View File

@@ -1,53 +0,0 @@
import difflib
def find_between(input_string, start, end):
try:
start_index = input_string.index(start) + len(start)
end_index = input_string.index(end, start_index)
return input_string[start_index:end_index]
except ValueError:
raise ValueError(f'Could not find {start} and {end} in {input_string}')
def clean_content(content):
return content.replace('```', '').strip()
def print_colored(headline, text, color_code, end='\n'):
if color_code == 'black':
color_code = '30'
elif color_code == 'red':
color_code = '31'
elif color_code == 'green':
color_code = '32'
elif color_code == 'yellow':
color_code = '33'
elif color_code == 'blue':
color_code = '34'
elif color_code == 'magenta':
color_code = '35'
elif color_code == 'cyan':
color_code = '36'
elif color_code == 'white':
color_code = '37'
color_start = f"\033[{color_code}m"
reset = "\033[0m"
bold_start = "\033[1m"
if headline:
print(f"{bold_start}{color_start}{headline}{reset}")
print(f"{color_start}{text}{reset}", end=end)
def find_differences(a, b):
matcher = difflib.SequenceMatcher(None, a, b)
differences = set()
for tag, i1, i2, j1, j2 in matcher.get_opcodes():
if tag == 'replace':
diff_a = a[i1:i2]
diff_b = b[j1:j2]
# Check for mirrored results and only add non-mirrored ones
if (diff_b, diff_a) not in differences:
differences.add((diff_a, diff_b))
return differences