Merge branch 'main' of https://github.com/jina-ai/gptdeploy into feat-avoid-loop

# Conflicts:
#	src/options/generate/static_files/microservice/Dockerfile
This commit is contained in:
Joschka Braun
2023-05-04 17:05:33 +02:00
14 changed files with 191 additions and 109 deletions

View File

@@ -59,3 +59,57 @@ jobs:
SCENEX_API_KEY: ${{ secrets.SCENEX_API_KEY }}
WHISPER_API_KEY: ${{ secrets.WHISPER_API_KEY }}
base-image-push:
runs-on: ubuntu-latest
steps:
- name: Checkout Repository
uses: actions/checkout@v2
- name: Check if code relevant to executor has changed
uses: dorny/paths-filter@v2
id: check
with:
filters: |
changed:
- src/options/generate/static_files/base_image/**
- name: Get base image tag
if: steps.check.outputs.changed == 'true'
shell: bash
run: |
FILE='src/constants.py'
VERSION=$(sed -n '/DOCKER_BASE_IMAGE_VERSION =/p' $FILE | cut -d \' -f2)
echo "VERSION=$VERSION" >> $GITHUB_ENV
- name: Check that tag isn't used already for the docker base image
if: steps.check.outputs.changed == 'true'
env:
VERSION: ${{ env.VERSION }}
shell: bash
run: |
if docker pull jinaai/gpt-dev:$VERSION; then
echo "Executor version/tag is used already. Please update the tag"
exit 1
else
echo "Executor version/tag isn't used already, continue to build..."
fi
- name: Set up Docker Buildx
if: steps.check.outputs.changed == 'true'
uses: docker/setup-buildx-action@v1
- name: Login to Docker Hub
if: steps.check.outputs.changed == 'true'
uses: docker/login-action@v1
with:
username: ${{ secrets.DOCKERHUB_DEVBOT_USER }}
password: ${{ secrets.DOCKERHUB_DEVBOT_PWD }}
- name: Build and Push Docker Image
if: steps.check.outputs.changed == 'true'
uses: docker/build-push-action@v2
with:
context: src/options/generate/static_files/base_image
push: true
tags: jinaai/gpt-dev:${{ env.VERSION }}

102
README.md
View File

@@ -58,16 +58,14 @@ We are working on a way to use gpt-3.5-turbo as well.
```bash
gptdeploy generate \
--description "<description of the microservice>" \
--test "<specification of a test scenario>" \
--model <gpt-3.5 or gpt-4> \
--path </path/to/local/folder>
```
To generate your personal microservice two things are required:
- A `description` of the task you want to accomplish.
- A `test` scenario that ensures the microservice works as expected.
- A `description` of the task you want to accomplish. (optional)
- The `model` you want to use - either `gpt-3.5` or `gpt-4`. `gpt-3.5` is ~10x cheaper,
but will not be able to generate as complex microservices.
- A `path` on the local drive where the microservice will be generated.
but will not be able to generate as complex microservices. (default: largest you have access to)
- A `path` on the local drive where the microservice will be generated. (default: ./microservice)
The creation process should take between 5 and 15 minutes.
During this time, GPT iteratively builds your microservice until it finds a strategy that make your test scenario pass.
@@ -105,9 +103,7 @@ In this section you can get a feeling for the kind of microservices that can be
```bash
gptdeploy generate \
--description "The user writes something and gets a related deep compliment." \
--test "Given the word test a deep compliment is generated" \
--model gpt-4 \
--path microservice
--model gpt-4
```
<img src="res/compliment_example.png" alt="Compliment Generator" width="400" />
@@ -115,40 +111,32 @@ gptdeploy generate \
### Extract and summarize news articles given a URL
```bash
gptdeploy generate \
--description "Extract text from a news article URL using Newspaper3k library and generate a summary using gpt." \
--test "input: 'http://fox13now.com/2013/12/30/new-year-new-laws-obamacare-pot-guns-and-drones/' output: assert a summarized version of the article exists" \
--model gpt-4 \
--path microservice
--description "Extract text from a news article URL using Newspaper3k library and generate a summary using gpt. Example input: http://fox13now.com/2013/12/30/new-year-new-laws-obamacare-pot-guns-and-drones/" \
--model gpt-4
```
<img src="res/news_article_example.png" alt="News Article Example" width="400" />
### Chemical Formula Visualization
```bash
gptdeploy generate \
--description "Convert a chemical formula into a 2D chemical structure diagram" \
--test "C=C, CN=C=O, CCC(=O)O" \
--model gpt-4 \
--path microservice
--description "Convert a chemical formula into a 2D chemical structure diagram. Example inputs: C=C, CN=C=O, CCC(=O)O" \
--model gpt-4
```
<img src="res/chemical_formula_example.png" alt="Chemical Formula Visualization" width="400" />
### 2d rendering of 3d model
```bash
gptdeploy generate \
--description "create a 2d rendering of a whole 3d object and x,y,z object rotation using trimesh and pyrender.OffscreenRenderer with os.environ['PYOPENGL_PLATFORM'] = 'egl' and freeglut3-dev library" \
--test "input: https://graphics.stanford.edu/courses/cs148-10-summer/as3/code/as3/teapot.obj output: assert the image is not completely white or black" \
--model gpt-4 \
--path microservice
--description "create a 2d rendering of a whole 3d object and x,y,z object rotation using trimesh and pyrender.OffscreenRenderer with os.environ['PYOPENGL_PLATFORM'] = 'egl' and freeglut3-dev library - example input: https://graphics.stanford.edu/courses/cs148-10-summer/as3/code/as3/teapot.obj" \
--model gpt-4
```
<img src="res/obj_render_example.gif" alt="2D Rendering of 3D Model" width="400" />
### Product Recommendation
```bash
gptdeploy generate \
--description "Generate personalized product recommendations based on user product browsing history and the product categories fashion, electronics and sport" \
--test "Test that a user how visited p1(electronics),p2(fashion),p3(fashion) is more likely to buy p4(fashion) than p5(sports)" \
--model gpt-4 \
--path microservice
--description "Generate personalized product recommendations based on user product browsing history and the product categories fashion, electronics and sport. Example: Input: browsing history: prod1(electronics),prod2(fashion),prod3(fashion), output: p4(fashion)" \
--model gpt-4
```
<img src="res/recommendation_example.png" alt="Product Recommendation" width="400" />
@@ -156,9 +144,7 @@ gptdeploy generate \
```bash
gptdeploy generate \
--description "Given a search query, find articles on hacker news using the hacker news api and return a list of (title, author, website_link, first_image_on_the_website)" \
--test "searching for GPT gives results" \
--model gpt-4 \
--path microservice
--model gpt-4
````
<img src="res/hacker_news_example.png" alt="Hacker News Search" width="400" />
@@ -166,20 +152,16 @@ gptdeploy generate \
```bash
gptdeploy generate \
--description "Given an image, return the image with bounding boxes of all animals (https://pjreddie.com/media/files/yolov3.weights, https://raw.githubusercontent.com/pjreddie/darknet/master/cfg/yolov3.cfg)" \
--test "https://images.unsplash.com/photo-1444212477490-ca407925329e contains animals" \
--model gpt-4 \
--path microservice
--description "Given an image, return the image with bounding boxes of all animals (https://pjreddie.com/media/files/yolov3.weights, https://raw.githubusercontent.com/pjreddie/darknet/master/cfg/yolov3.cfg), Example input: https://images.unsplash.com/photo-1444212477490-ca407925329e" \
--model gpt-4
```
<img src="res/animal_detector_example.png" alt="Animal Detector" width="400" />
### Meme Generator
```bash
gptdeploy generate \
--description "Generate a meme from an image and a caption" \
--test "Surprised Pikachu: https://media.wired.com/photos/5f87340d114b38fa1f8339f9/master/w_1600%2Cc_limit/Ideas_Surprised_Pikachu_HD.jpg, TOP:When you discovered GPTDeploy" \
--model gpt-4 \
--path microservice
--description "Generate a meme from an image and a caption. Example input: https://media.wired.com/photos/5f87340d114b38fa1f8339f9/master/w_1600%2Cc_limit/Ideas_Surprised_Pikachu_HD.jpg, TOP:When you discovered GPTDeploy" \
--model gpt-4
```
<img src="res/meme_example.png" alt="Meme Generator" width="400" />
@@ -187,9 +169,7 @@ gptdeploy generate \
```bash
gptdeploy generate \
--description "Given a word, return a list of rhyming words using the datamuse api" \
--test "hello" \
--model gpt-4 \
--path microservice
--model gpt-4
```
<img src="res/rhyme_generator_example.png" alt="Rhyme Generator" width="400" />
@@ -197,39 +177,31 @@ gptdeploy generate \
```bash
gptdeploy generate \
--description "Generate a word cloud from a given text" \
--test "Lorem ipsum dolor sit amet, consectetur adipiscing elit." \
--model gpt-4 \
--path microservice
--model gpt-4
```
<img src="res/word_cloud_example.png" alt="Word Cloud Generator" width="400" />
### 3d model info
```bash
gptdeploy generate \
--description "Given a 3d object, return vertex count and face count" \
--test "https://raw.githubusercontent.com/polygonjs/polygonjs-assets/master/models/wolf.obj" \
--model gpt-4 \
--path microservice
--description "Given a 3d object, return vertex count and face count. Example: https://raw.githubusercontent.com/polygonjs/polygonjs-assets/master/models/wolf.obj" \
--model gpt-4
```
<img src="res/obj_info_example.png" alt="3D Model Info" width="400" />
### Table extraction
```bash
gptdeploy generate \
--description "Given a URL, extract all tables as csv" \
--test "http://www.ins.tn/statistiques/90" \
--model gpt-4 \
--path microservice
--description "Given a URL, extract all tables as csv. Example: http://www.ins.tn/statistiques/90" \
--model gpt-4
```
<img src="res/table_extraction_example.png" alt="Table Extraction" width="400" />
### Audio to mel spectrogram
```bash
gptdeploy generate \
--description "Create mel spectrograms from audio file" \
--test "https://cdn.pixabay.com/download/audio/2023/02/28/audio_550d815fa5.mp3" \
--model gpt-4 \
--path microservice
--description "Create mel spectrogram from audio file. Example: https://cdn.pixabay.com/download/audio/2023/02/28/audio_550d815fa5.mp3" \
--model gpt-4
```
<img src="res/audio_to_mel_example.png" alt="Audio to Mel Spectrogram" width="400" />
@@ -237,9 +209,7 @@ gptdeploy generate \
```bash
gptdeploy generate \
--description "Convert text to speech" \
--test "Hello, welcome to GPT Deploy!" \
--model gpt-4 \
--path microservice
--model gpt-4
```
<a href=res/text_to_speech_example.wav><img src="res/text_to_speech_example.png" alt="Text to Speech" width="400" /></a>
@@ -251,20 +221,16 @@ gptdeploy generate \
### Heatmap Generator
```bash
gptdeploy generate \
--description "Create a heatmap from an image and a list of relative coordinates" \
--test "https://images.unsplash.com/photo-1574786198875-49f5d09fe2d2, [[0.1, 0.2], [0.3, 0.4], [0.5, 0.6], [0.2, 0.1], [0.7, 0.2], [0.4, 0.2]]" \
--model gpt-4 \
--path microservice
--description "Create a heatmap from an image and a list of relative coordinates. Example input: https://images.unsplash.com/photo-1574786198875-49f5d09fe2d2, [[0.1, 0.2], [0.3, 0.4], [0.5, 0.6], [0.2, 0.1], [0.7, 0.2], [0.4, 0.2]]" \
--model gpt-4
```
<img src="res/heatmap_example.png" alt="Heatmap Generator" width="400" />
### QR Code Generator
```bash
gptdeploy generate \
--description "Generate QR code from URL" \
--test "https://www.example.com" \
--model gpt-4 \
--path microservice
--description "Generate QR code from URL. Example input: https://www.example.com" \
--model gpt-4
```
<img src="res/qr_example.png" alt="QR Code Generator" width="400" />
@@ -272,10 +238,8 @@ gptdeploy generate \
```bash
gptdeploy generate \
--description "Visualize the Mandelbrot set with custom parameters" \
--test "center=-0+1i, zoom=1.0, size=800x800, iterations=1000" \
--model gpt-4 \
--path microservice
--description "Visualize the Mandelbrot set with custom parameters. Example input: center=-0+1i, zoom=1.0, size=800x800, iterations=1000" \
--model gpt-4
```
<img src="res/mandelbrot_example.png" alt="Mandelbrot Set Visualizer" width="400" />
@@ -283,7 +247,7 @@ gptdeploy generate \
### Markdown to HTML Converter
```bash
gptdeploy generate --description "Convert markdown to HTML" --test "# Hello, welcome to GPT Deploy!"
gptdeploy generate --description "Convert markdown to HTML"
```
<img src="res/markdown_to_html_example.png" alt="Markdown to HTML Converter" width="400" />

View File

@@ -164,10 +164,12 @@ def deploy_on_jcloud(executor_name, microservice_path):
time.sleep(5)
except SystemExit as e:
raise SystemExit(f'''
Looks like your free credits ran out.
Please add payment information to your account and try again.
Visit https://cloud.jina.ai/
''') from e
Looks like you either ran out of credits or something went wrong in the generation and we didn't catch it.
To check if you ran out of credits, please go to https://cloud.jina.ai.
If you have credits left, please create an issue here https://github.com/jina-ai/gptdeploy/issues/new/choose
and add details on the microservice you are trying to create.
In that case, you can upgrade your GPT Deploy version, if not using latest, and try again.
''') from e
if i == 2:
raise Exception('''
Could not deploy on Jina Cloud.

View File

@@ -35,8 +35,10 @@ def get_latest_package_version(package_name):
# Get package versions not older than 2021
valid_versions = []
for v, release_info in releases.items():
if not release_info:
continue
upload_time = datetime.strptime(release_info[0]['upload_time'], '%Y-%m-%dT%H:%M:%S')
if upload_time.year <= 2021:
if upload_time.year <= 2020 or (upload_time.year == 2021 and upload_time.month <= 9): # knowledge cutoff 2021-09 (including september)
valid_versions.append(v)
v = max(valid_versions, key=version.parse) if valid_versions else None
@@ -61,13 +63,12 @@ def clean_requirements_txt(previous_microservice_path):
continue
split = re.split(r'==|>=|<=|>|<|~=', line)
if len(split) == 1:
if len(split) == 1 or len(split) > 2:
version = None
package_name = split[0]
elif len(split) == 2:
package_name, version = split
else:
raise ValueError(f'Could not parse line {line} in requirements.txt')
package_name, version = split
# Keep lines with jina, docarray, openai, pytest unchanged
if package_name in {'jina', 'docarray', 'openai', 'pytest'}:
@@ -77,7 +78,7 @@ def clean_requirements_txt(previous_microservice_path):
if version is None or not is_package_on_pypi(package_name, version):
latest_version = get_latest_package_version(package_name)
if latest_version is None:
raise ValueError(f'Package {package_name} not found on PyPI')
continue
updated_requirements.append(f'{package_name}~={latest_version}')
else:
updated_requirements.append(line)

View File

@@ -30,7 +30,7 @@ https://github.com/jina-ai/gptdeploy/issues/new
return wrapper
def path_param(func):
@click.option('--path', required=True, help='Path to the generated microservice.')
@click.option('--path', default='microservice', help='Path to the generated microservice.')
@functools.wraps(func)
def wrapper(*args, **kwargs):
path = os.path.expanduser(kwargs['path'])

View File

@@ -1,3 +1,5 @@
DOCKER_BASE_IMAGE_VERSION = '0.0.4'
EXECUTOR_FILE_NAME = '__init__.py'
IMPLEMENTATION_FILE_NAME = 'microservice.py'
TEST_EXECUTOR_FILE_NAME = 'test_microservice.py'
@@ -51,3 +53,4 @@ LANGUAGE_PACKAGES = [
'pattern', 'polyglot', 'pytorch-transformers', 'rasa', 'sentence-transformers',
'spacy', 'stanza', 'summarizer', 'sumy', 'textblob', 'textstat', 'transformers'
]

View File

@@ -17,7 +17,7 @@ from src.apis.pypi import is_package_on_pypi, clean_requirements_txt
from src.constants import FILE_AND_TAG_PAIRS, NUM_IMPLEMENTATION_STRATEGIES, MAX_DEBUGGING_ITERATIONS, \
BLACKLISTED_PACKAGES, EXECUTOR_FILE_NAME, TEST_EXECUTOR_FILE_NAME, TEST_EXECUTOR_FILE_TAG, \
REQUIREMENTS_FILE_NAME, REQUIREMENTS_FILE_TAG, DOCKER_FILE_NAME, IMPLEMENTATION_FILE_NAME, \
IMPLEMENTATION_FILE_TAG, LANGUAGE_PACKAGES, UNNECESSARY_PACKAGES
IMPLEMENTATION_FILE_TAG, LANGUAGE_PACKAGES, UNNECESSARY_PACKAGES, DOCKER_BASE_IMAGE_VERSION
from src.options.generate.templates_system import system_task_iteration, system_task_introduction, system_test_iteration
from src.options.generate.templates_user import template_generate_microservice_name, \
template_generate_possible_packages, \
@@ -51,7 +51,8 @@ class Generator:
self.previous_errors = []
self.previous_solutions = []
def extract_content_from_result(self, plain_text, file_name, match_single_block=False, can_contain_code_block=True):
@staticmethod
def extract_content_from_result(plain_text, file_name, match_single_block=False, can_contain_code_block=True):
optional_line_break = '\n' if can_contain_code_block else '' # the \n at the end makes sure that ``` within the generated code is not matched because it is not right before a line break
pattern = fr"(?:\*|\*\*| ){file_name}\*?\*?\n```(?:\w+\n)?([\s\S]*?){optional_line_break}```"
matches = re.findall(pattern, plain_text, re.MULTILINE)
@@ -205,7 +206,7 @@ metas:
encoding='utf-8') as f:
docker_file_template_lines = f.readlines()
docker_file_template_lines = [
line.replace('{{apt_get_packages}}', '')
line.replace('{{APT_GET_PACKAGES}}', '').replace('{{DOCKER_BASE_IMAGE_VERSION}}', DOCKER_BASE_IMAGE_VERSION)
for line in docker_file_template_lines
]
docker_file_content = '\n'.join(docker_file_template_lines)
@@ -225,8 +226,8 @@ metas:
packages = ' '.join(json.loads(json_string)['packages'])
docker_file_template = self.read_docker_template()
return {DOCKER_FILE_NAME: docker_file_template.replace('{{apt_get_packages}}', '{apt_get_packages}').format(
apt_get_packages=packages)}
return {DOCKER_FILE_NAME: docker_file_template.replace('{{APT_GET_PACKAGES}}', '{APT_GET_PACKAGES}').replace('{{DOCKER_BASE_IMAGE_VERSION}}', DOCKER_BASE_IMAGE_VERSION).format(
APT_GET_PACKAGES=packages)}
def parse_result_fn_requirements(self, content_raw: str):
content_parsed = self.extract_content_from_result(content_raw, 'requirements.txt', match_single_block=True)

View File

@@ -0,0 +1,7 @@
FROM jinaai/jina:3.15.1-dev14-py39-standard
# update pip
RUN pip install --upgrade pip
# install media dependencies
RUN apt-get update && apt-get install --no-install-recommends -y ffmpeg build-essential pkg-config libpoppler-cpp-dev

View File

@@ -1,8 +1,6 @@
FROM jinaai/jina:3.15.1-dev14-py39-standard
# update pip
RUN pip install --upgrade pip
FROM jinaai/gpt-dev:{{DOCKER_BASE_IMAGE_VERSION}}
RUN apt-get update && apt-get install --no-install-recommends -y ffmpeg build-essential pkg-config libpoppler-cpp-dev {{apt_get_packages}} && apt-get clean && rm -rf /var/lib/apt/lists/*
RUN apt-get install --no-install-recommends -y {{APT_GET_PACKAGES}}
## install requirements for the executor
COPY requirements.txt .

View File

@@ -1,7 +1,7 @@
from src.options.generate.templates_user import not_allowed_docker_string, not_allowed_function_string
template_system_message_base = f'''It is the year 2021.
template_system_message_base = f'''It is September 2021.
You are a principal engineer working at Jina - an open source company.
You accurately satisfy all of the user's requirements.
To be more specific, you help the user to build a microservice with the following requirements:

View File

@@ -170,7 +170,7 @@ template_generate_apt_get_install = PromptTemplate.from_template(
{docker_file_wrapped}
Name all packages which need to be installed via `apt-get install` in above Dockerfile (`{{apt_get_packages}}`) for the following requirements.txt file:
Name all packages which need to be installed via `apt-get install` in above Dockerfile (`{{APT_GET_PACKAGES}}`) for the following requirements.txt file:
{requirements_file_wrapped}
@@ -178,7 +178,7 @@ Note that you must not list apt-get packages that are already installed in the D
Note that openai does not require any apt-get packages.
Note that you are only allowed to list packages where you are highly confident that they are really needed.
Note that you can assume that the standard python packages are already installed.
Output the packages that need to me placed at {{apt_get_packages}} as json in the following format:
Output the packages that need to me placed at {{APT_GET_PACKAGES}} as json in the following format:
**apt-get-packages.json**
```json
{{"packages": ["<package1>", "<package2>"]}}
@@ -276,23 +276,20 @@ Here is the summary of the error that occurred:
{summarized_error}
To solve this error, you should determine the list of packages that need to be installed via `apt-get install` in the Dockerfile.
Output the apt-get packages that need to be placed at {{apt_get_packages}} as json in the following format:
Output the apt-get packages that need to be placed at {{APT_GET_PACKAGES}} as json in the following format:
**apt-get-packages.json**
```json
{{"packages": ["<package1>", "<package2>"]}}
```
Example for the following requirements.txt file:
**requirements.txt**
```
numpy==1.19.5
fitz
```
The output would be:
Example:
Error is about missing package `libgl1-mesa-glx`.
The output is:
**apt-get-packages.json**
```json
{{"packages": []}}
{{"packages": [libgl1-mesa-glx]}}
```
Note that you must not output any other files. Only output the apt-get-packages.json file.
Note that you must not output the content of any other files like the Dockerfile or requirements.txt.
Only output the apt-get-packages.json file.
'''
)

View File

@@ -38,19 +38,30 @@ def test_filter_packages_list():
def test_precheck_requirements_txt(tmpdir):
requirements_content = """\
jina==1.2.3
docarray==1.2.3
requests~=2.26.0
# version does not exist but jina and docarray should not be verified
jina==111.222.333
docarray==111.222.333
# package that actually exists in that version
gtts~=2.2.3
# package with non-existing version
pydub~=123.123.123
# non-existing package with correct version
base64~=3.3.0
# not parsable version
pdfminer.six>=20201018,<20211018
# existing package without version
requests
# another existing package without version
streamlit
"""
requirements_clean = """\
jina==1.2.3
docarray==1.2.3
requests~=2.26.0
jina==111.222.333
docarray==111.222.333
gtts~=2.2.3
pydub~=0.25.1"""
pydub~=0.25.1
pdfminer.six~=20201018
requests~=2.26.0
streamlit~=0.89.0"""
requirements_txt_path = os.path.join(tmpdir, "requirements.txt")
with open(requirements_txt_path, "w", encoding="utf-8") as f:
f.write(requirements_content)

View File

@@ -0,0 +1,44 @@
import pytest
from src.options.generate.generator import Generator
def create_code_block(with_backticks, asterisks, with_highlight_info, file_name, start_inline, content):
code_block = f'''
{{
"content": "{content}",
}}
'''
if with_highlight_info:
high_light_info = 'json'
else:
high_light_info = ''
if with_backticks:
code_block = f'```{high_light_info}\n{code_block}\n```'
if file_name:
code_block = f'{asterisks}{file_name}{asterisks}\n{code_block}'
if start_inline:
code_block = f'This is your file: {code_block}'
return code_block
@pytest.mark.parametrize(
'plain_text, expected1, expected2',
[
(
f"""{create_code_block(True, '', False, 'test1.json', True, 'content1')}\n{create_code_block(True, '', True, '', False, 'content2')}""",
f'{create_code_block(False, "", False, "", False, content="content1")}'.strip(),
''
),
(
f"""{create_code_block(True, '', True, '', False, 'content2')}""",
'',
f'{create_code_block(False, "", False, "", False, content="content2")}'.strip()
)
]
)
def test_extract_content_from_result(plain_text, expected1, expected2):
parsed_result1 = Generator.extract_content_from_result(plain_text, 'test1.json', False, True)
assert parsed_result1 == expected1
parsed_result2 = Generator.extract_content_from_result(plain_text, 'test100.json', True, False)
assert parsed_result2 == expected2