Merge pull request #99 from jina-ai/feat_search_api

🔍 feat: search api
This commit is contained in:
Florian Hönicke
2023-05-20 16:44:15 +02:00
committed by GitHub
22 changed files with 467 additions and 251 deletions

View File

@@ -10,7 +10,7 @@ jobs:
strategy:
fail-fast: false
matrix:
group: [0, 1, 2, 3, 4]
group: [0, 1, 2, 3, 4, 5_company_logos]
steps:
- uses: actions/checkout@v2
- name: Set up Python 3.8
@@ -33,6 +33,8 @@ jobs:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
SCENEX_API_KEY: ${{ secrets.SCENEX_API_KEY }}
WHISPER_API_KEY: ${{ secrets.WHISPER_API_KEY }}
GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
GOOGLE_CSE_ID: ${{ secrets.GOOGLE_CSE_ID }}
test_unit:
runs-on: ubuntu-latest
@@ -58,6 +60,8 @@ jobs:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
SCENEX_API_KEY: ${{ secrets.SCENEX_API_KEY }}
WHISPER_API_KEY: ${{ secrets.WHISPER_API_KEY }}
GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
GOOGLE_CSE_ID: ${{ secrets.GOOGLE_CSE_ID }}
base-image-push:
runs-on: ubuntu-latest

View File

@@ -54,7 +54,9 @@ Your imagination is the limit!
</p>
Welcome to Dev GPT, where we bring your ideas to life with the power of advanced artificial intelligence! Our automated development team is designed to create microservices tailored to your specific needs, making your software development process seamless and efficient. Comprised of a virtual Product Manager, Developer, and DevOps, our AI team ensures that every aspect of your project is covered, from concept to deployment.
Welcome to Dev-GPT, where we bring your ideas to life with the power of advanced artificial intelligence!
Our automated development team is designed to create microservices tailored to your specific needs, making your software development process seamless and efficient.
Comprised of a virtual Product Manager, Developer, and DevOps, our AI team ensures that every aspect of your project is covered, from concept to deployment.
## Quickstart
@@ -65,8 +67,13 @@ dev-gpt generate
### Requirements
- OpenAI key with access to gpt-3.5-turbo or gpt-4
- if you want to enable your microservice to search for web content,
you need to set the GOOGLE_API_KEY and GOOGLE_CSE_ID environment variables.
More information can be found [here](https://developers.google.com/custom-search/v1/overview).
```bash
dev-gpt configure --key <your openai api key>
dev-gpt configure --openai_api_key <your openai api key>
dev-gpt configure --google_api_key <google api key> (optional if you want to use google search)
dev-gpt configure --google_cse_id <google cse id> (optional if you want to use google search)
```
If you set the environment variable `OPENAI_API_KEY`, the configuration step can be skipped.

View File

@@ -24,7 +24,7 @@ def configure_openai_api_key():
if 'OPENAI_API_KEY' not in os.environ:
print_colored('You need to set OPENAI_API_KEY in your environment.', '''
Run:
dev-gpt configure --key <your_openai_api_key>
dev-gpt configure --openai_api_key <your_openai_api_key>
If you have updated it already, please restart your terminal.
''', 'red')

View File

@@ -98,7 +98,7 @@ def _push_executor(dir_path):
'public': 'True',
'private': 'False',
'verbose': 'True',
'buildEnv': f'{{"OPENAI_API_KEY": "{os.environ["OPENAI_API_KEY"]}"}}',
'buildEnv': f'{{"OPENAI_API_KEY": "{os.environ["OPENAI_API_KEY"]}", "GOOGLE_API_KEY": "{os.environ.get("GOOGLE_API_KEY","")}", "GOOGLE_CSE_ID": "{os.environ.get("GOOGLE_CSE_ID","")}"}}',
'md5sum': md5_digest,
}
with suppress_stdout():
@@ -251,7 +251,9 @@ executors:
uses: {prefix}://{get_user_name(DEMO_TOKEN)}/{executor_name}:latest
{"" if use_docker else "install-requirements: True"}
env:
OPENAI_API_KEY: {os.environ['OPENAI_API_KEY']}
OPENAI_API_KEY: ${{{{ ENV.OPENAI_API_KEY }}}}
GOOGLE_API_KEY: ${{{{ ENV.GOOGLE_API_KEY }}}}
GOOGLE_CSE_ID: ${{{{ ENV.GOOGLE_CSE_ID }}}}
jcloud:
resources:
instance: C2

View File

@@ -92,9 +92,16 @@ def deploy(path):
Deployer().deploy(path)
@main.command()
@click.option('--key', required=True, help='Your OpenAI API key.')
def configure(key):
set_api_key(key)
@click.option('--openai-api-key', default=None, help='Your OpenAI API key.')
@click.option('--google-api-key', default=None, help='Your Google API key.')
@click.option('--google-cse-id', default=None, help='Your Google CSE ID.')
def configure(openai_api_key, google_api_key, google_cse_id):
if openai_api_key:
set_api_key('OPENAI_API_KEY', openai_api_key)
if google_api_key:
set_api_key('GOOGLE_API_KEY', google_api_key)
if google_cse_id:
set_api_key('GOOGLE_CSE_ID', google_cse_id)
if __name__ == '__main__':

View File

@@ -26,6 +26,10 @@ FILE_AND_TAG_PAIRS = [
(STREAMLIT_FILE_NAME, STREAMLIT_FILE_TAG)
]
INDICATOR_TO_IMPORT_STATEMENT = {
'BytesIO': 'from io import BytesIO',
}
FLOW_URL_PLACEHOLDER = 'jcloud.jina.ai'
PRICING_GPT4_PROMPT = 0.03
@@ -43,15 +47,20 @@ DEMO_TOKEN = '45372338e04f5a41af949024db929d46'
BLACKLISTED_PACKAGES = [
'moderngl', 'pyopengl', 'pyglet', 'pythreejs', 'panda3d', # because they need a screen,
'tika', # because it needs java
'clearbit' # because of installation issues on latest version
]
UNNECESSARY_PACKAGES = [
'fastapi', 'uvicorn', 'starlette' # because the wrappers are used instead
]
LANGUAGE_PACKAGES = [
'allennlp', 'bertopic', 'fasttext', 'flair', 'gensim', 'nltk', 'openai',
'allennlp', 'bertopic', 'GPT-3', 'fasttext', 'flair', 'gensim', 'nltk', 'openai',
'pattern', 'polyglot', 'pytorch-transformers', 'rasa', 'sentence-transformers',
'spacy', 'stanza', 'summarizer', 'sumy', 'textblob', 'textstat', 'transformers',
'vadersentiment'
]
SEARCH_PACKAGES = [
'googlesearch-python', 'google', 'googlesearch', 'google-api-python-client', 'pygooglenews', 'google-cloud'
]

View File

@@ -40,26 +40,26 @@ def get_shell():
return None
def get_shell_config(key):
def get_shell_config(name, key):
return {
"bash": {"config_file": "~/.bashrc", "export_line": f"export OPENAI_API_KEY={key}"},
"zsh": {"config_file": "~/.zshrc", "export_line": f"export OPENAI_API_KEY={key}"},
"sh": {"config_file": "~/.profile", "export_line": f"export OPENAI_API_KEY={key}"},
"bash": {"config_file": "~/.bashrc", "export_line": f"export {name}={key}"},
"zsh": {"config_file": "~/.zshrc", "export_line": f"export {name}={key}"},
"sh": {"config_file": "~/.profile", "export_line": f"export {name}={key}"},
"fish": {
"config_file": "~/.config/fish/config.fish",
"export_line": f"set -gx OPENAI_API_KEY {key}",
"export_line": f"set -gx {name} {key}",
},
"csh": {"config_file": "~/.cshrc", "export_line": f"setenv OPENAI_API_KEY {key}"},
"tcsh": {"config_file": "~/.tcshrc", "export_line": f"setenv OPENAI_API_KEY {key}"},
"ksh": {"config_file": "~/.kshrc", "export_line": f"export OPENAI_API_KEY={key}"},
"dash": {"config_file": "~/.profile", "export_line": f"export OPENAI_API_KEY={key}"}
"csh": {"config_file": "~/.cshrc", "export_line": f"setenv {name} {key}"},
"tcsh": {"config_file": "~/.tcshrc", "export_line": f"setenv {name} {key}"},
"ksh": {"config_file": "~/.kshrc", "export_line": f"export {name}={key}"},
"dash": {"config_file": "~/.profile", "export_line": f"export {name}={key}"}
}
def set_env_variable(shell, key):
shell_config = get_shell_config(key)
def set_env_variable(shell, name, key):
shell_config = get_shell_config(name, key)
if shell not in shell_config:
click.echo("Sorry, your shell is not supported. Please add the key OPENAI_API_KEY manually.")
click.echo(f"Sorry, your shell is not supported. Please add the key {name} manually.")
return
config_file = os.path.expanduser(shell_config[shell]["config_file"])
@@ -71,8 +71,8 @@ def set_env_variable(shell, key):
export_line = shell_config[shell]['export_line']
# Update the existing API key if it exists, otherwise append it to the config file
if f"OPENAI_API_KEY" in content:
content = re.sub(r'OPENAI_API_KEY=.*', f'OPENAI_API_KEY={key}', content, flags=re.MULTILINE)
if f"{name}" in content:
content = re.sub(rf'{name}=.*', f'{name}={key}', content, flags=re.MULTILINE)
with open(config_file, "w", encoding='utf-8') as file:
file.write(content)
@@ -81,7 +81,7 @@ def set_env_variable(shell, key):
file.write(f"\n{export_line}\n")
click.echo(f'''
✅ Success, OPENAI_API_KEY has been set in {config_file}.
✅ Success, {name} has been set in {config_file}.
Please restart your shell to apply the changes or run:
source {config_file}
'''
@@ -91,21 +91,21 @@ source {config_file}
click.echo(f"Error: {config_file} not found. Please set the environment variable manually.")
def set_api_key(key):
def set_api_key(name, key):
system_platform = platform.system().lower()
if system_platform == "windows":
set_env_variable_command = f'setx OPENAI_API_KEY "{key}"'
set_env_variable_command = f'setx {name} "{key}"'
subprocess.call(set_env_variable_command, shell=True)
click.echo('''
✅ Success, OPENAI_API_KEY has been set.
click.echo(f'''
✅ Success, {name} has been set.
Please restart your Command Prompt to apply the changes.
'''
)
elif system_platform in ["linux", "darwin"]:
if "OPENAI_API_KEY" in os.environ or is_key_set_in_config_file(key):
if not click.confirm("OPENAI_API_KEY is already set. Do you want to overwrite it?"):
if f"{name}" in os.environ or is_key_set_in_config_file(key):
if not click.confirm(f"{name} is already set. Do you want to overwrite it?"):
click.echo("Aborted.")
return
@@ -115,24 +115,24 @@ Please restart your Command Prompt to apply the changes.
"Error: Unable to detect your shell or psutil is not available. Please set the environment variable manually.")
return
set_env_variable(shell, key)
set_env_variable(shell, name, key)
else:
click.echo("Sorry, this platform is not supported.")
def is_key_set_in_config_file(key):
def is_key_set_in_config_file(name, key):
shell = get_shell()
if shell is None:
return False
shell_config = get_shell_config(key)
shell_config = get_shell_config(name, key)
config_file = os.path.expanduser(shell_config[shell]["config_file"])
try:
with open(config_file, "r", encoding='utf-8') as file:
content = file.read()
if f"OPENAI_API_KEY" in content:
if f"{name}" in content:
return True
except FileNotFoundError:
pass

View File

@@ -3,7 +3,7 @@ import json
from dev_gpt.apis.gpt import ask_gpt
from dev_gpt.options.generate.parser import identity_parser
from dev_gpt.options.generate.prompt_factory import context_to_string
from dev_gpt.options.generate.tools.tools import get_available_tools
def auto_refine_description(context):
@@ -36,7 +36,9 @@ def auto_refine_description(context):
better_description_prompt = f'''{{context_string}}
Update the description of the Microservice to make it more precise without adding or removing information.
Note: the output must be a list of tasks the Microservice has to perform.
Example for the description: "return the average temperature of the 5 days weather forecast for a given location."
Note: you can uses two tools if necessary:
{get_available_tools()}
Example for the description: "return a description of the average temperature of the 5 days weather forecast for a given location."
1. get the 5 days weather forcast from the https://openweathermap.org/ API
2. extract the temperature from the response
3. calculate the average temperature'''

View File

@@ -1,25 +1,46 @@
from dev_gpt.apis.gpt import ask_gpt
from dev_gpt.options.generate.parser import boolean_parser
from dev_gpt.options.generate.parser import boolean_parser, identity_parser
def is_question_true(question):
def fn(text):
return answer_yes_no_question(text, question)
return fn
def is_question_false(question):
return lambda context: not is_question_true(question)(context)
def answer_yes_no_question(text, question):
prompt = question_prompt.format(
question=question,
text=text
pros_and_cons = ask_gpt(
pros_and_cons_prompt.format(
question=question,
text=text,
),
identity_parser,
)
return ask_gpt(prompt, boolean_parser)
return ask_gpt(
question_prompt.format(
text=text,
question=question,
pros_and_cons=pros_and_cons,
),
boolean_parser)
pros_and_cons_prompt = '''\
# Context
{text}
# Question
{question}
Note: You must not answer the question. Instead, give up to 5 bullet points (10 words) arguing why the question should be answered with true or false.'''
question_prompt = '''\
# Context
{text}
# Question
{question}
Note: You must answer "yes" or "no".
'''

View File

@@ -17,13 +17,14 @@ from dev_gpt.apis.pypi import is_package_on_pypi, clean_requirements_txt
from dev_gpt.constants import FILE_AND_TAG_PAIRS, NUM_IMPLEMENTATION_STRATEGIES, MAX_DEBUGGING_ITERATIONS, \
BLACKLISTED_PACKAGES, EXECUTOR_FILE_NAME, TEST_EXECUTOR_FILE_NAME, TEST_EXECUTOR_FILE_TAG, \
REQUIREMENTS_FILE_NAME, REQUIREMENTS_FILE_TAG, DOCKER_FILE_NAME, IMPLEMENTATION_FILE_NAME, \
IMPLEMENTATION_FILE_TAG, LANGUAGE_PACKAGES, UNNECESSARY_PACKAGES, DOCKER_BASE_IMAGE_VERSION
IMPLEMENTATION_FILE_TAG, LANGUAGE_PACKAGES, UNNECESSARY_PACKAGES, DOCKER_BASE_IMAGE_VERSION, SEARCH_PACKAGES, \
INDICATOR_TO_IMPORT_STATEMENT
from dev_gpt.options.generate.pm.pm import PM
from dev_gpt.options.generate.templates_user import template_generate_microservice_name, \
template_generate_possible_packages, \
template_implement_solution_code_issue, \
template_solve_pip_dependency_issue, template_is_dependency_issue, template_generate_playground, \
template_generate_function, template_generate_test, template_generate_requirements, \
template_generate_function_constructor, template_generate_test, template_generate_requirements, \
template_chain_of_thought, template_summarize_error, \
template_solve_apt_get_dependency_issue, \
template_suggest_solutions_code_issue, template_was_error_seen_before, \
@@ -40,9 +41,10 @@ class TaskSpecification:
class Generator:
def __init__(self, task_description, path, model='gpt-4'):
def __init__(self, task_description, path, model='gpt-4', self_healing=True):
self.gpt_session = gpt.GPTSession(model=model)
self.microservice_specification = TaskSpecification(task=task_description, test=None)
self.self_healing = self_healing
self.microservice_root_path = path
self.microservice_name = None
self.previous_microservice_path = None
@@ -102,6 +104,7 @@ metas:
parse_result_fn: Callable = None,
use_custom_system_message: bool = True,
response_format_example: str = None,
post_process_fn: Callable = None,
**template_kwargs
):
"""This function generates file(s) using the given template and persists it/them in the given destination folder.
@@ -145,6 +148,8 @@ metas:
)
)
content = parse_result_fn(content_raw)
if post_process_fn is not None:
content = post_process_fn(content)
if content == {}:
conversation = self.gpt_session.get_conversation(
messages=[SystemMessage(content='You are a helpful assistant.'), AIMessage(content=content_raw)]
@@ -194,18 +199,22 @@ metas:
.replace('class DevGPTExecutor(Executor):', f'class {self.microservice_name}(Executor):')
persist_file(microservice_executor_code, os.path.join(self.cur_microservice_path, EXECUTOR_FILE_NAME))
with open(os.path.join(os.path.dirname(__file__), 'static_files', 'microservice', 'apis.py'), 'r', encoding='utf-8') as f:
persist_file(f.read(), os.path.join(self.cur_microservice_path, 'apis.py'))
for additional_file in ['google_custom_search.py', 'gpt_3_5_turbo.py']:
with open(os.path.join(os.path.dirname(__file__), 'static_files', 'microservice', additional_file), 'r', encoding='utf-8') as f:
persist_file(f.read(), os.path.join(self.cur_microservice_path, additional_file))
is_using_gpt_3_5_turbo = 'gpt_3_5_turbo' in packages or 'gpt-3-5-turbo' in packages
is_using_google_custom_search = 'google_custom_search' in packages or 'google-custom-search' in packages
microservice_content = self.generate_and_persist_file(
section_title='Microservice',
template=template_generate_function,
template=template_generate_function_constructor(is_using_gpt_3_5_turbo, is_using_google_custom_search),
microservice_description=self.microservice_specification.task,
test_description=self.microservice_specification.test,
packages=packages,
file_name_purpose=IMPLEMENTATION_FILE_NAME,
tag_name=IMPLEMENTATION_FILE_TAG,
file_name_s=[IMPLEMENTATION_FILE_NAME],
post_process_fn=self.add_missing_imports_post_process_fn,
)[IMPLEMENTATION_FILE_NAME]
test_microservice_content = self.generate_and_persist_file(
@@ -218,6 +227,7 @@ metas:
file_name_purpose=TEST_EXECUTOR_FILE_NAME,
tag_name=TEST_EXECUTOR_FILE_TAG,
file_name_s=[TEST_EXECUTOR_FILE_NAME],
post_process_fn=self.add_missing_imports_post_process_fn,
)[TEST_EXECUTOR_FILE_NAME]
self.generate_and_persist_file(
@@ -247,6 +257,15 @@ metas:
print('\nFirst version of the microservice generated. Start iterating on it to make the tests pass...')
def add_missing_imports_post_process_fn(self, content_dict: dict):
for indicator, import_statement in INDICATOR_TO_IMPORT_STATEMENT.items():
for file_name, file_content in content_dict.items():
if indicator in file_content and import_statement not in file_content:
content_dict[file_name] = f'{import_statement}\n{file_content}'
return content_dict
@staticmethod
def read_docker_template():
with open(os.path.join(os.path.dirname(__file__), 'static_files', 'microservice', 'Dockerfile'), 'r', encoding='utf-8') as f:
@@ -323,7 +342,7 @@ pytest
if not is_executor_in_hub(gateway_name):
raise Exception(f'{self.microservice_name} not in hub. Hubble logs: {hubble_log}')
def debug_microservice(self, num_approach, packages):
def debug_microservice(self, num_approach, packages, self_healing):
for i in range(1, MAX_DEBUGGING_ITERATIONS):
print('Debugging iteration', i)
print('Trying to debug the microservice. Might take a while...')
@@ -331,6 +350,9 @@ pytest
log_hubble = push_executor(self.cur_microservice_path)
error = process_error_message(log_hubble)
if error:
if not self_healing:
print(error)
raise Exception('Self-healing is disabled. Please fix the error manually.')
print('An error occurred during the build process. Feeding the error back to the assistant...')
self.previous_microservice_path = self.cur_microservice_path
self.cur_microservice_path = get_microservice_path(
@@ -500,7 +522,7 @@ pytest
description=self.microservice_specification.task
)['strategies.json']
packages_list = [[pkg.strip().lower() for pkg in packages] for packages in json.loads(packages_json_string)]
packages_list = [[self.replace_with_gpt_3_5_turbo_if_possible(pkg) for pkg in packages] for packages in
packages_list = [[self.replace_with_tool_if_possible(pkg) for pkg in packages] for packages in
packages_list]
packages_list = self.filter_packages_list(packages_list)
@@ -518,7 +540,7 @@ pytest
for num_approach, packages in enumerate(packages_list):
try:
self.generate_microservice(packages, num_approach)
self.debug_microservice(num_approach, packages)
self.debug_microservice(num_approach, packages, self.self_healing)
self.generate_playground()
except self.MaxDebugTimeReachedException:
print('Could not debug the Microservice with the approach:', packages)
@@ -543,9 +565,11 @@ dev-gpt deploy --path {self.microservice_root_path}
@staticmethod
def replace_with_gpt_3_5_turbo_if_possible(pkg):
def replace_with_tool_if_possible(pkg):
if pkg in LANGUAGE_PACKAGES:
return 'gpt_3_5_turbo'
if pkg in SEARCH_PACKAGES:
return 'google_custom_search'
return pkg
@staticmethod

View File

@@ -5,7 +5,8 @@ from dev_gpt.options.generate.chains.question_answering import is_question_true
from dev_gpt.options.generate.chains.translation import translation
from dev_gpt.options.generate.chains.user_confirmation_feedback_loop import user_feedback_loop
from dev_gpt.options.generate.chains.get_user_input_if_needed import get_user_input_if_needed
from dev_gpt.options.generate.parser import identity_parser
from dev_gpt.options.generate.parser import identity_parser, json_parser
from dev_gpt.options.generate.pm.task_tree_schema import TaskTree
from dev_gpt.options.generate.prompt_factory import make_prompt_friendly
from dev_gpt.options.generate.ui import get_random_employee
@@ -35,9 +36,9 @@ Description of the microservice:
def refine(self, microservice_description):
microservice_description, test_description = self.refine_description(microservice_description)
return microservice_description, test_description
# sub_task_tree = self.construct_sub_task_tree(microservice_description)
# sub_task_tree = construct_sub_task_tree(microservice_description)
# return sub_task_tree
return microservice_description, test_description
def refine_description(self, microservice_description):
context = {'microservice_description': microservice_description}
@@ -60,7 +61,8 @@ Description of the microservice:
microservice_description += self.user_input_extension_if_needed(
context,
microservice_description,
condition_question='Does the microservice send requests to an API?',
condition_question='''\
Does the microservice send requests to an API beside the Google Custom Search API and gpt-3.5-turbo?''',
question_gen='Generate a question that asks for the endpoint of the external API and an example of a request and response when interacting with the external API.',
extension_name='Example of API usage',
post_transformation_fn=translation(from_format='api instruction', to_format='python code snippet raw without formatting')
@@ -127,44 +129,44 @@ Example:
# microservice_description=microservice_description
# )
#
# def construct_sub_task_tree(self, microservice_description):
# """
# takes a microservice description and recursively constructs a tree of sub-tasks that need to be done to implement the microservice
# """
# #
# # nlp_fns = self.get_nlp_fns(
# # microservice_description
# # )
#
# sub_task_tree_dict = ask_gpt(
# construct_sub_task_tree_prompt, json_parser,
# microservice_description=microservice_description,
# # nlp_fns=nlp_fns
# )
# reflections = ask_gpt(
# sub_task_tree_reflections_prompt, identity_parser,
# microservice_description=microservice_description,
# # nlp_fns=nlp_fns,
# sub_task_tree=sub_task_tree_dict,
# )
# solutions = ask_gpt(
# sub_task_tree_solutions_prompt, identity_parser,
# # nlp_fns=nlp_fns,
# microservice_description=microservice_description, sub_task_tree=sub_task_tree_dict,
# reflections=reflections,
# )
# sub_task_tree_updated = ask_gpt(
# sub_task_tree_update_prompt,
# json_parser,
# microservice_description=microservice_description,
# # nlp_fns=nlp_fns,
# sub_task_tree=sub_task_tree_dict, solutions=solutions
# )
# # for task_dict in self.iterate_over_sub_tasks(sub_task_tree_updated):
# # task_dict.update(self.get_additional_task_info(task_dict['task']))
#
# sub_task_tree = TaskTree.parse_obj(sub_task_tree_updated)
# return sub_task_tree
def construct_sub_task_tree(self, microservice_description):
"""
takes a microservice description and recursively constructs a tree of sub-tasks that need to be done to implement the microservice
"""
#
# nlp_fns = self.get_nlp_fns(
# microservice_description
# )
sub_task_tree_dict = ask_gpt(
construct_sub_task_tree_prompt, json_parser,
microservice_description=microservice_description,
# nlp_fns=nlp_fns
)
reflections = ask_gpt(
sub_task_tree_reflections_prompt, identity_parser,
microservice_description=microservice_description,
# nlp_fns=nlp_fns,
sub_task_tree=sub_task_tree_dict,
)
solutions = ask_gpt(
sub_task_tree_solutions_prompt, identity_parser,
# nlp_fns=nlp_fns,
microservice_description=microservice_description, sub_task_tree=sub_task_tree_dict,
reflections=reflections,
)
sub_task_tree_updated = ask_gpt(
sub_task_tree_update_prompt,
json_parser,
microservice_description=microservice_description,
# nlp_fns=nlp_fns,
sub_task_tree=sub_task_tree_dict, solutions=solutions
)
# for task_dict in self.iterate_over_sub_tasks(sub_task_tree_updated):
# task_dict.update(self.get_additional_task_info(task_dict['task']))
sub_task_tree = TaskTree.parse_obj(sub_task_tree_updated)
return sub_task_tree
# def get_additional_task_info(self, sub_task_description):
# additional_info_dict = self.get_additional_infos(
@@ -280,71 +282,71 @@ Example:
# Note: You must ignore facts that are unknown.
# Note: You must ignore facts that are unclear.'''
# construct_sub_task_tree_prompt = client_description + '''
# Recursively constructs a tree of functions that need to be implemented for the endpoint_function that retrieves a json string and returns a json string.
# Example:
# Input: "Input: list of integers, Output: Audio file of short story where each number is mentioned exactly once."
# Output:
# {{
# "description": "Create an audio file containing a short story in which each integer from the provided list is seamlessly incorporated, ensuring that every integer is mentioned exactly once.",
# "python_fn_signature": "def generate_integer_story_audio(numbers: List[int]) -> str:",
# "sub_fns": [
# {{
# "description": "Generate sentence from integer.",
# "python_fn_signature": "def generate_sentence_from_integer(number: int) -> int:",
# "sub_fns": []
# }},
# {{
# "description": "Convert the story into an audio file.",
# "python_fn_signature": "def convert_story_to_audio(story: str) -> bytes:",
# "sub_fns": []
# }}
# ]
# }}
#
# Note: you must only output the json string - nothing else.
# Note: you must pretty print the json string.'''
construct_sub_task_tree_prompt = client_description + '''
Recursively constructs a tree of functions that need to be implemented for the endpoint_function that retrieves a json string and returns a json string.
Example:
Input: "Input: list of integers, Output: Audio file of short story where each number is mentioned exactly once."
Output:
{{
"description": "Create an audio file containing a short story in which each integer from the provided list is seamlessly incorporated, ensuring that every integer is mentioned exactly once.",
"python_fn_signature": "def generate_integer_story_audio(numbers: List[int]) -> str:",
"sub_fns": [
{{
"description": "Generate sentence from integer.",
"python_fn_signature": "def generate_sentence_from_integer(number: int) -> int:",
"sub_fns": []
}},
{{
"description": "Convert the story into an audio file.",
"python_fn_signature": "def convert_story_to_audio(story: str) -> bytes:",
"sub_fns": []
}}
]
}}
# sub_task_tree_reflections_prompt = client_description + '''
# Sub task tree:
# ```
# {sub_task_tree}
# ```
# Write down 3 arguments why the sub task tree might not perfectly represents the information mentioned in the microservice description. (5 words per argument)'''
#
# sub_task_tree_solutions_prompt = client_description + '''
# Sub task tree:
# ```
# {sub_task_tree}
# ```
# Reflections:
# ```
# {reflections}
# ```
# For each constructive criticism, write a solution (5 words) that address the criticism.'''
#
# sub_task_tree_update_prompt = client_description + '''
# Sub task tree:
# ```
# {sub_task_tree}
# ```
# Solutions:
# ```
# {solutions}
# ```
# Update the sub task tree by applying the solutions. (pretty print the json string)'''
#
# ask_questions_prompt = client_description + '''
# Request json schema:
# ```
# {request_schema}
# ```
# Response json schema:
# ```
# {response_schema}
# ```
# Ask the user up to 5 unique detailed questions (5 words) about the microservice description that are not yet answered.
# '''
Note: you must only output the json string - nothing else.
Note: you must pretty print the json string.'''
sub_task_tree_reflections_prompt = client_description + '''
Sub task tree:
```
{sub_task_tree}
```
Write down 3 arguments why the sub task tree might not perfectly represents the information mentioned in the microservice description. (5 words per argument)'''
sub_task_tree_solutions_prompt = client_description + '''
Sub task tree:
```
{sub_task_tree}
```
Reflections:
```
{reflections}
```
For each constructive criticism, write a solution (5 words) that address the criticism.'''
sub_task_tree_update_prompt = client_description + '''
Sub task tree:
```
{sub_task_tree}
```
Solutions:
```
{solutions}
```
Update the sub task tree by applying the solutions. (pretty print the json string)'''
ask_questions_prompt = client_description + '''
Request json schema:
```
{request_schema}
```
Response json schema:
```
{response_schema}
```
Ask the user up to 5 unique detailed questions (5 words) about the microservice description that are not yet answered.
'''
# answer_questions_prompt = client_description + '''
# Request json schema:

View File

@@ -1,22 +1,22 @@
# from typing import Dict, List, Union, Optional
# from pydantic import BaseModel, Field
#
# class JSONSchema(BaseModel):
# type: str
# format: Union[str, None] = None
# items: Union['JSONSchema', None] = None
# properties: Dict[str, 'JSONSchema'] = Field(default_factory=dict)
# additionalProperties: Union[bool, 'JSONSchema'] = True
# required: List[str] = Field(default_factory=list)
#
# class Config:
# arbitrary_types_allowed = True
#
# class TaskTree(BaseModel):
# description: Optional[str]
# python_fn_signature: str
# sub_fns: List['TaskTree']
#
# JSONSchema.update_forward_refs()
# TaskTree.update_forward_refs()
from typing import Dict, List, Union, Optional
from pydantic import BaseModel, Field
class JSONSchema(BaseModel):
type: str
format: Union[str, None] = None
items: Union['JSONSchema', None] = None
properties: Dict[str, 'JSONSchema'] = Field(default_factory=dict)
additionalProperties: Union[bool, 'JSONSchema'] = True
required: List[str] = Field(default_factory=list)
class Config:
arbitrary_types_allowed = True
class TaskTree(BaseModel):
description: Optional[str]
python_fn_signature: str
sub_fns: List['TaskTree']
JSONSchema.update_forward_refs()
TaskTree.update_forward_refs()
#

View File

@@ -0,0 +1,53 @@
import json
import os
import base64
import streamlit as st
from jina import Client, Document, DocumentArray
st.set_page_config(
page_title="<page title here>",
page_icon="<page icon here>",
layout="<page layout here>",
initial_sidebar_state="<sidebar state here>",
)
st.title("<thematic emoji here> <header title here>")
st.markdown(
"<10 word description here>"
"To deploy your own microservice, click [here](https://github.com/jina-ai/dev-gpt)."
)
st.header("<another thematic emoji here> Input Parameters") # only if input parameters are needed
with st.form(key="input_form"):
<input parameter definition here>
input_data = {
<input parameters here>
}
input_json = json.dumps(input_data)
# Process input and call microservice
if submit_button:
with st.spinner("Generating collage..."):
client = Client(host="http://localhost:8080")
d = Document(text=input_json)
response = client.post("/", inputs=DocumentArray([d]))
output_data = json.loads(response[0].text)
<visualization of results>
# Display curl command
deployment_id = os.environ.get("K8S_NAMESPACE_NAME", "")
host = (
f"https://dev-gpt-{deployment_id.split('-')[1]}.wolf.jina.ai/post"
if deployment_id
else "http://localhost:8080/post"
)
with st.expander("See curl command"):
st.markdown("You can use the following curl command to send a request to the microservice from the command line:")
st.code(
f'curl -X "POST" "{host}" -H "accept: application/json" -H "Content-Type: application/json" -d \'{{"data": [{{"text": "{input_json}"}}]}}\'',
language="bash",
)

View File

@@ -0,0 +1,28 @@
import os
from typing import Optional
import requests
def google_search(search_term, search_type, top_n):
google_api_key: Optional[str] = os.environ['GOOGLE_API_KEY']
google_cse_id: Optional[str] = os.environ['GOOGLE_CSE_ID']
url = "https://www.googleapis.com/customsearch/v1"
params = {
'q': search_term,
'key': google_api_key,
'cx': google_cse_id,
**({'searchType': search_type} if search_type == 'image' else {}),
'num': top_n
}
response = requests.get(url, params=params)
response.raise_for_status()
return response.json()
def search_images(search_term, top_n):
response = google_search(search_term, search_type="image", top_n=top_n)
return [item["link"] for item in response["items"]]
def search_web(search_term, top_n):
response = google_search(search_term, search_type="web", top_n=top_n)
return [item["snippet"] for item in response["items"]]

View File

@@ -6,10 +6,10 @@ openai.api_key = os.getenv("OPENAI_API_KEY")
class GPT_3_5_Turbo:
def __init__(self, system: str = ''):
self.system = system
def __init__(self, system_string: str = ''):
self.system = system_string
def __call__(self, prompt: str) -> str:
def __call__(self, prompt_string: str) -> str:
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=[{
@@ -17,7 +17,8 @@ class GPT_3_5_Turbo:
"content": self.system
}, {
"role": 'user',
"content": prompt
"content": prompt_string
}]
)
return response.choices[0]['message']['content']

View File

@@ -1,35 +0,0 @@
import os
from typing import Any, List, Optional
from googleapiclient.discovery import build
google_api_key: Optional[str] = os.environ['GOOGLE_API_KEY']
google_cse_id: Optional[str] = os.environ['GOOGLE_CSE_ID']
search_engine = build("customsearch", "v1", developerKey=google_api_key)
k: int = 10
siterestrict: bool = False
def _google_search_results(search_term: str, **kwargs: Any) -> List[dict]:
cse = search_engine.cse()
if siterestrict:
cse = cse.siterestrict()
res = cse.list(q=search_term, cx=google_cse_id, **kwargs).execute()
return res.get("items", [])
def run(query: str) -> str:
"""Run query through GoogleSearch and parse result."""
snippets = []
results = _google_search_results(query, num=k)
if len(results) == 0:
return "No good Google Search Result was found"
for result in results:
if "snippet" in result:
snippets.append(result["snippet"])
return " ".join(snippets)
if __name__ == "__main__":
# google-api-python-client==2.86.0
print(run("jina ai"))

View File

@@ -16,7 +16,8 @@ The Dockerfile must not attach a virtual display when running test_microservice.
not_allowed_function_string = '''The implemented function and the test must not use the GPU.
The implemented function and the test must not access a database.
The implemented function and the test must not access a display.
The implemented function and the test must not access external apis except unless it is explicitly mentioned in the description or test case (e.g. by mentioning the api that should be used or by providing a URL to access the data).
The implemented function and the test must not access external apis unless it is explicitly mentioned.
The implemented function and the test must not be based on a large collection of hard-coded strings.
The implemented function and the test must not load data from the local file system unless it was created by the implemented function itself.
The implemented function and the test must not use a pre-trained model unless it is explicitly mentioned in the description.
The implemented function and the test must not train a model.
@@ -88,51 +89,73 @@ Note that you must obey the double asterisk and triple backtick syntax from like
```{tag_name}
...code...
```
You must provide the complete file with the exact same syntax to wrap the code.'''
You must provide the complete {file_name} wrapped with the exact syntax shown above.'''
gpt_35_turbo_usage_string = """If need to use gpt_3_5_turbo, then this is an example on how to use it:
gpt_35_turbo_usage_string = """If you need to use gpt_3_5_turbo, then use it like shown in the following example:
```
from .apis import GPT_3_5_Turbo
from .gpt_3_5_turbo import GPT_3_5_Turbo
gpt_3_5_turbo = GPT_3_5_Turbo(
system=\'\'\'
system_string=\'\'\'
You are a tv-reporter who is specialized in C-list celebrities.
When you get asked something like 'Who was having a date with <X>?', then you answer with a json like '{{"dates": ["<Y>", "<Z>"]}}'.
You must not answer something else - only the json.
\'\'\')
generated_string = gpt(prompt) # fill-in the prompt (str); the output is a string
generated_string = gpt_3_5_turbo(prompt_string="example user prompt") # prompt_string is the only parameter
```
"""
google_custom_search_usage_string = """If you need to use google_custom_search, then use it like shown in the following example:
a) when searching for text:
```
from .google_custom_search import search_web
template_generate_function = PromptTemplate.from_template(
general_guidelines_string + '''
# input: search term (str), top_n (int)
# output: list of strings
string_list = search_web('<search term>', top_n=10)
```
b) when searching for images:
```
from .google_custom_search import search_images
# input: search term (str), top_n (int)
# output: list of image urls
image_url_list = search_images('<search term>', top_n=10)
```
"""
linebreak = '\n'
def template_generate_function_constructor(is_using_gpt_3_5_turbo, is_using_google_custom_search):
return PromptTemplate.from_template(
general_guidelines_string + f'''
Write a python function which receives as \
input json string (that can be parsed with the python function json.loads) and \
outputs a json string (that can be parsed with the python function json.loads). \
The function is called 'func'.
The function must fulfill the following description: '{microservice_description}'.
It will be tested with the following scenario: '{test_description}'.
For the implementation use the following package(s): '{packages}'.
The function must fulfill the following description: '{{microservice_description}}'.
It will be tested with the following scenario: '{{test_description}}'.
For the implementation use the following package(s): '{{packages}}'.
The code must start with the following imports:
```
from .apis import GPT_3_5_Turbo
```{linebreak +'from .gpt_3_5_turbo import GPT_3_5_Turbo' if is_using_gpt_3_5_turbo else ""}{linebreak + 'from .google_custom_search import search_web, search_images' if is_using_google_custom_search else ""}
import json
import requests
```
Obey the following rules:
''' + not_allowed_function_string + '''
{not_allowed_function_string}
Your approach:
1. Identify the core challenge when implementing the function.
2. Think about solutions for these challenges.
3. Decide for one of the solutions.
4. Write the code for the function. Don't write code for the test.
''' + gpt_35_turbo_usage_string + '\n' + template_code_wrapping_string
)
{gpt_35_turbo_usage_string if is_using_gpt_3_5_turbo else ''}
{google_custom_search_usage_string if is_using_google_custom_search else ''}
{template_code_wrapping_string}'''
)
template_generate_test = PromptTemplate.from_template(
@@ -147,6 +170,7 @@ The test must start with the following imports:
```
from .microservice import func
import json
import requests
```
''' + not_allowed_function_string + '''
The test must not open local files.
@@ -341,9 +365,10 @@ Note that any changes needed to make the test pass must be written under the con
''' + f'{not_allowed_function_string}\n{not_allowed_docker_string}\n{gpt_35_turbo_usage_string}' + '''
After thinking about the possible solutions, output them as JSON ranked from best to worst. Like this:
After thinking about the possible solutions, output them as JSON ranked from best to worst.
You must use the following format:
''' + response_format_suggest_solutions + '''
Ensure the response can be parsed by Python json.loads'''
Ensure the response starts with **solutions.json** and can be parsed by Python json.loads'''
)

View File

@@ -0,0 +1,9 @@
import os
def get_available_tools():
tools = ['gpt-3.5-turbo (for any kind of text processing like summarization, paraphrasing, etc.)']
if os.environ.get('GOOGLE_API_KEY') and os.environ.get('GOOGLE_CSE_ID'):
tools.append('Google Custom Search API')
chars = 'abcdefghijklmnopqrstuvwxyz'
return '\n'.join([f'{char}) {tool}' for tool, char in zip(tools, chars)])

View File

@@ -22,12 +22,12 @@ def test_generation_level_0(microservice_dir, mock_input_sequence):
generator = Generator(
"The microservice is very simple, it does not take anything as input and only outputs the word 'test'",
microservice_dir,
'gpt-3.5-turbo'
'gpt-3.5-turbo',
self_healing=False,
)
assert generator.generate() == 0
@pytest.mark.parametrize('mock_input_sequence', [['y']], indirect=True)
def test_generation_level_1(microservice_dir, mock_input_sequence):
"""
@@ -46,12 +46,14 @@ Example tweet:
\'When your coworker microwaves fish in the break room... AGAIN. 🐟🤢
But hey, at least SOMEONE's enjoying their lunch. #officelife\'''',
str(microservice_dir),
'gpt-3.5-turbo'
'gpt-3.5-turbo',
# self_healing=False,
)
assert generator.generate() == 0
@pytest.mark.parametrize('mock_input_sequence', [['y', 'https://www.africau.edu/images/default/sample.pdf']], indirect=True)
@pytest.mark.parametrize('mock_input_sequence', [['y', 'https://www.africau.edu/images/default/sample.pdf']],
indirect=True)
def test_generation_level_2(microservice_dir, mock_input_sequence):
"""
Requirements:
@@ -66,11 +68,14 @@ def test_generation_level_2(microservice_dir, mock_input_sequence):
generator = Generator(
"The input is a PDF and the output the summarized text (50 words).",
str(microservice_dir),
'gpt-3.5-turbo'
'gpt-3.5-turbo',
# self_healing=False,
)
assert generator.generate() == 0
@pytest.mark.parametrize('mock_input_sequence', [['y', 'https://upload.wikimedia.org/wikipedia/commons/4/47/PNG_transparency_demonstration_1.png']], indirect=True)
@pytest.mark.parametrize('mock_input_sequence', [
['y', 'https://upload.wikimedia.org/wikipedia/commons/4/47/PNG_transparency_demonstration_1.png']], indirect=True)
def test_generation_level_2_svg(microservice_dir, mock_input_sequence):
"""
Requirements:
@@ -85,7 +90,8 @@ def test_generation_level_2_svg(microservice_dir, mock_input_sequence):
generator = Generator(
"Get a png as input and return a vectorized version as svg.",
str(microservice_dir),
'gpt-3.5-turbo'
'gpt-3.5-turbo',
# self_healing=False,
)
assert generator.generate() == 0
@@ -111,10 +117,12 @@ def test_generation_level_3(microservice_dir, mock_input_sequence):
Example input: 'AAPL'
''',
str(microservice_dir),
'gpt-3.5-turbo'
'gpt-3.5-turbo',
# self_healing=False,
)
assert generator.generate() == 0
@pytest.mark.parametrize(
'mock_input_sequence', [
[
@@ -155,11 +163,32 @@ def test_generation_level_4(microservice_dir, mock_input_sequence):
4. Return the the audio file as base64 encoded binary.
''',
str(microservice_dir),
'gpt-4'
'gpt-4',
# self_healing=False,
)
assert generator.generate() == 0
@pytest.mark.parametrize('mock_input_sequence', [['y', 'https://upload.wikimedia.org/wikipedia/commons/thumb/4/47/PNG_transparency_demonstration_1.png/560px-PNG_transparency_demonstration_1.png']], indirect=True)
@pytest.mark.parametrize('mock_input_sequence', [['y']], indirect=True)
def test_generation_level_5_company_logos(microservice_dir, mock_input_sequence):
os.environ['VERBOSE'] = 'true'
generator = Generator(
f'''\
Given a list of email addresses, get all company names from them.
For all companies, get the company logo.
All logos need to be arranged on a square.
The square is returned as png.
''',
str(microservice_dir),
'gpt-3.5-turbo',
# self_healing=False,
)
assert generator.generate() == 0
@pytest.mark.parametrize('mock_input_sequence', [['y',
'https://upload.wikimedia.org/wikipedia/commons/thumb/4/47/PNG_transparency_demonstration_1.png/560px-PNG_transparency_demonstration_1.png']],
indirect=True)
def test_generation_level_5(microservice_dir, mock_input_sequence):
"""
Requirements:
@@ -171,7 +200,8 @@ def test_generation_level_5(microservice_dir, mock_input_sequence):
Databases: ❌
"""
os.environ['VERBOSE'] = 'true'
generator = Generator(f'''
generator = Generator(
f'''
The input is an image.
Use the following api to get the description of the image:
Request:
@@ -193,9 +223,10 @@ The description is then used to generate a joke.
The joke is the put on the image.
The output is the image with the joke on it.
''',
str(microservice_dir),
'gpt-3.5-turbo'
)
str(microservice_dir),
'gpt-3.5-turbo',
# self_healing=False,
)
assert generator.generate() == 0
# @pytest.fixture

13
test/unit/test_search.py Normal file
View File

@@ -0,0 +1,13 @@
from dev_gpt.options.generate.static_files.microservice.google_custom_search import search_web, search_images
def test_web_search():
results = search_web("jina", 10)
assert len(results) == 10
assert "jina" in results[0].lower()
assert not results[0].startswith("http")
def test_image_search():
results = search_images("jina", 10)
assert len(results) == 10
assert results[0].startswith("http")

13
test/unit/test_tools.py Normal file
View File

@@ -0,0 +1,13 @@
import os
from dev_gpt.options.generate.tools.tools import get_available_tools
def test_all_tools():
tool_lines = get_available_tools().split('\n')
assert len(tool_lines) == 2
def test_no_search():
os.environ['GOOGLE_API_KEY'] = ''
tool_lines = get_available_tools().split('\n')
assert len(tool_lines) == 1