From 93df752f208c9071048a73ed652ba5d32d013e80 Mon Sep 17 00:00:00 2001 From: jeffvestal <53237856+jeffvestal@users.noreply.github.com> Date: Tue, 11 Apr 2023 20:09:47 +0000 Subject: [PATCH] pushing tested code for blog --- .gitignore | 5 ++ .streamlit/config.toml | 11 +++++ elasticdocs_gpt.py | 108 +++++++++++++++++++++++++++++++++++++++++ requirements.txt | 3 ++ 4 files changed, 127 insertions(+) create mode 100644 .gitignore create mode 100644 .streamlit/config.toml create mode 100644 elasticdocs_gpt.py create mode 100644 requirements.txt diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..bfb4835 --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +*replit* +*pyc +.config +venv +main-notokenlimit.py \ No newline at end of file diff --git a/.streamlit/config.toml b/.streamlit/config.toml new file mode 100644 index 0000000..2ba917c --- /dev/null +++ b/.streamlit/config.toml @@ -0,0 +1,11 @@ +[logger] + +# Level of logging: 'error', 'warning', 'info', or 'debug'. +# Default: 'info' +level = "info" + +[server] + +# If false, will attempt to open a browser window on start. +# Default: false unless (1) we are on a Linux box where DISPLAY is unset, or (2) we are running in the Streamlit Atom plugin. +headless = true \ No newline at end of file diff --git a/elasticdocs_gpt.py b/elasticdocs_gpt.py new file mode 100644 index 0000000..bab308b --- /dev/null +++ b/elasticdocs_gpt.py @@ -0,0 +1,108 @@ +import os +import streamlit as st +import openai +from elasticsearch import Elasticsearch + +# This code is part of an Elastic Blog showing how to combine +# Elasticsearch's search relevancy power with +# OpenAI's GPT's Question Answering power + + +# Required Environment Variables +# openai_api - OpenAI API Key +# cloud_id - Elastic Cloud Deployment ID +# cloud_user - Elasticsearch Cluster User +# cloud_pass - Elasticsearch User Password + +openai.api_key = os.environ['openai_api'] +model = "gpt-3.5-turbo-0301" + +# Connect to Elastic Cloud cluster +def es_connect(cid, user, passwd): + es = Elasticsearch(cloud_id=cid, http_auth=(user, passwd)) + return es + +# Search ElasticSearch index and return body and URL of the result +def search(query_text): + cid = os.environ['cloud_id'] + cp = os.environ['cloud_pass'] + cu = os.environ['cloud_user'] + es = es_connect(cid, cu, cp) + + # Elasticsearch query (BM25) and kNN configuration for hybrid search + query = { + "bool": { + "must": [{ + "match": { + "title": { + "query": query_text, + "boost": 1 + } + } + }], + "filter": [{ + "exists": { + "field": "title-vector" + } + }] + } + } + + knn = { + "field": "title-vector", + "k": 1, + "num_candidates": 20, + "query_vector_builder": { + "text_embedding": { + "model_id": "sentence-transformers__all-distilroberta-v1", + "model_text": query_text + } + }, + "boost": 24 + } + + fields = ["title", "body_content", "url"] + index = 'search-elastic-docs' + resp = es.search(index=index, + query=query, + knn=knn, + fields=fields, + size=1, + source=False) + + body = resp['hits']['hits'][0]['fields']['body_content'][0] + url = resp['hits']['hits'][0]['fields']['url'][0] + + return body, url + +def truncate_text(text, max_tokens): + tokens = text.split() + if len(tokens) <= max_tokens: + return text + + return ' '.join(tokens[:max_tokens]) + +# Generate a response from ChatGPT based on the given prompt +def chat_gpt(prompt, model="gpt-3.5-turbo", max_tokens=1024, max_context_tokens=4000, safety_margin=5): + # Truncate the prompt content to fit within the model's context length + truncated_prompt = truncate_text(prompt, max_context_tokens - max_tokens - safety_margin) + + response = openai.ChatCompletion.create(model=model, + messages=[{"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": truncated_prompt}]) + + return response["choices"][0]["message"]["content"] + + +st.title("ElasticDocs GPT") + +# Main chat form +with st.form("chat_form"): + query = st.text_input("You: ") + submit_button = st.form_submit_button("Send") + +# Generate and display response on form submission +if submit_button: + resp, url = search(query) + prompt = f"Answer this question: {query}\nUsing only the information from this Elastic Doc: {resp}" + answer = chat_gpt(prompt) + st.write(f"ChatGPT: {answer.strip()}\n\nDocs: {url}") diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..4d1277d --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +streamlit +openai +elasticsearch \ No newline at end of file