From 93df752f208c9071048a73ed652ba5d32d013e80 Mon Sep 17 00:00:00 2001
From: jeffvestal <53237856+jeffvestal@users.noreply.github.com>
Date: Tue, 11 Apr 2023 20:09:47 +0000
Subject: [PATCH] pushing tested code for blog

---
 .gitignore             |   5 ++
 .streamlit/config.toml |  11 +++++
 elasticdocs_gpt.py     | 108 +++++++++++++++++++++++++++++++++++++++++
 requirements.txt       |   3 ++
 4 files changed, 127 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 .streamlit/config.toml
 create mode 100644 elasticdocs_gpt.py
 create mode 100644 requirements.txt

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..bfb4835
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,5 @@
+*replit*
+*pyc
+.config
+venv
+main-notokenlimit.py
\ No newline at end of file
diff --git a/.streamlit/config.toml b/.streamlit/config.toml
new file mode 100644
index 0000000..2ba917c
--- /dev/null
+++ b/.streamlit/config.toml
@@ -0,0 +1,11 @@
+[logger]
+
+# Level of logging: 'error', 'warning', 'info', or 'debug'.
+# Default: 'info'
+level = "info"
+
+[server]
+
+# If false, will attempt to open a browser window on start.
+# Default: false unless (1) we are on a Linux box where DISPLAY is unset, or (2) we are running in the Streamlit Atom plugin.
+headless = true
\ No newline at end of file
diff --git a/elasticdocs_gpt.py b/elasticdocs_gpt.py
new file mode 100644
index 0000000..bab308b
--- /dev/null
+++ b/elasticdocs_gpt.py
@@ -0,0 +1,108 @@
+import os
+import streamlit as st
+import openai
+from elasticsearch import Elasticsearch
+
+# This code is part of an Elastic Blog showing how to combine
+# Elasticsearch's search relevancy power with 
+# OpenAI's GPT's Question Answering power
+
+
+# Required Environment Variables
+# openai_api - OpenAI API Key
+# cloud_id - Elastic Cloud Deployment ID
+# cloud_user - Elasticsearch Cluster User
+# cloud_pass - Elasticsearch User Password
+
+openai.api_key = os.environ['openai_api']
+model = "gpt-3.5-turbo-0301"
+
+# Connect to Elastic Cloud cluster
+def es_connect(cid, user, passwd):
+    es = Elasticsearch(cloud_id=cid, http_auth=(user, passwd))
+    return es
+
+# Search ElasticSearch index and return body and URL of the result
+def search(query_text):
+    cid = os.environ['cloud_id']
+    cp = os.environ['cloud_pass']
+    cu = os.environ['cloud_user']
+    es = es_connect(cid, cu, cp)
+
+    # Elasticsearch query (BM25) and kNN configuration for hybrid search
+    query = {
+        "bool": {
+            "must": [{
+                "match": {
+                    "title": {
+                        "query": query_text,
+                        "boost": 1
+                    }
+                }
+            }],
+            "filter": [{
+                "exists": {
+                    "field": "title-vector"
+                }
+            }]
+        }
+    }
+
+    knn = {
+        "field": "title-vector",
+        "k": 1,
+        "num_candidates": 20,
+        "query_vector_builder": {
+            "text_embedding": {
+                "model_id": "sentence-transformers__all-distilroberta-v1",
+                "model_text": query_text
+            }
+        },
+        "boost": 24
+    }
+
+    fields = ["title", "body_content", "url"]
+    index = 'search-elastic-docs'
+    resp = es.search(index=index,
+                     query=query,
+                     knn=knn,
+                     fields=fields,
+                     size=1,
+                     source=False)
+
+    body = resp['hits']['hits'][0]['fields']['body_content'][0]
+    url = resp['hits']['hits'][0]['fields']['url'][0]
+
+    return body, url
+
+def truncate_text(text, max_tokens):
+    tokens = text.split()
+    if len(tokens) <= max_tokens:
+        return text
+
+    return ' '.join(tokens[:max_tokens])
+
+# Generate a response from ChatGPT based on the given prompt
+def chat_gpt(prompt, model="gpt-3.5-turbo", max_tokens=1024, max_context_tokens=4000, safety_margin=5):
+    # Truncate the prompt content to fit within the model's context length
+    truncated_prompt = truncate_text(prompt, max_context_tokens - max_tokens - safety_margin)
+
+    response = openai.ChatCompletion.create(model=model,
+                                            messages=[{"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": truncated_prompt}])
+
+    return response["choices"][0]["message"]["content"]
+
+
+st.title("ElasticDocs GPT")
+
+# Main chat form
+with st.form("chat_form"):
+    query = st.text_input("You: ")
+    submit_button = st.form_submit_button("Send")
+
+# Generate and display response on form submission
+if submit_button:
+    resp, url = search(query)
+    prompt = f"Answer this question: {query}\nUsing only the information from this Elastic Doc: {resp}"
+    answer = chat_gpt(prompt)
+    st.write(f"ChatGPT: {answer.strip()}\n\nDocs: {url}")
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..4d1277d
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,3 @@
+streamlit
+openai
+elasticsearch
\ No newline at end of file