diff --git a/.github/workflows/build_deploy.yaml b/.github/workflows/build_deploy.yaml
index 773619a..3003475 100644
--- a/.github/workflows/build_deploy.yaml
+++ b/.github/workflows/build_deploy.yaml
@@ -3,7 +3,7 @@ name: Build and Deploy to GKE
on:
push:
branches:
- - main
+ - production
env:
PROJECT_ID: ${{ secrets.GKE_PROJECT }}
@@ -49,7 +49,7 @@ jobs:
--build-arg GITHUB_REF="$GITHUB_REF" \
.
- # Push the Docker image to Google Container Registry
+ # Push the Docker image to Google Artifact Registry
- name: Publish
run: |-
docker push "gcr.io/$PROJECT_ID/$IMAGE:$GITHUB_SHA"
diff --git a/README.md b/README.md
index 2d0b5cc..ffd5d5f 100644
--- a/README.md
+++ b/README.md
@@ -12,7 +12,7 @@ Main Steps
- **Data Ingestion**: Load data from GitHub repositories.
- **Indexing**: Use SentenceSplitter for indexing in nodes.
-- **Embedding**: Implement FastEmbedEmbedding.
+- **Embedding and Model**: OpenAI.
- **Vector Store**: Use Qdrant for inserting metadata.
- **Query Retrieval**: Implement RetrieverQueryEngine.
- **FastAPI and GKE**: Handle requests via the FastAPI app deployed on GKE.
@@ -131,7 +131,7 @@ The Python version used for this project is Python 3.10. You can follow along th
--zone=europe-west6-a \
--num-nodes=5 \
--enable-autoscaling \
- --min-nodes=1 \
+ --min-nodes=2 \
--max-nodes=10 \
--machine-type=n1-standard-4 \
--enable-vertical-pod-autoscaling
@@ -162,19 +162,20 @@ The Python version used for this project is Python 3.10. You can follow along th
- http://34.65.191.211:8000
+ http://34.65.157.134:8000
12. Check some pods and logs
```bash
- kubectl logs llama-gke-deploy-8476f496bc-gxhms
- kubectl describe pod llama-gke-deploy-8476f496bc-gxhms
+ kubectl logs llama-gke-deploy-668b58b455-fjwvq
+ kubectl describe pod llama-gke-deploy-668b58b455-fjwvq
+ kubectl top pod llama-gke-deploy-668b58b455-8xfhf
```
13. Clean up to avoid costs deleting the cluster and the docker image
```bash
- gcloud container clusters delete app-llama-gke-cluster --zone=europe-west6-a
+ gcloud container clusters delete llama-gke-cluster --zone=europe-west6-a
kubectl delete deployment llama-gke-deploy
```
@@ -186,6 +187,6 @@ Run the streamlit app adding the endpoint url that you get after deployment:
streamlit run streamlit_app.py
```
-
-
-
+
+
+
diff --git a/create_qdrant_collection.py b/create_qdrant_collection.py
index 5fe02f7..194fef9 100644
--- a/create_qdrant_collection.py
+++ b/create_qdrant_collection.py
@@ -26,7 +26,7 @@ OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
openai.api_key = OPENAI_API_KEY
-def get_repository_list(github_token, github_username):
+def get_code_file_list(github_token, github_username):
"""
Fetch all repositories for a given GitHub user.
@@ -194,7 +194,7 @@ def chunked_nodes(data, client, collection_name):
if __name__ == "__main__":
# Fetch documents from GitHub repositories
- all_documents = get_repository_list(ACCESS_TOKEN, GITHUB_USERNAME)
+ all_documents = get_code_file_list(ACCESS_TOKEN, GITHUB_USERNAME)
if all_documents:
# Split documents into nodes
diff --git a/deploy-gke.yaml b/deploy_gke.yaml
similarity index 94%
rename from deploy-gke.yaml
rename to deploy_gke.yaml
index c704975..9b1b6b6 100644
--- a/deploy-gke.yaml
+++ b/deploy_gke.yaml
@@ -3,7 +3,7 @@ kind: Deployment
metadata:
name: llama-gke-deploy
spec:
- replicas: 2
+ replicas: 2 # Pods
selector:
matchLabels:
app: llama-gke-pod
@@ -43,8 +43,8 @@ spec:
memory: "2Gi"
cpu: "1"
limits: # Maximum resources allowed
- memory: "4Gi"
- cpu: "2"
+ memory: "12Gi" # Maximum memory of the instance (80-90%)
+ cpu: "4" # Maximum vCPUs of the instance
readinessProbe: # Check if the pod is ready to serve traffic.
httpGet:
scheme: HTTP
diff --git a/kustomization.yaml b/kustomization.yaml
index 945bee4..b755575 100644
--- a/kustomization.yaml
+++ b/kustomization.yaml
@@ -2,4 +2,4 @@ apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- - deploy-gke.yaml
\ No newline at end of file
+ - deploy_gke.yaml
\ No newline at end of file
diff --git a/streamlit_app.py b/streamlit_app.py
new file mode 100644
index 0000000..36a6617
--- /dev/null
+++ b/streamlit_app.py
@@ -0,0 +1,24 @@
+import streamlit as st
+import requests
+
+# Set the FastAPI endpoint
+FASTAPI_ENDPOINT = "http://34.65.157.134:8000/query/"
+
+# Streamlit app title
+st.title("Find Your Code")
+
+# Input field for the query
+query = st.text_input("Query:")
+
+# Button to submit the query
+if st.button("Get Response"):
+ if query:
+ response = requests.post(FASTAPI_ENDPOINT, json={"query": query})
+ if response.status_code == 200:
+ st.write(response.text)
+ else:
+ st.write("Error:", response.status_code)
+ else:
+ st.write("Please enter a query.")
+
+