minor adjustments

This commit is contained in:
Benito Martin
2024-06-30 14:35:13 +02:00
parent 7e8688c556
commit 1f05738d09
6 changed files with 42 additions and 17 deletions

View File

@@ -3,7 +3,7 @@ name: Build and Deploy to GKE
on: on:
push: push:
branches: branches:
- main - production
env: env:
PROJECT_ID: ${{ secrets.GKE_PROJECT }} PROJECT_ID: ${{ secrets.GKE_PROJECT }}
@@ -49,7 +49,7 @@ jobs:
--build-arg GITHUB_REF="$GITHUB_REF" \ --build-arg GITHUB_REF="$GITHUB_REF" \
. .
# Push the Docker image to Google Container Registry # Push the Docker image to Google Artifact Registry
- name: Publish - name: Publish
run: |- run: |-
docker push "gcr.io/$PROJECT_ID/$IMAGE:$GITHUB_SHA" docker push "gcr.io/$PROJECT_ID/$IMAGE:$GITHUB_SHA"

View File

@@ -12,7 +12,7 @@ Main Steps
- **Data Ingestion**: Load data from GitHub repositories. - **Data Ingestion**: Load data from GitHub repositories.
- **Indexing**: Use SentenceSplitter for indexing in nodes. - **Indexing**: Use SentenceSplitter for indexing in nodes.
- **Embedding**: Implement FastEmbedEmbedding. - **Embedding and Model**: OpenAI.
- **Vector Store**: Use Qdrant for inserting metadata. - **Vector Store**: Use Qdrant for inserting metadata.
- **Query Retrieval**: Implement RetrieverQueryEngine. - **Query Retrieval**: Implement RetrieverQueryEngine.
- **FastAPI and GKE**: Handle requests via the FastAPI app deployed on GKE. - **FastAPI and GKE**: Handle requests via the FastAPI app deployed on GKE.
@@ -131,7 +131,7 @@ The Python version used for this project is Python 3.10. You can follow along th
--zone=europe-west6-a \ --zone=europe-west6-a \
--num-nodes=5 \ --num-nodes=5 \
--enable-autoscaling \ --enable-autoscaling \
--min-nodes=1 \ --min-nodes=2 \
--max-nodes=10 \ --max-nodes=10 \
--machine-type=n1-standard-4 \ --machine-type=n1-standard-4 \
--enable-vertical-pod-autoscaling --enable-vertical-pod-autoscaling
@@ -162,19 +162,20 @@ The Python version used for this project is Python 3.10. You can follow along th
<img width="767" alt="lambda-gke" src="https://github.com/benitomartin/mlops-car-prices/assets/116911431/b4a7e10c-52f9-4ca2-ade3-f2136ff6bbdf"> <img width="767" alt="lambda-gke" src="https://github.com/benitomartin/mlops-car-prices/assets/116911431/b4a7e10c-52f9-4ca2-ade3-f2136ff6bbdf">
</p> </p>
http://34.65.191.211:8000 http://34.65.157.134:8000
12. Check some pods and logs 12. Check some pods and logs
```bash ```bash
kubectl logs llama-gke-deploy-8476f496bc-gxhms kubectl logs llama-gke-deploy-668b58b455-fjwvq
kubectl describe pod llama-gke-deploy-8476f496bc-gxhms kubectl describe pod llama-gke-deploy-668b58b455-fjwvq
kubectl top pod llama-gke-deploy-668b58b455-8xfhf
``` ```
13. Clean up to avoid costs deleting the cluster and the docker image 13. Clean up to avoid costs deleting the cluster and the docker image
```bash ```bash
gcloud container clusters delete app-llama-gke-cluster --zone=europe-west6-a gcloud container clusters delete llama-gke-cluster --zone=europe-west6-a
kubectl delete deployment llama-gke-deploy kubectl delete deployment llama-gke-deploy
``` ```
@@ -186,6 +187,6 @@ Run the streamlit app adding the endpoint url that you get after deployment:
streamlit run streamlit_app.py streamlit run streamlit_app.py
``` ```
<p align="center"> <p align="center">
<img width="767" alt="lambda-gke" src="https://github.com/benitomartin/mlops-car-prices/assets/116911431/b4a7e10c-52f9-4ca2-ade3-f2136ff6bbdf"> <img width="767" alt="lambda-gke" src="https://github.com/benitomartin/mlops-car-prices/assets/116911431/b4a7e10c-52f9-4ca2-ade3-f2136ff6bbdf">
</p> </p>

View File

@@ -26,7 +26,7 @@ OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
openai.api_key = OPENAI_API_KEY openai.api_key = OPENAI_API_KEY
def get_repository_list(github_token, github_username): def get_code_file_list(github_token, github_username):
""" """
Fetch all repositories for a given GitHub user. Fetch all repositories for a given GitHub user.
@@ -194,7 +194,7 @@ def chunked_nodes(data, client, collection_name):
if __name__ == "__main__": if __name__ == "__main__":
# Fetch documents from GitHub repositories # Fetch documents from GitHub repositories
all_documents = get_repository_list(ACCESS_TOKEN, GITHUB_USERNAME) all_documents = get_code_file_list(ACCESS_TOKEN, GITHUB_USERNAME)
if all_documents: if all_documents:
# Split documents into nodes # Split documents into nodes

View File

@@ -3,7 +3,7 @@ kind: Deployment
metadata: metadata:
name: llama-gke-deploy name: llama-gke-deploy
spec: spec:
replicas: 2 replicas: 2 # Pods
selector: selector:
matchLabels: matchLabels:
app: llama-gke-pod app: llama-gke-pod
@@ -43,8 +43,8 @@ spec:
memory: "2Gi" memory: "2Gi"
cpu: "1" cpu: "1"
limits: # Maximum resources allowed limits: # Maximum resources allowed
memory: "4Gi" memory: "12Gi" # Maximum memory of the instance (80-90%)
cpu: "2" cpu: "4" # Maximum vCPUs of the instance
readinessProbe: # Check if the pod is ready to serve traffic. readinessProbe: # Check if the pod is ready to serve traffic.
httpGet: httpGet:
scheme: HTTP scheme: HTTP

View File

@@ -2,4 +2,4 @@ apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization kind: Kustomization
resources: resources:
- deploy-gke.yaml - deploy_gke.yaml

24
streamlit_app.py Normal file
View File

@@ -0,0 +1,24 @@
import streamlit as st
import requests
# Set the FastAPI endpoint
FASTAPI_ENDPOINT = "http://34.65.157.134:8000/query/"
# Streamlit app title
st.title("Find Your Code")
# Input field for the query
query = st.text_input("Query:")
# Button to submit the query
if st.button("Get Response"):
if query:
response = requests.post(FASTAPI_ENDPOINT, json={"query": query})
if response.status_code == 200:
st.write(response.text)
else:
st.write("Error:", response.status_code)
else:
st.write("Please enter a query.")