mirror of
https://github.com/benitomartin/scale-gke-qdrant-llama.git
synced 2025-12-17 02:54:25 +01:00
minor adjustments
This commit is contained in:
4
.github/workflows/build_deploy.yaml
vendored
4
.github/workflows/build_deploy.yaml
vendored
@@ -3,7 +3,7 @@ name: Build and Deploy to GKE
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
- production
|
||||
|
||||
env:
|
||||
PROJECT_ID: ${{ secrets.GKE_PROJECT }}
|
||||
@@ -49,7 +49,7 @@ jobs:
|
||||
--build-arg GITHUB_REF="$GITHUB_REF" \
|
||||
.
|
||||
|
||||
# Push the Docker image to Google Container Registry
|
||||
# Push the Docker image to Google Artifact Registry
|
||||
- name: Publish
|
||||
run: |-
|
||||
docker push "gcr.io/$PROJECT_ID/$IMAGE:$GITHUB_SHA"
|
||||
|
||||
13
README.md
13
README.md
@@ -12,7 +12,7 @@ Main Steps
|
||||
|
||||
- **Data Ingestion**: Load data from GitHub repositories.
|
||||
- **Indexing**: Use SentenceSplitter for indexing in nodes.
|
||||
- **Embedding**: Implement FastEmbedEmbedding.
|
||||
- **Embedding and Model**: OpenAI.
|
||||
- **Vector Store**: Use Qdrant for inserting metadata.
|
||||
- **Query Retrieval**: Implement RetrieverQueryEngine.
|
||||
- **FastAPI and GKE**: Handle requests via the FastAPI app deployed on GKE.
|
||||
@@ -131,7 +131,7 @@ The Python version used for this project is Python 3.10. You can follow along th
|
||||
--zone=europe-west6-a \
|
||||
--num-nodes=5 \
|
||||
--enable-autoscaling \
|
||||
--min-nodes=1 \
|
||||
--min-nodes=2 \
|
||||
--max-nodes=10 \
|
||||
--machine-type=n1-standard-4 \
|
||||
--enable-vertical-pod-autoscaling
|
||||
@@ -162,19 +162,20 @@ The Python version used for this project is Python 3.10. You can follow along th
|
||||
<img width="767" alt="lambda-gke" src="https://github.com/benitomartin/mlops-car-prices/assets/116911431/b4a7e10c-52f9-4ca2-ade3-f2136ff6bbdf">
|
||||
</p>
|
||||
|
||||
http://34.65.191.211:8000
|
||||
http://34.65.157.134:8000
|
||||
|
||||
12. Check some pods and logs
|
||||
|
||||
```bash
|
||||
kubectl logs llama-gke-deploy-8476f496bc-gxhms
|
||||
kubectl describe pod llama-gke-deploy-8476f496bc-gxhms
|
||||
kubectl logs llama-gke-deploy-668b58b455-fjwvq
|
||||
kubectl describe pod llama-gke-deploy-668b58b455-fjwvq
|
||||
kubectl top pod llama-gke-deploy-668b58b455-8xfhf
|
||||
```
|
||||
|
||||
13. Clean up to avoid costs deleting the cluster and the docker image
|
||||
|
||||
```bash
|
||||
gcloud container clusters delete app-llama-gke-cluster --zone=europe-west6-a
|
||||
gcloud container clusters delete llama-gke-cluster --zone=europe-west6-a
|
||||
kubectl delete deployment llama-gke-deploy
|
||||
```
|
||||
|
||||
|
||||
@@ -26,7 +26,7 @@ OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
|
||||
openai.api_key = OPENAI_API_KEY
|
||||
|
||||
|
||||
def get_repository_list(github_token, github_username):
|
||||
def get_code_file_list(github_token, github_username):
|
||||
"""
|
||||
Fetch all repositories for a given GitHub user.
|
||||
|
||||
@@ -194,7 +194,7 @@ def chunked_nodes(data, client, collection_name):
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Fetch documents from GitHub repositories
|
||||
all_documents = get_repository_list(ACCESS_TOKEN, GITHUB_USERNAME)
|
||||
all_documents = get_code_file_list(ACCESS_TOKEN, GITHUB_USERNAME)
|
||||
|
||||
if all_documents:
|
||||
# Split documents into nodes
|
||||
|
||||
@@ -3,7 +3,7 @@ kind: Deployment
|
||||
metadata:
|
||||
name: llama-gke-deploy
|
||||
spec:
|
||||
replicas: 2
|
||||
replicas: 2 # Pods
|
||||
selector:
|
||||
matchLabels:
|
||||
app: llama-gke-pod
|
||||
@@ -43,8 +43,8 @@ spec:
|
||||
memory: "2Gi"
|
||||
cpu: "1"
|
||||
limits: # Maximum resources allowed
|
||||
memory: "4Gi"
|
||||
cpu: "2"
|
||||
memory: "12Gi" # Maximum memory of the instance (80-90%)
|
||||
cpu: "4" # Maximum vCPUs of the instance
|
||||
readinessProbe: # Check if the pod is ready to serve traffic.
|
||||
httpGet:
|
||||
scheme: HTTP
|
||||
@@ -2,4 +2,4 @@ apiVersion: kustomize.config.k8s.io/v1beta1
|
||||
kind: Kustomization
|
||||
|
||||
resources:
|
||||
- deploy-gke.yaml
|
||||
- deploy_gke.yaml
|
||||
24
streamlit_app.py
Normal file
24
streamlit_app.py
Normal file
@@ -0,0 +1,24 @@
|
||||
import streamlit as st
|
||||
import requests
|
||||
|
||||
# Set the FastAPI endpoint
|
||||
FASTAPI_ENDPOINT = "http://34.65.157.134:8000/query/"
|
||||
|
||||
# Streamlit app title
|
||||
st.title("Find Your Code")
|
||||
|
||||
# Input field for the query
|
||||
query = st.text_input("Query:")
|
||||
|
||||
# Button to submit the query
|
||||
if st.button("Get Response"):
|
||||
if query:
|
||||
response = requests.post(FASTAPI_ENDPOINT, json={"query": query})
|
||||
if response.status_code == 200:
|
||||
st.write(response.text)
|
||||
else:
|
||||
st.write("Error:", response.status_code)
|
||||
else:
|
||||
st.write("Please enter a query.")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user