minor adjustments

2025-12-17 02:54:25 +01:00 · 2024-06-30 14:35:13 +02:00
parent 7e8688c556
commit 1f05738d09
6 changed files with 42 additions and 17 deletions
--- a/.github/workflows/build_deploy.yaml
+++ b/.github/workflows/build_deploy.yaml
@@ -3,7 +3,7 @@ name: Build and Deploy to GKE
 on:
  push:
    branches:
-      - main
+      - production
 env:
  PROJECT_ID: ${{ secrets.GKE_PROJECT }} 
@@ -49,7 +49,7 @@ jobs:
          --build-arg GITHUB_REF="$GITHUB_REF" \
          .
-    # Push the Docker image to Google Container Registry
+    # Push the Docker image to Google Artifact Registry
    - name: Publish
      run: |-
        docker push "gcr.io/$PROJECT_ID/$IMAGE:$GITHUB_SHA"
--- a/README.md
+++ b/README.md
@@ -12,7 +12,7 @@ Main Steps
 - **Data Ingestion**: Load data from GitHub repositories.
 - **Indexing**: Use SentenceSplitter for indexing in nodes.
- **Embedding**: Implement FastEmbedEmbedding.
+- **Embedding and Model**: OpenAI.
 - **Vector Store**: Use Qdrant for inserting metadata.
 - **Query Retrieval**: Implement RetrieverQueryEngine.
 - **FastAPI and GKE**: Handle requests via the FastAPI app deployed on GKE.
@@ -131,7 +131,7 @@ The Python version used for this project is Python 3.10. You can follow along th
        --zone=europe-west6-a \
        --num-nodes=5 \
        --enable-autoscaling \
-        --min-nodes=1 \
+        --min-nodes=2 \
        --max-nodes=10 \
        --machine-type=n1-standard-4 \
        --enable-vertical-pod-autoscaling
@@ -162,19 +162,20 @@ The Python version used for this project is Python 3.10. You can follow along th
    <img width="767" alt="lambda-gke" src="https://github.com/benitomartin/mlops-car-prices/assets/116911431/b4a7e10c-52f9-4ca2-ade3-f2136ff6bbdf">
    </p>
-    http://34.65.191.211:8000
+    http://34.65.157.134:8000
 12. Check some pods and logs
    ```bash
-    kubectl logs llama-gke-deploy-8476f496bc-gxhms
+    kubectl logs llama-gke-deploy-668b58b455-fjwvq 
-    kubectl describe pod llama-gke-deploy-8476f496bc-gxhms 
+    kubectl describe pod llama-gke-deploy-668b58b455-fjwvq
    kubectl top pod llama-gke-deploy-668b58b455-8xfhf 
    ```
 13. Clean up to avoid costs deleting the cluster and the docker image
    ```bash
-    gcloud container clusters delete app-llama-gke-cluster --zone=europe-west6-a
+    gcloud container clusters delete llama-gke-cluster --zone=europe-west6-a
    kubectl delete deployment llama-gke-deploy
    ```
@@ -186,6 +187,6 @@ Run the streamlit app adding the endpoint url that you get after deployment:
   streamlit run streamlit_app.py
   ```
-    <p align="center">
+   <p align="center">
-    <img width="767" alt="lambda-gke" src="https://github.com/benitomartin/mlops-car-prices/assets/116911431/b4a7e10c-52f9-4ca2-ade3-f2136ff6bbdf">
+   <img width="767" alt="lambda-gke" src="https://github.com/benitomartin/mlops-car-prices/assets/116911431/b4a7e10c-52f9-4ca2-ade3-f2136ff6bbdf">
-    </p>
+   </p>
--- a/create_qdrant_collection.py
+++ b/create_qdrant_collection.py
@@ -26,7 +26,7 @@ OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
 openai.api_key = OPENAI_API_KEY
-def get_repository_list(github_token, github_username):
+def get_code_file_list(github_token, github_username):
    """
    Fetch all repositories for a given GitHub user.
@@ -194,7 +194,7 @@ def chunked_nodes(data, client, collection_name):
 if __name__ == "__main__":
    # Fetch documents from GitHub repositories
-    all_documents = get_repository_list(ACCESS_TOKEN, GITHUB_USERNAME)
+    all_documents = get_code_file_list(ACCESS_TOKEN, GITHUB_USERNAME)
    if all_documents:
        # Split documents into nodes
--- a/deploy_gke.yaml
+++ b/deploy_gke.yaml
@@ -3,7 +3,7 @@ kind: Deployment
 metadata:
  name: llama-gke-deploy
 spec:
-  replicas: 2
+  replicas: 2                     # Pods
  selector:
    matchLabels:
      app: llama-gke-pod
@@ -43,8 +43,8 @@ spec:
            memory: "2Gi"
            cpu: "1"
          limits:                 # Maximum resources allowed
-            memory: "4Gi"
+            memory: "12Gi"        # Maximum memory of the instance (80-90%)
-            cpu: "2"
+            cpu: "4"              # Maximum vCPUs of the instance
        readinessProbe:           # Check if the pod is ready to serve traffic.
          httpGet:
            scheme: HTTP
--- a/kustomization.yaml
+++ b/kustomization.yaml
@@ -2,4 +2,4 @@ apiVersion: kustomize.config.k8s.io/v1beta1
 kind: Kustomization
 resources:
-  - deploy-gke.yaml
+  - deploy_gke.yaml
--- a/streamlit_app.py
+++ b/streamlit_app.py
@@ -0,0 +1,24 @@
 import streamlit as st
 import requests
 # Set the FastAPI endpoint
 FASTAPI_ENDPOINT = "http://34.65.157.134:8000/query/"
 # Streamlit app title
 st.title("Find Your Code")
 # Input field for the query
 query = st.text_input("Query:")
 # Button to submit the query
 if st.button("Get Response"):
    if query:
        response = requests.post(FASTAPI_ENDPOINT, json={"query": query})
        if response.status_code == 200:
            st.write(response.text)
        else:
            st.write("Error:", response.status_code)
    else:
        st.write("Please enter a query.")