diff --git a/.github/workflows/build_deploy.yaml b/.github/workflows/build_deploy.yaml index 773619a..3003475 100644 --- a/.github/workflows/build_deploy.yaml +++ b/.github/workflows/build_deploy.yaml @@ -3,7 +3,7 @@ name: Build and Deploy to GKE on: push: branches: - - main + - production env: PROJECT_ID: ${{ secrets.GKE_PROJECT }} @@ -49,7 +49,7 @@ jobs: --build-arg GITHUB_REF="$GITHUB_REF" \ . - # Push the Docker image to Google Container Registry + # Push the Docker image to Google Artifact Registry - name: Publish run: |- docker push "gcr.io/$PROJECT_ID/$IMAGE:$GITHUB_SHA" diff --git a/README.md b/README.md index 2d0b5cc..ffd5d5f 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ Main Steps - **Data Ingestion**: Load data from GitHub repositories. - **Indexing**: Use SentenceSplitter for indexing in nodes. -- **Embedding**: Implement FastEmbedEmbedding. +- **Embedding and Model**: OpenAI. - **Vector Store**: Use Qdrant for inserting metadata. - **Query Retrieval**: Implement RetrieverQueryEngine. - **FastAPI and GKE**: Handle requests via the FastAPI app deployed on GKE. @@ -131,7 +131,7 @@ The Python version used for this project is Python 3.10. You can follow along th --zone=europe-west6-a \ --num-nodes=5 \ --enable-autoscaling \ - --min-nodes=1 \ + --min-nodes=2 \ --max-nodes=10 \ --machine-type=n1-standard-4 \ --enable-vertical-pod-autoscaling @@ -162,19 +162,20 @@ The Python version used for this project is Python 3.10. You can follow along th lambda-gke

- http://34.65.191.211:8000 + http://34.65.157.134:8000 12. Check some pods and logs ```bash - kubectl logs llama-gke-deploy-8476f496bc-gxhms - kubectl describe pod llama-gke-deploy-8476f496bc-gxhms + kubectl logs llama-gke-deploy-668b58b455-fjwvq + kubectl describe pod llama-gke-deploy-668b58b455-fjwvq + kubectl top pod llama-gke-deploy-668b58b455-8xfhf ``` 13. Clean up to avoid costs deleting the cluster and the docker image ```bash - gcloud container clusters delete app-llama-gke-cluster --zone=europe-west6-a + gcloud container clusters delete llama-gke-cluster --zone=europe-west6-a kubectl delete deployment llama-gke-deploy ``` @@ -186,6 +187,6 @@ Run the streamlit app adding the endpoint url that you get after deployment: streamlit run streamlit_app.py ``` -

- lambda-gke -

+

+ lambda-gke +

diff --git a/create_qdrant_collection.py b/create_qdrant_collection.py index 5fe02f7..194fef9 100644 --- a/create_qdrant_collection.py +++ b/create_qdrant_collection.py @@ -26,7 +26,7 @@ OPENAI_API_KEY = os.getenv('OPENAI_API_KEY') openai.api_key = OPENAI_API_KEY -def get_repository_list(github_token, github_username): +def get_code_file_list(github_token, github_username): """ Fetch all repositories for a given GitHub user. @@ -194,7 +194,7 @@ def chunked_nodes(data, client, collection_name): if __name__ == "__main__": # Fetch documents from GitHub repositories - all_documents = get_repository_list(ACCESS_TOKEN, GITHUB_USERNAME) + all_documents = get_code_file_list(ACCESS_TOKEN, GITHUB_USERNAME) if all_documents: # Split documents into nodes diff --git a/deploy-gke.yaml b/deploy_gke.yaml similarity index 94% rename from deploy-gke.yaml rename to deploy_gke.yaml index c704975..9b1b6b6 100644 --- a/deploy-gke.yaml +++ b/deploy_gke.yaml @@ -3,7 +3,7 @@ kind: Deployment metadata: name: llama-gke-deploy spec: - replicas: 2 + replicas: 2 # Pods selector: matchLabels: app: llama-gke-pod @@ -43,8 +43,8 @@ spec: memory: "2Gi" cpu: "1" limits: # Maximum resources allowed - memory: "4Gi" - cpu: "2" + memory: "12Gi" # Maximum memory of the instance (80-90%) + cpu: "4" # Maximum vCPUs of the instance readinessProbe: # Check if the pod is ready to serve traffic. httpGet: scheme: HTTP diff --git a/kustomization.yaml b/kustomization.yaml index 945bee4..b755575 100644 --- a/kustomization.yaml +++ b/kustomization.yaml @@ -2,4 +2,4 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization resources: - - deploy-gke.yaml \ No newline at end of file + - deploy_gke.yaml \ No newline at end of file diff --git a/streamlit_app.py b/streamlit_app.py new file mode 100644 index 0000000..36a6617 --- /dev/null +++ b/streamlit_app.py @@ -0,0 +1,24 @@ +import streamlit as st +import requests + +# Set the FastAPI endpoint +FASTAPI_ENDPOINT = "http://34.65.157.134:8000/query/" + +# Streamlit app title +st.title("Find Your Code") + +# Input field for the query +query = st.text_input("Query:") + +# Button to submit the query +if st.button("Get Response"): + if query: + response = requests.post(FASTAPI_ENDPOINT, json={"query": query}) + if response.status_code == 200: + st.write(response.text) + else: + st.write("Error:", response.status_code) + else: + st.write("Please enter a query.") + +