From 65fb2b1837fca8abc3ca6f504bb50b8e5430461b Mon Sep 17 00:00:00 2001 From: "Charles E. Gormley" Date: Sat, 12 Oct 2024 13:43:29 -0400 Subject: [PATCH] fixing issue with embedding model initialization. --- src/infra/cloudformation/eventbridge.yaml | 2 +- .../src/analytics/embeddings/vector_db.py | 3 +-- .../lambdas/RSSFeedProcessorLambda/src/lambda_function.py | 1 - src/infra/lambdas/lambda_utils/update_lambda_env_vars.py | 3 ++- todo.md | 2 +- 5 files changed, 5 insertions(+), 6 deletions(-) diff --git a/src/infra/cloudformation/eventbridge.yaml b/src/infra/cloudformation/eventbridge.yaml index 6726a04..04371cf 100644 --- a/src/infra/cloudformation/eventbridge.yaml +++ b/src/infra/cloudformation/eventbridge.yaml @@ -28,7 +28,7 @@ Resources: Properties: Name: rss-feed-processor-schedule Description: Runs the RSS Feed Processor Lambda function every hour - ScheduleExpression: rate(5 minutes) + ScheduleExpression: rate(30 minutes) FlexibleTimeWindow: Mode: FLEXIBLE MaximumWindowInMinutes: 1 diff --git a/src/infra/lambdas/RSSFeedProcessorLambda/src/analytics/embeddings/vector_db.py b/src/infra/lambdas/RSSFeedProcessorLambda/src/analytics/embeddings/vector_db.py index 8c99396..5483513 100644 --- a/src/infra/lambdas/RSSFeedProcessorLambda/src/analytics/embeddings/vector_db.py +++ b/src/infra/lambdas/RSSFeedProcessorLambda/src/analytics/embeddings/vector_db.py @@ -11,8 +11,7 @@ embedding_dim = os.getenv("VECTOR_EMBEDDING_DIM") vector_search_metric = os.getenv("VECTOR_SEARCH_METRIC") index_name = os.getenv("PINECONE_DB_NAME") -client = OpenAI() - +client = OpenAI() # For Embedding Models, Not LLMs pc = Pinecone(api_key=api_key) def get_index(): diff --git a/src/infra/lambdas/RSSFeedProcessorLambda/src/lambda_function.py b/src/infra/lambdas/RSSFeedProcessorLambda/src/lambda_function.py index 8717448..22029ed 100644 --- a/src/infra/lambdas/RSSFeedProcessorLambda/src/lambda_function.py +++ b/src/infra/lambdas/RSSFeedProcessorLambda/src/lambda_function.py @@ -36,7 +36,6 @@ def lambda_handler(event, context): # Process the feed result = extract_feed(feed) - print(type(result)) logger.info("Process Feed Result Dictionary: ", result) last_pub_dt = result['max_date'] diff --git a/src/infra/lambdas/lambda_utils/update_lambda_env_vars.py b/src/infra/lambdas/lambda_utils/update_lambda_env_vars.py index c1d5e8f..f106eb7 100644 --- a/src/infra/lambdas/lambda_utils/update_lambda_env_vars.py +++ b/src/infra/lambdas/lambda_utils/update_lambda_env_vars.py @@ -20,7 +20,8 @@ def update_env_vars(function_name): 'VECTOR_EMBEDDING_MODEL': os.environ.get('VECTOR_EMBEDDING_MODEL'), 'VECTOR_EMBEDDING_DIM': os.environ.get('VECTOR_EMBEDDING_DIM'), 'VECTOR_SEARCH_METRIC': os.environ.get('VECTOR_SEARCH_METRIC'), - 'PINECONE_DB_NAME': os.environ.get('PINECONE_DB_NAME') + 'PINECONE_DB_NAME': os.environ.get('PINECONE_DB_NAME'), + 'OPENAI_API_KEY': os.environ.get('OPENAI_API_KEY') } return lambda_client.update_function_configuration( diff --git a/todo.md b/todo.md index 484a4d7..474261d 100644 --- a/todo.md +++ b/todo.md @@ -1,5 +1,5 @@ # Testing🧪 -* Testing from 3rd party aws account. [ Today ] +* Testing from 3rd party aws account. * Test Large Amounts of Feeds ( Decrease the cadence of ingesting. ) [ Today ] * Test out Vector Databases at Small Scale * Test out Vector Databases at Scale.