Sunday Error Patching

2025-12-17 22:14:20 +01:00 · 2024-10-13 20:12:30 -04:00
parent cdf232b547
commit 8928f05144
6 changed files with 36 additions and 22 deletions
--- a/src/infra/cloudformation/eventbridge.yaml
+++ b/src/infra/cloudformation/eventbridge.yaml
@@ -28,6 +28,7 @@ Resources:
    Properties:
      Name: rss-feed-processor-schedule
      Description: Runs the RSS Feed Processor Lambda function every hour
      State: DISABLED
      ScheduleExpression: rate(30 minutes)
      FlexibleTimeWindow: 
        Mode: FLEXIBLE
--- a/src/infra/deploy_infrastructure.py
+++ b/src/infra/deploy_infrastructure.py
@@ -6,24 +6,21 @@ from src.utils.retry_logic import retry_with_backoff
 from botocore.exceptions import ClientError
 from dotenv import load_dotenv
-load_dotenv()
+load_dotenv(override=True)
-region_name = os.getenv("AWS_REGION")
+kms_client = boto3.client('kms', region_name=os.getenv("AWS_REGION"))
 kms_client = boto3.client('kms', region_name=region_name)
 stack_base = os.getenv("STACK_BASE")
@retry_with_backoff()
 def deploy_cloudformation(template_file, stack_suffix, force_recreate=False, parameters=[]):
-    cf_client = boto3.client('cloudformation')
+    cf_client = boto3.client('cloudformation', region_name=os.getenv("AWS_REGION"))
    stack_name = f"{stack_base}-{stack_suffix}"
    with open(f'src/infra/cloudformation/{template_file}', 'r') as file:
        template_body = file.read()
    capabilities = ['CAPABILITY_NAMED_IAM']
    try:
        if force_recreate:
            try:
@@ -70,7 +67,7 @@ def deploy_cloudformation(template_file, stack_suffix, force_recreate=False, par
 def get_or_create_kms_key():
    # Create a KMS client
-    kms_client = boto3.client('kms', region_name=region_name)
+    kms_client = boto3.client('kms', region_name=os.getenv("AWS_REGION"))
    tag_key = 'purpose'
    tag_value = 'You pass butter'
    description = 'KMS key for RSS Feed Processor... Oh my god'
@@ -85,7 +82,7 @@ def get_or_create_kms_key():
        for key in response['Keys']:
            try:
                tags = kms_client.list_resource_tags(KeyId=key['KeyId'])['Tags']
-                if any(tag['TagKey'] == tag_key and tag['TagValue'] == tag_value for tag in tags):
+                if any(tag['TagKey'] == tag_key and tag['TagValue'] == tag_value for tag in tags) and any(tag['TagKey'] == 'region' and tag['TagValue'] == os.getenv("AWS_REGION") for tag in tags): # TODO: This is inefficient and should be fixed and more readable.
                    print(f"Found existing KMS key with ID: {key['KeyId']}")
                    return key['KeyId']
            except ClientError:
@@ -120,7 +117,7 @@ def get_or_create_kms_key():
            Description=description,
            KeyUsage='ENCRYPT_DECRYPT',
            Origin='AWS_KMS',
-            Tags=[{'TagKey': tag_key, 'TagValue': tag_value}],
+            Tags=[{'TagKey': tag_key, 'TagValue': tag_value}, {'TagKey': 'region', 'TagValue': os.getenv("AWS_REGION")}],
            Policy=json.dumps(key_policy)
        )
--- a/src/infra/lambdas/RSSFeedProcessorLambda/deploy_rss_feed_lambda.py
+++ b/src/infra/lambdas/RSSFeedProcessorLambda/deploy_rss_feed_lambda.py
@@ -10,7 +10,7 @@ import time
 import sys
 from src.infra.deploy_infrastructure import get_or_create_kms_key
 from dotenv import load_dotenv
-load_dotenv()
+load_dotenv(override=True)
 import logging
 logging.basicConfig(level=os.getenv('LOG_LEVEL', 'INFO'))
--- a/src/infra/lambdas/RSSFeedProcessorLambda/src/feed_processor.py
+++ b/src/infra/lambdas/RSSFeedProcessorLambda/src/feed_processor.py
@@ -43,7 +43,7 @@ def extract_feed_threading(rss: dict, output_queue, stop_thread):
        for entry in feed['entries']:
            if stop_thread.is_set():
                break
-
+            
            pub_date = parse_pub_date(entry['published'])
            if pub_date > last_date:
@@ -110,11 +110,20 @@ def extract_feed(rss: dict):
        logger.error(f"Feed: {entry}")
        logger.error(f"Feed failed due to error: {e}")
-def parse_pub_date(date_string):
+def parse_pub_date(entry:dict):
-    try:
+
-        return int(datetime.strptime(date_string, "%a, %d %b %Y %H:%M:%S %z").timestamp())
+    if 'published' in entry:
-    except ValueError:
+        date_string = entry['published']       
        try:
-            return int(datetime.strptime(date_string, "%Y-%m-%dT%H:%M:%SZ").timestamp())
+            return int(datetime.strptime(date_string, "%a, %d %b %Y %H:%M:%S %z").timestamp())
        except ValueError:
-            return int(parser.parse(date_string).timestamp())
+            try:
                return int(datetime.strptime(date_string, "%Y-%m-%dT%H:%M:%SZ").timestamp())
            except ValueError:
                try:
                    return int(parser.parse(date_string).timestamp())
                except ValueError:
                    pass
    return int(datetime.now().timestamp()) # Return current time if no date is found
--- a/src/infra/lambdas/RSSQueueFiller/deploy_sqs_filler_lambda.py
+++ b/src/infra/lambdas/RSSQueueFiller/deploy_sqs_filler_lambda.py
@@ -6,7 +6,7 @@ from dotenv import load_dotenv
 from src.infra.deploy_infrastructure import deploy_cloudformation
 # Load environment variables
-load_dotenv()
+load_dotenv(override=True)
 # Set up logging
--- a/todo.md
+++ b/todo.md
@@ -1,13 +1,19 @@
-# Testing🧪
+# Before Public Launch
 * Testing from 3rd party aws account. 
 * Fix Issue with KMS Keys & IAM Role [ Done ]
 * Debug the Errors that are at scale.
 * Test Large Amounts of Feeds ( Decrease the cadence of ingesting. ) [ Today ]
-* Test out how long an S3 Full Pull will take on the
+* Test out how long an S3 Full Pull will take on the full thing.
-** First Run 
+** First Run ( ~30 Minutes)
 ** Second Run.
 * Test out Vector Databases at Small Scale.
 * Test out Vector Databases at Scale. 
 * Test out LLM Summarizaiton At Small Scale
-* Test out LLM Summarization At Scall
+* Test out LLM Summarization At Scall 
 * Re-enable the Scheduler
 # Application Modules
@@ -18,6 +24,7 @@
 * AWS Budget, Pinecone Budget, & LLM Budget
 * Integration with bumblebee (Easily Handle standardization with embedding models & LLMs)
 * Visualization System ( Ingesting, Clustering, etc...) 
 * API Infrastructure.
 # Misc
 * Duplicate Article Check Module.