9/2

2025-12-18 14:34:26 +01:00 · 2024-09-02 15:02:41 -04:00
parent b448471c6a
commit 02833626c0
47 changed files with 426 additions and 366 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
 repo_structure.txt
 .env
 /layer/python*
 *__pycache__*
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -0,0 +1,3 @@
 {
    "todo-tree.tree.showBadges": false
 }
--- a/launch.py
+++ b/launch.py
@@ -3,10 +3,13 @@ import sys
 import json
 import boto3
 from dotenv import load_dotenv
-
+import logging
 # Load environment variables
 load_dotenv()
 # Set up logging
 logging.basicConfig(level=os.getenv('LOG_LEVEL'))
 # Set AWS credentials from environment variables
 os.environ['AWS_ACCESS_KEY_ID'] = os.getenv('AWS_ACCESS_KEY_ID')
 os.environ['AWS_SECRET_ACCESS_KEY'] = os.getenv('AWS_SECRET_ACCESS_KEY')
@@ -27,14 +30,14 @@ current_dir = os.path.dirname(os.path.abspath(__file__))
 sys.path.append(current_dir)
 from src.infra.deploy_infrastructure import deploy_infrastructure
-from src.utils.create_lambda_layer import create_lambda_layer
+from src.infra.lambdas.RSSFeedProcessorLambda.deploy_rss_feed_lambda import deploy_lambda
-from src.lambda_function.deploy_lambda import deploy_lambda
+from src.infra.lambdas.lambda_utils.update_lambda_env_vars import update_env_vars
-from src.lambda_function.update_lambda_env_vars import update_env_vars
+from src.feed_management.upload_rss_feeds import upload_rss_feeds
 from src.utils.upload_rss_feeds import upload_rss_feeds
 def main():
    # Deploy infrastructure
-    deploy_infrastructure()
+    # deploy_infrastructure() # TODO: Add in sqs lambda filler here.
    # logging.info("Finished Deploying Infrastructure")
    # Deploy Lambda function
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,6 @@
-boto3
+boto3==1.35.*
-python-dotenv
+python-dotenv==1.0.*
-requests
+requests==2.32.*
 constructs==10.2.69
 # Optional, yet necessary for the Pinecone SDK functionality.
 pinecone==5.1.*
--- a/src/article_storage/create_index.py
+++ b/src/article_storage/create_index.py
@@ -0,0 +1,62 @@
 from pinecone import Pinecone, ServerlessSpec
 from initialize import pc
 import os
 from dotenv import load_dotenv
 load_dotenv()
 region = os.getenv("AWS_REGION")
 index_name = os.getenv("PINECONE_DB_NAME")
 index_name = "quickstart" # TODO: Remove this line after we are done testing with vector dbs.
 if index_name not in pc.list_indexes().names():
    pc.create_index(
        name=index_name,
        dimension=2,
        metric="cosine",
        spec=ServerlessSpec(
            cloud='aws', 
            region='us-east-1'
        ) 
    ) 
 index = pc.Index(index_name)
 index.upsert(
    vectors=[
        {"id": "vec1", "values": [1.0, 1.5]},
        {"id": "vec2", "values": [2.0, 1.0]},
        {"id": "vec3", "values": [0.1, 3.0]},
    ],
    namespace="example-namespace1"
 )
 index.upsert(
    vectors=[
        {"id": "vec1", "values": [1.0, -2.5]},
        {"id": "vec2", "values": [3.0, -2.0]},
        {"id": "vec3", "values": [0.5, -1.5]},
    ],
    namespace="example-namespace2"
 )
 print(index.describe_index_stats())
 query_results1 = index.query(
    namespace="example-namespace1",
    vector=[1.0, 1.5],
    top_k=3,
    include_values=True
 )
 print(query_results1)
 query_results2 = index.query(
    namespace="example-namespace2",
    vector=[1.0,-2.5],
    top_k=3,
    include_values=True
 )
 print(query_results2)
--- a/src/article_storage/initialize.py
+++ b/src/article_storage/initialize.py
@@ -0,0 +1,9 @@
 from pinecone import Pinecone
 import os
 from dotenv import load_dotenv
 load_dotenv()
 # Set up Pinecone client
 api_key = os.getenv("PINCEONE_API_KEY")
 pc = Pinecone(api_key=api_key)
--- a/src/feed_management/upload_rss_feeds.py
+++ b/src/feed_management/upload_rss_feeds.py
--- a/src/infra/RSSQueueFillerLambda/rss_lambda_stack.py
+++ b/src/infra/RSSQueueFillerLambda/rss_lambda_stack.py
@@ -1,50 +0,0 @@
 # File: rss_lambda_stack.py
 import os
 from dotenv import load_dotenv
 load_dotenv()
 from aws_cdk import (
    App,
    Stack,
    aws_lambda as _lambda,
    aws_iam as iam,
    Duration
 )
 from constructs import Construct
 class SqsFillerLambdaStack(Stack):
    def __init__(self, scope: Construct, construct_id: str, **kwargs) -> None:
        super().__init__(scope, construct_id, **kwargs)
        # Create Lambda Function
        self.sqs_filler = _lambda.Function(
            self, "SqsFillerFunction",
            function_name=os.getenv("QUEUE_FILLER_LAMBDA_NAME"),
            runtime=_lambda.Runtime.PYTHON_3_12,
            handler="lambda_function.handler",
            code=_lambda.Code.from_asset("src/infra/RSSQueueFillerLambda/lambda"),
            timeout=Duration.minutes(5),
            environment={
                "SQS_QUEUE_URL": os.getenv("SQS_QUEUE_URL"),
                "DYNAMODB_TABLE_NAME": os.getenv("DYNAMODB_TABLE_NAME")
            }
        )
        # Grant Lambda permission to scan DynamoDB
        self.sqs_filler.add_to_role_policy(iam.PolicyStatement(
            actions=["dynamodb:Scan"],
            resources=[os.getenv("DYNAMODB_TABLE_ARN")]
        ))
        # Grant Lambda permission to send messages to SQS
        self.sqs_filler.add_to_role_policy(iam.PolicyStatement(
            actions=["sqs:SendMessage"],
            resources=[os.getenv("SQS_QUEUE_ARN")]
        ))
 # Main
 if __name__ == "__main__":
    app = App()
    SqsFillerLambdaStack(app, "SqsFillerLambdaStack")
    app.synth()
--- a/src/infra/pycache/deploy_infrastructure.cpython-310.pyc
+++ b/src/infra/pycache/deploy_infrastructure.cpython-310.pyc
--- a/src/infra/pycache/deploy_infrastructure.cpython-311.pyc
+++ b/src/infra/pycache/deploy_infrastructure.cpython-311.pyc
--- a/src/infra/pycache/deploy_infrastructure.cpython-312.pyc
+++ b/src/infra/pycache/deploy_infrastructure.cpython-312.pyc
--- a/src/infra/cloudformation/rss_lambda_stack.yaml
+++ b/src/infra/cloudformation/rss_lambda_stack.yaml
@@ -0,0 +1,92 @@
 AWSTemplateFormatVersion: '2010-09-09'
 Description: SQS Filler Lambda Stack
 Parameters:
  QueueFillerLambdaName:
    Type: String
    Description: Name of the Lambda function
  SqsQueueUrl:
    Type: String
    Description: URL of the SQS queue
  DynamoDbTableName:
    Type: String
    Description: Name of the DynamoDB table
  DynamoDbTableArn:
    Type: String
    Description: ARN of the DynamoDB table
  SqsQueueArn:
    Type: String
    Description: ARN of the SQS queue
  LambdaCodeS3Bucket:
    Type: String
    Description: S3 bucket containing the Lambda function code
  LambdaCodeS3Key:
    Type: String
    Description: S3 key for the Lambda function code
  LambdaRuntime:
    Type: String
    Description: Lambda runtime
    Default: python3.12
  LambdaTimeout:
    Type: Number
    Description: Lambda timeout in seconds
    Default: 300
 Resources:
  SqsFillerFunction:
    Type: AWS::Lambda::Function
    Properties:
      FunctionName: !Ref QueueFillerLambdaName
      Runtime: !Ref LambdaRuntime
      Handler: lambda_function.handler
      Code:
        S3Bucket: !Ref LambdaCodeS3Bucket
        S3Key: !Ref LambdaCodeS3Key
      Timeout: !Ref LambdaTimeout
      Environment:
        Variables:
          SQS_QUEUE_URL: !Ref SqsQueueUrl
          DYNAMODB_TABLE_NAME: !Ref DynamoDbTableName
      Role: !GetAtt SqsFillerFunctionRole.Arn
  SqsFillerFunctionRole:
    Type: AWS::IAM::Role
    Properties:
      AssumeRolePolicyDocument:
        Version: '2012-10-17'
        Statement:
          - Effect: Allow
            Principal:
              Service: lambda.amazonaws.com
            Action: sts:AssumeRole
      Policies:
        - PolicyName: LambdaExecutionPolicy
          PolicyDocument:
            Version: '2012-10-17'
            Statement:
              - Effect: Allow
                Action:
                  - logs:CreateLogGroup
                  - logs:CreateLogStream
                  - logs:PutLogEvents
                Resource: arn:aws:logs:*:*:*
              - Effect: Allow
                Action:
                  - dynamodb:Scan
                Resource: !Ref DynamoDbTableArn
              - Effect: Allow
                Action:
                  - sqs:SendMessage
                Resource: !Ref SqsQueueArn
              - Effect: Allow
                Action:
                  - s3:GetObject
                Resource: !Sub arn:aws:s3:::${LambdaCodeS3Bucket}/${LambdaCodeS3Key}
 Outputs:
  SqsFillerFunctionArn:
    Description: ARN of the SQS Filler Lambda Function
    Value: !GetAtt SqsFillerFunction.Arn
  SqsFillerFunctionRoleArn:
    Description: ARN of the IAM Role for SQS Filler Lambda Function
    Value: !GetAtt SqsFillerFunctionRole.Arn
--- a/src/infra/deploy_
+++ b/src/infra/deploy_
--- a/src/infra/deploy_eventbridge.py
+++ b/src/infra/deploy_eventbridge.py
--- a/src/infra/deploy_infrastructure.py
+++ b/src/infra/deploy_infrastructure.py
@@ -2,20 +2,21 @@ import boto3
 import os
 import sys
 import json
 from src.utils.retry_logic import retry_with_backoff
 from botocore.exceptions import ClientError
 region_name = os.getenv("AWS_REGION")
 kms_client = boto3.client('kms', region_name=region_name)
 stack_base = os.getenv("STACK_BASE")
@retry_with_backoff()
 def deploy_cloudformation(template_file, stack_suffix, force_recreate=False, parameters=[]):
    cf_client = boto3.client('cloudformation')
-    stack_name = f"rss-feed-processor-{stack_suffix}"
+    stack_name = f"{stack_base}-{stack_suffix}"
    with open(f'src/infra/cloudformation/{template_file}', 'r') as file:
        template_body = file.read()
    print(f"Template contents:\n{template_body}")
    capabilities = ['CAPABILITY_NAMED_IAM']
@@ -57,11 +58,11 @@ def deploy_cloudformation(template_file, stack_suffix, force_recreate=False, par
            elif 'No updates are to be performed' in str(e):
                print(f"No updates needed for stack {stack_name}.")
            else:
-                raise
+                raise ClientError
    except ClientError as e:
        print(f"Error handling stack {stack_name}: {str(e)}")
-        raise
+        raise ClientError
 def get_or_create_kms_key():
    # Create a KMS client
@@ -72,8 +73,6 @@ def get_or_create_kms_key():
    account_id = os.getenv('AWS_ACCOUNT_ID')
    try:
        # List all KMS keys
        response = kms_client.list_keys()
@@ -143,7 +142,7 @@ def deploy_infrastructure():
                          parameters=[
                            {
                                'ParameterKey': 'BucketName',
-                                'ParameterValue': os.environ.get('S3_BUCKET_NAME', 'default-bucket-name')
+                                'ParameterValue': os.getenv('S3_BUCKET_NAME')
                            }
                        ])
    deploy_cloudformation('dynamo.yaml', 'DynamoDB', 
@@ -172,7 +171,8 @@ def deploy_infrastructure():
                                    }
                                  ])
-    # TODO: Figure out KMS Stuff, but for now just do it in the console
+    # TODO: Figure out KMS Stuff, but for now just do it in the console. I would like to get the rest of the cloudformation working 
    # before I start messing with KMS keys.
 if __name__ == "__main__":
    deploy_infrastructure()
--- a/src/infra/deploy_sqs.py
+++ b/src/infra/deploy_sqs.py
@@ -1,3 +1,4 @@
 # TODO: Delete this... probably? If not move it somewhere elssse. 
 import boto3
 import os
 from botocore.exceptions import ClientError
--- a/src/infra/lambdas/RSSFeedProcessorLambda/deploy_rss_feed_lambda.py
+++ b/src/infra/lambdas/RSSFeedProcessorLambda/deploy_rss_feed_lambda.py
@@ -10,17 +10,21 @@ import time
 import sys
 from src.infra.deploy_infrastructure import get_or_create_kms_key
 import logging
 logging.basicConfig(level=os.getenv('LOG_LEVEL', 'INFO'))
 # Set variables
-LAMBDA_NAME = "RSSFeedProcessor"
+# TODO: Set environment variables
 LAMBDA_NAME = os.getenv('LAMBDA_FUNCTION_NAME')
 LAMBDA_HANDLER = "lambda_function.lambda_handler"
 ACCOUNT_NUM = os.getenv('AWS_ACCOUNT_ID')
 LAMBDA_ROLE_NAME = os.getenv('LAMBDA_EXECUTION_ROLE_NAME')
 LAMBDA_ROLE_ARN = f"arn:aws:iam::{ACCOUNT_NUM}:role/{LAMBDA_ROLE_NAME}"
-LAMBDA_TIMEOUT = 300
+LAMBDA_TIMEOUT = int(os.getenv('LAMBDA_TIMEOUT'))
-LAMBDA_MEMORY = 256
+LAMBDA_MEMORY = int(os.getenv('LAMBDA_MEMORY'))
-LAMBDA_RUNTIME = "python3.11"
+LAMBDA_RUNTIME = os.getenv('LAMBDA_RUNTIME')
-LAMBDA_STACK_NAME = "rss-feed-processor-Lambda"
+LAMBDA_STACK_NAME = os.getenv("STACK_BASE") + f"-{LAMBDA_NAME}"
-LAMBDA_LAYER_NAME = "RSSFeedProcessorLayer"
+LAMBDA_LAYER_NAME = LAMBDA_NAME + "Layer"
 S3_LAYER_BUCKET_NAME = os.getenv('S3_LAYER_BUCKET_NAME')
 S3_LAYER_KEY = os.getenv('S3_LAYER_KEY_NAME')+'.zip'
@@ -42,36 +46,13 @@ def update_function_code(lambda_client, function_name, zip_file):
        ZipFile=zip_file
    )
@retry_with_backoff()
 def get_or_create_lambda_layer():
-    layer_arn = 'arn:aws:lambda:us-east-1:966265353179:layer:OpenRSSLambdaLayer:3'
+    layer_arn = os.getenv('LAMBDA_LAYER_ARN')
    return layer_arn
-
+@retry_with_backoff(max_retries=50, initial_backoff=5, backoff_multiplier=2) # Note: This function usually takes a long time to be successful. 
 def wait_for_function_update_to_complete(lambda_client, function_name, max_attempts=30, delay=10):
    for attempt in range(max_attempts):
        try:
            response = lambda_client.get_function(FunctionName=function_name)
            state = response['Configuration']['State']
            if state == 'Active':
                return True
            elif state == 'Failed':
                print(f"Function update failed: {response['Configuration'].get('StateReason')}")
                return False
            print(f"Function {function_name} is in {state} state. Waiting...")
        except ClientError as e:
            print(f"Error checking function state: {e}")
            return False
        time.sleep(delay)
    print(f"Timeout waiting for function {function_name} to become active.")
    return False
@retry_with_backoff()
 def update_function_configuration(lambda_client, function_name, handler, role, timeout, memory, layers, kms_key_id):
    # First, wait for any ongoing updates to complete
    if not wait_for_function_update_to_complete(lambda_client, function_name):
        raise Exception(f"Function {function_name} is not in a state to be updated.")
    config = {
        'FunctionName': function_name,
@@ -85,43 +66,16 @@ def update_function_configuration(lambda_client, function_name, handler, role, t
    if kms_key_id:
        config['KMSKeyArn'] = f"arn:aws:kms:{os.environ['AWS_REGION']}:{ACCOUNT_NUM}:key/{kms_key_id}"
    print(f"Updating function configuration for {function_name}... with {config}")
    max_retries = 5 # TODO: Get rid of this dumb retry logic and just use the wrapper I created.
    for attempt in range(max_retries):
        try:
            response = lambda_client.update_function_configuration(**config)
            print(f"Update request sent successfully for {function_name}.")
            # Wait for the update to complete
            if wait_for_function_update_to_complete(lambda_client, function_name):
                print(f"Function {function_name} updated successfully.")
                return response
            else:
                print(f"Function {function_name} update may not have completed successfully.")
                if attempt < max_retries - 1:
                    print(f"Retrying in 30 seconds... (Attempt {attempt + 1}/{max_retries})")
                    time.sleep(30)
                else:
                    raise Exception(f"Failed to update function {function_name} after {max_retries} attempts.")
        except ClientError as e:
            if e.response['Error']['Code'] == 'ResourceConflictException':
-                if attempt < max_retries - 1:
+                logging.info(f"Function {function_name} is currently being updated. Retrying...")
-                    print(f"Another operation is in progress for {function_name}. Retrying in 30 seconds... (Attempt {attempt + 1}/{max_retries})")
+                raise e
                    time.sleep(30)
                else:
                    raise Exception(f"Failed to update function {function_name} after {max_retries} attempts due to ongoing operations.")
            elif 'The role defined for the function cannot be assumed by Lambda' in str(e):
                if attempt < max_retries - 1:
                    print(f"IAM role not ready. Retrying in 30 seconds... (Attempt {attempt + 1}/{max_retries})")
                    time.sleep(30)
                else:
                    raise Exception(f"Failed to update function {function_name} after {max_retries} attempts. IAM role could not be assumed by Lambda.")
            else:
                print(f"Error updating function configuration: {e}")
                raise
    raise Exception(f"Failed to update function {function_name} after {max_retries} attempts.")
@retry_with_backoff()
 def create_function(lambda_client, function_name, runtime, role, handler, zip_file, timeout, memory, layers, kms_key_id):
@@ -163,7 +117,7 @@ def deploy_lambda():
    lambda_client = boto3.client('lambda')
    print(f"Starting deployment of Lambda function: {LAMBDA_NAME}")
-    deployment_package = zip_directory('src/lambda_function/src')
+    deployment_package = zip_directory('src/infra/lambdas/RSSFeedProcessorLambda/src')
    layer_arn = get_or_create_lambda_layer()
    if layer_arn:
--- a/src/infra/lambdas/RSSFeedProcessorLambda/layers/requirements.txt
+++ b/src/infra/lambdas/RSSFeedProcessorLambda/layers/requirements.txt
--- a/src/infra/lambdas/RSSFeedProcessorLambda/src/article_extractor.py
+++ b/src/infra/lambdas/RSSFeedProcessorLambda/src/article_extractor.py
--- a/src/infra/lambdas/RSSFeedProcessorLambda/src/config.py
+++ b/src/infra/lambdas/RSSFeedProcessorLambda/src/config.py
@@ -1,10 +1,7 @@
 import os
 # SQS Configuration
-region = os.getenv["AWS_REGION"]
+SQS_QUEUE_URL = os.environ['SQS_QUEUE_URL']
 account_id = os.getenv["AWS_ACCOUNT_ID"]
 sqs_name = os.getenv["SQS_QUEUE_NAME"]
 # S3 Configuration
 CONTENT_BUCKET = os.environ['CONTENT_BUCKET']
--- a/src/infra/lambdas/RSSFeedProcessorLambda/src/data_storage.py
+++ b/src/infra/lambdas/RSSFeedProcessorLambda/src/data_storage.py
@@ -0,0 +1,60 @@
 import boto3
 import json
 import os
 import logging
 from random import randint
 from utils import generate_key
 logger = logging.getLogger()
 s3 = boto3.client('s3')
 dynamodb = boto3.resource('dynamodb')
 CONTENT_BUCKET = os.getenv("S3_BUCKET_NAME") 
 DYNAMODB_TABLE = os.getenv("DYNAMODB_TABLE_NAME")
 storage_strategy = os.environ.get('STORAGE_STRATEGY')
 ##### Article Storage #####
 def save_article(article:dict, strategy:str):
    if strategy == "s3":
        s3_save_article(article)
    else:
        raise ValueError(f"Invalid storage strategy: {strategy}")
 def pinecone_save_article(article:dict):
    pass
 def dynamodb_save_article(article:dict):
    pass
 def s3_save_article(article:dict):
    rss_feed_id = article['rss_id']
    article_id = article['article_id']
    try:
        key = f"articles/{rss_feed_id}/{article_id}/article.json" 
        s3.put_object(
            Bucket=CONTENT_BUCKET,
            Key=key,
            Body=json.dumps(article)
        )
        logger.info(f"Saved article to S3: {key}")
    except Exception as e:
        logger.error(f"Failed to save article: {str(e)}")
 ###### Feed Storage ######
 def update_rss_feed(feed:dict, last_pub_dt:int):
    try:
        table = dynamodb.Table(DYNAMODB_TABLE)
        table.update_item(
            Key={'url': feed['u']},
            UpdateExpression='SET dt = :val',
            ExpressionAttributeValues={':val': last_pub_dt}
        )
        logger.info(f"Updated RSS feed in DynamoDB: {feed['u']} with dt: {feed['dt']}")
    except Exception as e:
        logger.error(f"Failed to update RSS feed: {str(e)}")
--- a/src/infra/lambdas/RSSFeedProcessorLambda/src/exceptions.py
+++ b/src/infra/lambdas/RSSFeedProcessorLambda/src/exceptions.py
--- a/src/infra/lambdas/RSSFeedProcessorLambda/src/feed_processor.py
+++ b/src/infra/lambdas/RSSFeedProcessorLambda/src/feed_processor.py
@@ -4,6 +4,7 @@ from dateutil import parser
 import queue
 import threading
 import logging
 from utils import generate_key
 from article_extractor import extract_article
 logger = logging.getLogger()
@@ -52,7 +53,11 @@ def extract_feed(rss: dict, output_queue, stop_thread):
                    'rss': feed_url,
                    'title': title,
                    'content': text,
-                    'unixTime': pub_date
+                    'unixTime': pub_date,
                    'rss_id': generate_key(feed_url),
                    'article_id': generate_key(entry.link),
                    'llm_summary': None,
                    'embedding': None
                }
                articles.append(article)
                max_date = max(max_date, pub_date)
--- a/src/infra/lambdas/RSSFeedProcessorLambda/src/lambda_function.py
+++ b/src/infra/lambdas/RSSFeedProcessorLambda/src/lambda_function.py
@@ -16,6 +16,7 @@ sqs = boto3.client('sqs')
 def lambda_handler(event, context):
    logger.info("Starting RSS feed processing")
    print("starting rss feed, delete this later.")
    start_time = time.time()
    try:
@@ -25,6 +26,7 @@ def lambda_handler(event, context):
            MaxNumberOfMessages=1,
            WaitTimeSeconds=0
        )
        logger.debug("SQS Response: ", response)
        if 'Messages' not in response:
            logger.info("No messages in queue")
@@ -36,6 +38,8 @@ def lambda_handler(event, context):
        # Process the feed
        result = process_feed(feed)
        logger.info("Process Feed Result Dictionary: ", result)
        last_pub_dt = result['max_date']
        if result:
            # Save articles and update feed
@@ -46,9 +50,10 @@ def lambda_handler(event, context):
                    logger.error(f"Failed to save article: {str(e)}")
                    record_extraction_errors(1)
-            update_rss_feed(result['feed'])
+            update_rss_feed(result['feed'], last_pub_dt)
            # Delete the message from the queue
            logger.info("Deleting sqs queue message")
            sqs.delete_message(QueueUrl=SQS_QUEUE_URL, ReceiptHandle=receipt_handle)
            logger.info(f"Processed feed: {feed['u']}")
--- a/src/infra/lambdas/RSSFeedProcessorLambda/src/metrics.py
+++ b/src/infra/lambdas/RSSFeedProcessorLambda/src/metrics.py
--- a/src/infra/lambdas/RSSFeedProcessorLambda/src/utils.py
+++ b/src/infra/lambdas/RSSFeedProcessorLambda/src/utils.py
@@ -0,0 +1,18 @@
 import logging
 import os
 import hashlib
 def setup_logging():
    logger = logging.getLogger()
    log_level = "DEBUG"
    logger.setLevel(logging.getLevelName(log_level))
    return logger
 def generate_key(input_string, length=10):
    # Create a SHA256 hash of the input string
    hash_object = hashlib.sha256(input_string.encode())
    hex_dig = hash_object.hexdigest()
    # Return the first 'length' characters of the hash
    return hex_dig[:length]
--- a/src/infra/lambdas/RSSQueueFiller/deploy_sqs_filler_lambda.py
+++ b/src/infra/lambdas/RSSQueueFiller/deploy_sqs_filler_lambda.py
@@ -0,0 +1,83 @@
 import os
 import zipfile
 import boto3
 from dotenv import load_dotenv
 from deploy_infrastructure import deploy_cloudformation
 # Load environment variables
 load_dotenv()
 # Set up S3 client
 s3 = boto3.client('s3')
 def zip_lambda_code():
    lambda_dir = 'src/infra/RSSQueueFillerLambda/lambda'
    zip_path = 'tmp/lambda_function.zip'
    with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
        for root, _, files in os.walk(lambda_dir):
            for file in files:
                file_path = os.path.join(root, file)
                arcname = os.path.relpath(file_path, lambda_dir)
                zipf.write(file_path, arcname)
    return zip_path
 def upload_to_s3(file_path):
    s3_key = os.getenv('QUEUE_FILLER_LAMBDA_S3_KEY')
    bucket_name = os.getenv('S3_LAYER_BUCKET_NAME')
    s3.upload_file(file_path, bucket_name, s3_key)
    return f's3://{bucket_name}/{s3_key}'
 def deploy_sqs_filler():
    zip_file = zip_lambda_code()
    upload_to_s3(zip_file)
    # Deploy CloudFormation
    deploy_cloudformation('rss_lambda_stack.yaml', 'LambdaSQSFiller',
                          parameters=[
                            {
                                'ParameterKey': 'QueueFillerLambdaName',
                                'ParameterValue': os.getenv('QUEUE_FILLER_LAMBDA_NAME')
                            },
                            {
                                'ParameterKey': 'SqsQueueUrl',
                                'ParameterValue': os.getenv('SQS_QUEUE_URL')
                            },
                            {
                                'ParameterKey': 'DynamoDbTableName',
                                'ParameterValue': os.getenv('DYNAMODB_TABLE_NAME')
                            },
                            {
                                'ParameterKey': 'DynamoDbTableArn',
                                'ParameterValue': os.getenv('DYNAMODB_TABLE_ARN')
                            },
                            {
                                'ParameterKey': 'SqsQueueArn',
                                'ParameterValue': os.getenv('SQS_QUEUE_ARN')
                            },
                            {
                                'ParameterKey': 'LambdaCodeS3Bucket',
                                'ParameterValue': os.getenv('S3_LAYER_BUCKET_NAME')
                            },
                            {
                                'ParameterKey': 'LambdaCodeS3Key',
                                'ParameterValue': os.getenv('QUEUE_FILLER_LAMBDA_S3_KEY')
                            },
                            { 
                                'ParameterKey': 'LambdaRuntime',
                                'ParameterValue': os.getenv('LAMBDA_RUNTIME')
                            },
                            {
                                'ParameterKey': 'LambdaTimeout',
                                'ParameterValue': os.getenv('LAMBDA_TIMEOUT')
                            }
                          ])
    # Clean up local zip file
    os.remove(zip_file)
 if __name__ == "__main__":
    deploy_sqs_filler()
--- a/src/infra/lambdas/RSSQueueFiller/lambda/lambda_function.py
+++ b/src/infra/lambdas/RSSQueueFiller/lambda/lambda_function.py
@@ -1,9 +1,12 @@
 # File: lambda/lambda_function.py
 import json
 import os
 import boto3
 from decimal import Decimal
 from datetime import datetime
 import logging
 logger = logging.getLogger()
 logger.setLevel("INFO")
 dynamodb = boto3.resource('dynamodb')
 sqs = boto3.client('sqs')
@@ -11,6 +14,12 @@ sqs = boto3.client('sqs')
 SQS_QUEUE_URL = os.environ['SQS_QUEUE_URL']
 DYNAMODB_TABLE_NAME = os.environ['DYNAMODB_TABLE_NAME']
 class DecimalEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, Decimal):
            return int(obj)
        return super(DecimalEncoder, self).default(obj)
 def handler(event, context):
    table = dynamodb.Table(DYNAMODB_TABLE_NAME)
    messages_sent = 0
@@ -20,22 +29,27 @@ def handler(event, context):
    for item in response['Items']:
        rss_url = item.get('url')
        rss_dt = item.get('dt')
        logger.debug(f"Processing RSS feed: {rss_url}")
        logger.debug(f"Last published date: {rss_dt}")
        if rss_url:
            message = {
-                'rss_url': rss_url,
+                'u': rss_url,
-                'timestamp': datetime.now().isoformat()
+                'dt': rss_dt
            }
-
+            logger.debug("message", message)
            try:
                sqs.send_message(
                    QueueUrl=SQS_QUEUE_URL,
-                    MessageBody=json.dumps(message)
+                    MessageBody=json.dumps(message, cls=DecimalEncoder)
                )
                messages_sent += 1
            except Exception as e:
-                print(f"Error sending message to SQS: {str(e)}")
+                logger.error(f"Error sending message to SQS: {str(e)}")
-    print(f"Sent {messages_sent} messages to SQS at {datetime.now().isoformat()}")
+    logger.info(f"Sent {messages_sent} messages to SQS at {datetime.now().isoformat()}")
    return {
        'statusCode': 200,
--- a/src/infra/lambdas/lambda_utils/lambda_layer/lambda_layer_cloud9.sh
+++ b/src/infra/lambdas/lambda_utils/lambda_layer/lambda_layer_cloud9.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-
+# TODO: This needs to be completely overhauled
 # Update system packages
 echo "Updating system packages..."
 sudo yum update -y
--- a/src/infra/lambdas/lambda_utils/update_lambda_env_vars.py
+++ b/src/infra/lambdas/lambda_utils/update_lambda_env_vars.py
--- a/src/infra/test_sqs.py
+++ b/src/infra/test_sqs.py
@@ -1,33 +0,0 @@
 import boto3
 import json
 import os
 from dotenv import load_dotenv
 # Load environment variables
 load_dotenv()
 # Set up AWS client
 sqs = boto3.client('sqs')
 region = os.getenv("AWS_REGION")
 account_id = os.getenv("AWS_ACCOUNT_ID")
 SQS_QUEUE_NAME = os.getenv("SQS_QUEUE_NAME")
 SQS_QUEUE_URL = f"https://sqs.{region}.amazonaws.com/{account_id}/{SQS_QUEUE_NAME}"
 LAMBDA_FUNCTION_NAME = os.getenv("LAMBDA_FUNCTION_NAME")
 def send_test_message():
    # Create a test message
    message = {
        'test_key': 'test_value',
        'message': 'This is a test message for the Lambda trigger'
    }
    # Send the message to SQS
    response = sqs.send_message(
        QueueUrl=SQS_QUEUE_URL,
        MessageBody=json.dumps(message)
    )
    print(f"Message sent. MessageId: {response['MessageId']}")
 if __name__ == "__main__":
    send_test_message()
--- a/src/lambda_function/pycache/deploy_lambda.cpython-310.pyc
+++ b/src/lambda_function/pycache/deploy_lambda.cpython-310.pyc
--- a/src/lambda_function/pycache/deploy_lambda.cpython-311.pyc
+++ b/src/lambda_function/pycache/deploy_lambda.cpython-311.pyc
--- a/src/lambda_function/pycache/deploy_lambda.cpython-312.pyc
+++ b/src/lambda_function/pycache/deploy_lambda.cpython-312.pyc
--- a/src/lambda_function/pycache/update_env_vars.cpython-310.pyc
+++ b/src/lambda_function/pycache/update_env_vars.cpython-310.pyc
--- a/src/lambda_function/pycache/update_lambda_env_vars.cpython-310.pyc
+++ b/src/lambda_function/pycache/update_lambda_env_vars.cpython-310.pyc
--- a/src/lambda_function/pycache/update_lambda_env_vars.cpython-311.pyc
+++ b/src/lambda_function/pycache/update_lambda_env_vars.cpython-311.pyc
--- a/src/lambda_function/pycache/update_lambda_env_vars.cpython-312.pyc
+++ b/src/lambda_function/pycache/update_lambda_env_vars.cpython-312.pyc
--- a/src/lambda_function/src/pycache/utils.cpython-310.pyc
+++ b/src/lambda_function/src/pycache/utils.cpython-310.pyc
--- a/src/lambda_function/src/data_storage.py
+++ b/src/lambda_function/src/data_storage.py
@@ -1,42 +0,0 @@
 import boto3
 import json
 import os
 import logging
 logger = logging.getLogger()
 s3 = boto3.client('s3')
 dynamodb = boto3.resource('dynamodb')
 CONTENT_BUCKET = os.environ['CONTENT_BUCKET']
 DYNAMODB_TABLE = os.environ['DYNAMODB_TABLE']
 def save_article(article):
    try:
        # Save to S3
        key = f"articles/{article['unixTime']}/{article['link'].split('/')[-1]}.json"
        s3.put_object(
            Bucket=CONTENT_BUCKET,
            Key=key,
            Body=json.dumps(article)
        )
        logger.info(f"Saved article to S3: {key}")
        # Save to DynamoDB
        table = dynamodb.Table(DYNAMODB_TABLE)
        table.put_item(Item=article)
        logger.info(f"Saved article to DynamoDB: {article['link']}")
    except Exception as e:
        logger.error(f"Failed to save article: {str(e)}")
 def update_rss_feed(feed):
    try:
        table = dynamodb.Table(DYNAMODB_TABLE)
        table.update_item(
            Key={'u': feed['u']},
            UpdateExpression='SET dt = :val',
            ExpressionAttributeValues={':val': feed['dt']}
        )
        logger.info(f"Updated RSS feed in DynamoDB: {feed['u']}")
    except Exception as e:
        logger.error(f"Failed to update RSS feed: {str(e)}")
--- a/src/lambda_function/src/utils.py
+++ b/src/lambda_function/src/utils.py
@@ -1,8 +0,0 @@
 import logging
 import os
 def setup_logging():
    logger = logging.getLogger()
    log_level = os.environ.get('LOG_LEVEL', 'INFO')
    logger.setLevel(logging.getLevelName(log_level))
    return logger
--- a/src/lambda_function/tests/test_lambda_function.py
+++ b/src/lambda_function/tests/test_lambda_function.py
--- a/src/utils/pycache/retry_logic.cpython-312.pyc
+++ b/src/utils/pycache/retry_logic.cpython-312.pyc
--- a/src/utils/create_lambda_layer.py
+++ b/src/utils/create_lambda_layer.py
@@ -1,95 +0,0 @@
 import boto3
 import subprocess
 import os
 import shutil
 from botocore.exceptions import ClientError
 # Set variables
 LAYER_NAME = os.getenv('S3_LAYER_KEY_NAME')
 BUCKET_NAME = os.getenv("S3_LAYER_BUCKET_NAME")
 REQUIREMENTS_FILE = "src/lambda_function/layers/requirements.txt"
 ZIP_FILE = f"{LAYER_NAME}.zip"
 def create_s3_bucket_if_not_exists(bucket_name, region=None):
    s3_client = boto3.client('s3', region_name=region)
    try:
        # Check if the bucket exists
        s3_client.head_bucket(Bucket=bucket_name)
        print(f"Bucket '{bucket_name}' already exists.")
    except ClientError as e:
        error_code = e.response['Error']['Code']
        if error_code == '404':
            # Create the bucket
            if region == 'us-east-1' or region is None:
                # us-east-1 does not require LocationConstraint
                s3_client.create_bucket(Bucket=bucket_name)
            else:
                # Other regions require LocationConstraint
                s3_client.create_bucket(
                    Bucket=bucket_name,
                    CreateBucketConfiguration={
                        'LocationConstraint': region
                    }
                )
            print(f"Bucket '{bucket_name}' created.")
        else:
            # For any other errors, re-raise the exception
            raise e
 def install_requirements(requirements_file, target_dir):
    subprocess.check_call([
        "pip", "install", 
        "-r", requirements_file, 
        "-t", target_dir
    ])
 def create_lambda_layer():
    # Create a temporary directory for the layer
    os.makedirs("layer/python", exist_ok=True)
    # Install dependencies from requirements.txt
    install_requirements(REQUIREMENTS_FILE, "layer/python")
    print("Finished Installing Packages from requirements.txt")
    # Create ZIP file
    shutil.make_archive(LAYER_NAME, 'zip', "layer")
    print("Finished Zipping Package")
    # Create or update Lambda layer
    lambda_client = boto3.client('lambda', region_name='us-east-1')
    # Make sure the S3 bucket exists 
    create_s3_bucket_if_not_exists(BUCKET_NAME)
    # Upload the zip file to S3
    s3_client = boto3.client('s3')
    s3_client.upload_file(ZIP_FILE, BUCKET_NAME, ZIP_FILE)
    print(f"Uploaded {ZIP_FILE} to S3 bucket '{BUCKET_NAME}'.")
    # Publish the layer using the S3 object
    response = lambda_client.publish_layer_version(
        LayerName=LAYER_NAME,
        Description="Dependencies for RSS Feed Processor",
        Content={
            'S3Bucket': BUCKET_NAME,
            'S3Key': ZIP_FILE
        },
        CompatibleRuntimes=['python3.11']
    )
    print(f"Created Lambda layer version: {response['Version']}")
    # Clean up
    shutil.rmtree("layer")
    os.remove(ZIP_FILE)
    print("Lambda layer creation complete!")
 if __name__ == "__main__":
    create_lambda_layer()
--- a/src/utils/create_s3_bucket.py
+++ b/src/utils/create_s3_bucket.py
@@ -1,34 +0,0 @@
 import boto3
 from botocore.exceptions import ClientError
 def create_s3_bucket_if_not_exists(bucket_name, region=None):
    s3_client = boto3.client('s3', region_name=region)
    try:
        # Check if the bucket exists
        s3_client.head_bucket(Bucket=bucket_name)
        print(f"Bucket '{bucket_name}' already exists.")
    except ClientError as e:
        # If a 404 error is caught, it means the bucket does not exist
        error_code = e.response['Error']['Code']
        if error_code == '404':
            # Create the bucket
            if region is None:
                s3_client.create_bucket(Bucket=bucket_name)
            else:
                s3_client.create_bucket(
                    Bucket=bucket_name,
                    CreateBucketConfiguration={
                        'LocationConstraint': region
                    }
                )
            print(f"Bucket '{bucket_name}' created.")
        else:
            # For any other errors, re-raise the exception
            raise e
 # Example usage
 bucket_name = 'your-unique-bucket-name'
 region = 'us-east-1'  # Change this to your desired region
 create_s3_bucket_if_not_exists(bucket_name, region)
--- a/src/utils/retry_logic.py
+++ b/src/utils/retry_logic.py
@@ -1,5 +1,8 @@
 import time
 from botocore.exceptions import ClientError
 import logging
 import os
 logging.basicConfig(level=os.getenv('LOG_LEVEL', 'INFO'))
 def retry_with_backoff(max_retries=20, initial_backoff=1, backoff_multiplier=4):
    def decorator(func):
@@ -16,7 +19,7 @@ def retry_with_backoff(max_retries=20, initial_backoff=1, backoff_multiplier=4):
                        if retries == max_retries - 1:
                            raise
                        wait_time = backoff * (2 ** retries)
-                        print(f"Encountered {e.response['Error']['Code']}. Retrying in {wait_time} seconds...")
+                        logging.info(f"Encountered {e.response['Error']['Code']}. Retrying in {wait_time} seconds...")
                        time.sleep(wait_time)
                        retries += 1
                        backoff *= backoff_multiplier
--- a/todo.md
+++ b/todo.md
@@ -1,8 +1,17 @@
 # TODO: Clean up aws cdk stuff. 
 # TODO: Reorganize infra folder structure.
 # TODO: Add in console setup python script for new project into launch.py
 # Modules
 * Gen AI Module
 * More RSS Feed Module
-* Duplicate Article Check Module
+
 # Future Modules
 * Gen AI Summarization Module
 * Other Add-on's with text classification of articles ( Sentiment Analysis, political polarity, etc... )
 * Duplicate Article Check Module.
 * Semantic Storage Module
 * API Module ( Semantic Search, Retrieval )
-* Way to start the repo, enabling all the different modules from the launch script ( Make it fun ).
+
 # Over-caffeineted Ideas
 * Make it solarpunk themed.
 * Write a serverless manifesto for personal projects and where you would like to see the serverless world go.