Added package-lock.json

2026-02-09 00:04:27 +01:00 · 2025-06-03 14:35:36 +02:00
parent 122305001b
commit 36fc93cb8e
13 changed files with 900 additions and 0 deletions
--- a/package-lock.json
+++ b/package-lock.json
@@ -0,0 +1,6 @@
+{
+  "name": "project",
+  "lockfileVersion": 3,
+  "requires": true,
+  "packages": {}
+}
--- a/src/infra/lambdas/RSSFeedProcessorLambda/deploy_rss_feed_lambda.py
+++ b/src/infra/lambdas/RSSFeedProcessorLambda/deploy_rss_feed_lambda.py
@@ -0,0 +1,211 @@
+import boto3
+import os
+import zipfile
+import io
+import requests
+import json
+from botocore.exceptions import ClientError
+from src.utils.retry_logic import retry_with_backoff
+import time
+import sys
+from src.infra.deploy_infrastructure import get_or_create_kms_key
+from dotenv import load_dotenv
+load_dotenv(override=True)
+
+import logging
+logging.basicConfig(level=os.getenv('LOG_LEVEL', 'INFO'))
+
+# Set variables
+
+LAMBDA_NAME = os.getenv('LAMBDA_FUNCTION_NAME')
+
+ACCOUNT_NUM = os.getenv('AWS_ACCOUNT_ID')
+REGION = os.getenv("AWS_REGION")
+LAMBDA_ROLE_ARN = os.getenv("LAMBDA_ROLE_ARN")
+LAMBDA_TIMEOUT = int(os.getenv('LAMBDA_TIMEOUT'))
+LAMBDA_MEMORY = int(os.getenv('LAMBDA_MEMORY'))
+LAMBDA_RUNTIME = os.getenv('LAMBDA_RUNTIME')
+LAMBDA_STACK_NAME = os.getenv("STACK_BASE") + f"-{LAMBDA_NAME}"
+LAMBDA_HANDLER = "lambda_function.lambda_handler"
+LAMBDA_LAYER_NAME = LAMBDA_NAME + "Layer"
+S3_LAYER_KEY = os.getenv('S3_LAYER_KEY_NAME')+'.zip'
+
+def zip_directory(path):
+    print(f"Creating deployment package from {path}...")
+    zip_buffer = io.BytesIO()
+    with zipfile.ZipFile(zip_buffer, 'a', zipfile.ZIP_DEFLATED, False) as zip_file:
+        for root, _, files in os.walk(path):
+            for file in files:
+                file_path = os.path.join(root, file)
+                arcname = os.path.relpath(file_path, path)
+                zip_file.write(file_path, arcname)
+    return zip_buffer.getvalue()
+
+@retry_with_backoff()
+def update_function_code(lambda_client, function_name, zip_file):
+    return lambda_client.update_function_code(
+        FunctionName=function_name,
+        ZipFile=zip_file
+    )
+
+def get_or_create_lambda_layer():
+    layer_arn = os.getenv('LAMBDA_LAYER_ARN')
+    
+    return layer_arn
+
+@retry_with_backoff(max_retries=50, initial_backoff=5, backoff_multiplier=2) # Note: This function usually takes a long time to be successful. 
+def update_function_configuration(lambda_client, function_name, handler, role, timeout, memory, layers, kms_key_id):
+
+    config = {
+        'FunctionName': function_name,
+        'Handler': handler,
+        'Role': role,
+        'Timeout': timeout,
+        'MemorySize': memory,
+        'Layers': layers
+    }
+    
+    
+    if kms_key_id:
+        config['KMSKeyArn'] = f"arn:aws:kms:{REGION}:{ACCOUNT_NUM}:key/{kms_key_id}"
+
+    try:
+        response = lambda_client.update_function_configuration(**config)
+        print(f"Update request sent successfully for {function_name}.")
+        
+    except ClientError as e:
+        if e.response['Error']['Code'] == 'ResourceConflictException':
+            logging.info(f"Function {function_name} is currently being updated. Retrying...")
+            raise e
+
+@retry_with_backoff()
+def configure_sqs_trigger(lambda_client, function_name, queue_arn):
+    """Placeholder for backward compatibility. Redis deployment uses no SQS trigger."""
+    return
+
+@retry_with_backoff()
+def create_function(lambda_client, function_name, runtime, role, handler, zip_file, timeout, memory, layers, kms_key_id, policy):
+    config = {
+        'FunctionName': function_name,
+        'Runtime': runtime,
+        'Role': role,
+        'Handler': handler,
+        'Code': {'ZipFile': zip_file},
+        'Timeout': timeout,
+        'MemorySize': memory,
+        'Layers': layers
+    }
+    print(policy)
+    
+    if kms_key_id:
+        config['KMSKeyArn'] = f"arn:aws:kms:{REGION}:{ACCOUNT_NUM}:key/{kms_key_id}"
+    
+    try:
+        return lambda_client.create_function(**config)
+    except ClientError as e:
+        if e.response['Error']['Code'] == 'InvalidParameterValueException':
+            print(f"Error creating function: {e}")
+            print("Ensure that the IAM role has the correct trust relationship and permissions.")
+            print("There might be a delay in role propagation. Please wait a few minutes and try again.")
+        raise
+
+def get_pillow_layer_arn():
+    url = f"https://api.klayers.cloud/api/v2/p{os.getenv('PYTHON_VERSION')}/layers/latest/{os.getenv('AWS_REGION')}/json"
+    try:
+        response = requests.get(url)
+        response.raise_for_status()
+        layers_data = response.json()
+        
+        pillow_layer = next((layer for layer in layers_data if layer['package'] == 'Pillow'), None)
+        
+        if pillow_layer:
+            return pillow_layer['arn']
+        else:
+            print("Pillow layer not found in the API response.")
+            return None
+    except requests.RequestException as e:
+        print(f"Error fetching Pillow layer ARN: {e}")
+        return None
+    
+def get_lambda_policy():
+    policy = {
+    "Version": "2012-10-17",
+    "Statement": [
+        {
+            "Effect": "Allow",
+            "Action": [
+                "logs:CreateLogGroup",
+                "logs:CreateLogStream",
+                "logs:PutLogEvents"
+            ],
+            "Resource": "arn:aws:logs:*:*:*"
+        },
+        {
+            "Effect": "Allow",
+            "Action": [
+                "s3:GetObject",
+                "s3:PutObject"
+            ],
+            "Resource": "arn:aws:s3:::your-bucket-name/*"
+        }
+    ]
+}
+
+def deploy_lambda():
+    lambda_client = boto3.client('lambda', region_name=REGION)
+
+    print(f"Starting deployment of Lambda function: {LAMBDA_NAME}")
+    deployment_package = zip_directory('src/infra/lambdas/RSSFeedProcessorLambda/src')
+
+    layer_arn = get_or_create_lambda_layer()
+    if layer_arn:
+        print(f"Using Lambda Layer ARN: {layer_arn}")
+    else:
+        print("Warning: Lambda Layer not found or created. Proceeding without Layer.")
+
+    pillow_layer_arn = get_pillow_layer_arn()
+    if pillow_layer_arn:
+        print(f"Using Pillow Layer ARN: {pillow_layer_arn}")
+    else:
+        print("Warning: Pillow Layer not found. Proceeding without Pillow Layer.")
+
+    kms_key_id = get_or_create_kms_key()
+    if kms_key_id:
+        print(f"Using KMS Key ID: {kms_key_id}")
+    else:
+        print("Warning: KMS Key not found or created. Proceeding without KMS Key.")
+        sys.exit(1)
+
+    try:
+        # Check if the function exists
+        try:
+            lambda_client.get_function(FunctionName=LAMBDA_NAME)
+            function_exists = True
+        except ClientError as e:
+            if e.response['Error']['Code'] == 'ResourceNotFoundException':
+                function_exists = False
+            else:
+                raise e
+
+        # Combine the layers
+        layers = [layer_arn] if layer_arn else []
+        if pillow_layer_arn:
+            layers.append(pillow_layer_arn)
+
+        if function_exists:
+            print("Updating existing Lambda function...")
+            update_function_configuration(lambda_client, LAMBDA_NAME, LAMBDA_HANDLER, LAMBDA_ROLE_ARN, LAMBDA_TIMEOUT, LAMBDA_MEMORY, layers, kms_key_id)
+            update_function_code(lambda_client, LAMBDA_NAME, deployment_package)
+        else:
+            print(f"Lambda function '{LAMBDA_NAME}' not found. Creating new function...")
+            policy = get_lambda_policy()
+            create_function(lambda_client, LAMBDA_NAME, LAMBDA_RUNTIME, LAMBDA_ROLE_ARN, LAMBDA_HANDLER, deployment_package, LAMBDA_TIMEOUT, LAMBDA_MEMORY, layers, kms_key_id, policy)
+
+        print("Lambda deployment completed successfully!")
+
+    except Exception as e:
+        print(f"Error during Lambda deployment: {str(e)}")
+        raise
+
+if __name__ == "__main__":
+    deploy_lambda()
--- a/src/infra/lambdas/RSSFeedProcessorLambda/layers/requirements.txt
+++ b/src/infra/lambdas/RSSFeedProcessorLambda/layers/requirements.txt
@@ -0,0 +1,4 @@
+newspaper3k
+feedparser
+python-dateutil
+lxml
--- a/src/infra/lambdas/RSSFeedProcessorLambda/src/analytics/embeddings/vector_db.py
+++ b/src/infra/lambdas/RSSFeedProcessorLambda/src/analytics/embeddings/vector_db.py
@@ -0,0 +1,60 @@
+import os
+import requests
+
+from qdrant_client import QdrantClient, models
+
+from utils import setup_logging
+
+logger = setup_logging()
+
+qdrant_url = os.getenv("QDRANT_URL", "http://localhost:6333")
+qdrant_api_key = os.getenv("QDRANT_API_KEY")
+collection_name = os.getenv("QDRANT_COLLECTION_NAME")
+
+embedding_dim = os.getenv("VECTOR_EMBEDDING_DIM")
+vector_search_metric = os.getenv("VECTOR_SEARCH_METRIC", "cosine")
+
+ollama_host = os.getenv("OLLAMA_HOST", "http://localhost:11434")
+ollama_embedding_model = os.getenv("OLLAMA_EMBEDDING_MODEL", "nomic-embed-text")
+
+client = QdrantClient(url=qdrant_url, api_key=qdrant_api_key)
+
+def get_index():
+    collections = client.get_collections().collections
+    if collection_name not in [c.name for c in collections]:
+        raise KeyError(f"Collection {collection_name} not found")
+    return client
+
+def vectorize(article: str) -> list[float]:
+    response = requests.post(
+        f"{ollama_host}/api/embeddings",
+        json={"model": ollama_embedding_model, "prompt": article},
+        timeout=30,
+    )
+    response.raise_for_status()
+    return response.json().get("embedding", [])
+
+
+def upsert_vectors(index: QdrantClient, data: list[dict]):
+    points = [
+        models.PointStruct(id=item["id"], vector=item["vector"], payload=item.get("payload"))
+        for item in data
+    ]
+    index.upsert(collection_name=collection_name, points=points)
+
+
+def query_vectors(index: QdrantClient, vector: list[float], top_k: int, filter_query: dict | None = None):
+    if len(vector) != int(embedding_dim):
+        raise ValueError("Length of vector does not match the embedding dimension")
+    return index.search(
+        collection_name=collection_name,
+        query_vector=vector,
+        limit=top_k,
+        with_payload=True,
+        query_filter=filter_query,
+    )
+
+
+if __name__ == "__main__":
+    paragraph = "This is a test."
+    vectorize(paragraph)
--- a/src/infra/lambdas/RSSFeedProcessorLambda/src/analytics/genai/summarization.py
+++ b/src/infra/lambdas/RSSFeedProcessorLambda/src/analytics/genai/summarization.py
@@ -0,0 +1,6 @@
+
+
+def summarize(text:str): 
+    sub_prompt = "Summarize the following passage"
+    
+    
--- a/src/infra/lambdas/RSSFeedProcessorLambda/src/article_cleaning.py
+++ b/src/infra/lambdas/RSSFeedProcessorLambda/src/article_cleaning.py
@@ -0,0 +1,14 @@
+import re
+
+def remove_newlines(text: str) -> str:
+    return text.replace('\n', '')
+
+def remove_urls(text: str) -> str:
+    url_pattern = re.compile(r'http\S+|www\S+')
+    return url_pattern.sub('', text)
+
+
+def clean_text(text: str) -> str: 
+    text = remove_newlines(text)
+    text = remove_urls(text)
+    return text
--- a/src/infra/lambdas/RSSFeedProcessorLambda/src/article_extractor.py
+++ b/src/infra/lambdas/RSSFeedProcessorLambda/src/article_extractor.py
@@ -0,0 +1,31 @@
+import newspaper
+import logging
+
+
+
+logger = logging.getLogger()
+
+def extract_article(url):
+    """
+    Extracts the title and text of an article from the given URL.
+    
+    Args:
+        url (str): The URL of the article.
+    Returns:
+        A tuple containing the title and text of the article, respectively.
+    """
+    logger.debug(f"Starting Newspaper Article Extraction {url}")
+    config = newspaper.Config()
+    config.request_timeout = 60
+    article = newspaper.Article(url)
+    
+    try:
+        article.download()
+        logger.debug(f"Downloaded Article {url}")
+        article.parse()
+        logger.debug(f"Parsed Article {url}")    
+        
+        return article.title, article.text
+    except Exception as e:
+        logger.error(f"Failed to extract article {url}: {str(e)}")
+        return None, None
--- a/src/infra/lambdas/RSSFeedProcessorLambda/src/data_storage.py
+++ b/src/infra/lambdas/RSSFeedProcessorLambda/src/data_storage.py
@@ -0,0 +1,116 @@
+import boto3
+from minio import Minio
+import json
+import os
+import logging
+from datetime import datetime
+from pymongo import MongoClient
+
+from analytics.embeddings.vector_db import get_index, upsert_vectors, vectorize
+
+logger = logging.getLogger()
+
+s3 = boto3.client('s3')
+
+CONTENT_BUCKET = os.getenv("S3_BUCKET_NAME", os.getenv("CONTENT_BUCKET"))
+
+minio_client = Minio(
+    os.getenv("MINIO_ENDPOINT"),
+    access_key=os.getenv("MINIO_ACCESS_KEY"),
+    secret_key=os.getenv("MINIO_SECRET_KEY"),
+    secure=False
+)
+CONTENT_BUCKET = os.getenv("MINIO_BUCKET", os.getenv("S3_BUCKET_NAME", os.getenv("CONTENT_BUCKET")))
+DYNAMODB_TABLE = os.getenv("DYNAMODB_TABLE_NAME")
+storage_strategy = os.environ.get('STORAGE_STRATEGY')
+
+MONGODB_URL = os.getenv("MONGODB_URL")
+MONGODB_DB_NAME = os.getenv("MONGODB_DB_NAME")
+MONGODB_COLLECTION_NAME = os.getenv("MONGODB_COLLECTION_NAME", "rss_feeds")
+
+mongo_client = MongoClient(MONGODB_URL)
+feeds_collection = mongo_client[MONGODB_DB_NAME][MONGODB_COLLECTION_NAME]
+
+##### Article Storage #####
+def save_article(article: dict, strategy: str):
+    if strategy == "s3":
+        s3_save_article(article)
+    elif strategy == "qdrant":
+        qdrant_save_article(article)
+    elif strategy == "both":
+        qdrant_save_article(article)
+        s3_save_article(article)
+    else:
+        raise ValueError(f"Invalid storage strategy: {strategy}")
+    
+
+def qdrant_save_article(article: dict):
+    logger.info("Saving article to Qdrant")
+    index = get_index()
+
+    data = {
+        "id": article["article_id"],
+        "vector": vectorize(article["content"]),
+        "payload": {"rss": article.get("rss"), "title": article.get("title")},
+    }
+
+    upsert_vectors(index, [data])
+
+
+def s3_save_article(article:dict):
+    logger.info("Saving article to MinIO")
+
+    now = datetime.now()
+    article_id = article['article_id']
+    logger.info(f"Content ")
+    if not article_id:
+        logger.error(f"Missing rss_id or article_id in article: {article}")
+        return
+
+    file_path = f"/tmp/{article_id}-article.json"
+    file_key = f"{now.year}/{now.month}/{now.day}/{article_id}.json"
+    
+    # Save article to /tmp json file
+    with open(file_path, "w") as f:
+        json.dump(article, f)
+
+    try:
+        metadata = {
+            "rss": article.get("rss", ""),
+            "title": article.get("title", ""),
+            "unixTime": str(article.get("unixTime", "")),
+            "article_id": article.get("article_id", ""),
+            "link": article.get("link", ""),
+            "rss_id": article.get("rss_id", "")
+        }
+        minio_client.fput_object(
+            CONTENT_BUCKET,
+            file_key,
+            file_path,
+            content_type="application/json",
+            metadata=metadata
+        )
+        logger.info(f"Saved article {article_id} to bucket {CONTENT_BUCKET}")
+        
+    except Exception as e:
+        logger.error(f"Failed to save article with error: {str(e)}. \n Article: {article} \n Article Type: {type(article)}")
+
+
+###### Feed Storage ######
+RSS_FEEDS_FILE = os.getenv("RSS_FEEDS_FILE", "rss_feeds.json")
+
+
+def update_rss_feed(feed: dict, last_pub_dt: int):
+    try:
+        if not os.path.exists(RSS_FEEDS_FILE):
+            return
+        with open(RSS_FEEDS_FILE, "r") as f:
+            feeds = json.load(f)
+        for item in feeds:
+            if item.get("u") == feed["u"]:
+                item["dt"] = int(last_pub_dt)
+        with open(RSS_FEEDS_FILE, "w") as f:
+            json.dump(feeds, f)
+        logger.info(f"Updated RSS feed {feed['u']} with dt: {last_pub_dt}")
+    except Exception as e:
+        logger.error(f"Failed to update RSS feed: {str(e)}")
--- a/src/infra/lambdas/RSSFeedProcessorLambda/src/feed_processor.py
+++ b/src/infra/lambdas/RSSFeedProcessorLambda/src/feed_processor.py
@@ -0,0 +1,131 @@
+import feedparser
+from datetime import datetime
+from dateutil import parser
+import queue
+import threading
+import logging
+from utils import generate_key
+from article_extractor import extract_article
+from article_cleaning import clean_text
+
+logger = logging.getLogger()
+
+def process_feed(feed: dict):
+    output_queue = queue.Queue()
+    stop_thread = threading.Event()
+    thread = threading.Thread(target=extract_feed_threading, args=(feed, output_queue, stop_thread))
+    thread.daemon = True
+    thread.start()
+    
+    logger.debug(f"Thread Started: {feed['u']}")
+    thread.join(timeout=90)
+    
+    if thread.is_alive():
+        stop_thread.set()
+        logger.debug(f"Killing Thread: {feed['u']}")
+        return None
+    else:
+        try:
+            output = output_queue.get_nowait()
+            logger.info(f"Thread Succeeded: {feed['u']}")
+            return output
+        except queue.Empty:
+            logger.info(f"Thread Failed: {feed['u']}")
+            return None
+        
+def extract_feed_threading(rss: dict, output_queue, stop_thread):
+    articles = []
+    feed_url = rss['u']
+    last_date = rss['dt']
+    max_date = last_date
+
+    try:
+        feed = feedparser.parse(feed_url)
+        for entry in feed['entries']:
+            if stop_thread.is_set():
+                break
+            
+            pub_date = parse_pub_date(entry['published'])
+            
+            if pub_date > last_date:
+                title, text = extract_article(entry.link)
+                title, text = clean_text(title), clean_text(text)
+                article = {
+                    'link': entry.link,
+                    'rss': feed_url,
+                    'title': title,
+                    'content': text,
+                    'unixTime': pub_date,
+                    'rss_id': generate_key(feed_url),
+                    'article_id': generate_key(entry.link),
+                    'llm_summary': None,
+                    'embedding': None
+                }
+                articles.append(article)
+                max_date = max(max_date, pub_date)
+
+        output = {
+            'articles': articles,
+            'max_date': max_date,
+            'feed': rss
+        }
+        output_queue.put(output)
+    except Exception as e:
+        logger.error(f"Feed: {entry}")
+        logger.error(f"Feed failed due to error: {e}")
+
+def extract_feed(rss: dict):
+    articles = []
+    feed_url = rss['u']
+    last_date = rss['dt']
+    max_date = last_date
+
+    try:
+        feed = feedparser.parse(feed_url)
+        for entry in feed['entries']:
+            pub_date = parse_pub_date(entry['published'])
+            
+            if pub_date > last_date:
+                title, text = extract_article(entry.link) 
+                article = {
+                    'link': entry.link,
+                    'rss': feed_url,
+                    'title': title,
+                    'content': text,
+                    'unixTime': pub_date,
+                    'rss_id': generate_key(feed_url),
+                    'article_id': generate_key(entry.link),
+                    'llm_summary': None,
+                    'embedding': None
+                }
+                articles.append(article)
+                max_date = max(max_date, pub_date)
+
+        output = {
+            'articles': articles,
+            'max_date': max_date,
+            'feed': rss
+        }
+        print(output)
+        return output
+    except Exception as e:
+        logger.error(f"Feed: {entry}")
+        logger.error(f"Feed failed due to error: {e}")
+
+def parse_pub_date(entry:dict):
+
+    if 'published' in entry:
+        date_string = entry['published']       
+
+        try:
+            return int(datetime.strptime(date_string, "%a, %d %b %Y %H:%M:%S %z").timestamp())
+        except ValueError:
+            try:
+                return int(datetime.strptime(date_string, "%Y-%m-%dT%H:%M:%SZ").timestamp())
+            except ValueError:
+                try:
+                    return int(parser.parse(date_string).timestamp())
+                except ValueError:
+                    pass
+
+    return int(datetime.now().timestamp()) # Return current time if no date is found
--- a/src/infra/lambdas/RSSFeedProcessorLambda/src/lambda_function.py
+++ b/src/infra/lambdas/RSSFeedProcessorLambda/src/lambda_function.py
@@ -0,0 +1,65 @@
+import json
+import time
+import os
+import redis
+from feed_processor import extract_feed
+from data_storage import save_article, update_rss_feed
+from utils import setup_logging
+from config import REDIS_URL, REDIS_QUEUE_NAME
+from exceptions import RSSProcessingError, DataStorageError
+from metrics import (
+    record_processed_articles,
+    record_processing_time,
+    record_extraction_errors,
+)
+
+logger = setup_logging()
+storage_strategy = os.environ.get("STORAGE_STRATEGY")
+redis_client = redis.Redis.from_url(REDIS_URL)
+
+
+def lambda_handler(event, context):
+    logger.info("Starting RSS feed processing")
+    start_time = time.time()
+
+    try:
+        feed_data = redis_client.rpop(REDIS_QUEUE_NAME)
+        if not feed_data:
+            logger.info("No messages in queue")
+            return {"statusCode": 200, "body": json.dumps("No feeds to process")}
+        feed = json.loads(feed_data)
+
+        result = extract_feed(feed)
+        logger.info(f"Process Feed Result Dictionary: {result}")
+        last_pub_dt = result["max_date"]
+
+        if result:
+            for article in result["articles"]:
+                try:
+                    save_article(article, storage_strategy)
+                except DataStorageError as e:
+                    logger.error(f"Failed to save article: {str(e)}")
+                    record_extraction_errors(1)
+
+            update_rss_feed(result["feed"], last_pub_dt)
+            logger.info(f"Processed feed: {feed['u']}")
+            record_processed_articles(len(result["articles"]))
+        else:
+            logger.warning(f"Failed to process feed: {feed['u']}")
+            record_extraction_errors(1)
+
+    except RSSProcessingError as e:
+        logger.error(f"RSS Processing Error: {str(e)}")
+        return {"statusCode": 500, "body": json.dumps("RSS processing failed")}
+
+    except Exception as e:
+        logger.error(f"Unexpected error: {str(e)}")
+        return {"statusCode": 500, "body": json.dumps("An unexpected error occurred")}
+
+    finally:
+        end_time = time.time()
+        processing_time = end_time - start_time
+        record_processing_time(processing_time)
+        logger.info(f"Lambda execution time: {processing_time:.2f} seconds")
+
+    return {"statusCode": 200, "body": json.dumps("RSS feed processed successfully")}
--- a/src/infra/lambdas/RSSQueueFiller/deploy_sqs_filler_lambda.py
+++ b/src/infra/lambdas/RSSQueueFiller/deploy_sqs_filler_lambda.py
@@ -0,0 +1,81 @@
+import os
+import zipfile
+import logging
+import boto3
+from dotenv import load_dotenv
+from src.infra.deploy_infrastructure import deploy_cloudformation
+
+# Load environment variables
+load_dotenv(override=True)
+
+# Set up logging
+
+logging.basicConfig(level=os.getenv('LOG_LEVEL'))
+
+
+# Set up S3 client
+s3 = boto3.client('s3')
+
+def zip_lambda_code():
+    lambda_dir = 'src/infra/lambdas/RSSQueueFiller/lambda'
+    zip_path = 'tmp/lambda_function.zip'
+    
+    os.makedirs(zip_path.split("/")[0], exist_ok=True)
+    
+    with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
+        for root, _, files in os.walk(lambda_dir):
+            for file in files:
+                file_path = os.path.join(root, file)
+                arcname = os.path.relpath(file_path, lambda_dir)
+                zipf.write(file_path, arcname)
+    
+    return zip_path
+
+def upload_to_s3(file_path):
+    s3_key = os.getenv('QUEUE_FILLER_LAMBDA_S3_KEY')
+    bucket_name = os.getenv('S3_LAMBDA_ZIPPED_BUCKET_NAME')
+    s3.upload_file(file_path, bucket_name, s3_key)
+    return f's3://{bucket_name}/{s3_key}'
+
+def deploy_sqs_filler():
+    zip_file = zip_lambda_code()
+    upload_to_s3(zip_file)
+    
+    # Deploy CloudFormation
+    deploy_cloudformation('rss_lambda_stack.yaml', 'LambdaSQSFiller',
+                          parameters=[
+                            {
+                                'ParameterKey': 'QueueFillerLambdaName',
+                                'ParameterValue': os.getenv('QUEUE_FILLER_LAMBDA_NAME')
+                            },
+                            {
+                                'ParameterKey': 'RedisUrl',
+                                'ParameterValue': os.getenv('REDIS_URL')
+                            },
+                            {
+                                'ParameterKey': 'RedisQueueName',
+                                'ParameterValue': os.getenv('REDIS_QUEUE_NAME')
+                            },
+                            {
+                                'ParameterKey': 'LambdaCodeS3Bucket',
+                                'ParameterValue': os.getenv('S3_LAMBDA_ZIPPED_BUCKET_NAME')
+                            },
+                            {
+                                'ParameterKey': 'LambdaCodeS3Key',
+                                'ParameterValue': os.getenv('QUEUE_FILLER_LAMBDA_S3_KEY')
+                            },
+                            { 
+                                'ParameterKey': 'LambdaRuntime',
+                                'ParameterValue': os.getenv('LAMBDA_RUNTIME')
+                            },
+                            {
+                                'ParameterKey': 'LambdaTimeout',
+                                'ParameterValue': os.getenv('LAMBDA_TIMEOUT')
+                            }
+                          ])
+    
+    # Clean up local zip file
+    os.remove(zip_file)
+
+if __name__ == "__main__":
+    deploy_sqs_filler()
--- a/src/infra/lambdas/RSSQueueFiller/lambda/lambda_function.py
+++ b/src/infra/lambdas/RSSQueueFiller/lambda/lambda_function.py
@@ -0,0 +1,58 @@
+import json
+import os
+import logging
+from pymongo import MongoClient
+from datetime import datetime
+import redis
+
+logger = logging.getLogger()
+logger.setLevel("INFO")
+
+sqs = boto3.client('sqs')
+
+SQS_QUEUE_URL = os.environ['SQS_QUEUE_URL']
+MONGODB_URL = os.environ['MONGODB_URL']
+MONGODB_DB_NAME = os.environ['MONGODB_DB_NAME']
+MONGODB_COLLECTION_NAME = os.environ.get('MONGODB_COLLECTION_NAME', 'rss_feeds')
+
+mongo_client = MongoClient(MONGODB_URL)
+feeds_collection = mongo_client[MONGODB_DB_NAME][MONGODB_COLLECTION_NAME]
+
+class DecimalEncoder(json.JSONEncoder):
+    def default(self, obj):
+        if isinstance(obj, Decimal):
+            return int(obj)
+        return super(DecimalEncoder, self).default(obj)
+
+def handler(event, context):
+    messages_sent = 0
+
+    # Iterate over all feeds in MongoDB
+    for item in feeds_collection.find({}):
+        rss_url = item.get('url')
+        rss_dt = item.get('dt')
+
+        logger.debug(f"Processing RSS feed: {rss_url}")
+        logger.debug(f"Last published date: {rss_dt}")
+        
+        if rss_url:
+            message = {
+                'u': rss_url,
+                'dt': rss_dt
+            }
+            logger.debug("message", message)
+            try:
+                sqs.send_message(
+                    QueueUrl=SQS_QUEUE_URL,
+                    MessageBody=json.dumps(message, cls=DecimalEncoder)
+                )
+                messages_sent += 1
+            except Exception as e:
+                logger.error(f"Error sending message to SQS: {str(e)}")
+
+    logger.info(f"Sent {messages_sent} messages to SQS at {datetime.now().isoformat()}")
+
+    return {
+        "statusCode": 200,
+        "body": json.dumps(f"Sent {messages_sent} RSS URLs to Redis"),
+    }
--- a/src/infra/lambdas/lambda_utils/lambda_layer/lambda_layer_cloud9.sh
+++ b/src/infra/lambdas/lambda_utils/lambda_layer/lambda_layer_cloud9.sh
@@ -0,0 +1,117 @@
+#!/bin/bash
+
+set -e
+
+####### Section 1: Checking Python Existence ########
+echo "Section 1: Checking Python Existence"
+
+# Ensure python3.12 is installed
+if ! command -v python3.12 &> /dev/null; then
+    echo "Python 3.12 is not installed. Please install it before running this script."
+    exit 1
+fi
+echo "Python 3.12 found. Proceeding..."
+
+####### Section 2: Installing Dependencies ########
+echo "Section 2: Installing Dependencies"
+
+# Install dependencies
+python3.12 -m pip install --upgrade Pillow feedfinder2==0.0.4 python-dateutil newspaper3k==0.2.8 feedparser lxml[html5lib] lxml_html_clean lxml[html_clean] qdrant-client ollama -t python/
+echo "Dependencies installed successfully."
+
+####### Section 3: Creating ZIP File ########
+echo "Section 3: Creating ZIP File"
+
+# Create ZIP file
+zip -r OpenRSSLambdaLayer.zip python/
+echo "ZIP file created."
+
+# Check if ZIP file was created and is not empty
+if [ ! -s OpenRSSLambdaLayer.zip ]; then
+    echo "Error: ZIP file is empty or was not created."
+    exit 1
+fi
+echo "ZIP file check passed."
+
+####### Section 4: Getting AWS Regions ########
+echo "Section 4: Getting AWS Regions"
+
+# Get list of all AWS regions
+REGIONS=$(aws ec2 describe-regions --query 'Regions[].RegionName' --output text)
+echo "Retrieved AWS regions: $REGIONS"
+
+####### Section 5: Creating Buckets, Uploading, and Publishing Layer ########
+echo "Section 5: Creating Buckets, Uploading, and Publishing Layer"
+
+create_bucket_upload_and_publish_layer() {
+    local region=$1
+    local bucket_name="rss-feed-processor-layers-$region"
+    local layer_name="ingest-rss-lambda-layer-$region"
+    
+    echo "Processing region: $region"
+    
+    # Create bucket if it doesn't exist
+    if ! aws s3api head-bucket --bucket "$bucket_name" --region "$region" 2>/dev/null; then
+        echo "Creating bucket $bucket_name in $region"
+        if [ "$region" == "us-east-1" ]; then
+            aws s3api create-bucket --bucket "$bucket_name" --region "$region"
+        else
+            aws s3api create-bucket --bucket "$bucket_name" --region "$region" --create-bucket-configuration LocationConstraint=$region
+        fi
+    else
+        echo "Bucket $bucket_name already exists in $region"
+    fi
+    
+    # Upload ZIP to the region-specific bucket
+    echo "Uploading ZIP to $bucket_name"
+    aws s3 cp OpenRSSLambdaLayer.zip "s3://$bucket_name/" --region "$region"
+    
+    # Create and publish Lambda layer
+    echo "Creating Lambda layer in region: $region"
+    LAYER_VERSION=$(aws lambda publish-layer-version \
+        --region "$region" \
+        --layer-name $layer_name \
+        --description "Layer with dependencies for RSS processing" \
+        --license-info "MIT" \
+        --content "S3Bucket=$bucket_name,S3Key=OpenRSSLambdaLayer.zip" \
+        --compatible-runtimes python3.12 \
+        --query 'Version' \
+        --output text
+    )
+
+    if [ -z "$LAYER_VERSION" ]; then
+        echo "Failed to create Lambda layer in region $region."
+        return 1
+    fi
+
+    echo "Making layer public in region: $region"
+    aws lambda add-layer-version-permission \
+        --region "$region" \
+        --layer-name $layer_name \
+        --version-number "$LAYER_VERSION" \
+        --statement-id public \
+        --action lambda:GetLayerVersion \
+        --principal '*'
+
+    ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text)
+    ARN="arn:aws:lambda:${region}:${ACCOUNT_ID}:layer:$layer_name:${LAYER_VERSION}"
+    echo "Layer ARN for region $region: $ARN"
+    echo "$region:$ARN" >> layer_arns.txt
+}
+
+# Process all regions
+for region in $REGIONS; do
+    if create_bucket_upload_and_publish_layer "$region"; then
+        echo "Successfully processed region: $region"
+    else
+        echo "Failed to process region: $region. Continuing with next region..."
+    fi
+done
+
+####### Section 6: Completion ########
+echo "Section 6: Completion"
+
+echo "Setup complete! OpenRSSLambdaLayer is now available in all processed regions."
+echo "Layer ARNs have been saved to layer_arns.txt"
+
+echo "Script execution completed successfully."