mirror of
https://github.com/aljazceru/IngestRSS.git
synced 2025-12-17 22:14:20 +01:00
Sunday Error Patching
This commit is contained in:
@@ -28,6 +28,7 @@ Resources:
|
||||
Properties:
|
||||
Name: rss-feed-processor-schedule
|
||||
Description: Runs the RSS Feed Processor Lambda function every hour
|
||||
State: DISABLED
|
||||
ScheduleExpression: rate(30 minutes)
|
||||
FlexibleTimeWindow:
|
||||
Mode: FLEXIBLE
|
||||
|
||||
@@ -6,24 +6,21 @@ from src.utils.retry_logic import retry_with_backoff
|
||||
from botocore.exceptions import ClientError
|
||||
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv()
|
||||
load_dotenv(override=True)
|
||||
|
||||
region_name = os.getenv("AWS_REGION")
|
||||
kms_client = boto3.client('kms', region_name=region_name)
|
||||
kms_client = boto3.client('kms', region_name=os.getenv("AWS_REGION"))
|
||||
stack_base = os.getenv("STACK_BASE")
|
||||
|
||||
@retry_with_backoff()
|
||||
def deploy_cloudformation(template_file, stack_suffix, force_recreate=False, parameters=[]):
|
||||
cf_client = boto3.client('cloudformation')
|
||||
cf_client = boto3.client('cloudformation', region_name=os.getenv("AWS_REGION"))
|
||||
stack_name = f"{stack_base}-{stack_suffix}"
|
||||
|
||||
|
||||
with open(f'src/infra/cloudformation/{template_file}', 'r') as file:
|
||||
template_body = file.read()
|
||||
|
||||
capabilities = ['CAPABILITY_NAMED_IAM']
|
||||
|
||||
|
||||
try:
|
||||
if force_recreate:
|
||||
try:
|
||||
@@ -70,7 +67,7 @@ def deploy_cloudformation(template_file, stack_suffix, force_recreate=False, par
|
||||
|
||||
def get_or_create_kms_key():
|
||||
# Create a KMS client
|
||||
kms_client = boto3.client('kms', region_name=region_name)
|
||||
kms_client = boto3.client('kms', region_name=os.getenv("AWS_REGION"))
|
||||
tag_key = 'purpose'
|
||||
tag_value = 'You pass butter'
|
||||
description = 'KMS key for RSS Feed Processor... Oh my god'
|
||||
@@ -85,7 +82,7 @@ def get_or_create_kms_key():
|
||||
for key in response['Keys']:
|
||||
try:
|
||||
tags = kms_client.list_resource_tags(KeyId=key['KeyId'])['Tags']
|
||||
if any(tag['TagKey'] == tag_key and tag['TagValue'] == tag_value for tag in tags):
|
||||
if any(tag['TagKey'] == tag_key and tag['TagValue'] == tag_value for tag in tags) and any(tag['TagKey'] == 'region' and tag['TagValue'] == os.getenv("AWS_REGION") for tag in tags): # TODO: This is inefficient and should be fixed and more readable.
|
||||
print(f"Found existing KMS key with ID: {key['KeyId']}")
|
||||
return key['KeyId']
|
||||
except ClientError:
|
||||
@@ -120,7 +117,7 @@ def get_or_create_kms_key():
|
||||
Description=description,
|
||||
KeyUsage='ENCRYPT_DECRYPT',
|
||||
Origin='AWS_KMS',
|
||||
Tags=[{'TagKey': tag_key, 'TagValue': tag_value}],
|
||||
Tags=[{'TagKey': tag_key, 'TagValue': tag_value}, {'TagKey': 'region', 'TagValue': os.getenv("AWS_REGION")}],
|
||||
Policy=json.dumps(key_policy)
|
||||
)
|
||||
|
||||
|
||||
@@ -10,7 +10,7 @@ import time
|
||||
import sys
|
||||
from src.infra.deploy_infrastructure import get_or_create_kms_key
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv()
|
||||
load_dotenv(override=True)
|
||||
|
||||
import logging
|
||||
logging.basicConfig(level=os.getenv('LOG_LEVEL', 'INFO'))
|
||||
|
||||
@@ -110,11 +110,20 @@ def extract_feed(rss: dict):
|
||||
logger.error(f"Feed: {entry}")
|
||||
logger.error(f"Feed failed due to error: {e}")
|
||||
|
||||
def parse_pub_date(date_string):
|
||||
def parse_pub_date(entry:dict):
|
||||
|
||||
if 'published' in entry:
|
||||
date_string = entry['published']
|
||||
|
||||
try:
|
||||
return int(datetime.strptime(date_string, "%a, %d %b %Y %H:%M:%S %z").timestamp())
|
||||
except ValueError:
|
||||
try:
|
||||
return int(datetime.strptime(date_string, "%Y-%m-%dT%H:%M:%SZ").timestamp())
|
||||
except ValueError:
|
||||
try:
|
||||
return int(parser.parse(date_string).timestamp())
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
return int(datetime.now().timestamp()) # Return current time if no date is found
|
||||
@@ -6,7 +6,7 @@ from dotenv import load_dotenv
|
||||
from src.infra.deploy_infrastructure import deploy_cloudformation
|
||||
|
||||
# Load environment variables
|
||||
load_dotenv()
|
||||
load_dotenv(override=True)
|
||||
|
||||
# Set up logging
|
||||
|
||||
|
||||
13
todo.md
13
todo.md
@@ -1,14 +1,20 @@
|
||||
# Testing🧪
|
||||
# Before Public Launch
|
||||
* Testing from 3rd party aws account.
|
||||
* Fix Issue with KMS Keys & IAM Role [ Done ]
|
||||
* Debug the Errors that are at scale.
|
||||
|
||||
* Test Large Amounts of Feeds ( Decrease the cadence of ingesting. ) [ Today ]
|
||||
* Test out how long an S3 Full Pull will take on the
|
||||
** First Run
|
||||
* Test out how long an S3 Full Pull will take on the full thing.
|
||||
** First Run ( ~30 Minutes)
|
||||
** Second Run.
|
||||
* Test out Vector Databases at Small Scale.
|
||||
* Test out Vector Databases at Scale.
|
||||
* Test out LLM Summarizaiton At Small Scale
|
||||
* Test out LLM Summarization At Scall
|
||||
|
||||
* Re-enable the Scheduler
|
||||
|
||||
|
||||
|
||||
# Application Modules
|
||||
* Vector Storage
|
||||
@@ -18,6 +24,7 @@
|
||||
* AWS Budget, Pinecone Budget, & LLM Budget
|
||||
* Integration with bumblebee (Easily Handle standardization with embedding models & LLMs)
|
||||
* Visualization System ( Ingesting, Clustering, etc...)
|
||||
* API Infrastructure.
|
||||
|
||||
# Misc
|
||||
* Duplicate Article Check Module.
|
||||
|
||||
Reference in New Issue
Block a user