Sunday Error Patching

This commit is contained in:
Charles E. Gormley
2024-10-13 20:12:30 -04:00
parent cdf232b547
commit 8928f05144
6 changed files with 36 additions and 22 deletions

View File

@@ -28,6 +28,7 @@ Resources:
Properties:
Name: rss-feed-processor-schedule
Description: Runs the RSS Feed Processor Lambda function every hour
State: DISABLED
ScheduleExpression: rate(30 minutes)
FlexibleTimeWindow:
Mode: FLEXIBLE

View File

@@ -6,24 +6,21 @@ from src.utils.retry_logic import retry_with_backoff
from botocore.exceptions import ClientError
from dotenv import load_dotenv
load_dotenv()
load_dotenv(override=True)
region_name = os.getenv("AWS_REGION")
kms_client = boto3.client('kms', region_name=region_name)
kms_client = boto3.client('kms', region_name=os.getenv("AWS_REGION"))
stack_base = os.getenv("STACK_BASE")
@retry_with_backoff()
def deploy_cloudformation(template_file, stack_suffix, force_recreate=False, parameters=[]):
cf_client = boto3.client('cloudformation')
cf_client = boto3.client('cloudformation', region_name=os.getenv("AWS_REGION"))
stack_name = f"{stack_base}-{stack_suffix}"
with open(f'src/infra/cloudformation/{template_file}', 'r') as file:
template_body = file.read()
capabilities = ['CAPABILITY_NAMED_IAM']
try:
if force_recreate:
try:
@@ -70,7 +67,7 @@ def deploy_cloudformation(template_file, stack_suffix, force_recreate=False, par
def get_or_create_kms_key():
# Create a KMS client
kms_client = boto3.client('kms', region_name=region_name)
kms_client = boto3.client('kms', region_name=os.getenv("AWS_REGION"))
tag_key = 'purpose'
tag_value = 'You pass butter'
description = 'KMS key for RSS Feed Processor... Oh my god'
@@ -85,7 +82,7 @@ def get_or_create_kms_key():
for key in response['Keys']:
try:
tags = kms_client.list_resource_tags(KeyId=key['KeyId'])['Tags']
if any(tag['TagKey'] == tag_key and tag['TagValue'] == tag_value for tag in tags):
if any(tag['TagKey'] == tag_key and tag['TagValue'] == tag_value for tag in tags) and any(tag['TagKey'] == 'region' and tag['TagValue'] == os.getenv("AWS_REGION") for tag in tags): # TODO: This is inefficient and should be fixed and more readable.
print(f"Found existing KMS key with ID: {key['KeyId']}")
return key['KeyId']
except ClientError:
@@ -120,7 +117,7 @@ def get_or_create_kms_key():
Description=description,
KeyUsage='ENCRYPT_DECRYPT',
Origin='AWS_KMS',
Tags=[{'TagKey': tag_key, 'TagValue': tag_value}],
Tags=[{'TagKey': tag_key, 'TagValue': tag_value}, {'TagKey': 'region', 'TagValue': os.getenv("AWS_REGION")}],
Policy=json.dumps(key_policy)
)

View File

@@ -10,7 +10,7 @@ import time
import sys
from src.infra.deploy_infrastructure import get_or_create_kms_key
from dotenv import load_dotenv
load_dotenv()
load_dotenv(override=True)
import logging
logging.basicConfig(level=os.getenv('LOG_LEVEL', 'INFO'))

View File

@@ -43,7 +43,7 @@ def extract_feed_threading(rss: dict, output_queue, stop_thread):
for entry in feed['entries']:
if stop_thread.is_set():
break
pub_date = parse_pub_date(entry['published'])
if pub_date > last_date:
@@ -110,11 +110,20 @@ def extract_feed(rss: dict):
logger.error(f"Feed: {entry}")
logger.error(f"Feed failed due to error: {e}")
def parse_pub_date(date_string):
try:
return int(datetime.strptime(date_string, "%a, %d %b %Y %H:%M:%S %z").timestamp())
except ValueError:
def parse_pub_date(entry:dict):
if 'published' in entry:
date_string = entry['published']
try:
return int(datetime.strptime(date_string, "%Y-%m-%dT%H:%M:%SZ").timestamp())
return int(datetime.strptime(date_string, "%a, %d %b %Y %H:%M:%S %z").timestamp())
except ValueError:
return int(parser.parse(date_string).timestamp())
try:
return int(datetime.strptime(date_string, "%Y-%m-%dT%H:%M:%SZ").timestamp())
except ValueError:
try:
return int(parser.parse(date_string).timestamp())
except ValueError:
pass
return int(datetime.now().timestamp()) # Return current time if no date is found

View File

@@ -6,7 +6,7 @@ from dotenv import load_dotenv
from src.infra.deploy_infrastructure import deploy_cloudformation
# Load environment variables
load_dotenv()
load_dotenv(override=True)
# Set up logging

15
todo.md
View File

@@ -1,13 +1,19 @@
# Testing🧪
# Before Public Launch
* Testing from 3rd party aws account.
* Fix Issue with KMS Keys & IAM Role [ Done ]
* Debug the Errors that are at scale.
* Test Large Amounts of Feeds ( Decrease the cadence of ingesting. ) [ Today ]
* Test out how long an S3 Full Pull will take on the
** First Run
* Test out how long an S3 Full Pull will take on the full thing.
** First Run ( ~30 Minutes)
** Second Run.
* Test out Vector Databases at Small Scale.
* Test out Vector Databases at Scale.
* Test out LLM Summarizaiton At Small Scale
* Test out LLM Summarization At Scall
* Test out LLM Summarization At Scall
* Re-enable the Scheduler
# Application Modules
@@ -18,6 +24,7 @@
* AWS Budget, Pinecone Budget, & LLM Budget
* Integration with bumblebee (Easily Handle standardization with embedding models & LLMs)
* Visualization System ( Ingesting, Clustering, etc...)
* API Infrastructure.
# Misc
* Duplicate Article Check Module.