This commit is contained in:
Charles-Gormley
2024-08-26 21:38:52 -04:00
parent 049cb6a6b2
commit 32e255784f
28 changed files with 388 additions and 60 deletions

View File

@@ -36,17 +36,13 @@ def main():
# Deploy infrastructure
deploy_infrastructure()
# Create Lambda layer
create_lambda_layer()
print("Finished with Lambda Layer")
# Deploy Lambda function
deploy_lambda()
print("Finished Deploying Lambda")
# Update Lambda environment variables
update_env_vars(LAMBDA_FUNCTION_NAME, )
update_env_vars(LAMBDA_FUNCTION_NAME)
print("Finished Environment Variable Updates")
# Upload RSS feeds

View File

@@ -0,0 +1,3 @@
boto3
python-dotenv
requests

View File

@@ -1,10 +1,13 @@
AWSTemplateFormatVersion: '2010-09-09'
Description: 'IAM Role for RSS Feed Processor Lambda Function with Broad Permissions'
Description: 'IAM Role for RSS Feed Processor Lambda Function with Environment Variable Encryption'
Parameters:
LambdaExecutionRoleName:
Type: String
Description: "Name of the Lambda Execution Role"
LambdaKMSKeyArn:
Type: String
Description: "ARN of the KMS Key for Lambda environment variable encryption"
Resources:
LambdaExecutionRole:
@@ -23,7 +26,7 @@ Resources:
ManagedPolicyArns:
- 'arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole'
Policies:
- PolicyName: 'RSSFeedProcessorLambdaBroadPolicy'
- PolicyName: 'RSSFeedProcessorLambdaPolicy'
PolicyDocument:
Version: '2012-10-17'
Statement:
@@ -37,12 +40,21 @@ Resources:
- 'xray:*'
- 'cloudwatch:*'
- 'events:*'
- 'kms:Decrypt'
Resource: '*'
- Effect: Allow
Action:
- 'kms:Decrypt'
- 'kms:GenerateDataKey'
Resource: !Ref LambdaKMSKeyArn
Outputs:
LambdaRoleArn:
Description: 'ARN of the Lambda Execution Role'
Value: !GetAtt LambdaExecutionRole.Arn
Export:
Name: !Sub '${AWS::StackName}-LambdaRoleArn'
Name: !Sub '${AWS::StackName}-LambdaRoleArn'
LambdaKMSKeyArn:
Description: 'ARN of the KMS Key for Lambda'
Value: !Ref LambdaKMSKeyArn
Export:
Name: !Sub '${AWS::StackName}-LambdaKMSKeyArn'

View File

@@ -1,7 +1,12 @@
import boto3
import os
import sys
import json
from botocore.exceptions import ClientError
region_name = os.getenv("AWS_REGION")
kms_client = boto3.client('kms', region_name=region_name)
def deploy_cloudformation(template_file, stack_suffix, force_recreate=False, parameters=[]):
cf_client = boto3.client('cloudformation')
stack_name = f"rss-feed-processor-{stack_suffix}"
@@ -58,38 +63,116 @@ def deploy_cloudformation(template_file, stack_suffix, force_recreate=False, par
print(f"Error handling stack {stack_name}: {str(e)}")
raise
def get_or_create_kms_key():
# Create a KMS client
kms_client = boto3.client('kms', region_name=region_name)
tag_key = 'purpose'
tag_value = 'You pass butter'
description = 'KMS key for RSS Feed Processor... Oh my god'
account_id = os.getenv('AWS_ACCOUNT_ID')
try:
# List all KMS keys
response = kms_client.list_keys()
# Check each key for the specified tag
for key in response['Keys']:
try:
tags = kms_client.list_resource_tags(KeyId=key['KeyId'])['Tags']
if any(tag['TagKey'] == tag_key and tag['TagValue'] == tag_value for tag in tags):
print(f"Found existing KMS key with ID: {key['KeyId']}")
return key['KeyId']
except ClientError:
continue
# If no key found, create a new one with appropriate policy
print("No existing key found. Creating a new KMS key.")
key_policy = {
"Version": "2012-10-17",
"Statement": [
{
"Sid": "Enable IAM User Permissions",
"Effect": "Allow",
"Principal": {"AWS": f"arn:aws:iam::{account_id}:root"},
"Action": "kms:*",
"Resource": "*"
},
{
"Sid": "Allow Lambda to use the key",
"Effect": "Allow",
"Principal": {"Service": "lambda.amazonaws.com"},
"Action": [
"kms:Decrypt",
"kms:GenerateDataKey*"
],
"Resource": "*"
}
]
}
response = kms_client.create_key(
Description=description,
KeyUsage='ENCRYPT_DECRYPT',
Origin='AWS_KMS',
Tags=[{'TagKey': tag_key, 'TagValue': tag_value}],
Policy=json.dumps(key_policy)
)
key_id = response['KeyMetadata']['KeyId']
print(f"Successfully created new KMS key with ID: {key_id}")
return key_id
except ClientError as e:
print(f"Error in KMS key operation: {e}")
sys.exit(1)
def deploy_infrastructure():
# Do some stuff with KMS keys.
kms_key_id = get_or_create_kms_key()
key_info = kms_client.describe_key(KeyId=kms_key_id)
kms_key_arn = key_info['KeyMetadata']['Arn']
deploy_cloudformation('s3.yaml', 'S3',
parameters=[
{
'ParameterKey': 'BucketName',
'ParameterValue': os.environ.get('S3_BUCKET_NAME', 'default-role-name')
'ParameterValue': os.environ.get('S3_BUCKET_NAME', 'default-bucket-name')
}
]) # Force recreation of Lambda role)
])
deploy_cloudformation('dynamo.yaml', 'DynamoDB',
parameters=[
{
'ParameterKey': 'DynamoDBName',
'ParameterValue': os.environ.get('DYNAMODB_TABLE_NAME', 'default-role-name')
'ParameterValue': os.environ.get('DYNAMODB_TABLE_NAME', 'default-table-name')
}
])
deploy_cloudformation('sqs.yaml', 'SQS',
parameters=[
{
'ParameterKey': 'SQSQueueName',
'ParameterValue': os.environ.get('SQS_QUEUE_NAME', 'default-role-name')
'ParameterValue': os.environ.get('SQS_QUEUE_NAME', 'default-queue-name')
}
])
deploy_cloudformation('lambda_role.yaml', 'Lambda', force_recreate=True,
parameters=[
{
'ParameterKey': 'LambdaExecutionRoleName',
'ParameterValue': os.environ.get('LAMBDA_EXECUTION_ROLE_NAME', 'default-role-name')
}
])
parameters=[
{
'ParameterKey': 'LambdaExecutionRoleName',
'ParameterValue': os.environ.get('LAMBDA_EXECUTION_ROLE_NAME', 'default-role-name')
},
{
'ParameterKey': 'LambdaKMSKeyArn',
'ParameterValue': kms_key_arn
}
])
# TODO: Figure out KMS Stuff, but for now just do it in the console
if __name__ == "__main__":
deploy_infrastructure()

View File

@@ -2,8 +2,13 @@ import boto3
import os
import zipfile
import io
import requests
import json
from botocore.exceptions import ClientError
from src.utils.retry_logic import retry_with_backoff
import time
import sys
from src.infra.deploy_infrastructure import get_or_create_kms_key
# Set variables
LAMBDA_NAME = "RSSFeedProcessor"
@@ -13,7 +18,11 @@ LAMBDA_ROLE_NAME = os.getenv('LAMBDA_EXECUTION_ROLE_NAME')
LAMBDA_ROLE_ARN = f"arn:aws:iam::{ACCOUNT_NUM}:role/{LAMBDA_ROLE_NAME}"
LAMBDA_TIMEOUT = 300
LAMBDA_MEMORY = 256
LAMBDA_RUNTIME = "python3.10"
LAMBDA_RUNTIME = "python3.11"
LAMBDA_STACK_NAME = "rss-feed-processor-Lambda"
LAMBDA_LAYER_NAME = "RSSFeedProcessorLayer"
S3_LAYER_BUCKET_NAME = os.getenv('S3_LAYER_BUCKET_NAME')
S3_LAYER_KEY = os.getenv('S3_LAYER_KEY_NAME')+'.zip'
def zip_directory(path):
print(f"Creating deployment package from {path}...")
@@ -34,26 +43,121 @@ def update_function_code(lambda_client, function_name, zip_file):
)
@retry_with_backoff()
def update_function_configuration(lambda_client, function_name, handler, role, timeout, memory):
return lambda_client.update_function_configuration(
FunctionName=function_name,
Handler=handler,
Role=role,
Timeout=timeout,
MemorySize=memory
)
def get_or_create_lambda_layer():
layer_arn = 'arn:aws:lambda:us-east-1:966265353179:layer:OpenRSSLambdaLayer:3'
return layer_arn
def wait_for_function_update_to_complete(lambda_client, function_name, max_attempts=30, delay=10):
for attempt in range(max_attempts):
try:
response = lambda_client.get_function(FunctionName=function_name)
state = response['Configuration']['State']
if state == 'Active':
return True
elif state == 'Failed':
print(f"Function update failed: {response['Configuration'].get('StateReason')}")
return False
print(f"Function {function_name} is in {state} state. Waiting...")
except ClientError as e:
print(f"Error checking function state: {e}")
return False
time.sleep(delay)
print(f"Timeout waiting for function {function_name} to become active.")
return False
@retry_with_backoff()
def create_function(lambda_client, function_name, runtime, role, handler, zip_file, timeout, memory):
return lambda_client.create_function(
FunctionName=function_name,
Runtime=runtime,
Role=role,
Handler=handler,
Code={'ZipFile': zip_file},
Timeout=timeout,
MemorySize=memory
)
def update_function_configuration(lambda_client, function_name, handler, role, timeout, memory, layers, kms_key_id):
# First, wait for any ongoing updates to complete
if not wait_for_function_update_to_complete(lambda_client, function_name):
raise Exception(f"Function {function_name} is not in a state to be updated.")
config = {
'FunctionName': function_name,
'Handler': handler,
'Role': role,
'Timeout': timeout,
'MemorySize': memory,
'Layers': layers
}
if kms_key_id:
config['KMSKeyArn'] = f"arn:aws:kms:{os.environ['AWS_REGION']}:{ACCOUNT_NUM}:key/{kms_key_id}"
print(f"Updating function configuration for {function_name}... with {config}")
max_retries = 5 # TODO: Get rid of this dumb retry logic and just use the wrapper I created.
for attempt in range(max_retries):
try:
response = lambda_client.update_function_configuration(**config)
print(f"Update request sent successfully for {function_name}.")
# Wait for the update to complete
if wait_for_function_update_to_complete(lambda_client, function_name):
print(f"Function {function_name} updated successfully.")
return response
else:
print(f"Function {function_name} update may not have completed successfully.")
if attempt < max_retries - 1:
print(f"Retrying in 30 seconds... (Attempt {attempt + 1}/{max_retries})")
time.sleep(30)
else:
raise Exception(f"Failed to update function {function_name} after {max_retries} attempts.")
except ClientError as e:
if e.response['Error']['Code'] == 'ResourceConflictException':
if attempt < max_retries - 1:
print(f"Another operation is in progress for {function_name}. Retrying in 30 seconds... (Attempt {attempt + 1}/{max_retries})")
time.sleep(30)
else:
raise Exception(f"Failed to update function {function_name} after {max_retries} attempts due to ongoing operations.")
elif 'The role defined for the function cannot be assumed by Lambda' in str(e):
if attempt < max_retries - 1:
print(f"IAM role not ready. Retrying in 30 seconds... (Attempt {attempt + 1}/{max_retries})")
time.sleep(30)
else:
raise Exception(f"Failed to update function {function_name} after {max_retries} attempts. IAM role could not be assumed by Lambda.")
else:
print(f"Error updating function configuration: {e}")
raise
raise Exception(f"Failed to update function {function_name} after {max_retries} attempts.")
@retry_with_backoff()
def create_function(lambda_client, function_name, runtime, role, handler, zip_file, timeout, memory, layers, kms_key_id):
config = {
'FunctionName': function_name,
'Runtime': runtime,
'Role': role,
'Handler': handler,
'Code': {'ZipFile': zip_file},
'Timeout': timeout,
'MemorySize': memory,
'Layers': layers
}
if kms_key_id:
config['KMSKeyArn'] = f"arn:aws:kms:{os.environ['AWS_DEFAULT_REGION']}:{ACCOUNT_NUM}:key/{kms_key_id}"
return lambda_client.create_function(**config)
def get_pillow_layer_arn():
url = "https://api.klayers.cloud/api/v2/p3.11/layers/latest/us-east-1/json"
try:
response = requests.get(url)
response.raise_for_status()
layers_data = response.json()
pillow_layer = next((layer for layer in layers_data if layer['package'] == 'Pillow'), None)
if pillow_layer:
return pillow_layer['arn']
else:
print("Pillow layer not found in the API response.")
return None
except requests.RequestException as e:
print(f"Error fetching Pillow layer ARN: {e}")
return None
def deploy_lambda():
lambda_client = boto3.client('lambda')
@@ -61,6 +165,25 @@ def deploy_lambda():
print(f"Starting deployment of Lambda function: {LAMBDA_NAME}")
deployment_package = zip_directory('src/lambda_function/src')
layer_arn = get_or_create_lambda_layer()
if layer_arn:
print(f"Using Lambda Layer ARN: {layer_arn}")
else:
print("Warning: Lambda Layer not found or created. Proceeding without Layer.")
pillow_layer_arn = get_pillow_layer_arn()
if pillow_layer_arn:
print(f"Using Pillow Layer ARN: {pillow_layer_arn}")
else:
print("Warning: Pillow Layer not found. Proceeding without Pillow Layer.")
kms_key_id = get_or_create_kms_key()
if kms_key_id:
print(f"Using KMS Key ID: {kms_key_id}")
else:
print("Warning: KMS Key not found or created. Proceeding without KMS Key.")
sys.exit(1)
try:
# Check if the function exists
try:
@@ -72,13 +195,18 @@ def deploy_lambda():
else:
raise e
# Combine the layers
layers = [layer_arn] if layer_arn else []
if pillow_layer_arn:
layers.append(pillow_layer_arn)
if function_exists:
print("Updating existing Lambda function...")
update_function_configuration(lambda_client, LAMBDA_NAME, LAMBDA_HANDLER, LAMBDA_ROLE_ARN, LAMBDA_TIMEOUT, LAMBDA_MEMORY, layers, kms_key_id)
update_function_code(lambda_client, LAMBDA_NAME, deployment_package)
update_function_configuration(lambda_client, LAMBDA_NAME, LAMBDA_HANDLER, LAMBDA_ROLE_ARN, LAMBDA_TIMEOUT, LAMBDA_MEMORY)
else:
print(f"Lambda function '{LAMBDA_NAME}' not found. Creating new function...")
create_function(lambda_client, LAMBDA_NAME, LAMBDA_RUNTIME, LAMBDA_ROLE_ARN, LAMBDA_HANDLER, deployment_package, LAMBDA_TIMEOUT, LAMBDA_MEMORY)
create_function(lambda_client, LAMBDA_NAME, LAMBDA_RUNTIME, LAMBDA_ROLE_ARN, LAMBDA_HANDLER, deployment_package, LAMBDA_TIMEOUT, LAMBDA_MEMORY, layers, kms_key_id)
print("Lambda deployment completed successfully!")

View File

@@ -1,5 +1,4 @@
requests
newspaper3k
feedparser
python-dateutil
pandas
lxml

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -5,7 +5,7 @@ import shutil
from botocore.exceptions import ClientError
# Set variables
LAYER_NAME = "RSSFeedProcessorDependencies"
LAYER_NAME = os.getenv('S3_LAYER_KEY_NAME')
BUCKET_NAME = os.getenv("S3_LAYER_BUCKET_NAME")
REQUIREMENTS_FILE = "src/lambda_function/layers/requirements.txt"
ZIP_FILE = f"{LAYER_NAME}.zip"
@@ -37,17 +37,25 @@ def create_s3_bucket_if_not_exists(bucket_name, region=None):
# For any other errors, re-raise the exception
raise e
def install_requirements(requirements_file, target_dir):
subprocess.check_call([
"pip", "install",
"-r", requirements_file,
"-t", target_dir
])
def create_lambda_layer():
# Create a temporary directory for the layer
os.makedirs("layer/python", exist_ok=True)
# Install dependencies
subprocess.check_call([
"pip", "install",
"-r", REQUIREMENTS_FILE,
"-t", "layer/python"
])
print("Finished Installing Packages")
# Install dependencies from requirements.txt
install_requirements(REQUIREMENTS_FILE, "layer/python")
print("Finished Installing Packages from requirements.txt")
# Create ZIP file
shutil.make_archive(LAYER_NAME, 'zip', "layer")
@@ -72,7 +80,7 @@ def create_lambda_layer():
'S3Bucket': BUCKET_NAME,
'S3Key': ZIP_FILE
},
CompatibleRuntimes=['python3.10', 'python3.11']
CompatibleRuntimes=['python3.11']
)
print(f"Created Lambda layer version: {response['Version']}")

View File

@@ -0,0 +1,98 @@
#!/bin/bash
# Update system packages
echo "Updating system packages..."
sudo yum update -y
# Install development tools
echo "Installing development tools..."
sudo yum groupinstall "Development Tools" -y
# Install Python 3.11
echo "Installing Python 3.11..."
sudo amazon-linux-extras enable python3.11
sudo yum install python3.11 -y
# Verify Python 3.11 installation
if command -v python3.11 &>/dev/null; then
echo "Python 3.11 installed successfully:"
python3.11 --version
else
echo "Failed to install Python 3.11. Exiting."
exit 1
fi
# Install pip for Python 3.11
echo "Installing pip for Python 3.11..."
sudo python3.11 -m ensurepip --upgrade
# Verify pip installation
if command -v pip3.11 &>/dev/null; then
echo "pip installed successfully:"
pip3.11 --version
else
echo "Failed to install pip. Exiting."
exit 1
fi
# Create directory for Lambda layer
echo "Creating directory for Lambda layer..."
mkdir -p OpenRSSLambdaLayer/python
cd OpenRSSLambdaLayer
# Install packages
echo "Installing packages..."
pip3.11 install newspaper3k feedparser python-dateutil-t python/
# Create ZIP file
echo "Creating ZIP file..."
zip -r OpenRSSLambdaLayer.zip python/
# Upload to S3
echo "Uploading to S3..."
aws s3 cp OpenRSSLambdaLayer.zip s3://rss-feed-processor-layers/OpenRSSLambdaLayer.zip
# Create Lambda layer
echo "Creating Lambda layer..."
LAYER_VERSION=$(aws lambda publish-layer-version \
--layer-name OpenRSSLambdaLayer \
--description "Layer with dependencies for RSS processing" \
--license-info "MIT" \
--content S3Bucket=rss-feed-processor-layers,S3Key=OpenRSSLambdaLayer.zip \
--compatible-runtimes python3.11 \
--query 'Version' \
--output text)
# Make layer public
echo "Making layer public..."
aws lambda add-layer-version-permission \
--layer-name OpenRSSLambdaLayer \
--version-number $LAYER_VERSION \
--statement-id public \
--action lambda:GetLayerVersion \
--principal '*'
# Calculate and print the ARN
REGION=$(aws configure get region)
ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text)
ARN="arn:aws:lambda:${REGION}:${ACCOUNT_ID}:layer:OpenRSSLambdaLayer:${LAYER_VERSION}"
echo "Setup complete! OpenRSSLambdaLayer is now available to anyone on the internet."
echo "Layer ARN: $ARN"
echo ""
echo "Copy the ARN below:"
echo "$ARN"
# Double-check and verify
echo ""
echo "Verification steps:"
echo "1. Verifying S3 upload..."
aws s3 ls s3://rss-feed-processor-layers/OpenRSSLambdaLayer.zip
echo "2. Verifying Lambda layer..."
aws lambda get-layer-version --layer-name OpenRSSLambdaLayer --version-number $LAYER_VERSION
echo "3. Verifying public access..."
aws lambda get-layer-version-policy --layer-name OpenRSSLambdaLayer --version-number $LAYER_VERSION
echo "Script execution completed. Please review the output above for any errors."

View File

@@ -1,7 +1,7 @@
import time
from botocore.exceptions import ClientError
def retry_with_backoff(max_retries=5, initial_backoff=1, backoff_multiplier=2):
def retry_with_backoff(max_retries=20, initial_backoff=1, backoff_multiplier=4):
def decorator(func):
def wrapper(*args, **kwargs):
retries = 0
@@ -11,6 +11,7 @@ def retry_with_backoff(max_retries=5, initial_backoff=1, backoff_multiplier=2):
try:
return func(*args, **kwargs)
except ClientError as e:
print(e)
if e.response['Error']['Code'] in ['ResourceConflictException', 'ResourceInUseException']:
if retries == max_retries - 1:
raise