diff --git a/launch.py b/launch.py index 6d48457..7798881 100644 --- a/launch.py +++ b/launch.py @@ -5,20 +5,18 @@ import boto3 from dotenv import load_dotenv import logging from src.infra.lambdas.RSSQueueFiller.deploy_sqs_filler_lambda import deploy_sqs_filler -# Load environment variables + +from src.utils.check_env import check_env + + load_dotenv(override=True) +check_env() # Set up logging logging.basicConfig(level=os.getenv('LOG_LEVEL')) lambda_client = boto3.client("lambda") -# Set AWS credentials from environment variables -TABLE_NAME = os.getenv('DYNAMODB_TABLE_NAME') -ACCOUNT_NUM = os.getenv("AWS_ACCOUNT_ID") -SQS_QUEUE_NAME = os.getenv("SQS_QUEUE_NAME") -REGION = os.getenv("AWS_REGION") - # Add the src directory to the Python path current_dir = os.path.dirname(os.path.abspath(__file__)) sys.path.append(current_dir) @@ -44,19 +42,16 @@ def main(): update_env_vars(os.getenv("LAMBDA_FUNCTION_NAME")) print("Finished Environment Variable Updates") - # Upload RSS feeds rss_feeds_file = os.path.join(current_dir, "rss_feeds.json") if os.path.exists(rss_feeds_file): with open(rss_feeds_file, 'r') as f: rss_feeds = json.load(f) - upload_rss_feeds(rss_feeds, TABLE_NAME) + upload_rss_feeds(rss_feeds, os.getenv('DYNAMODB_TABLE_NAME')) else: print(f"WARNING: {rss_feeds_file} not found. Skipping RSS feed upload.") print("RSS Feed Processor launched successfully!") - print("RSS Feed Processor launched successfully!") - if __name__ == "__main__": main() \ No newline at end of file diff --git a/src/infra/lambdas/RSSFeedProcessorLambda/src/analytics/embeddings/pinecone.py b/src/infra/lambdas/RSSFeedProcessorLambda/src/analytics/embeddings/vector_db.py similarity index 100% rename from src/infra/lambdas/RSSFeedProcessorLambda/src/analytics/embeddings/pinecone.py rename to src/infra/lambdas/RSSFeedProcessorLambda/src/analytics/embeddings/vector_db.py diff --git a/src/infra/lambdas/RSSFeedProcessorLambda/src/data_storage.py b/src/infra/lambdas/RSSFeedProcessorLambda/src/data_storage.py index 71c18a9..1cbda8f 100644 --- a/src/infra/lambdas/RSSFeedProcessorLambda/src/data_storage.py +++ b/src/infra/lambdas/RSSFeedProcessorLambda/src/data_storage.py @@ -5,7 +5,8 @@ import logging from random import randint # TODO: Move this article storage logic to a separate module inside of lambda. -from src.analytics.embeddings.pinecone import get_index, upsert_vectors, vectorize +# TODO: Get better at handling loading local moduels insdie of the lambdda. +from infra.lambdas.RSSFeedProcessorLambda.src.analytics.embeddings.vector_db import get_index, upsert_vectors, vectorize logger = logging.getLogger() diff --git a/src/utils/check_env.py b/src/utils/check_env.py new file mode 100644 index 0000000..607216e --- /dev/null +++ b/src/utils/check_env.py @@ -0,0 +1,96 @@ +import os +from dotenv import load_dotenv +from typing import List, Dict + +def check_env() -> None: + # Variables that must be set by the user + required_user_vars = [ + "AWS_REGION", + "AWS_ACCOUNT_ID", + "AWS_ACCESS_KEY_ID", + "AWS_SECRET_ACCESS_KEY" + ] + + # Variables that are derived or have default values + derived_vars = [ + "AWS_DEFAULT_REGION", + "LAMBDA_FUNCTION_NAME", + "STACK_BASE", + "LAMBDA_EXECUTION_ROLE_NAME", + "LAMBDA_ROLE_ARN", + "S3_BUCKET_NAME", + "DYNAMODB_TABLE_NAME", + "SQS_QUEUE_NAME", + "LAMBDA_LAYER_VERSION", + "LAMBDA_LAYER_NAME", + "LAMBDA_LAYER_ARN", + "S3_LAYER_BUCKET_NAME", + "S3_LAYER_KEY_NAME", + "SQS_QUEUE_URL", + "SQS_QUEUE_ARN", + "DYNAMODB_TABLE_ARN", + "PYTHON_VERSION", + "LAMBDA_RUNTIME", + "LAMBDA_TIMEOUT", + "LAMBDA_MEMORY", + "QUEUE_FILLER_LAMBDA_NAME", + "QUEUE_FILLER_LAMBDA_S3_KEY", + "LOG_LEVEL", + "APP_NAME", + "VERSION", + "STORAGE_STRATEGY" + ] + + # Variables that are optional depending on the storage strategy + optional_vars = { + "PINECONE_API_KEY": "pinecone", + "PINECONE_DB_NAME": "pinecone", + "OPENAI_API_KEY": "all" + } + + missing_vars: List[str] = [] + placeholder_vars: List[str] = [] + missing_optional_vars: List[str] = [] + + # Check required user variables + for var in required_user_vars: + value = os.getenv(var) + if value is None or value == "***" or value.strip() == "": + missing_vars.append(var) + + # Check derived variables + for var in derived_vars: + value = os.getenv(var) + if value is None: + missing_vars.append(var) + + # Check optional variables + storage_strategy = os.getenv("STORAGE_STRATEGY", "").lower() + for var, strategy in optional_vars.items(): + if strategy == "all" or strategy == storage_strategy: + value = os.getenv(var) + if value is None or value == "***" or value.strip() == "": + missing_optional_vars.append(var) + + if missing_vars or placeholder_vars or missing_optional_vars: + print("Error: Some environment variables are not properly set.") + + if missing_vars: + print("\nMissing or improperly set required variables:") + for var in missing_vars: + print(f"- {var}") + + if missing_optional_vars: + print("\nMissing or improperly set optional variables (based on your storage strategy):") + for var in missing_optional_vars: + print(f"- {var}") + + print("\nPlease set these environment variables before running the script.") + raise EnvironmentError("Missing or improperly set environment variables") + else: + print("All required environment variables are properly set.") + +# Example usage +if __name__ == "__main__": + load_dotenv(override=True) + check_env() \ No newline at end of file diff --git a/template.env b/template.env index b9bdd46..8beac0e 100644 --- a/template.env +++ b/template.env @@ -45,7 +45,7 @@ VERSION=1.0.0 STORAGE_STRATEGY=s3 # 's3' or 'pinecone' will support others in the future. -# Only need to fill out this if your storage strategy is pinecone [ Not currently supported. ] +# Only need to fill out this if your storage strategy is pinecone [ Not currently supported. ] PINECONE_API_KEY=*** PINECONE_DB_NAME=open-rss-articles