From 65e91e62ed5a038d5e6c1ce245f2b1cc9239e4b0 Mon Sep 17 00:00:00 2001 From: "Charles E. Gormley" Date: Mon, 2 Sep 2024 20:16:51 -0400 Subject: [PATCH] Batch changes. --- CHANGELOG.md | 0 CONTRIBUTING.md | 0 README.md | 110 +----------------- launch.py | 19 ++- src/article_storage/create_index.py | 2 +- .../deploy_infrastructure.cpython-312.pyc | Bin 6986 -> 7052 bytes src/infra/deploy_infrastructure.py | 13 ++- src/infra/deploy_sqs.py | 77 ------------ .../deploy_rss_feed_lambda.py | 12 +- .../deploy_sqs_filler_lambda.py | 10 +- .../__pycache__/retry_logic.cpython-310.pyc | Bin 1246 -> 1348 bytes todo.md | 7 +- tree.md | 81 +++++++++++++ 13 files changed, 128 insertions(+), 203 deletions(-) create mode 100644 CHANGELOG.md create mode 100644 CONTRIBUTING.md delete mode 100644 src/infra/deploy_sqs.py create mode 100644 tree.md diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..e69de29 diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..e69de29 diff --git a/README.md b/README.md index 15cc6ea..4481f66 100644 --- a/README.md +++ b/README.md @@ -2,103 +2,8 @@ OpenRSS is an AWS-based RSS feed processing system that automatically fetches, processes, and stores articles from specified RSS feeds. -## Project Structure -``` -OpenRSS/ -├── src/ -│ ├── infra/ -│ │ ├── cloudformation/ -│ │ │ ├── s3.yaml -│ │ │ ├── dynamo.yaml -│ │ │ └── sqs.yaml -│ │ └── deploy_infrastructure.py -│ ├── lambda_function/ -│ │ ├── src/ -│ │ │ ├── lambda_function.py -│ │ │ ├── feed_processor.py -│ │ │ ├── article_extractor.py -│ │ │ ├── data_storage.py -│ │ │ ├── utils.py -│ │ │ ├── config.py -│ │ │ ├── exceptions.py -│ │ │ └── metrics.py -│ │ ├── tests/ -│ │ │ └── test_lambda_function.py -│ │ ├── layers/ -│ │ │ └── requirements.txt -│ │ ├── deploy_lambda.py -│ │ └── update_env_vars.py -│ └── utils/ -│ ├── create_lambda_layer.py -│ └── upload_rss_feeds.py -├── launch.py -├── rss_feeds.json -├── requirements.txt -└── README.md -``` -## Prerequisites - -- Python 3.8+ -- AWS CLI configured with appropriate permissions -- An AWS account with necessary services (S3, DynamoDB, SQS, Lambda) enabled - -## Setup - -1. Clone the repository: - ``` - git clone https://github.com/yourusername/OpenRSS.git - cd OpenRSS - ``` - -2. Install the required dependencies: - ``` - pip install -r requirements.txt - ``` - -3. Create a `.env` file in the root directory with the following content: - ``` - AWS_ACCESS_KEY_ID=your_access_key_here - AWS_SECRET_ACCESS_KEY=your_secret_key_here - AWS_REGION=us-east-1 - ``` - -4. Update the `rss_feeds.json` file with the RSS feeds you want to process. - -## Usage - -To deploy the infrastructure and start the RSS feed processor: - -``` -python launch.py -``` - -This script will: -1. Deploy the necessary AWS infrastructure (S3, DynamoDB, SQS) using CloudFormation. -2. Create and upload the Lambda layer. -3. Deploy the Lambda function. -4. Upload the RSS feeds to DynamoDB. -5. Trigger an initial execution of the Lambda function. - -## Infrastructure - -The project uses the following AWS services: - -- S3: Stores processed articles -- DynamoDB: Stores RSS feed information and processing status -- SQS: Queues RSS feeds for processing -- Lambda: Processes RSS feeds and extracts articles - -## Lambda Function - -The Lambda function (`src/lambda_function/src/lambda_function.py`) is triggered periodically to process RSS feeds. It: - -1. Retrieves RSS feed information from DynamoDB -2. Fetches and parses the RSS feed -3. Extracts articles using the newspaper3k library -4. Stores processed articles in S3 -5. Updates the feed's last processed timestamp in DynamoDB ## Customization @@ -106,22 +11,13 @@ The Lambda function (`src/lambda_function/src/lambda_function.py`) is triggered - To change the Lambda function's behavior, modify the Python files in `src/lambda_function/src/`. - To add or remove RSS feeds, update the `rss_feeds.json` file. -## Testing - -To run the tests for the Lambda function: - -``` -python -m pytest src/lambda_function/tests/ -``` ## Monitoring - The Lambda function logs its activities to CloudWatch Logs. You can monitor the function's performance and any errors through the AWS CloudWatch console. ## Contributing - -Contributions are welcome! Please feel free to submit a Pull Request. +We are still working on a contribution framework. But they are more than welcome! Feel free to submit a PR which will be approved by the team. +Check ## License - -This project is licensed under the MIT License. \ No newline at end of file +[![License: MIT](https://img.shields.io/badge/License-MIT-green.svg)](https://opensource.org/licenses/MIT) \ No newline at end of file diff --git a/launch.py b/launch.py index 32585b8..c60b902 100644 --- a/launch.py +++ b/launch.py @@ -4,6 +4,7 @@ import json import boto3 from dotenv import load_dotenv import logging +from src.infra.lambdas.RSSQueueFiller.deploy_sqs_filler_lambda import deploy_sqs_filler # Load environment variables load_dotenv() @@ -36,18 +37,30 @@ from src.feed_management.upload_rss_feeds import upload_rss_feeds def main(): # Deploy infrastructure - # deploy_infrastructure() # TODO: Add in sqs lambda filler here. - # logging.info("Finished Deploying Infrastructure") + deploy_infrastructure() + logging.info("Finished Deploying Infrastructure") # Deploy Lambda function deploy_lambda() - print("Finished Deploying Lambda") + logging.info("Finished Deploying Lambda") + + deploy_sqs_filler() + logging.info("Finished Deploying SQS Filler Lambda") + + # Update Lambda environment variables update_env_vars(LAMBDA_FUNCTION_NAME) print("Finished Environment Variable Updates") + + + # TODO: Add in an eventbridge timer to trigger the lambda. + + # TODO: Add in a 2x check to make sure the queue trigger and the eb trigger are enabled. + + # Upload RSS feeds rss_feeds_file = os.path.join(current_dir, "rss_feeds.json") if os.path.exists(rss_feeds_file): diff --git a/src/article_storage/create_index.py b/src/article_storage/create_index.py index 9dc154f..7833fda 100644 --- a/src/article_storage/create_index.py +++ b/src/article_storage/create_index.py @@ -8,7 +8,7 @@ load_dotenv() region = os.getenv("AWS_REGION") index_name = os.getenv("PINECONE_DB_NAME") -index_name = "quickstart" # TODO: Remove this line after we are done testing with vector dbs. +index_name = "quickstart" # TODO: Remove this line after we are done testing with vector dbs. if index_name not in pc.list_indexes().names(): pc.create_index( diff --git a/src/infra/__pycache__/deploy_infrastructure.cpython-312.pyc b/src/infra/__pycache__/deploy_infrastructure.cpython-312.pyc index e2f39645982027110f10c4b8a8ca4ffac4f747fa..188e56feacc74c79714101c4a3b6cb0063f71e27 100644 GIT binary patch delta 1084 zcmZvaUuaWT9LLY^-g}aJa(i$7)HG?bRLu;Uuqkb~wQkimwl(!(_%gASZI{@4mHtU0 zCs8OUvQkiF8Fc4mFa~9?F;*YM_#kdW9EOZ-4?Yx`!G|$6*!HrniVrP}{Z6{b9z1YA z{C?lxzjMw#-@o?$fcb-I`UtLf#~+-&vut)N9k9G_DC-bMo7W3$iUCoG%T{F$u9gS0 zJGQ<5)1j-d?N1q4A?}#rOOD4iu6b9L|8x`e=hy1bYAwv6?P(4>LryD>ZwcDUl;1Iy z$W*}TKnC5AW6td(gzH!pZ07)3fCWffe~6us@u#6_%$M?x?UX8R@w_+&qmUF;IBs2d zwqqa_Ce~m#7~(0s0bbFkTt?njjw5|)75R~R4Ec*X0Lvn#CD0z!s=K}I8*qFV=*Pr~ ze4@5b0;Kq&6BRvLL=9BDAJT*hi$r8-KizfFsCdOG+NUg%+6oN<1Zp=tdmu#5@|j|# zsg=&U#iqvHyi@z#I}E;loZ`o?{HEw-d-O*dyOLSc!r}xQ083O^9=gO|>=yD1U-i<@ zf$$Sbf@2^#3R~+DKkM3hk|Z6G4Wdx%k#61(q*i8E-o75XIeH6fN3Rdww}#gALqF-} z);h(M&2kaPeH9$*QSU~TT#DR{XFJC-SaR_$-&8^t3plBtcD2{Y2=7C`89w7y>=L)9@tP~HJy$5(bM6A4!C0==-}$TRIfy1dArg_dAYJ1T z1_$t^5uQe*^;RO!quuOq-10P^IfYHIf&5qeq{O87GCGX=|1%n=@1by2__{aKFD0L< z28}&Nfi!j7F63v5ww>=4etC>_ggOh;FXgjovl6 z#JLL+X=#*FBN(I%s32lde2Nc|^Pw+-k&`R!OR$z=t^tKU%W*A2lNV>so;t7}e)Ii) zcYZU>?nkMc&l$H2!-e?xbMDUkg{wwLYM`shVMRg?3+PHN8NNmR!dq|K!1A^mlP&d6={NQ06>1G2-Bz#Xbc6^Cz z`D-&a75S2cal6WUQj2X(?z`^5t9wdKzQ@wOpNbfInESTNIte(D}O zmPw;ZkYqaADqZcFAzVobzcL7!p){@xVKGT-6CL^ktZ>VI_C_kb9ASnv(OWW8Vo;;-3E8cIQk2SmlFjemLeFe(%!X0L}{RtRk zkNkOeyAVPGDk}u~p_6?bxEOs#2%`dH97S<$(bgCprG0EP7$I+Qb-X+qypNRu5uIW$ znNyG~e`#v)^iiSkuL||3R0QcP$F6QW#W|Wg=UC^RRVuE8o)I|9?zWG_`b6Zg?JO@W zty;OlJk1tMbY-rzLLIusy(=sg+V45m{TFtT!~-s%UDdJ{v$=w0_4sM?D9z>G;1-=> z)8WBO^sJ~=f#xY^MHLokc5iNNjZei&h3ukZSu`!&_XNa-_SNCVC3|JTd4XQxQ}Ig{ zMN!89P(ykRX*J}kA@?5Y-$T8>d(8E7ALl;IeWu^`B-Yr`NMCZ-MZNcvuP1Y z@X|vNL}c(JLVEOTh=^yw%N)FT@)LOV?dIaZ{N}y+zyG`$=9m99|60w>IuK5BR=k2+NGEv96D|h09L^fY`Z3ndLvY~WQ3@rP2M-pY z2o&e4(X7?Dh$>7$*4Jqy)UIuV7GTI+yCHqP=v((+lPuUpXjYVyQ}O;aMjMyPGQR qg@Mflawy}I%sV?i>ye?FI=nl5C5|I`4=_uL2#*x;#U3oeJpBiM;fUw} delta 597 zcmYk3ziSjh6vyA2nZ4}BO9;uC&Bh-DyA8r2SY$y>K!OMofPI&d z0Ds-$f8%zsV@GkxLW0B$I?l5UhpfX8q%RwC?k#q>!~=+fsoe*AF;33h%z7#>cZoDj zTQ)dF2YU#luBx+lU#r$n>m^p~lvRApPWb@nk=esAcz{p|gcsW;$7p-s<)pU;14fr$ z!{$jMy*G$Z@=wBi#{@mY*v@)q^{y)P38m_G zey>ow&r&ridpaj!9c1*}`MM?Wu4^=x=Pm-hWWzpBx^PwGdgP>ty_HNl#ovETIVLf}O;iTtmnO$#VfgP4i1!yyCL6l6Bo0bzT43P|K4{-EciE+>?E1;v)M8 DY2SFa diff --git a/todo.md b/todo.md index 5770f15..65ba7ab 100644 --- a/todo.md +++ b/todo.md @@ -1,9 +1,12 @@ -# TODO: Clean up aws cdk stuff. -# TODO: Reorganize infra folder structure. + # TODO: Add in console setup python script for new project into launch.py +# TODO: Eventbridge set up ( make sure this works ) +# TODO: Automate eventbridge. +# TODO: Make sure ingestion of articles actually works # Modules * More RSS Feed Module +* Update Lambda Layer Creation Script to be comprehensive. # Future Modules * Gen AI Summarization Module diff --git a/tree.md b/tree.md new file mode 100644 index 0000000..ff89dad --- /dev/null +++ b/tree.md @@ -0,0 +1,81 @@ +. +├── README.md +├── launch.py +├── requirements.txt +├── rss_feeds.json +├── src +│   ├── article_storage +│   │   ├── __pycache__ +│   │   │   └── initialize.cpython-310.pyc +│   │   ├── create_index.py +│   │   └── initialize.py +│   ├── feed_management +│   │   ├── __pycache__ +│   │   │   └── upload_rss_feeds.cpython-312.pyc +│   │   └── upload_rss_feeds.py +│   ├── infra +│   │   ├── __pycache__ +│   │   │   └── deploy_infrastructure.cpython-312.pyc +│   │   ├── cloudformation +│   │   │   ├── dynamo.yaml +│   │   │   ├── lambda_role.yaml +│   │   │   ├── rss_lambda_stack.yaml +│   │   │   ├── s3.yaml +│   │   │   └── sqs.yaml +│   │   ├── deploy_infrastructure.py +│   │   ├── lambdas +│   │   │   ├── RSSFeedProcessorLambda +│   │   │   │   ├── __pycache__ +│   │   │   │   │   ├── deploy_lambda.cpython-310.pyc +│   │   │   │   │   ├── deploy_lambda.cpython-311.pyc +│   │   │   │   │   ├── deploy_lambda.cpython-312.pyc +│   │   │   │   │   ├── deploy_rss_feed_lambda.cpython-312.pyc +│   │   │   │   │   ├── update_env_vars.cpython-310.pyc +│   │   │   │   │   ├── update_lambda_env_vars.cpython-310.pyc +│   │   │   │   │   ├── update_lambda_env_vars.cpython-311.pyc +│   │   │   │   │   └── update_lambda_env_vars.cpython-312.pyc +│   │   │   │   ├── deploy_rss_feed_lambda.py +│   │   │   │   ├── layers +│   │   │   │   │   └── requirements.txt +│   │   │   │   └── src +│   │   │   │   ├── __pycache__ +│   │   │   │   │   └── utils.cpython-310.pyc +│   │   │   │   ├── article_extractor.py +│   │   │   │   ├── config.py +│   │   │   │   ├── data_storage.py +│   │   │   │   ├── exceptions.py +│   │   │   │   ├── feed_processor.py +│   │   │   │   ├── lambda_function.py +│   │   │   │   ├── metrics.py +│   │   │   │   └── utils.py +│   │   │   ├── RSSQueueFiller +│   │   │   │   ├── deploy_sqs_filler_lambda.py +│   │   │   │   └── lambda +│   │   │   │   └── lambda_function.py +│   │   │   └── lambda_utils +│   │   │   ├── __pycache__ +│   │   │   │   └── update_lambda_env_vars.cpython-312.pyc +│   │   │   ├── lambda_layer +│   │   │   │   └── lambda_layer_cloud9.sh +│   │   │   └── update_lambda_env_vars.py +│   │   └── tmp +│   └── utils +│   ├── __pycache__ +│   │   ├── create_lambda_layer.cpython-310.pyc +│   │   ├── create_lambda_layer.cpython-311.pyc +│   │   ├── create_lambda_layer.cpython-312.pyc +│   │   ├── create_s3_bucket.cpython-310.pyc +│   │   ├── kms_update.cpython-310.pyc +│   │   ├── kms_update.cpython-311.pyc +│   │   ├── kms_update.cpython-312.pyc +│   │   ├── retry_logic.cpython-310.pyc +│   │   ├── retry_logic.cpython-311.pyc +│   │   ├── retry_logic.cpython-312.pyc +│   │   ├── upload_rss_feeds.cpython-310.pyc +│   │   ├── upload_rss_feeds.cpython-311.pyc +│   │   └── upload_rss_feeds.cpython-312.pyc +│   └── retry_logic.py +├── template.env +├── tmp +├── todo.md +└── tree.md