From f400c8bda5fd4208c35e02392c57f2c99521094a Mon Sep 17 00:00:00 2001 From: Charles-Gormley Date: Wed, 13 Nov 2024 18:45:05 -0500 Subject: [PATCH] Fixing metadata for s3 --- .../RSSFeedProcessorLambda/src/data_storage.py | 17 +++++++++++++++-- todo.md | 2 +- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/src/infra/lambdas/RSSFeedProcessorLambda/src/data_storage.py b/src/infra/lambdas/RSSFeedProcessorLambda/src/data_storage.py index 536b18e..6fdbdf1 100644 --- a/src/infra/lambdas/RSSFeedProcessorLambda/src/data_storage.py +++ b/src/infra/lambdas/RSSFeedProcessorLambda/src/data_storage.py @@ -62,8 +62,21 @@ def s3_save_article(article:dict): json.dump(article, f) try: - s3.upload_file(file_path, CONTENT_BUCKET, file_key) - + s3.upload_file(file_path, + CONTENT_BUCKET, + file_key, + ExtraArgs={ + "Metadata": + { + "rss": article.get("rss", ""), + "title": article.get("title", ""), + "unixTime": str(article.get("unixTime", "")), + "article_id": article.get("article_id", ""), + "link": article.get("link", ""), + "rss_id": article.get("rss_id", "") + } + } + ) logger.info(f"Saved article {article_id} to S3 bucket {CONTENT_BUCKET}") except Exception as e: diff --git a/todo.md b/todo.md index b627dfd..2d74cc7 100644 --- a/todo.md +++ b/todo.md @@ -1,6 +1,6 @@ # Before Public Launch * Rethink Partitioning Strategy [Done] -* Implement New Partitioning Strategy. [Next] +* Implement New Partitioning Strategy. [Done] - Incorporate: All the json data except for content and link. Partitioning Strategy: {Year}/{Month}/{Day}/{article_id} * API Tool - to Pull data that you have down.