Switch metrics to Prometheus

2026-02-23 07:04:32 +01:00 · 2025-06-02 13:46:09 +02:00
parent 8ad98c4cb4
commit 8e11a0153c
3 changed files with 47 additions and 23 deletions
--- a/README.md
+++ b/README.md
@@ -58,7 +58,10 @@ The primary goal of IngestRSS is to provide researchers with a robust, scalable

 ## 📊 Monitoring

-The Lambda function logs its activities to CloudWatch Logs. You can monitor the function's performance and any errors through the AWS CloudWatch console.
+The Lambda function logs are still sent to CloudWatch Logs, however metrics are
+exposed using [Prometheus](https://prometheus.io/). When the processor runs it
+starts a tiny HTTP server that serves metrics on `/metrics` (port `8000` by
+default). These metrics can be scraped by a Prometheus server for monitoring.

 ## 🤝 Contributing

--- a/requirements.txt
+++ b/requirements.txt
@@ -5,4 +5,5 @@ constructs==10.2.69
 # Optional, yet necessary for the Pinecone SDK functionality.
 pinecone
 openai
-tqdm
+tqdm
+prometheus-client==0.20.*
--- a/src/infra/lambdas/RSSFeedProcessorLambda/src/metrics.py
+++ b/src/infra/lambdas/RSSFeedProcessorLambda/src/metrics.py
@@ -1,26 +1,46 @@
-import boto3
-import time
+"""Prometheus metrics utilities for the RSS feed processor."""

-cloudwatch = boto3.client('cloudwatch')
+import os
+from prometheus_client import Counter, Histogram, start_http_server

-def put_metric_data(metric_name, value, unit='Count'):
-    cloudwatch.put_metric_data(
-        Namespace='RSS/FeedProcessor',
-        MetricData=[
-            {
-                'MetricName': metric_name,
-                'Value': value,
-                'Unit': unit,
-                'Timestamp': time.time()
-            },
-        ]
-    )

-def record_processed_articles(count):
-    put_metric_data('ProcessedArticles', count)
+# Start a Prometheus metrics HTTP server exposing ``/metrics``. The port can be
+# customised with the ``METRICS_PORT`` environment variable. This block is safe
+# to run multiple times as it silently ignores port binding errors.
+_metrics_port = int(os.environ.get("METRICS_PORT", "8000"))
+if not os.environ.get("METRICS_SERVER_STARTED"):
+    try:
+        start_http_server(_metrics_port)
+        os.environ["METRICS_SERVER_STARTED"] = "1"
+    except OSError:
+        pass

-def record_processing_time(duration):
-    put_metric_data('ProcessingTime', duration, 'Seconds')

-def record_extraction_errors(count):
-    put_metric_data('ExtractionErrors', count)
+# Metric definitions
+_processed_articles = Counter(
+    "processed_articles_total",
+    "Total number of processed articles",
+)
+_processing_time = Histogram(
+    "rss_processing_seconds",
+    "Time spent processing RSS feeds",
+)
+_extraction_errors = Counter(
+    "extraction_errors_total",
+    "Number of article extraction errors",
+)
+
+
+def record_processed_articles(count: int) -> None:
+    """Increment the processed articles counter."""
+    _processed_articles.inc(count)
+
+
+def record_processing_time(duration: float) -> None:
+    """Record how long a feed took to process."""
+    _processing_time.observe(duration)
+
+
+def record_extraction_errors(count: int) -> None:
+    """Increment the extraction errors counter."""
+    _extraction_errors.inc(count)