mirror of
https://github.com/aljazceru/IngestRSS.git
synced 2026-02-15 19:24:31 +01:00
Switch metrics to Prometheus
This commit is contained in:
@@ -58,7 +58,10 @@ The primary goal of IngestRSS is to provide researchers with a robust, scalable
|
||||
|
||||
## 📊 Monitoring
|
||||
|
||||
The Lambda function logs its activities to CloudWatch Logs. You can monitor the function's performance and any errors through the AWS CloudWatch console.
|
||||
The Lambda function logs are still sent to CloudWatch Logs, however metrics are
|
||||
exposed using [Prometheus](https://prometheus.io/). When the processor runs it
|
||||
starts a tiny HTTP server that serves metrics on `/metrics` (port `8000` by
|
||||
default). These metrics can be scraped by a Prometheus server for monitoring.
|
||||
|
||||
## 🤝 Contributing
|
||||
|
||||
|
||||
@@ -5,4 +5,5 @@ constructs==10.2.69
|
||||
# Optional, yet necessary for the Pinecone SDK functionality.
|
||||
pinecone
|
||||
openai
|
||||
tqdm
|
||||
tqdm
|
||||
prometheus-client==0.20.*
|
||||
|
||||
@@ -1,26 +1,46 @@
|
||||
import boto3
|
||||
import time
|
||||
"""Prometheus metrics utilities for the RSS feed processor."""
|
||||
|
||||
cloudwatch = boto3.client('cloudwatch')
|
||||
import os
|
||||
from prometheus_client import Counter, Histogram, start_http_server
|
||||
|
||||
def put_metric_data(metric_name, value, unit='Count'):
|
||||
cloudwatch.put_metric_data(
|
||||
Namespace='RSS/FeedProcessor',
|
||||
MetricData=[
|
||||
{
|
||||
'MetricName': metric_name,
|
||||
'Value': value,
|
||||
'Unit': unit,
|
||||
'Timestamp': time.time()
|
||||
},
|
||||
]
|
||||
)
|
||||
|
||||
def record_processed_articles(count):
|
||||
put_metric_data('ProcessedArticles', count)
|
||||
# Start a Prometheus metrics HTTP server exposing ``/metrics``. The port can be
|
||||
# customised with the ``METRICS_PORT`` environment variable. This block is safe
|
||||
# to run multiple times as it silently ignores port binding errors.
|
||||
_metrics_port = int(os.environ.get("METRICS_PORT", "8000"))
|
||||
if not os.environ.get("METRICS_SERVER_STARTED"):
|
||||
try:
|
||||
start_http_server(_metrics_port)
|
||||
os.environ["METRICS_SERVER_STARTED"] = "1"
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
def record_processing_time(duration):
|
||||
put_metric_data('ProcessingTime', duration, 'Seconds')
|
||||
|
||||
def record_extraction_errors(count):
|
||||
put_metric_data('ExtractionErrors', count)
|
||||
# Metric definitions
|
||||
_processed_articles = Counter(
|
||||
"processed_articles_total",
|
||||
"Total number of processed articles",
|
||||
)
|
||||
_processing_time = Histogram(
|
||||
"rss_processing_seconds",
|
||||
"Time spent processing RSS feeds",
|
||||
)
|
||||
_extraction_errors = Counter(
|
||||
"extraction_errors_total",
|
||||
"Number of article extraction errors",
|
||||
)
|
||||
|
||||
|
||||
def record_processed_articles(count: int) -> None:
|
||||
"""Increment the processed articles counter."""
|
||||
_processed_articles.inc(count)
|
||||
|
||||
|
||||
def record_processing_time(duration: float) -> None:
|
||||
"""Record how long a feed took to process."""
|
||||
_processing_time.observe(duration)
|
||||
|
||||
|
||||
def record_extraction_errors(count: int) -> None:
|
||||
"""Increment the extraction errors counter."""
|
||||
_extraction_errors.inc(count)
|
||||
|
||||
Reference in New Issue
Block a user