mega changes

This commit is contained in:
2025-11-20 11:11:18 +01:00
parent e070c95190
commit 841d79f26b
138 changed files with 21499 additions and 8844 deletions

View File

@@ -20,6 +20,7 @@ logger = logging.getLogger(__name__)
@dataclass
class RequestMetric:
"""Individual request metric"""
timestamp: datetime
provider: str
model: str
@@ -35,26 +36,30 @@ class RequestMetric:
class MetricsCollector:
"""Collects and aggregates LLM service metrics"""
def __init__(self, max_history_size: int = 10000):
"""
Initialize metrics collector
Args:
max_history_size: Maximum number of metrics to keep in memory
"""
self.max_history_size = max_history_size
self._metrics: deque = deque(maxlen=max_history_size)
self._provider_metrics: Dict[str, deque] = defaultdict(lambda: deque(maxlen=1000))
self._provider_metrics: Dict[str, deque] = defaultdict(
lambda: deque(maxlen=1000)
)
self._lock = threading.RLock()
# Aggregated metrics cache
self._cache_timestamp: Optional[datetime] = None
self._cached_metrics: Optional[LLMMetrics] = None
self._cache_ttl_seconds = 60 # Cache for 1 minute
logger.info(f"Metrics collector initialized with max history: {max_history_size}")
logger.info(
f"Metrics collector initialized with max history: {max_history_size}"
)
def record_request(
self,
provider: str,
@@ -66,7 +71,7 @@ class MetricsCollector:
security_risk_score: float = 0.0,
error_code: Optional[str] = None,
user_id: Optional[str] = None,
api_key_id: Optional[int] = None
api_key_id: Optional[int] = None,
):
"""Record a request metric"""
metric = RequestMetric(
@@ -80,64 +85,73 @@ class MetricsCollector:
security_risk_score=security_risk_score,
error_code=error_code,
user_id=user_id,
api_key_id=api_key_id
api_key_id=api_key_id,
)
with self._lock:
self._metrics.append(metric)
self._provider_metrics[provider].append(metric)
# Invalidate cache
self._cached_metrics = None
self._cache_timestamp = None
# Log significant events
if not success:
logger.warning(f"Request failed: {provider}/{model} - {error_code or 'Unknown error'}")
logger.warning(
f"Request failed: {provider}/{model} - {error_code or 'Unknown error'}"
)
elif security_risk_score > 0.6:
logger.info(f"High risk request: {provider}/{model} - risk score: {security_risk_score:.3f}")
logger.info(
f"High risk request: {provider}/{model} - risk score: {security_risk_score:.3f}"
)
def get_metrics(self, force_refresh: bool = False) -> LLMMetrics:
"""Get aggregated metrics"""
with self._lock:
# Check cache validity
if (not force_refresh and
self._cached_metrics and
self._cache_timestamp and
(datetime.utcnow() - self._cache_timestamp).total_seconds() < self._cache_ttl_seconds):
if (
not force_refresh
and self._cached_metrics
and self._cache_timestamp
and (datetime.utcnow() - self._cache_timestamp).total_seconds()
< self._cache_ttl_seconds
):
return self._cached_metrics
# Calculate fresh metrics
metrics = self._calculate_metrics()
# Cache results
self._cached_metrics = metrics
self._cache_timestamp = datetime.utcnow()
return metrics
def _calculate_metrics(self) -> LLMMetrics:
"""Calculate aggregated metrics from recorded data"""
if not self._metrics:
return LLMMetrics()
total_requests = len(self._metrics)
successful_requests = sum(1 for m in self._metrics if m.success)
failed_requests = total_requests - successful_requests
# Calculate averages
latencies = [m.latency_ms for m in self._metrics if m.latency_ms > 0]
risk_scores = [m.security_risk_score for m in self._metrics]
avg_latency = sum(latencies) / len(latencies) if latencies else 0.0
avg_risk_score = sum(risk_scores) / len(risk_scores) if risk_scores else 0.0
# Provider-specific metrics
provider_metrics = {}
for provider, provider_data in self._provider_metrics.items():
if provider_data:
provider_metrics[provider] = self._calculate_provider_metrics(provider_data)
provider_metrics[provider] = self._calculate_provider_metrics(
provider_data
)
return LLMMetrics(
total_requests=total_requests,
successful_requests=successful_requests,
@@ -145,48 +159,50 @@ class MetricsCollector:
average_latency_ms=avg_latency,
average_risk_score=avg_risk_score,
provider_metrics=provider_metrics,
last_updated=datetime.utcnow()
last_updated=datetime.utcnow(),
)
def _calculate_provider_metrics(self, provider_data: deque) -> Dict[str, Any]:
"""Calculate metrics for a specific provider"""
if not provider_data:
return {}
total = len(provider_data)
successful = sum(1 for m in provider_data if m.success)
failed = total - successful
latencies = [m.latency_ms for m in provider_data if m.latency_ms > 0]
avg_latency = sum(latencies) / len(latencies) if latencies else 0.0
# Token usage aggregation
total_prompt_tokens = 0
total_completion_tokens = 0
total_tokens = 0
for metric in provider_data:
if metric.token_usage:
total_prompt_tokens += metric.token_usage.get("prompt_tokens", 0)
total_completion_tokens += metric.token_usage.get("completion_tokens", 0)
total_completion_tokens += metric.token_usage.get(
"completion_tokens", 0
)
total_tokens += metric.token_usage.get("total_tokens", 0)
# Model distribution
model_counts = defaultdict(int)
for metric in provider_data:
model_counts[metric.model] += 1
# Request type distribution
request_type_counts = defaultdict(int)
for metric in provider_data:
request_type_counts[metric.request_type] += 1
# Error analysis
error_counts = defaultdict(int)
for metric in provider_data:
if not metric.success and metric.error_code:
error_counts[metric.error_code] += 1
return {
"total_requests": total,
"successful_requests": successful,
@@ -198,51 +214,57 @@ class MetricsCollector:
"total_completion_tokens": total_completion_tokens,
"total_tokens": total_tokens,
"avg_prompt_tokens": total_prompt_tokens / total if total > 0 else 0,
"avg_completion_tokens": total_completion_tokens / successful if successful > 0 else 0
"avg_completion_tokens": total_completion_tokens / successful
if successful > 0
else 0,
},
"model_distribution": dict(model_counts),
"request_type_distribution": dict(request_type_counts),
"error_distribution": dict(error_counts),
"recent_requests": total
"recent_requests": total,
}
def get_provider_metrics(self, provider: str) -> Optional[Dict[str, Any]]:
"""Get metrics for a specific provider"""
with self._lock:
if provider not in self._provider_metrics:
return None
return self._calculate_provider_metrics(self._provider_metrics[provider])
def get_recent_metrics(self, minutes: int = 5) -> List[RequestMetric]:
"""Get metrics from the last N minutes"""
cutoff_time = datetime.utcnow() - timedelta(minutes=minutes)
with self._lock:
return [m for m in self._metrics if m.timestamp >= cutoff_time]
def get_error_metrics(self, hours: int = 1) -> Dict[str, int]:
"""Get error distribution from the last N hours"""
cutoff_time = datetime.utcnow() - timedelta(hours=hours)
error_counts = defaultdict(int)
with self._lock:
for metric in self._metrics:
if metric.timestamp >= cutoff_time and not metric.success and metric.error_code:
if (
metric.timestamp >= cutoff_time
and not metric.success
and metric.error_code
):
error_counts[metric.error_code] += 1
return dict(error_counts)
def get_performance_metrics(self, minutes: int = 15) -> Dict[str, Dict[str, float]]:
"""Get performance metrics by provider from the last N minutes"""
cutoff_time = datetime.utcnow() - timedelta(minutes=minutes)
provider_perf = defaultdict(list)
with self._lock:
for metric in self._metrics:
if metric.timestamp >= cutoff_time and metric.success:
provider_perf[metric.provider].append(metric.latency_ms)
performance = {}
for provider, latencies in provider_perf.items():
if latencies:
@@ -252,26 +274,26 @@ class MetricsCollector:
"max_latency_ms": max(latencies),
"p95_latency_ms": self._percentile(latencies, 95),
"p99_latency_ms": self._percentile(latencies, 99),
"request_count": len(latencies)
"request_count": len(latencies),
}
return performance
def _percentile(self, data: List[float], percentile: int) -> float:
"""Calculate percentile of a list of numbers"""
if not data:
return 0.0
sorted_data = sorted(data)
index = (percentile / 100.0) * (len(sorted_data) - 1)
if index.is_integer():
return sorted_data[int(index)]
else:
lower = sorted_data[int(index)]
upper = sorted_data[int(index) + 1]
return lower + (upper - lower) * (index - int(index))
def clear_metrics(self):
"""Clear all metrics (use with caution)"""
with self._lock:
@@ -279,20 +301,20 @@ class MetricsCollector:
self._provider_metrics.clear()
self._cached_metrics = None
self._cache_timestamp = None
logger.info("All metrics cleared")
def get_health_summary(self) -> Dict[str, Any]:
"""Get a health summary for monitoring"""
metrics = self.get_metrics()
recent_metrics = self.get_recent_metrics(minutes=5)
error_metrics = self.get_error_metrics(hours=1)
# Calculate health scores
total_recent = len(recent_metrics)
successful_recent = sum(1 for m in recent_metrics if m.success)
success_rate = successful_recent / total_recent if total_recent > 0 else 1.0
# Determine health status
if success_rate >= 0.95:
health_status = "healthy"
@@ -300,18 +322,20 @@ class MetricsCollector:
health_status = "degraded"
else:
health_status = "unhealthy"
return {
"health_status": health_status,
"success_rate_5min": success_rate,
"total_requests_5min": total_recent,
"average_latency_ms": metrics.average_latency_ms,
"error_count_1hour": sum(error_metrics.values()),
"top_errors": dict(sorted(error_metrics.items(), key=lambda x: x[1], reverse=True)[:5]),
"top_errors": dict(
sorted(error_metrics.items(), key=lambda x: x[1], reverse=True)[:5]
),
"provider_count": len(metrics.provider_metrics),
"last_updated": datetime.utcnow().isoformat()
"last_updated": datetime.utcnow().isoformat(),
}
# Global metrics collector instance
metrics_collector = MetricsCollector()
metrics_collector = MetricsCollector()