""" Cost calculation service for LLM model pricing """ from typing import Dict, Optional from app.core.logging import get_logger logger = get_logger(__name__) class CostCalculator: """Service for calculating costs based on model usage and token consumption""" # Model pricing in 1/10000ths of a dollar per 1000 tokens (input/output) MODEL_PRICING = { # OpenAI Models "gpt-4": {"input": 300, "output": 600}, # $0.03/$0.06 per 1K tokens "gpt-4-turbo": {"input": 100, "output": 300}, # $0.01/$0.03 per 1K tokens "gpt-3.5-turbo": {"input": 5, "output": 15}, # $0.0005/$0.0015 per 1K tokens # Anthropic Models "claude-3-opus": {"input": 150, "output": 750}, # $0.015/$0.075 per 1K tokens "claude-3-sonnet": {"input": 30, "output": 150}, # $0.003/$0.015 per 1K tokens "claude-3-haiku": {"input": 25, "output": 125}, # $0.00025/$0.00125 per 1K tokens # Google Models "gemini-pro": {"input": 5, "output": 15}, # $0.0005/$0.0015 per 1K tokens "gemini-pro-vision": {"input": 5, "output": 15}, # $0.0005/$0.0015 per 1K tokens # Privatemode.ai Models (estimated pricing) "privatemode-llama-70b": {"input": 40, "output": 80}, # Estimated pricing "privatemode-mixtral": {"input": 20, "output": 40}, # Estimated pricing # Embedding Models "text-embedding-ada-002": {"input": 1, "output": 0}, # $0.0001 per 1K tokens "text-embedding-3-small": {"input": 2, "output": 0}, # $0.00002 per 1K tokens "text-embedding-3-large": {"input": 13, "output": 0}, # $0.00013 per 1K tokens } # Default pricing for unknown models DEFAULT_PRICING = {"input": 10, "output": 20} # $0.001/$0.002 per 1K tokens @classmethod def get_model_pricing(cls, model_name: str) -> Dict[str, int]: """Get pricing for a specific model""" # Normalize model name (remove provider prefixes) normalized_name = cls._normalize_model_name(model_name) # Look up pricing pricing = cls.MODEL_PRICING.get(normalized_name, cls.DEFAULT_PRICING) logger.debug(f"Pricing for model '{model_name}' (normalized: '{normalized_name}'): {pricing}") return pricing @classmethod def _normalize_model_name(cls, model_name: str) -> str: """Normalize model name by removing provider prefixes""" # Remove common provider prefixes prefixes = ["openai/", "anthropic/", "google/", "gemini/", "privatemode/"] normalized = model_name.lower() for prefix in prefixes: if normalized.startswith(prefix): normalized = normalized[len(prefix):] break # Handle special cases if "claude-3-opus-20240229" in normalized: return "claude-3-opus" elif "claude-3-sonnet-20240229" in normalized: return "claude-3-sonnet" elif "claude-3-haiku-20240307" in normalized: return "claude-3-haiku" elif "meta-llama/llama-3.1-70b-instruct" in normalized: return "privatemode-llama-70b" elif "mistralai/mixtral-8x7b-instruct" in normalized: return "privatemode-mixtral" return normalized @classmethod def calculate_cost_cents( cls, model_name: str, input_tokens: int = 0, output_tokens: int = 0 ) -> int: """ Calculate cost in cents for given token usage Args: model_name: Name of the LLM model input_tokens: Number of input tokens used output_tokens: Number of output tokens generated Returns: Total cost in cents """ pricing = cls.get_model_pricing(model_name) # Calculate cost per token type input_cost_cents = (input_tokens * pricing["input"]) // 1000 output_cost_cents = (output_tokens * pricing["output"]) // 1000 total_cost_cents = input_cost_cents + output_cost_cents logger.debug( f"Cost calculation for {model_name}: " f"input_tokens={input_tokens} (${input_cost_cents/100:.4f}), " f"output_tokens={output_tokens} (${output_cost_cents/100:.4f}), " f"total=${total_cost_cents/100:.4f}" ) return total_cost_cents @classmethod def estimate_cost_cents(cls, model_name: str, estimated_tokens: int) -> int: """ Estimate cost for a request based on estimated total tokens Assumes 70% input, 30% output token distribution Args: model_name: Name of the LLM model estimated_tokens: Estimated total tokens for the request Returns: Estimated cost in cents """ input_tokens = int(estimated_tokens * 0.7) # 70% input output_tokens = int(estimated_tokens * 0.3) # 30% output return cls.calculate_cost_cents(model_name, input_tokens, output_tokens) @classmethod def get_cost_per_1k_tokens(cls, model_name: str) -> Dict[str, float]: """ Get cost per 1000 tokens in dollars for display purposes Args: model_name: Name of the LLM model Returns: Dictionary with input and output costs in dollars per 1K tokens """ pricing_cents = cls.get_model_pricing(model_name) return { "input": pricing_cents["input"] / 10000, # Convert 1/10000ths to dollars "output": pricing_cents["output"] / 10000, "currency": "USD" } @classmethod def get_all_model_pricing(cls) -> Dict[str, Dict[str, float]]: """Get pricing for all supported models in dollars""" pricing_data = {} for model_name in cls.MODEL_PRICING.keys(): pricing_data[model_name] = cls.get_cost_per_1k_tokens(model_name) return pricing_data @classmethod def format_cost_display(cls, cost_cents: int) -> str: """Format cost in 1/1000ths of a dollar for display""" if cost_cents == 0: return "$0.00" elif cost_cents < 1000: return f"${cost_cents/1000:.4f}" else: return f"${cost_cents/1000:.2f}" # Convenience functions for common operations def calculate_request_cost(model_name: str, input_tokens: int, output_tokens: int) -> int: """Calculate cost for a single request""" return CostCalculator.calculate_cost_cents(model_name, input_tokens, output_tokens) def estimate_request_cost(model_name: str, estimated_tokens: int) -> int: """Estimate cost for a request""" return CostCalculator.estimate_cost_cents(model_name, estimated_tokens) def get_model_pricing_display(model_name: str) -> Dict[str, float]: """Get model pricing for display""" return CostCalculator.get_cost_per_1k_tokens(model_name)