moreminimore-marketing/backend/services/subscription/pricing_service.py

"""
Pricing Service for API Usage Tracking
Manages API pricing, cost calculation, and subscription limits.
"""

from typing import Dict, Any, Optional, List, Tuple, Union
from decimal import Decimal, ROUND_HALF_UP
from datetime import datetime, timedelta
from sqlalchemy.orm import Session
from sqlalchemy import text
from loguru import logger
import os

from models.subscription_models import (
    APIProviderPricing, SubscriptionPlan, UserSubscription,
    UsageSummary, APIUsageLog, APIProvider, SubscriptionTier
)

class PricingService:
    """Service for managing API pricing and cost calculations."""

    # Class-level cache shared across all instances (critical for cache invalidation on subscription renewal)
    # key: f"{user_id}:{provider}", value: { 'result': (bool, str, dict), 'expires_at': datetime }
    _limits_cache: Dict[str, Dict[str, Any]] = {}

    def __init__(self, db: Session):
        self.db = db
        self._pricing_cache = {}
        self._plans_cache = {}
        # Cache for schema feature detection (ai_text_generation_calls_limit column)
        self._ai_text_gen_col_checked: bool = False
        self._ai_text_gen_col_available: bool = False

    # ------------------- Billing period helpers -------------------
    def _compute_next_period_end(self, start: datetime, cycle: str) -> datetime:
        """Compute the next period end given a start and billing cycle."""
        try:
            cycle_value = cycle.value if hasattr(cycle, 'value') else str(cycle)
        except Exception:
            cycle_value = str(cycle)
        if cycle_value == 'yearly':
            return start + timedelta(days=365)
        return start + timedelta(days=30)

    def _ensure_subscription_current(self, subscription) -> bool:
        """Auto-advance subscription period if expired and auto_renew is enabled."""
        if not subscription:
            return False
        now = datetime.utcnow()
        try:
            if subscription.current_period_end and subscription.current_period_end < now:
                if getattr(subscription, 'auto_renew', False):
                    subscription.current_period_start = now
                    subscription.current_period_end = self._compute_next_period_end(now, subscription.billing_cycle)
                    # Keep status active if model enum else string
                    try:
                        subscription.status = subscription.status.ACTIVE  # type: ignore[attr-defined]
                    except Exception:
                        setattr(subscription, 'status', 'active')
                    self.db.commit()
                else:
                    return False
        except Exception:
            self.db.rollback()
        return True

    def get_current_billing_period(self, user_id: str) -> Optional[str]:
        """Return current billing period key (YYYY-MM) after ensuring subscription is current."""
        subscription = self.db.query(UserSubscription).filter(
            UserSubscription.user_id == user_id,
            UserSubscription.is_active == True
        ).first()
        # Ensure subscription is current (advance if auto_renew)
        self._ensure_subscription_current(subscription)
        # Continue to use YYYY-MM for summaries
        return datetime.now().strftime("%Y-%m")

    @classmethod
    def clear_user_cache(cls, user_id: str) -> int:
        """Clear all cached limit checks for a specific user. Returns number of entries cleared."""
        keys_to_remove = [key for key in cls._limits_cache.keys() if key.startswith(f"{user_id}:")]
        for key in keys_to_remove:
            del cls._limits_cache[key]
        logger.info(f"Cleared {len(keys_to_remove)} cache entries for user {user_id}")
        return len(keys_to_remove)

    def initialize_default_pricing(self):
        """Initialize default pricing for all API providers."""

        # Gemini API Pricing (Updated as of September 2025 - Official Google AI Pricing)
        # Source: https://ai.google.dev/gemini-api/docs/pricing
        gemini_pricing = [
            # Gemini 2.5 Pro - Standard Tier
            {
                "provider": APIProvider.GEMINI,
                "model_name": "gemini-2.5-pro",
                "cost_per_input_token": 0.00000125,   # $1.25 per 1M input tokens (prompts <= 200k tokens)
                "cost_per_output_token": 0.00001,     # $10.00 per 1M output tokens (prompts <= 200k tokens)
                "description": "Gemini 2.5 Pro - State-of-the-art multipurpose model for coding and complex reasoning"
            },
            {
                "provider": APIProvider.GEMINI,
                "model_name": "gemini-2.5-pro-large",
                "cost_per_input_token": 0.0000025,    # $2.50 per 1M input tokens (prompts > 200k tokens)
                "cost_per_output_token": 0.000015,    # $15.00 per 1M output tokens (prompts > 200k tokens)
                "description": "Gemini 2.5 Pro - Large context model for prompts > 200k tokens"
            },
            # Gemini 2.5 Flash - Standard Tier
            {
                "provider": APIProvider.GEMINI,
                "model_name": "gemini-2.5-flash",
                "cost_per_input_token": 0.0000003,    # $0.30 per 1M input tokens (text/image/video)
                "cost_per_output_token": 0.0000025,   # $2.50 per 1M output tokens
                "description": "Gemini 2.5 Flash - Hybrid reasoning model with 1M token context window"
            },
            {
                "provider": APIProvider.GEMINI,
                "model_name": "gemini-2.5-flash-audio",
                "cost_per_input_token": 0.000001,     # $1.00 per 1M input tokens (audio)
                "cost_per_output_token": 0.0000025,   # $2.50 per 1M output tokens
                "description": "Gemini 2.5 Flash - Audio input model"
            },
            # Gemini 2.5 Flash-Lite - Standard Tier
            {
                "provider": APIProvider.GEMINI,
                "model_name": "gemini-2.5-flash-lite",
                "cost_per_input_token": 0.0000001,    # $0.10 per 1M input tokens (text/image/video)
                "cost_per_output_token": 0.0000004,   # $0.40 per 1M output tokens
                "description": "Gemini 2.5 Flash-Lite - Smallest and most cost-effective model for at-scale usage"
            },
            {
                "provider": APIProvider.GEMINI,
                "model_name": "gemini-2.5-flash-lite-audio",
                "cost_per_input_token": 0.0000003,    # $0.30 per 1M input tokens (audio)
                "cost_per_output_token": 0.0000004,   # $0.40 per 1M output tokens
                "description": "Gemini 2.5 Flash-Lite - Audio input model"
            },
            # Gemini 1.5 Flash - Standard Tier
            {
                "provider": APIProvider.GEMINI,
                "model_name": "gemini-1.5-flash",
                "cost_per_input_token": 0.000000075,  # $0.075 per 1M input tokens (prompts <= 128k tokens)
                "cost_per_output_token": 0.0000003,   # $0.30 per 1M output tokens (prompts <= 128k tokens)
                "description": "Gemini 1.5 Flash - Fast multimodal model with 1M token context window"
            },
            {
                "provider": APIProvider.GEMINI,
                "model_name": "gemini-1.5-flash-large",
                "cost_per_input_token": 0.00000015,   # $0.15 per 1M input tokens (prompts > 128k tokens)
                "cost_per_output_token": 0.0000006,   # $0.60 per 1M output tokens (prompts > 128k tokens)
                "description": "Gemini 1.5 Flash - Large context model for prompts > 128k tokens"
            },
            # Gemini 1.5 Flash-8B - Standard Tier
            {
                "provider": APIProvider.GEMINI,
                "model_name": "gemini-1.5-flash-8b",
                "cost_per_input_token": 0.0000000375, # $0.0375 per 1M input tokens (prompts <= 128k tokens)
                "cost_per_output_token": 0.00000015,  # $0.15 per 1M output tokens (prompts <= 128k tokens)
                "description": "Gemini 1.5 Flash-8B - Smallest model for lower intelligence use cases"
            },
            {
                "provider": APIProvider.GEMINI,
                "model_name": "gemini-1.5-flash-8b-large",
                "cost_per_input_token": 0.000000075,  # $0.075 per 1M input tokens (prompts > 128k tokens)
                "cost_per_output_token": 0.0000003,   # $0.30 per 1M output tokens (prompts > 128k tokens)
                "description": "Gemini 1.5 Flash-8B - Large context model for prompts > 128k tokens"
            },
            # Gemini 1.5 Pro - Standard Tier
            {
                "provider": APIProvider.GEMINI,
                "model_name": "gemini-1.5-pro",
                "cost_per_input_token": 0.00000125,   # $1.25 per 1M input tokens (prompts <= 128k tokens)
                "cost_per_output_token": 0.000005,    # $5.00 per 1M output tokens (prompts <= 128k tokens)
                "description": "Gemini 1.5 Pro - Highest intelligence model with 2M token context window"
            },
            {
                "provider": APIProvider.GEMINI,
                "model_name": "gemini-1.5-pro-large",
                "cost_per_input_token": 0.0000025,    # $2.50 per 1M input tokens (prompts > 128k tokens)
                "cost_per_output_token": 0.00001,     # $10.00 per 1M output tokens (prompts > 128k tokens)
                "description": "Gemini 1.5 Pro - Large context model for prompts > 128k tokens"
            },
            # Gemini Embedding - Standard Tier
            {
                "provider": APIProvider.GEMINI,
                "model_name": "gemini-embedding",
                "cost_per_input_token": 0.00000015,   # $0.15 per 1M input tokens
                "cost_per_output_token": 0.0,         # No output tokens for embeddings
                "description": "Gemini Embedding - Newest embeddings model with higher rate limits"
            },
            # Grounding with Google Search - Standard Tier
            {
                "provider": APIProvider.GEMINI,
                "model_name": "gemini-grounding-search",
                "cost_per_request": 0.035,            # $35 per 1,000 requests (after free tier)
                "cost_per_input_token": 0.0,          # No additional token cost for grounding
                "cost_per_output_token": 0.0,         # No additional token cost for grounding
                "description": "Grounding with Google Search - 1,500 RPD free, then $35/1K requests"
            }
        ]

        # OpenAI Pricing (estimated, will be updated)
        openai_pricing = [
            {
                "provider": APIProvider.OPENAI,
                "model_name": "gpt-4o",
                "cost_per_input_token": 0.0000025,    # $2.50 per 1M input tokens
                "cost_per_output_token": 0.00001,     # $10.00 per 1M output tokens
                "description": "GPT-4o - Latest OpenAI model"
            },
            {
                "provider": APIProvider.OPENAI,
                "model_name": "gpt-4o-mini",
                "cost_per_input_token": 0.00000015,   # $0.15 per 1M input tokens
                "cost_per_output_token": 0.0000006,   # $0.60 per 1M output tokens
                "description": "GPT-4o Mini - Cost-effective model"
            }
        ]

        # Anthropic Pricing (estimated, will be updated)
        anthropic_pricing = [
            {
                "provider": APIProvider.ANTHROPIC,
                "model_name": "claude-3.5-sonnet",
                "cost_per_input_token": 0.000003,     # $3.00 per 1M input tokens
                "cost_per_output_token": 0.000015,    # $15.00 per 1M output tokens
                "description": "Claude 3.5 Sonnet - Anthropic's flagship model"
            }
        ]

        # HuggingFace/Mistral Pricing (for GPT-OSS-120B via Groq)
        # Default pricing from environment variables or fallback to estimated values
        # Based on Groq pricing: ~$1 per 1M input tokens, ~$3 per 1M output tokens
        hf_input_cost = float(os.getenv('HUGGINGFACE_INPUT_TOKEN_COST', '0.000001'))  # $1 per 1M tokens default
        hf_output_cost = float(os.getenv('HUGGINGFACE_OUTPUT_TOKEN_COST', '0.000003'))  # $3 per 1M tokens default

        mistral_pricing = [
            {
                "provider": APIProvider.MISTRAL,
                "model_name": "openai/gpt-oss-120b:groq",
                "cost_per_input_token": hf_input_cost,
                "cost_per_output_token": hf_output_cost,
                "description": f"GPT-OSS-120B via HuggingFace/Groq (configurable via HUGGINGFACE_INPUT_TOKEN_COST and HUGGINGFACE_OUTPUT_TOKEN_COST env vars)"
            },
            {
                "provider": APIProvider.MISTRAL,
                "model_name": "gpt-oss-120b",
                "cost_per_input_token": hf_input_cost,
                "cost_per_output_token": hf_output_cost,
                "description": f"GPT-OSS-120B via HuggingFace/Groq (configurable via HUGGINGFACE_INPUT_TOKEN_COST and HUGGINGFACE_OUTPUT_TOKEN_COST env vars)"
            },
            {
                "provider": APIProvider.MISTRAL,
                "model_name": "default",
                "cost_per_input_token": hf_input_cost,
                "cost_per_output_token": hf_output_cost,
                "description": f"HuggingFace default model pricing (configurable via HUGGINGFACE_INPUT_TOKEN_COST and HUGGINGFACE_OUTPUT_TOKEN_COST env vars)"
            }
        ]

        # Search API Pricing (estimated)
        search_pricing = [
            {
                "provider": APIProvider.TAVILY,
                "model_name": "tavily-search",
                "cost_per_request": 0.001,  # $0.001 per search
                "description": "Tavily AI Search API"
            },
            {
                "provider": APIProvider.SERPER,
                "model_name": "serper-search",
                "cost_per_request": 0.001,  # $0.001 per search
                "description": "Serper Google Search API"
            },
            {
                "provider": APIProvider.METAPHOR,
                "model_name": "metaphor-search",
                "cost_per_request": 0.003,  # $0.003 per search
                "description": "Metaphor/Exa AI Search API"
            },
            {
                "provider": APIProvider.FIRECRAWL,
                "model_name": "firecrawl-extract",
                "cost_per_page": 0.002,  # $0.002 per page crawled
                "description": "Firecrawl Web Extraction API"
            },
            {
                "provider": APIProvider.STABILITY,
                "model_name": "stable-diffusion",
                "cost_per_image": 0.04,  # $0.04 per image
                "description": "Stability AI Image Generation"
            },
            {
                "provider": APIProvider.EXA,
                "model_name": "exa-search",
                "cost_per_request": 0.005,  # $0.005 per search (1-25 results)
                "description": "Exa Neural Search API"
            },
            {
                "provider": APIProvider.VIDEO,
                "model_name": "tencent/HunyuanVideo",
                "cost_per_request": 0.10,  # $0.10 per video generation (estimated)
                "description": "HuggingFace AI Video Generation (HunyuanVideo)"
            },
            {
                "provider": APIProvider.VIDEO,
                "model_name": "default",
                "cost_per_request": 0.10,  # $0.10 per video generation (estimated)
                "description": "AI Video Generation default pricing"
            },
            {
                "provider": APIProvider.VIDEO,
                "model_name": "kling-v2.5-turbo-std-5s",
                "cost_per_request": 0.21,
                "description": "WaveSpeed Kling v2.5 Turbo Std Image-to-Video (5 seconds)"
            },
            {
                "provider": APIProvider.VIDEO,
                "model_name": "kling-v2.5-turbo-std-10s",
                "cost_per_request": 0.42,
                "description": "WaveSpeed Kling v2.5 Turbo Std Image-to-Video (10 seconds)"
            },
            {
                "provider": APIProvider.VIDEO,
                "model_name": "wavespeed-ai/infinitetalk",
                "cost_per_request": 0.30,
                "description": "WaveSpeed InfiniteTalk (image + audio to talking avatar video)"
            },
            # Audio Generation Pricing (Minimax Speech 02 HD via WaveSpeed)
            {
                "provider": APIProvider.AUDIO,
                "model_name": "minimax/speech-02-hd",
                "cost_per_input_token": 0.00005,  # $0.05 per 1,000 characters (every character is 1 token)
                "cost_per_output_token": 0.0,  # No output tokens for audio
                "cost_per_request": 0.0,  # Pricing is per character, not per request
                "description": "AI Audio Generation (Text-to-Speech) - Minimax Speech 02 HD via WaveSpeed"
            },
            {
                "provider": APIProvider.AUDIO,
                "model_name": "default",
                "cost_per_input_token": 0.00005,  # $0.05 per 1,000 characters default
                "cost_per_output_token": 0.0,
                "cost_per_request": 0.0,
                "description": "AI Audio Generation default pricing"
            }
        ]

        # Combine all pricing data (include video pricing in search_pricing list)
        all_pricing = gemini_pricing + openai_pricing + anthropic_pricing + mistral_pricing + search_pricing

        # Insert or update pricing data
        for pricing_data in all_pricing:
            existing = self.db.query(APIProviderPricing).filter(
                APIProviderPricing.provider == pricing_data["provider"],
                APIProviderPricing.model_name == pricing_data["model_name"]
            ).first()

            if existing:
                # Update existing pricing (especially for HuggingFace if env vars changed)
                if pricing_data["provider"] == APIProvider.MISTRAL:
                    # Update HuggingFace pricing from env vars
                    existing.cost_per_input_token = pricing_data["cost_per_input_token"]
                    existing.cost_per_output_token = pricing_data["cost_per_output_token"]
                    existing.description = pricing_data["description"]
                    existing.updated_at = datetime.utcnow()
                    logger.debug(f"Updated pricing for {pricing_data['provider'].value}:{pricing_data['model_name']}")
            else:
                pricing = APIProviderPricing(**pricing_data)
                self.db.add(pricing)
                logger.debug(f"Added new pricing for {pricing_data['provider'].value}:{pricing_data['model_name']}")

        self.db.commit()
        logger.info("Default API pricing initialized/updated. HuggingFace pricing loaded from env vars if available.")

    def initialize_default_plans(self):
        """Initialize default subscription plans."""

        plans = [
            {
                "name": "Free",
                "tier": SubscriptionTier.FREE,
                "price_monthly": 0.0,
                "price_yearly": 0.0,
                "gemini_calls_limit": 100,
                "openai_calls_limit": 0,
                "anthropic_calls_limit": 0,
                "mistral_calls_limit": 50,
                "tavily_calls_limit": 20,
                "serper_calls_limit": 20,
                "metaphor_calls_limit": 10,
                "firecrawl_calls_limit": 10,
                "stability_calls_limit": 5,
                "exa_calls_limit": 100,
                "video_calls_limit": 0,  # No video generation for free tier
                "image_edit_calls_limit": 10,  # 10 AI image editing calls/month
                "audio_calls_limit": 20,  # 20 AI audio generation calls/month
                "gemini_tokens_limit": 100000,
                "monthly_cost_limit": 0.0,
                "features": ["basic_content_generation", "limited_research"],
                "description": "Perfect for trying out ALwrity"
            },
            {
                "name": "Basic",
                "tier": SubscriptionTier.BASIC,
                "price_monthly": 29.0,
                "price_yearly": 290.0,
                "ai_text_generation_calls_limit": 10,  # Unified limit for all LLM providers
                "gemini_calls_limit": 1000,  # Legacy, kept for backwards compatibility (not used for enforcement)
                "openai_calls_limit": 500,
                "anthropic_calls_limit": 200,
                "mistral_calls_limit": 500,
                "tavily_calls_limit": 200,
                "serper_calls_limit": 200,
                "metaphor_calls_limit": 100,
                "firecrawl_calls_limit": 100,
                "stability_calls_limit": 5,
                "exa_calls_limit": 500,
                "video_calls_limit": 20,  # 20 videos/month for basic plan
                "image_edit_calls_limit": 30,  # 30 AI image editing calls/month
                "audio_calls_limit": 50,  # 50 AI audio generation calls/month
                "gemini_tokens_limit": 20000,  # Increased from 5000 for better stability
                "openai_tokens_limit": 20000,  # Increased from 5000 for better stability
                "anthropic_tokens_limit": 20000,  # Increased from 5000 for better stability
                "mistral_tokens_limit": 20000,  # Increased from 5000 for better stability
                "monthly_cost_limit": 50.0,
                "features": ["full_content_generation", "advanced_research", "basic_analytics"],
                "description": "Great for individuals and small teams"
            },
            {
                "name": "Pro",
                "tier": SubscriptionTier.PRO,
                "price_monthly": 79.0,
                "price_yearly": 790.0,
                "gemini_calls_limit": 5000,
                "openai_calls_limit": 2500,
                "anthropic_calls_limit": 1000,
                "mistral_calls_limit": 2500,
                "tavily_calls_limit": 1000,
                "serper_calls_limit": 1000,
                "metaphor_calls_limit": 500,
                "firecrawl_calls_limit": 500,
                "stability_calls_limit": 200,
                "exa_calls_limit": 2000,
                "video_calls_limit": 50,  # 50 videos/month for pro plan
                "image_edit_calls_limit": 100,  # 100 AI image editing calls/month
                "audio_calls_limit": 200,  # 200 AI audio generation calls/month
                "gemini_tokens_limit": 5000000,
                "openai_tokens_limit": 2500000,
                "anthropic_tokens_limit": 1000000,
                "mistral_tokens_limit": 2500000,
                "monthly_cost_limit": 150.0,
                "features": ["unlimited_content_generation", "premium_research", "advanced_analytics", "priority_support"],
                "description": "Perfect for growing businesses"
            },
            {
                "name": "Enterprise",
                "tier": SubscriptionTier.ENTERPRISE,
                "price_monthly": 199.0,
                "price_yearly": 1990.0,
                "gemini_calls_limit": 0,  # Unlimited
                "openai_calls_limit": 0,
                "anthropic_calls_limit": 0,
                "mistral_calls_limit": 0,
                "tavily_calls_limit": 0,
                "serper_calls_limit": 0,
                "metaphor_calls_limit": 0,
                "firecrawl_calls_limit": 0,
                "stability_calls_limit": 0,
                "exa_calls_limit": 0,  # Unlimited
                "video_calls_limit": 0,  # Unlimited for enterprise
                "image_edit_calls_limit": 0,  # Unlimited image editing for enterprise
                "audio_calls_limit": 0,  # Unlimited audio generation for enterprise
                "gemini_tokens_limit": 0,
                "openai_tokens_limit": 0,
                "anthropic_tokens_limit": 0,
                "mistral_tokens_limit": 0,
                "monthly_cost_limit": 500.0,
                "features": ["unlimited_everything", "white_label", "dedicated_support", "custom_integrations"],
                "description": "For large organizations with high-volume needs"
            }
        ]

        for plan_data in plans:
            existing = self.db.query(SubscriptionPlan).filter(
                SubscriptionPlan.name == plan_data["name"]
            ).first()

            if not existing:
                plan = SubscriptionPlan(**plan_data)
                self.db.add(plan)
            else:
                # Update existing plan with new limits (e.g., image_edit_calls_limit)
                # This ensures existing plans get new columns like image_edit_calls_limit
                for key, value in plan_data.items():
                    if key not in ["name", "tier"]:  # Don't overwrite name/tier
                        try:
                            # Try to set the attribute (works even if column was just added)
                            setattr(existing, key, value)
                        except (AttributeError, Exception) as e:
                            # If attribute doesn't exist yet (column not migrated), skip it
                            # Schema migration will add it, then this will update it on next run
                            logger.debug(f"Could not set {key} on plan {existing.name}: {e}")
                existing.updated_at = datetime.utcnow()
                logger.debug(f"Updated existing plan: {existing.name}")

        self.db.commit()
        logger.debug("Default subscription plans initialized")

    def calculate_api_cost(self, provider: APIProvider, model_name: str,
                          tokens_input: int = 0, tokens_output: int = 0,
                          request_count: int = 1, **kwargs) -> Dict[str, float]:
        """Calculate cost for an API call.

        Args:
            provider: APIProvider enum (e.g., APIProvider.MISTRAL for HuggingFace)
            model_name: Model name (e.g., "openai/gpt-oss-120b:groq")
            tokens_input: Number of input tokens
            tokens_output: Number of output tokens
            request_count: Number of requests (default: 1)
            **kwargs: Additional parameters (search_count, image_count, page_count, etc.)

        Returns:
            Dict with cost_input, cost_output, and cost_total
        """

        # Get pricing for the provider and model
        # Try exact match first
        pricing = self.db.query(APIProviderPricing).filter(
            APIProviderPricing.provider == provider,
            APIProviderPricing.model_name == model_name,
            APIProviderPricing.is_active == True
        ).first()

        # If not found, try "default" model name for the provider
        if not pricing:
            pricing = self.db.query(APIProviderPricing).filter(
                APIProviderPricing.provider == provider,
                APIProviderPricing.model_name == "default",
                APIProviderPricing.is_active == True
            ).first()

        # If still not found, check for HuggingFace models (provider is MISTRAL)
        # Try alternative model name variations
        if not pricing and provider == APIProvider.MISTRAL:
            # Try with "gpt-oss-120b" (without full path) if model contains it
            if "gpt-oss-120b" in model_name.lower():
                pricing = self.db.query(APIProviderPricing).filter(
                    APIProviderPricing.provider == provider,
                    APIProviderPricing.model_name == "gpt-oss-120b",
                    APIProviderPricing.is_active == True
                ).first()

            # Also try with full model path
            if not pricing:
                pricing = self.db.query(APIProviderPricing).filter(
                    APIProviderPricing.provider == provider,
                    APIProviderPricing.model_name == "openai/gpt-oss-120b:groq",
                    APIProviderPricing.is_active == True
                ).first()

        if not pricing:
            # Check if we should use env vars for HuggingFace/Mistral
            if provider == APIProvider.MISTRAL:
                # Use environment variables for HuggingFace pricing if available
                hf_input_cost = float(os.getenv('HUGGINGFACE_INPUT_TOKEN_COST', '0.000001'))
                hf_output_cost = float(os.getenv('HUGGINGFACE_OUTPUT_TOKEN_COST', '0.000003'))
                logger.info(f"Using HuggingFace pricing from env vars: input={hf_input_cost}, output={hf_output_cost} for model {model_name}")
                cost_input = tokens_input * hf_input_cost
                cost_output = tokens_output * hf_output_cost
                cost_total = cost_input + cost_output
            else:
                logger.warning(f"No pricing found for {provider.value}:{model_name}, using default estimates")
                # Use default estimates
                cost_input = tokens_input * 0.000001  # $1 per 1M tokens default
                cost_output = tokens_output * 0.000001
                cost_total = cost_input + cost_output
        else:
            # Calculate based on actual pricing from database
            logger.debug(f"Using pricing from DB for {provider.value}:{model_name} - input: {pricing.cost_per_input_token}, output: {pricing.cost_per_output_token}")
            cost_input = tokens_input * (pricing.cost_per_input_token or 0.0)
            cost_output = tokens_output * (pricing.cost_per_output_token or 0.0)
            cost_request = request_count * (pricing.cost_per_request or 0.0)

            # Handle special cases for non-LLM APIs
            cost_search = kwargs.get('search_count', 0) * (pricing.cost_per_search or 0.0)
            cost_image = kwargs.get('image_count', 0) * (pricing.cost_per_image or 0.0)
            cost_page = kwargs.get('page_count', 0) * (pricing.cost_per_page or 0.0)

            cost_total = cost_input + cost_output + cost_request + cost_search + cost_image + cost_page

        # Round to 6 decimal places for precision
        return {
            'cost_input': round(cost_input, 6),
            'cost_output': round(cost_output, 6),
            'cost_total': round(cost_total, 6)
        }

    def get_user_limits(self, user_id: str) -> Optional[Dict[str, Any]]:
        """Get usage limits for a user based on their subscription."""

        # CRITICAL: Expire all objects first to ensure fresh data after renewal
        self.db.expire_all()

        subscription = self.db.query(UserSubscription).filter(
            UserSubscription.user_id == user_id,
            UserSubscription.is_active == True
        ).first()

        if not subscription:
            # Return free tier limits
            free_plan = self.db.query(SubscriptionPlan).filter(
                SubscriptionPlan.tier == SubscriptionTier.FREE
            ).first()
            if free_plan:
                return self._plan_to_limits_dict(free_plan)
            return None

        # Ensure current period before returning limits
        self._ensure_subscription_current(subscription)

        # CRITICAL: Refresh subscription to get latest plan_id, then refresh plan relationship
        self.db.refresh(subscription)

        # Re-query plan directly to ensure fresh data (bypass relationship cache)
        plan = self.db.query(SubscriptionPlan).filter(
            SubscriptionPlan.id == subscription.plan_id
        ).first()

        if not plan:
            logger.error(f"Plan not found for subscription plan_id={subscription.plan_id}")
            return None

        # Refresh plan to ensure fresh limits
        self.db.refresh(plan)

        return self._plan_to_limits_dict(plan)

    def _ensure_ai_text_gen_column_detection(self) -> None:
        """Detect at runtime whether ai_text_generation_calls_limit column exists and cache the result."""
        if self._ai_text_gen_col_checked:
            return
        try:
            # Try to query the column - if it exists, this will work
            self.db.execute(text('SELECT ai_text_generation_calls_limit FROM subscription_plans LIMIT 0'))
            self._ai_text_gen_col_available = True
        except Exception:
            self._ai_text_gen_col_available = False
        finally:
            self._ai_text_gen_col_checked = True

    def _plan_to_limits_dict(self, plan: SubscriptionPlan) -> Dict[str, Any]:
        """Convert subscription plan to limits dictionary."""
        # Detect if unified AI text generation limit column exists
        self._ensure_ai_text_gen_column_detection()

        # Use unified AI text generation limit if column exists and is set
        ai_text_gen_limit = None
        if self._ai_text_gen_col_available:
            try:
                ai_text_gen_limit = getattr(plan, 'ai_text_generation_calls_limit', None)
                # If 0, treat as not set (unlimited for Enterprise or use fallback)
                if ai_text_gen_limit == 0:
                    ai_text_gen_limit = None
            except (AttributeError, Exception):
                # Column exists but access failed - use fallback
                ai_text_gen_limit = None

        return {
            'plan_name': plan.name,
            'tier': plan.tier.value,
            'limits': {
                # Unified AI text generation limit (applies to all LLM providers)
                # If not set, fall back to first non-zero legacy limit for backwards compatibility
                'ai_text_generation_calls': ai_text_gen_limit if ai_text_gen_limit is not None else (
                    plan.gemini_calls_limit if plan.gemini_calls_limit > 0 else
                    plan.openai_calls_limit if plan.openai_calls_limit > 0 else
                    plan.anthropic_calls_limit if plan.anthropic_calls_limit > 0 else
                    plan.mistral_calls_limit if plan.mistral_calls_limit > 0 else 0
                ),
                # Legacy per-provider limits (for backwards compatibility and analytics)
                'gemini_calls': plan.gemini_calls_limit,
                'openai_calls': plan.openai_calls_limit,
                'anthropic_calls': plan.anthropic_calls_limit,
                'mistral_calls': plan.mistral_calls_limit,
                # Other API limits
                'tavily_calls': plan.tavily_calls_limit,
                'serper_calls': plan.serper_calls_limit,
                'metaphor_calls': plan.metaphor_calls_limit,
                'firecrawl_calls': plan.firecrawl_calls_limit,
                'stability_calls': plan.stability_calls_limit,
                'video_calls': getattr(plan, 'video_calls_limit', 0),  # Support missing column
                'image_edit_calls': getattr(plan, 'image_edit_calls_limit', 0),  # Support missing column
                'audio_calls': getattr(plan, 'audio_calls_limit', 0),  # Support missing column
                # Token limits
                'gemini_tokens': plan.gemini_tokens_limit,
                'openai_tokens': plan.openai_tokens_limit,
                'anthropic_tokens': plan.anthropic_tokens_limit,
                'mistral_tokens': plan.mistral_tokens_limit,
                'monthly_cost': plan.monthly_cost_limit
            },
            'features': plan.features or []
        }

    def check_usage_limits(self, user_id: str, provider: APIProvider,
                          tokens_requested: int = 0, actual_provider_name: Optional[str] = None) -> Tuple[bool, str, Dict[str, Any]]:
        """Check if user can make an API call within their limits.

        Delegates to LimitValidator for actual validation logic.

        Args:
            user_id: User ID
            provider: APIProvider enum (may be MISTRAL for HuggingFace)
            tokens_requested: Estimated tokens for the request
            actual_provider_name: Optional actual provider name (e.g., "huggingface" when provider is MISTRAL)

        Returns:
            (can_proceed, error_message, usage_info)
        """
        from .limit_validation import LimitValidator
        validator = LimitValidator(self)
        return validator.check_usage_limits(user_id, provider, tokens_requested, actual_provider_name)

    def estimate_tokens(self, text: str, provider: APIProvider) -> int:
        """Estimate token count for text based on provider."""

        # Get pricing info for token estimation
        pricing = self.db.query(APIProviderPricing).filter(
            APIProviderPricing.provider == provider,
            APIProviderPricing.is_active == True
        ).first()

        if pricing and pricing.tokens_per_word:
            # Use provider-specific conversion
            word_count = len(text.split())
            return int(word_count * pricing.tokens_per_word)
        else:
            # Use default estimation (roughly 1.3 tokens per word for most models)
            word_count = len(text.split())
            return int(word_count * 1.3)

    def get_pricing_info(self, provider: APIProvider, model_name: str = None) -> Optional[Dict[str, Any]]:
        """Get pricing information for a provider/model."""

        query = self.db.query(APIProviderPricing).filter(
            APIProviderPricing.provider == provider,
            APIProviderPricing.is_active == True
        )

        if model_name:
            query = query.filter(APIProviderPricing.model_name == model_name)

        pricing = query.first()

        if not pricing:
            return None

        # Return pricing info as dict
        return {
            'provider': pricing.provider.value,
            'model_name': pricing.model_name,
            'cost_per_input_token': pricing.cost_per_input_token,
            'cost_per_output_token': pricing.cost_per_output_token,
            'cost_per_request': pricing.cost_per_request,
            'description': pricing.description
        }

    def check_comprehensive_limits(
        self,
        user_id: str,
        operations: List[Dict[str, Any]]
    ) -> Tuple[bool, Optional[str], Optional[Dict[str, Any]]]:
        """
        Comprehensive pre-flight validation that checks ALL limits before making ANY API calls.

        Delegates to LimitValidator for actual validation logic.
        This prevents wasteful API calls by validating that ALL subsequent operations will succeed
        before making the first external API call.

        Args:
            user_id: User ID
            operations: List of operations to validate, each with:
                - 'provider': APIProvider enum
                - 'tokens_requested': int (estimated tokens for LLM calls, 0 for non-LLM)
                - 'actual_provider_name': Optional[str] (e.g., "huggingface" when provider is MISTRAL)
                - 'operation_type': str (e.g., "google_grounding", "llm_call", "image_generation")

        Returns:
            (can_proceed, error_message, error_details)
            If can_proceed is False, error_message explains which limit would be exceeded
        """
        from .limit_validation import LimitValidator
        validator = LimitValidator(self)
        return validator.check_comprehensive_limits(user_id, operations)

    def get_pricing_for_provider_model(self, provider: APIProvider, model_name: str) -> Optional[Dict[str, Any]]:
        """Get pricing configuration for a specific provider and model."""
        pricing = self.db.query(APIProviderPricing).filter(
            APIProviderPricing.provider == provider,
            APIProviderPricing.model_name == model_name
        ).first()

        if not pricing:
            return None

        return {
            'provider': pricing.provider.value,
            'model_name': pricing.model_name,
            'cost_per_input_token': pricing.cost_per_input_token,
            'cost_per_output_token': pricing.cost_per_output_token,
            'cost_per_request': pricing.cost_per_request,
            'cost_per_search': pricing.cost_per_search,
            'cost_per_image': pricing.cost_per_image,
            'cost_per_page': pricing.cost_per_page,
            'description': pricing.description
        }