AI story writer enhancements, text to video and voice generation, subscription management, and more.

2025-11-19 09:55:32 +05:30
parent bf7493c366
commit e96525347b
64 changed files with 10367 additions and 400 deletions
--- a/backend/services/llm_providers/main_audio_generation.py
+++ b/backend/services/llm_providers/main_audio_generation.py
@@ -0,0 +1,301 @@
+"""
+Main Audio Generation Service for ALwrity Backend.
+
+This service provides AI-powered text-to-speech functionality using WaveSpeed Minimax Speech 02 HD.
+"""
+
+from __future__ import annotations
+
+import sys
+from typing import Optional, Dict, Any
+from datetime import datetime
+from loguru import logger
+from fastapi import HTTPException
+
+from services.wavespeed.client import WaveSpeedClient
+from services.onboarding.api_key_manager import APIKeyManager
+from utils.logger_utils import get_service_logger
+
+logger = get_service_logger("audio_generation")
+
+
+class AudioGenerationResult:
+    """Result of audio generation."""
+    
+    def __init__(
+        self,
+        audio_bytes: bytes,
+        provider: str,
+        model: str,
+        voice_id: str,
+        text_length: int,
+        file_size: int,
+    ):
+        self.audio_bytes = audio_bytes
+        self.provider = provider
+        self.model = model
+        self.voice_id = voice_id
+        self.text_length = text_length
+        self.file_size = file_size
+
+
+def generate_audio(
+    text: str,
+    voice_id: str = "Wise_Woman",
+    speed: float = 1.0,
+    volume: float = 1.0,
+    pitch: float = 0.0,
+    emotion: str = "happy",
+    user_id: Optional[str] = None,
+    **kwargs
+) -> AudioGenerationResult:
+    """
+    Generate audio using AI text-to-speech with subscription tracking.
+    
+    Args:
+        text: Text to convert to speech (max 10000 characters)
+        voice_id: Voice ID (default: "Wise_Woman")
+        speed: Speech speed (0.5-2.0, default: 1.0)
+        volume: Speech volume (0.1-10.0, default: 1.0)
+        pitch: Speech pitch (-12 to 12, default: 0.0)
+        emotion: Emotion (default: "happy")
+        user_id: User ID for subscription checking (required)
+        **kwargs: Additional parameters (sample_rate, bitrate, format, etc.)
+        
+    Returns:
+        AudioGenerationResult: Generated audio result
+        
+    Raises:
+        RuntimeError: If subscription limits are exceeded or user_id is missing.
+    """
+    try:
+        logger.info("[audio_gen] Starting audio generation")
+        logger.debug(f"[audio_gen] Text length: {len(text)} characters, voice: {voice_id}")
+        
+        # SUBSCRIPTION CHECK - Required and strict enforcement
+        if not user_id:
+            raise RuntimeError("user_id is required for subscription checking. Please provide Clerk user ID.")
+        
+        # Calculate cost based on character count (every character is 1 token)
+        # Pricing: $0.05 per 1,000 characters
+        character_count = len(text)
+        cost_per_1000_chars = 0.05
+        estimated_cost = (character_count / 1000.0) * cost_per_1000_chars
+        
+        try:
+            from services.database import get_db
+            from services.subscription import PricingService
+            from models.subscription_models import UsageSummary, APIProvider
+            
+            db = next(get_db())
+            try:
+                pricing_service = PricingService(db)
+                
+                # Check limits using sync method from pricing service (strict enforcement)
+                # Use AUDIO provider for audio generation
+                can_proceed, message, usage_info = pricing_service.check_usage_limits(
+                    user_id=user_id,
+                    provider=APIProvider.AUDIO,
+                    tokens_requested=character_count,  # Use character count as "tokens" for audio
+                    actual_provider_name="wavespeed"  # Actual provider is WaveSpeed
+                )
+                
+                if not can_proceed:
+                    logger.warning(f"[audio_gen] Subscription limit exceeded for user {user_id}: {message}")
+                    error_detail = {
+                        'error': message,
+                        'message': message,
+                        'provider': 'wavespeed',
+                        'usage_info': usage_info if usage_info else {}
+                    }
+                    raise HTTPException(status_code=429, detail=error_detail)
+                
+                # Get current usage for limit checking
+                current_period = pricing_service.get_current_billing_period(user_id) or datetime.now().strftime("%Y-%m")
+                usage = db.query(UsageSummary).filter(
+                    UsageSummary.user_id == user_id,
+                    UsageSummary.billing_period == current_period
+                ).first()
+                
+            finally:
+                db.close()
+        except HTTPException:
+            raise
+        except RuntimeError:
+            raise
+        except Exception as sub_error:
+            logger.error(f"[audio_gen] Subscription check failed for user {user_id}: {sub_error}")
+            raise RuntimeError(f"Subscription check failed: {str(sub_error)}")
+        
+        # Generate audio using WaveSpeed
+        try:
+            client = WaveSpeedClient()
+            audio_bytes = client.generate_speech(
+                text=text,
+                voice_id=voice_id,
+                speed=speed,
+                volume=volume,
+                pitch=pitch,
+                emotion=emotion,
+                enable_sync_mode=True,
+                **kwargs
+            )
+            
+            logger.info(f"[audio_gen] ✅ API call successful, generated {len(audio_bytes)} bytes")
+            
+        except HTTPException:
+            raise
+        except Exception as api_error:
+            logger.error(f"[audio_gen] Audio generation API failed: {api_error}")
+            raise HTTPException(
+                status_code=502,
+                detail={
+                    "error": "Audio generation failed",
+                    "message": str(api_error)
+                }
+            )
+        
+        # TRACK USAGE after successful API call
+        if audio_bytes:
+            logger.info(f"[audio_gen] ✅ API call successful, tracking usage for user {user_id}")
+            try:
+                db_track = next(get_db())
+                try:
+                    from models.subscription_models import UsageSummary, APIUsageLog, APIProvider
+                    from services.subscription import PricingService
+                    
+                    pricing = PricingService(db_track)
+                    current_period = pricing.get_current_billing_period(user_id) or datetime.now().strftime("%Y-%m")
+                    
+                    # Get or create usage summary
+                    summary = db_track.query(UsageSummary).filter(
+                        UsageSummary.user_id == user_id,
+                        UsageSummary.billing_period == current_period
+                    ).first()
+                    
+                    if not summary:
+                        summary = UsageSummary(
+                            user_id=user_id,
+                            billing_period=current_period
+                        )
+                        db_track.add(summary)
+                        db_track.flush()
+                    
+                    # Get current values before update
+                    current_calls_before = getattr(summary, "audio_calls", 0) or 0
+                    current_cost_before = getattr(summary, "audio_cost", 0.0) or 0.0
+                    
+                    # Update audio calls and cost
+                    new_calls = current_calls_before + 1
+                    new_cost = current_cost_before + estimated_cost
+                    
+                    # Use direct SQL UPDATE for dynamic attributes
+                    from sqlalchemy import text
+                    update_query = text("""
+                        UPDATE usage_summaries 
+                        SET audio_calls = :new_calls,
+                            audio_cost = :new_cost
+                        WHERE user_id = :user_id AND billing_period = :period
+                    """)
+                    db_track.execute(update_query, {
+                        'new_calls': new_calls,
+                        'new_cost': new_cost,
+                        'user_id': user_id,
+                        'period': current_period
+                    })
+                    
+                    # Update total cost
+                    summary.total_cost = (summary.total_cost or 0.0) + estimated_cost
+                    summary.total_calls = (summary.total_calls or 0) + 1
+                    summary.updated_at = datetime.utcnow()
+                    
+                    # Create usage log
+                    usage_log = APIUsageLog(
+                        user_id=user_id,
+                        provider=APIProvider.AUDIO,
+                        endpoint="/audio-generation/wavespeed",
+                        method="POST",
+                        model_used="minimax/speech-02-hd",
+                        tokens_input=character_count,
+                        tokens_output=0,
+                        tokens_total=character_count,
+                        cost_input=0.0,
+                        cost_output=0.0,
+                        cost_total=estimated_cost,
+                        response_time=0.0,
+                        status_code=200,
+                        request_size=len(text.encode("utf-8")),
+                        response_size=len(audio_bytes),
+                        billing_period=current_period,
+                    )
+                    db_track.add(usage_log)
+                    
+                    # Get plan details for unified log
+                    limits = pricing.get_user_limits(user_id)
+                    plan_name = limits.get('plan_name', 'unknown') if limits else 'unknown'
+                    tier = limits.get('tier', 'unknown') if limits else 'unknown'
+                    audio_limit = limits['limits'].get("audio_calls", 0) if limits else 0
+                    # Only show ∞ for Enterprise tier when limit is 0 (unlimited)
+                    audio_limit_display = audio_limit if (audio_limit > 0 or tier != 'enterprise') else '∞'
+                    
+                    # Get related stats for unified log
+                    current_image_calls = getattr(summary, "stability_calls", 0) or 0
+                    image_limit = limits['limits'].get("stability_calls", 0) if limits else 0
+                    current_image_edit_calls = getattr(summary, "image_edit_calls", 0) or 0
+                    image_edit_limit = limits['limits'].get("image_edit_calls", 0) if limits else 0
+                    current_video_calls = getattr(summary, "video_calls", 0) or 0
+                    video_limit = limits['limits'].get("video_calls", 0) if limits else 0
+                    
+                    db_track.commit()
+                    logger.info(f"[audio_gen] ✅ Successfully tracked usage: user {user_id} -> audio -> {new_calls} calls, ${estimated_cost:.4f}")
+                    
+                    # UNIFIED SUBSCRIPTION LOG - Shows before/after state in one message
+                    print(f"""
+[SUBSCRIPTION] Audio Generation
+├─ User: {user_id}
+├─ Plan: {plan_name} ({tier})
+├─ Provider: wavespeed
+├─ Actual Provider: wavespeed
+├─ Model: minimax/speech-02-hd
+├─ Voice: {voice_id}
+├─ Calls: {current_calls_before} → {new_calls} / {audio_limit_display}
+├─ Cost: ${current_cost_before:.4f} → ${new_cost:.4f}
+├─ Characters: {character_count}
+├─ Images: {current_image_calls} / {image_limit if image_limit > 0 else '∞'}
+├─ Image Editing: {current_image_edit_calls} / {image_edit_limit if image_edit_limit > 0 else '∞'}
+├─ Videos: {current_video_calls} / {video_limit if video_limit > 0 else '∞'}
+└─ Status: ✅ Allowed & Tracked
+""", flush=True)
+                    sys.stdout.flush()
+                    
+                except Exception as track_error:
+                    logger.error(f"[audio_gen] ❌ Error tracking usage (non-blocking): {track_error}", exc_info=True)
+                    db_track.rollback()
+                finally:
+                    db_track.close()
+            except Exception as usage_error:
+                logger.error(f"[audio_gen] ❌ Failed to track usage: {usage_error}", exc_info=True)
+        
+        return AudioGenerationResult(
+            audio_bytes=audio_bytes,
+            provider="wavespeed",
+            model="minimax/speech-02-hd",
+            voice_id=voice_id,
+            text_length=character_count,
+            file_size=len(audio_bytes),
+        )
+        
+    except HTTPException:
+        raise
+    except RuntimeError:
+        raise
+    except Exception as e:
+        logger.error(f"[audio_gen] Error generating audio: {e}")
+        raise HTTPException(
+            status_code=500,
+            detail={
+                "error": "Audio generation failed",
+                "message": str(e)
+            }
+        )
+