""" YouTube Video Planner Service Generates video plans, outlines, and insights using AI with persona integration. """ from typing import Dict, Any, Optional, List from loguru import logger from fastapi import HTTPException from services.llm_providers.main_text_generation import llm_text_gen from utils.logger_utils import get_service_logger logger = get_service_logger("youtube.planner") class YouTubePlannerService: """Service for planning YouTube videos with AI assistance.""" def __init__(self): """Initialize the planner service.""" logger.info("[YouTubePlanner] Service initialized") def generate_video_plan( self, user_idea: str, duration_type: str, # "shorts", "medium", "long" persona_data: Optional[Dict[str, Any]] = None, reference_image_description: Optional[str] = None, source_content_id: Optional[str] = None, # For blog/story conversion source_content_type: Optional[str] = None, # "blog", "story" user_id: str = None, include_scenes: bool = False, # For shorts: combine plan + scenes in one call ) -> Dict[str, Any]: """ Generate a comprehensive video plan from user input. Args: user_idea: User's video idea or topic duration_type: "shorts" (≤60s), "medium" (1-4min), "long" (4-10min) persona_data: Optional persona data for tone/style reference_image_description: Optional description of reference image source_content_id: Optional ID of source content (blog/story) source_content_type: Type of source content user_id: Clerk user ID for subscription checking Returns: Dictionary with video plan, outline, insights, and metadata """ try: logger.info( f"[YouTubePlanner] Generating plan: idea={user_idea[:50]}..., " f"duration={duration_type}, user={user_id}" ) # Build persona context persona_context = self._build_persona_context(persona_data) # Build duration context duration_context = self._get_duration_context(duration_type) # Build source content context if provided source_context = "" if source_content_id and source_content_type: source_context = f""" **Source Content:** - Type: {source_content_type} - ID: {source_content_id} - Note: This video should be based on the existing {source_content_type} content. """ # Build reference image context image_context = "" if reference_image_description: image_context = f""" **Reference Image:** {reference_image_description} - Use this as visual inspiration for the video """ # Generate comprehensive video plan planning_prompt = f"""You are an expert YouTube content strategist. Create a comprehensive video plan based on the user's idea. **User's Video Idea:** {user_idea} **Video Duration Type:** {duration_type} ({duration_context['description']}) **Duration Guidelines:** - Target length: {duration_context['target_seconds']} seconds - Hook duration: {duration_context['hook_seconds']} seconds - Main content: {duration_context['main_seconds']} seconds - CTA duration: {duration_context['cta_seconds']} seconds - Maximum scenes: {duration_context['max_scenes']} (for shorts, keep 2-4 scenes total) {persona_context} {source_context} {image_context} **Your Task:** Create a detailed video plan that includes: 1. **Video Summary**: A 2-3 sentence overview of what the video will cover 2. **Target Audience**: Who this video is for 3. **Video Goal**: Primary objective (educate, entertain, sell, inspire, etc.) 4. **Key Message**: The main takeaway viewers should remember 5. **Hook Strategy**: Attention-grabbing opening (first {duration_context['hook_seconds']} seconds) 6. **Content Outline**: High-level structure with 3-5 main sections 7. **Call-to-Action**: Clear CTA that fits the video goal 8. **Visual Style**: Recommended visual approach (cinematic, tutorial, vlog, etc.) 9. **Tone**: Recommended tone (professional, casual, energetic, etc.) 10. **SEO Keywords**: 5-7 relevant keywords for YouTube SEO **Format your response as JSON:** {{ "video_summary": "...", "target_audience": "...", "video_goal": "...", "key_message": "...", "hook_strategy": "...", "content_outline": [ {{"section": "Section 1", "description": "...", "duration_estimate": 30}}, {{"section": "Section 2", "description": "...", "duration_estimate": 45}} ], "call_to_action": "...", "visual_style": "...", "tone": "...", "seo_keywords": ["keyword1", "keyword2", ...] }} Make sure the content outline fits within the {duration_type} duration constraints. """ system_prompt = ( "You are an expert YouTube content strategist specializing in creating " "engaging, well-structured video plans. Your plans are data-driven, " "audience-focused, and optimized for YouTube's algorithm." ) # For shorts, combine plan + scenes in one call to save API calls if include_scenes and duration_type == "shorts": planning_prompt += f""" **IMPORTANT: Since this is a SHORTS video, also generate the complete scene breakdown in the same response.** **Additional Task - Generate Detailed Scenes:** Create detailed scenes (up to {duration_context['max_scenes']} scenes) that include: 1. Scene number and title 2. Narration text (what will be spoken) - keep it concise for shorts 3. Visual description (what viewers will see) 4. Duration estimate (2-8 seconds each) 5. Emphasis tags (hook, main_content, transition, cta) **Scene Format:** Each scene should be detailed enough for video generation. Total duration must fit within {duration_context['target_seconds']} seconds. **Update JSON structure to include "scenes" array:** Add a "scenes" field with the complete scene breakdown. """ json_struct = { "type": "object", "properties": { "video_summary": {"type": "string"}, "target_audience": {"type": "string"}, "video_goal": {"type": "string"}, "key_message": {"type": "string"}, "hook_strategy": {"type": "string"}, "content_outline": { "type": "array", "items": { "type": "object", "properties": { "section": {"type": "string"}, "description": {"type": "string"}, "duration_estimate": {"type": "number"} } } }, "call_to_action": {"type": "string"}, "visual_style": {"type": "string"}, "tone": {"type": "string"}, "seo_keywords": { "type": "array", "items": {"type": "string"} }, "scenes": { "type": "array", "items": { "type": "object", "properties": { "scene_number": {"type": "number"}, "title": {"type": "string"}, "narration": {"type": "string"}, "visual_description": {"type": "string"}, "duration_estimate": {"type": "number"}, "emphasis": {"type": "string"}, "visual_cues": { "type": "array", "items": {"type": "string"} } }, "required": [ "scene_number", "title", "narration", "visual_description", "duration_estimate", "emphasis" ] } } }, "required": [ "video_summary", "target_audience", "video_goal", "key_message", "hook_strategy", "content_outline", "call_to_action", "visual_style", "tone", "seo_keywords", "scenes" ] } else: json_struct = { "type": "object", "properties": { "video_summary": {"type": "string"}, "target_audience": {"type": "string"}, "video_goal": {"type": "string"}, "key_message": {"type": "string"}, "hook_strategy": {"type": "string"}, "content_outline": { "type": "array", "items": { "type": "object", "properties": { "section": {"type": "string"}, "description": {"type": "string"}, "duration_estimate": {"type": "number"} } } }, "call_to_action": {"type": "string"}, "visual_style": {"type": "string"}, "tone": {"type": "string"}, "seo_keywords": { "type": "array", "items": {"type": "string"} } }, "required": [ "video_summary", "target_audience", "video_goal", "key_message", "hook_strategy", "content_outline", "call_to_action", "visual_style", "tone", "seo_keywords" ] } # Generate plan using LLM response = llm_text_gen( prompt=planning_prompt, system_prompt=system_prompt, user_id=user_id, json_struct=json_struct ) # Parse response (handle both dict and JSON string) if isinstance(response, dict): plan_data = response else: import json plan_data = json.loads(response) # Add metadata plan_data["duration_type"] = duration_type plan_data["duration_metadata"] = duration_context plan_data["user_idea"] = user_idea # If scenes were included, mark them for scene builder if include_scenes and duration_type == "shorts" and "scenes" in plan_data: plan_data["_scenes_included"] = True logger.info( f"[YouTubePlanner] ✅ Plan + {len(plan_data.get('scenes', []))} scenes " f"generated in 1 AI call (optimized for shorts)" ) else: if include_scenes and duration_type == "shorts": # LLM did not return scenes; downstream will regenerate plan_data["_scenes_included"] = False logger.warning( "[YouTubePlanner] Shorts optimization requested but no scenes returned; " "scene builder will generate scenes separately." ) logger.info(f"[YouTubePlanner] ✅ Plan generated successfully") return plan_data except HTTPException: raise except Exception as e: logger.error(f"[YouTubePlanner] Error generating plan: {e}", exc_info=True) raise HTTPException( status_code=500, detail=f"Failed to generate video plan: {str(e)}" ) def _build_persona_context(self, persona_data: Optional[Dict[str, Any]]) -> str: """Build persona context string for prompts.""" if not persona_data: return """ **Persona Context:** - Using default professional tone - No specific persona constraints """ core_persona = persona_data.get("core_persona", {}) tone = core_persona.get("tone", "professional") voice = core_persona.get("voice_characteristics", {}) return f""" **Persona Context:** - Tone: {tone} - Voice Style: {voice.get('style', 'professional')} - Communication Style: {voice.get('communication_style', 'clear and direct')} - Brand Values: {core_persona.get('core_belief', 'value-driven content')} - Use this persona to guide the video's tone, style, and messaging approach. """ def _get_duration_context(self, duration_type: str) -> Dict[str, Any]: """Get duration-specific context and constraints.""" contexts = { "shorts": { "description": "YouTube Shorts (15-60 seconds)", "target_seconds": 30, "hook_seconds": 3, "main_seconds": 24, "cta_seconds": 3, # Keep scenes tight for shorts to control cost and pacing "max_scenes": 4, "scene_duration_range": (2, 8) }, "medium": { "description": "Medium-length video (1-4 minutes)", "target_seconds": 150, # 2.5 minutes "hook_seconds": 10, "main_seconds": 130, "cta_seconds": 10, "max_scenes": 12, "scene_duration_range": (5, 15) }, "long": { "description": "Long-form video (4-10 minutes)", "target_seconds": 420, # 7 minutes "hook_seconds": 15, "main_seconds": 380, "cta_seconds": 25, "max_scenes": 20, "scene_duration_range": (10, 30) } } return contexts.get(duration_type, contexts["medium"])