Base code
This commit is contained in:
2
backend/services/youtube/__init__.py
Normal file
2
backend/services/youtube/__init__.py
Normal file
@@ -0,0 +1,2 @@
|
||||
"""YouTube Creator Studio services."""
|
||||
|
||||
853
backend/services/youtube/planner.py
Normal file
853
backend/services/youtube/planner.py
Normal file
@@ -0,0 +1,853 @@
|
||||
"""
|
||||
YouTube Video Planner Service
|
||||
|
||||
Generates video plans, outlines, and insights using AI with persona integration.
|
||||
Supports optional Exa research for enhanced, data-driven plans.
|
||||
"""
|
||||
|
||||
from typing import Dict, Any, Optional, List
|
||||
from loguru import logger
|
||||
from fastapi import HTTPException
|
||||
import os
|
||||
|
||||
from services.llm_providers.main_text_generation import llm_text_gen
|
||||
from utils.logger_utils import get_service_logger
|
||||
|
||||
logger = get_service_logger("youtube.planner")
|
||||
|
||||
# Video type configurations for optimization
|
||||
VIDEO_TYPE_CONFIGS = {
|
||||
"tutorial": {
|
||||
"hook_strategy": "Problem statement or quick preview of solution",
|
||||
"structure": "Problem → Steps → Result → Key Takeaways",
|
||||
"visual_style": "Clean, instructional, screen-recordings or clear demonstrations",
|
||||
"tone": "Clear, patient, instructional",
|
||||
"optimal_scenes": "2-6 scenes showing sequential steps",
|
||||
"avatar_style": "Approachable instructor, professional yet friendly",
|
||||
"cta_focus": "Subscribe for more tutorials, try it yourself"
|
||||
},
|
||||
"review": {
|
||||
"hook_strategy": "Product reveal or strong opinion statement",
|
||||
"structure": "Hook → Overview → Pros/Cons → Verdict → CTA",
|
||||
"visual_style": "Product-focused, close-ups, comparison shots",
|
||||
"tone": "Honest, engaging, opinionated but fair",
|
||||
"optimal_scenes": "4-8 scenes covering different aspects",
|
||||
"avatar_style": "Trustworthy reviewer, confident, credible",
|
||||
"cta_focus": "Check links in description, subscribe for reviews"
|
||||
},
|
||||
"educational": {
|
||||
"hook_strategy": "Intriguing question or surprising fact",
|
||||
"structure": "Question → Explanation → Examples → Conclusion",
|
||||
"visual_style": "Illustrative, concept visualization, animations",
|
||||
"tone": "Authoritative yet accessible, engaging",
|
||||
"optimal_scenes": "3-10 scenes breaking down concepts",
|
||||
"avatar_style": "Knowledgeable educator, professional, warm",
|
||||
"cta_focus": "Learn more, subscribe for educational content"
|
||||
},
|
||||
"entertainment": {
|
||||
"hook_strategy": "Grab attention immediately with energy/humor",
|
||||
"structure": "Hook → Setup → Payoff → Share/Subscribe",
|
||||
"visual_style": "Dynamic, energetic, varied angles, transitions",
|
||||
"tone": "High energy, funny, engaging, personality-driven",
|
||||
"optimal_scenes": "3-8 scenes with varied pacing",
|
||||
"avatar_style": "Energetic creator, expressive, relatable",
|
||||
"cta_focus": "Like, share, subscribe for more fun content"
|
||||
},
|
||||
"vlog": {
|
||||
"hook_strategy": "Preview of day/event or personal moment",
|
||||
"structure": "Introduction → Journey/Experience → Reflection → CTA",
|
||||
"visual_style": "Natural, personal, authentic moments",
|
||||
"tone": "Conversational, authentic, relatable",
|
||||
"optimal_scenes": "5-15 scenes following narrative",
|
||||
"avatar_style": "Authentic person, approachable, real",
|
||||
"cta_focus": "Follow my journey, subscribe for daily updates"
|
||||
},
|
||||
"product_demo": {
|
||||
"hook_strategy": "Product benefit or transformation",
|
||||
"structure": "Benefit → Features → Use Cases → CTA",
|
||||
"visual_style": "Product-focused, polished, commercial quality",
|
||||
"tone": "Enthusiastic, persuasive, benefit-focused",
|
||||
"optimal_scenes": "3-7 scenes highlighting features",
|
||||
"avatar_style": "Professional presenter, polished, confident",
|
||||
"cta_focus": "Get it now, learn more, special offer"
|
||||
},
|
||||
"reaction": {
|
||||
"hook_strategy": "Preview of reaction or content being reacted to",
|
||||
"structure": "Setup → Reaction → Commentary → CTA",
|
||||
"visual_style": "Split-screen or picture-in-picture, expressive",
|
||||
"tone": "Authentic reactions, engaging commentary",
|
||||
"optimal_scenes": "4-10 scenes with reactions",
|
||||
"avatar_style": "Expressive creator, authentic reactions",
|
||||
"cta_focus": "Watch full video, subscribe for reactions"
|
||||
},
|
||||
"storytelling": {
|
||||
"hook_strategy": "Intriguing opening or compelling question",
|
||||
"structure": "Hook → Setup → Conflict → Resolution → CTA",
|
||||
"visual_style": "Cinematic, narrative-driven, emotional",
|
||||
"tone": "Engaging, immersive, story-focused",
|
||||
"optimal_scenes": "6-15 scenes following narrative arc",
|
||||
"avatar_style": "Storyteller, warm, engaging narrator",
|
||||
"cta_focus": "Subscribe for more stories, share your thoughts"
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
class YouTubePlannerService:
|
||||
"""Service for planning YouTube videos with AI assistance."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the planner service."""
|
||||
logger.info("[YouTubePlanner] Service initialized")
|
||||
|
||||
async def generate_video_plan(
|
||||
self,
|
||||
user_idea: str,
|
||||
duration_type: str, # "shorts", "medium", "long"
|
||||
video_type: Optional[str] = None, # "tutorial", "review", etc.
|
||||
target_audience: Optional[str] = None,
|
||||
video_goal: Optional[str] = None,
|
||||
brand_style: Optional[str] = None,
|
||||
persona_data: Optional[Dict[str, Any]] = None,
|
||||
reference_image_description: Optional[str] = None,
|
||||
source_content_id: Optional[str] = None, # For blog/story conversion
|
||||
source_content_type: Optional[str] = None, # "blog", "story"
|
||||
user_id: str = None,
|
||||
include_scenes: bool = False, # For shorts: combine plan + scenes in one call
|
||||
enable_research: bool = True, # Always enable research by default for enhanced plans
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Generate a comprehensive video plan from user input.
|
||||
|
||||
Args:
|
||||
user_idea: User's video idea or topic
|
||||
duration_type: "shorts" (≤60s), "medium" (1-4min), "long" (4-10min)
|
||||
video_type: Optional video format type (tutorial, review, etc.)
|
||||
target_audience: Optional target audience description
|
||||
video_goal: Optional primary goal of the video
|
||||
brand_style: Optional brand aesthetic preferences
|
||||
persona_data: Optional persona data for tone/style
|
||||
reference_image_description: Optional description of reference image
|
||||
source_content_id: Optional ID of source content (blog/story)
|
||||
source_content_type: Type of source content
|
||||
user_id: Clerk user ID for subscription checking
|
||||
|
||||
Returns:
|
||||
Dictionary with video plan, outline, insights, and metadata
|
||||
"""
|
||||
try:
|
||||
logger.info(
|
||||
f"[YouTubePlanner] Generating plan: idea={user_idea[:50]}..., "
|
||||
f"duration={duration_type}, video_type={video_type}, user={user_id}"
|
||||
)
|
||||
|
||||
# Get video type config
|
||||
video_type_config = {}
|
||||
if video_type and video_type in VIDEO_TYPE_CONFIGS:
|
||||
video_type_config = VIDEO_TYPE_CONFIGS[video_type]
|
||||
|
||||
# Build persona context
|
||||
persona_context = self._build_persona_context(persona_data)
|
||||
|
||||
# Build duration context
|
||||
duration_context = self._get_duration_context(duration_type)
|
||||
|
||||
# Build source content context if provided
|
||||
source_context = ""
|
||||
if source_content_id and source_content_type:
|
||||
source_context = f"""
|
||||
**Source Content:**
|
||||
- Type: {source_content_type}
|
||||
- ID: {source_content_id}
|
||||
- Note: This video should be based on the existing {source_content_type} content.
|
||||
"""
|
||||
|
||||
# Build reference image context
|
||||
image_context = ""
|
||||
if reference_image_description:
|
||||
image_context = f"""
|
||||
**Reference Image:**
|
||||
{reference_image_description}
|
||||
- Use this as visual inspiration for the video
|
||||
"""
|
||||
|
||||
# Generate smart defaults based on video type if selected
|
||||
# When video_type is selected, use its config for defaults; otherwise use user inputs or generic defaults
|
||||
if video_type_config:
|
||||
default_tone = video_type_config.get('tone', 'Professional and engaging')
|
||||
default_visual_style = video_type_config.get('visual_style', 'Professional and engaging')
|
||||
default_goal = video_goal or f"Create engaging {video_type} content"
|
||||
default_audience = target_audience or f"Viewers interested in {video_type} content"
|
||||
else:
|
||||
# No video type selected - use user inputs or generic defaults
|
||||
default_tone = 'Professional and engaging'
|
||||
default_visual_style = 'Professional and engaging'
|
||||
default_goal = video_goal or 'Engage and inform viewers'
|
||||
default_audience = target_audience or 'General YouTube audience'
|
||||
|
||||
# Perform Exa research if enabled (after defaults are set)
|
||||
research_context = ""
|
||||
research_sources = []
|
||||
research_enabled = False
|
||||
if enable_research:
|
||||
logger.info(f"[YouTubePlanner] 🔍 Starting Exa research for plan generation (idea: {user_idea[:50]}...)")
|
||||
research_enabled = True
|
||||
try:
|
||||
research_context, research_sources = await self._perform_exa_research(
|
||||
user_idea=user_idea,
|
||||
video_type=video_type,
|
||||
target_audience=default_audience,
|
||||
user_id=user_id
|
||||
)
|
||||
if research_sources:
|
||||
logger.info(
|
||||
f"[YouTubePlanner] ✅ Exa research completed successfully: "
|
||||
f"{len(research_sources)} sources found. Research context length: {len(research_context)} chars"
|
||||
)
|
||||
else:
|
||||
logger.warning(f"[YouTubePlanner] ⚠️ Exa research completed but no sources returned")
|
||||
except HTTPException as http_ex:
|
||||
# Subscription limit exceeded or other HTTP errors
|
||||
error_detail = http_ex.detail
|
||||
if isinstance(error_detail, dict):
|
||||
error_msg = error_detail.get("message", error_detail.get("error", str(http_ex)))
|
||||
else:
|
||||
error_msg = str(error_detail)
|
||||
logger.warning(
|
||||
f"[YouTubePlanner] ⚠️ Exa research skipped due to subscription limits or error: {error_msg} "
|
||||
f"(status={http_ex.status_code}). Continuing without research."
|
||||
)
|
||||
# Continue without research - non-critical failure
|
||||
except Exception as e:
|
||||
error_msg = str(e)
|
||||
logger.warning(
|
||||
f"[YouTubePlanner] ⚠️ Exa research failed (non-critical): {error_msg}. "
|
||||
f"Continuing without research."
|
||||
)
|
||||
# Continue without research - non-critical failure
|
||||
else:
|
||||
logger.info(f"[YouTubePlanner] ℹ️ Exa research disabled for this plan generation")
|
||||
|
||||
# Generate comprehensive video plan
|
||||
video_type_context = ""
|
||||
if video_type_config:
|
||||
video_type_context = f"""
|
||||
**Video Type: {video_type}**
|
||||
Follow these guidelines:
|
||||
- Structure: {video_type_config.get('structure', '')}
|
||||
- Hook: {video_type_config.get('hook_strategy', '')}
|
||||
- Visual: {video_type_config.get('visual_style', '')}
|
||||
- Tone: {video_type_config.get('tone', '')}
|
||||
- CTA: {video_type_config.get('cta_focus', '')}
|
||||
"""
|
||||
|
||||
planning_prompt = f"""Create a YouTube video plan for: "{user_idea}"
|
||||
|
||||
**Video Format:** {video_type or 'General'} | **Duration:** {duration_type} ({duration_context['target_seconds']}s target)
|
||||
**Audience:** {default_audience}
|
||||
**Goal:** {default_goal}
|
||||
**Style:** {brand_style or default_visual_style}
|
||||
|
||||
{video_type_context}
|
||||
|
||||
**Constraints:**
|
||||
- Duration: {duration_context['target_seconds']}s (Hook: {duration_context['hook_seconds']}s, Main: {duration_context['main_seconds']}s, CTA: {duration_context['cta_seconds']}s)
|
||||
- Max scenes: {duration_context['max_scenes']}
|
||||
|
||||
{persona_context if persona_data else ""}
|
||||
{source_context if source_content_id else ""}
|
||||
{image_context if reference_image_description else ""}
|
||||
{research_context if research_context else ""}
|
||||
|
||||
**Generate a plan with:**
|
||||
1. **Video Summary**: 2-3 sentences capturing the essence
|
||||
2. **Target Audience**: {f"Match: {target_audience}" if target_audience else f"Infer from video idea and {video_type or 'content type'}"}
|
||||
3. **Video Goal**: {f"Align with: {video_goal}" if video_goal else f"Infer appropriate goal for {video_type or 'this'} content"}
|
||||
4. **Key Message**: Single memorable takeaway
|
||||
5. **Hook Strategy**: Engaging opening for first {duration_context['hook_seconds']}s{f" ({video_type_config.get('hook_strategy', '')})" if video_type_config else ""}
|
||||
6. **Content Outline**: 3-5 sections totaling {duration_context['target_seconds']}s{f" following: {video_type_config.get('structure', '')}" if video_type_config else ""}
|
||||
7. **Call-to-Action**: Actionable CTA{f" ({video_type_config.get('cta_focus', '')})" if video_type_config else ""}
|
||||
8. **Visual Style**: Match {brand_style or default_visual_style}
|
||||
9. **Tone**: {default_tone}
|
||||
10. **SEO Keywords**: 5-7 relevant terms based on video idea
|
||||
11. **Avatar Recommendations**: {f"{video_type_config.get('avatar_style', '')} " if video_type_config else ""}matching audience and style
|
||||
|
||||
**Response Format (JSON):**
|
||||
{{
|
||||
"video_summary": "...",
|
||||
"target_audience": "...",
|
||||
"video_goal": "...",
|
||||
"key_message": "...",
|
||||
"hook_strategy": "...",
|
||||
"content_outline": [
|
||||
{{"section": "...", "description": "...", "duration_estimate": 30}},
|
||||
{{"section": "...", "description": "...", "duration_estimate": 45}}
|
||||
],
|
||||
"call_to_action": "...",
|
||||
"visual_style": "...",
|
||||
"tone": "...",
|
||||
"seo_keywords": ["keyword1", "keyword2", ...],
|
||||
"avatar_recommendations": {{
|
||||
"description": "...",
|
||||
"style": "...",
|
||||
"energy": "..."
|
||||
}}
|
||||
}}
|
||||
|
||||
**Critical:** Content outline durations must sum to {duration_context['target_seconds']}s (±20%).
|
||||
"""
|
||||
|
||||
system_prompt = (
|
||||
"You are an expert YouTube content strategist. Create clear, actionable video plans "
|
||||
"that are optimized for the specified video type and audience. Focus on accuracy and "
|
||||
"specificity - these plans will be used to generate actual video content."
|
||||
)
|
||||
|
||||
# For shorts, combine plan + scenes in one call to save API calls
|
||||
if include_scenes and duration_type == "shorts":
|
||||
planning_prompt += f"""
|
||||
|
||||
**IMPORTANT: Since this is a SHORTS video, also generate the complete scene breakdown in the same response.**
|
||||
|
||||
**Additional Task - Generate Detailed Scenes:**
|
||||
Create detailed scenes (up to {duration_context['max_scenes']} scenes) that include:
|
||||
1. Scene number and title
|
||||
2. Narration text (what will be spoken) - keep it concise for shorts
|
||||
3. Visual description (what viewers will see)
|
||||
4. Duration estimate (2-8 seconds each)
|
||||
5. Emphasis tags (hook, main_content, transition, cta)
|
||||
|
||||
**Scene Format:**
|
||||
Each scene should be detailed enough for video generation. Total duration must fit within {duration_context['target_seconds']} seconds.
|
||||
|
||||
**Update JSON structure to include "scenes" array and "avatar_recommendations":**
|
||||
Add a "scenes" field with the complete scene breakdown, and include "avatar_recommendations" with ideal presenter appearance, style, and energy.
|
||||
"""
|
||||
|
||||
json_struct = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"video_summary": {"type": "string"},
|
||||
"target_audience": {"type": "string"},
|
||||
"video_goal": {"type": "string"},
|
||||
"key_message": {"type": "string"},
|
||||
"hook_strategy": {"type": "string"},
|
||||
"content_outline": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"section": {"type": "string"},
|
||||
"description": {"type": "string"},
|
||||
"duration_estimate": {"type": "number"}
|
||||
}
|
||||
}
|
||||
},
|
||||
"call_to_action": {"type": "string"},
|
||||
"visual_style": {"type": "string"},
|
||||
"tone": {"type": "string"},
|
||||
"seo_keywords": {
|
||||
"type": "array",
|
||||
"items": {"type": "string"}
|
||||
},
|
||||
"scenes": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"scene_number": {"type": "number"},
|
||||
"title": {"type": "string"},
|
||||
"narration": {"type": "string"},
|
||||
"visual_description": {"type": "string"},
|
||||
"duration_estimate": {"type": "number"},
|
||||
"emphasis": {"type": "string"},
|
||||
"visual_cues": {
|
||||
"type": "array",
|
||||
"items": {"type": "string"}
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"scene_number", "title", "narration", "visual_description",
|
||||
"duration_estimate", "emphasis"
|
||||
]
|
||||
}
|
||||
},
|
||||
"avatar_recommendations": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"description": {"type": "string"},
|
||||
"style": {"type": "string"},
|
||||
"energy": {"type": "string"}
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"video_summary", "target_audience", "video_goal", "key_message",
|
||||
"hook_strategy", "content_outline", "call_to_action",
|
||||
"visual_style", "tone", "seo_keywords", "scenes", "avatar_recommendations"
|
||||
]
|
||||
}
|
||||
else:
|
||||
json_struct = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"video_summary": {"type": "string"},
|
||||
"target_audience": {"type": "string"},
|
||||
"video_goal": {"type": "string"},
|
||||
"key_message": {"type": "string"},
|
||||
"hook_strategy": {"type": "string"},
|
||||
"content_outline": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"section": {"type": "string"},
|
||||
"description": {"type": "string"},
|
||||
"duration_estimate": {"type": "number"}
|
||||
}
|
||||
}
|
||||
},
|
||||
"call_to_action": {"type": "string"},
|
||||
"visual_style": {"type": "string"},
|
||||
"tone": {"type": "string"},
|
||||
"seo_keywords": {
|
||||
"type": "array",
|
||||
"items": {"type": "string"}
|
||||
},
|
||||
"avatar_recommendations": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"description": {"type": "string"},
|
||||
"style": {"type": "string"},
|
||||
"energy": {"type": "string"}
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"video_summary", "target_audience", "video_goal", "key_message",
|
||||
"hook_strategy", "content_outline", "call_to_action",
|
||||
"visual_style", "tone", "seo_keywords", "avatar_recommendations"
|
||||
]
|
||||
}
|
||||
|
||||
# Generate plan using LLM with structured JSON response
|
||||
# llm_text_gen handles subscription checks and provider selection automatically
|
||||
# json_struct ensures deterministic structured response (returns dict, not string)
|
||||
response = llm_text_gen(
|
||||
prompt=planning_prompt,
|
||||
system_prompt=system_prompt,
|
||||
user_id=user_id,
|
||||
json_struct=json_struct
|
||||
)
|
||||
|
||||
# Parse response (structured responses return dict, text responses return string)
|
||||
if isinstance(response, dict):
|
||||
plan_data = response
|
||||
else:
|
||||
import json
|
||||
try:
|
||||
plan_data = json.loads(response)
|
||||
except json.JSONDecodeError as e:
|
||||
logger.error(f"[YouTubePlanner] Failed to parse JSON response: {e}")
|
||||
logger.debug(f"[YouTubePlanner] Raw response: {response[:500]}")
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail="Failed to parse video plan response. Please try again."
|
||||
)
|
||||
|
||||
# Validate and enhance plan quality
|
||||
plan_data = self._validate_and_enhance_plan(
|
||||
plan_data, duration_context, video_type, video_type_config
|
||||
)
|
||||
|
||||
# Add metadata
|
||||
plan_data["duration_type"] = duration_type
|
||||
plan_data["duration_metadata"] = duration_context
|
||||
plan_data["user_idea"] = user_idea
|
||||
|
||||
# Add research metadata to plan
|
||||
plan_data["research_enabled"] = research_enabled
|
||||
if research_sources:
|
||||
plan_data["research_sources"] = research_sources
|
||||
plan_data["research_sources_count"] = len(research_sources)
|
||||
else:
|
||||
plan_data["research_sources"] = []
|
||||
plan_data["research_sources_count"] = 0
|
||||
|
||||
# Log research status in plan metadata for debugging
|
||||
if research_enabled:
|
||||
logger.info(
|
||||
f"[YouTubePlanner] 📊 Plan metadata: research_enabled=True, "
|
||||
f"research_sources_count={plan_data.get('research_sources_count', 0)}, "
|
||||
f"research_context_length={len(research_context)} chars"
|
||||
)
|
||||
|
||||
# Validate and process scenes if included (for shorts)
|
||||
if include_scenes and duration_type == "shorts":
|
||||
if "scenes" in plan_data and plan_data["scenes"]:
|
||||
# Validate scenes count and duration
|
||||
scenes = plan_data["scenes"]
|
||||
scene_count = len(scenes)
|
||||
total_scene_duration = sum(
|
||||
scene.get("duration_estimate", 0) for scene in scenes
|
||||
)
|
||||
|
||||
max_scenes = duration_context["max_scenes"]
|
||||
target_duration = duration_context["target_seconds"]
|
||||
|
||||
if scene_count > max_scenes:
|
||||
logger.warning(
|
||||
f"[YouTubePlanner] Scene count ({scene_count}) exceeds max ({max_scenes}). "
|
||||
f"Truncating to first {max_scenes} scenes."
|
||||
)
|
||||
plan_data["scenes"] = scenes[:max_scenes]
|
||||
|
||||
# Warn if total duration is off
|
||||
if abs(total_scene_duration - target_duration) > target_duration * 0.3:
|
||||
logger.warning(
|
||||
f"[YouTubePlanner] Total scene duration ({total_scene_duration}s) "
|
||||
f"differs significantly from target ({target_duration}s)"
|
||||
)
|
||||
|
||||
plan_data["_scenes_included"] = True
|
||||
logger.info(
|
||||
f"[YouTubePlanner] ✅ Plan + {len(plan_data['scenes'])} scenes "
|
||||
f"generated in 1 AI call (optimized for shorts)"
|
||||
)
|
||||
else:
|
||||
# LLM did not return scenes; downstream will regenerate
|
||||
plan_data["_scenes_included"] = False
|
||||
logger.warning(
|
||||
"[YouTubePlanner] Shorts optimization requested but no scenes returned; "
|
||||
"scene builder will generate scenes separately."
|
||||
)
|
||||
|
||||
logger.info(f"[YouTubePlanner] ✅ Plan generated successfully")
|
||||
|
||||
return plan_data
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"[YouTubePlanner] Error generating plan: {e}", exc_info=True)
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Failed to generate video plan: {str(e)}"
|
||||
)
|
||||
|
||||
def _build_persona_context(self, persona_data: Optional[Dict[str, Any]]) -> str:
|
||||
"""Build persona context string for prompts."""
|
||||
if not persona_data:
|
||||
return """
|
||||
**Persona Context:**
|
||||
- Using default professional tone
|
||||
- No specific persona constraints
|
||||
"""
|
||||
|
||||
core_persona = persona_data.get("core_persona", {})
|
||||
tone = core_persona.get("tone", "professional")
|
||||
voice = core_persona.get("voice_characteristics", {})
|
||||
|
||||
return f"""
|
||||
**Persona Context:**
|
||||
- Tone: {tone}
|
||||
- Voice Style: {voice.get('style', 'professional')}
|
||||
- Communication Style: {voice.get('communication_style', 'clear and direct')}
|
||||
- Brand Values: {core_persona.get('core_belief', 'value-driven content')}
|
||||
- Use this persona to guide the video's tone, style, and messaging approach.
|
||||
"""
|
||||
|
||||
def _get_duration_context(self, duration_type: str) -> Dict[str, Any]:
|
||||
"""Get duration-specific context and constraints."""
|
||||
contexts = {
|
||||
"shorts": {
|
||||
"description": "YouTube Shorts (15-60 seconds)",
|
||||
"target_seconds": 30,
|
||||
"hook_seconds": 3,
|
||||
"main_seconds": 24,
|
||||
"cta_seconds": 3,
|
||||
# Keep scenes tight for shorts to control cost and pacing
|
||||
"max_scenes": 4,
|
||||
"scene_duration_range": (2, 8)
|
||||
},
|
||||
"medium": {
|
||||
"description": "Medium-length video (1-4 minutes)",
|
||||
"target_seconds": 150, # 2.5 minutes
|
||||
"hook_seconds": 10,
|
||||
"main_seconds": 130,
|
||||
"cta_seconds": 10,
|
||||
"max_scenes": 12,
|
||||
"scene_duration_range": (5, 15)
|
||||
},
|
||||
"long": {
|
||||
"description": "Long-form video (4-10 minutes)",
|
||||
"target_seconds": 420, # 7 minutes
|
||||
"hook_seconds": 15,
|
||||
"main_seconds": 380,
|
||||
"cta_seconds": 25,
|
||||
"max_scenes": 20,
|
||||
"scene_duration_range": (10, 30)
|
||||
}
|
||||
}
|
||||
|
||||
return contexts.get(duration_type, contexts["medium"])
|
||||
|
||||
def _validate_and_enhance_plan(
|
||||
self,
|
||||
plan_data: Dict[str, Any],
|
||||
duration_context: Dict[str, Any],
|
||||
video_type: Optional[str],
|
||||
video_type_config: Dict[str, Any],
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Validate and enhance plan quality before returning.
|
||||
|
||||
Performs quality checks:
|
||||
- Validates required fields
|
||||
- Validates content outline duration matches target
|
||||
- Ensures SEO keywords are present
|
||||
- Validates avatar recommendations
|
||||
- Adds quality metadata
|
||||
"""
|
||||
# Ensure required fields exist
|
||||
required_fields = [
|
||||
"video_summary", "target_audience", "video_goal", "key_message",
|
||||
"hook_strategy", "content_outline", "call_to_action",
|
||||
"visual_style", "tone", "seo_keywords"
|
||||
]
|
||||
|
||||
missing_fields = [field for field in required_fields if not plan_data.get(field)]
|
||||
if missing_fields:
|
||||
logger.warning(f"[YouTubePlanner] Missing required fields: {missing_fields}")
|
||||
# Fill with defaults to prevent errors
|
||||
for field in missing_fields:
|
||||
if field == "seo_keywords":
|
||||
plan_data[field] = []
|
||||
elif field == "content_outline":
|
||||
plan_data[field] = []
|
||||
else:
|
||||
plan_data[field] = f"[{field} not generated]"
|
||||
|
||||
# Validate content outline duration
|
||||
if plan_data.get("content_outline"):
|
||||
total_duration = sum(
|
||||
section.get("duration_estimate", 0)
|
||||
for section in plan_data["content_outline"]
|
||||
)
|
||||
target_duration = duration_context.get("target_seconds", 150)
|
||||
|
||||
# Allow 20% variance
|
||||
tolerance = target_duration * 0.2
|
||||
if abs(total_duration - target_duration) > tolerance:
|
||||
logger.warning(
|
||||
f"[YouTubePlanner] Content outline duration ({total_duration}s) "
|
||||
f"doesn't match target ({target_duration}s). Adjusting..."
|
||||
)
|
||||
# Normalize durations proportionally
|
||||
if total_duration > 0:
|
||||
scale_factor = target_duration / total_duration
|
||||
for section in plan_data["content_outline"]:
|
||||
if "duration_estimate" in section:
|
||||
section["duration_estimate"] = round(
|
||||
section["duration_estimate"] * scale_factor, 1
|
||||
)
|
||||
|
||||
# Validate SEO keywords
|
||||
if not plan_data.get("seo_keywords") or len(plan_data["seo_keywords"]) < 3:
|
||||
logger.warning(
|
||||
f"[YouTubePlanner] Insufficient SEO keywords ({len(plan_data.get('seo_keywords', []))}). "
|
||||
f"Plan may need enhancement."
|
||||
)
|
||||
|
||||
# Validate avatar recommendations
|
||||
if not plan_data.get("avatar_recommendations"):
|
||||
logger.warning("[YouTubePlanner] Avatar recommendations missing. Generating defaults...")
|
||||
plan_data["avatar_recommendations"] = {
|
||||
"description": video_type_config.get("avatar_style", "Professional YouTube creator"),
|
||||
"style": plan_data.get("visual_style", "Professional"),
|
||||
"energy": plan_data.get("tone", "Engaging")
|
||||
}
|
||||
else:
|
||||
# Ensure all avatar recommendation fields exist
|
||||
avatar_rec = plan_data["avatar_recommendations"]
|
||||
if not avatar_rec.get("description"):
|
||||
avatar_rec["description"] = video_type_config.get("avatar_style", "Professional YouTube creator")
|
||||
if not avatar_rec.get("style"):
|
||||
avatar_rec["style"] = plan_data.get("visual_style", "Professional")
|
||||
if not avatar_rec.get("energy"):
|
||||
avatar_rec["energy"] = plan_data.get("tone", "Engaging")
|
||||
|
||||
# Add quality metadata
|
||||
plan_data["_quality_checks"] = {
|
||||
"content_outline_validated": bool(plan_data.get("content_outline")),
|
||||
"seo_keywords_count": len(plan_data.get("seo_keywords", [])),
|
||||
"avatar_recommendations_present": bool(plan_data.get("avatar_recommendations")),
|
||||
"all_required_fields_present": len(missing_fields) == 0,
|
||||
}
|
||||
|
||||
logger.info(
|
||||
f"[YouTubePlanner] Plan quality validated: "
|
||||
f"outline_sections={len(plan_data.get('content_outline', []))}, "
|
||||
f"seo_keywords={len(plan_data.get('seo_keywords', []))}, "
|
||||
f"avatar_recs={'yes' if plan_data.get('avatar_recommendations') else 'no'}"
|
||||
)
|
||||
|
||||
return plan_data
|
||||
|
||||
async def _perform_exa_research(
|
||||
self,
|
||||
user_idea: str,
|
||||
video_type: Optional[str],
|
||||
target_audience: str,
|
||||
user_id: str
|
||||
) -> tuple[str, List[Dict[str, Any]]]:
|
||||
"""
|
||||
Perform Exa research directly using ExaResearchProvider (common module).
|
||||
Uses the same pattern as podcast research with proper subscription checks.
|
||||
|
||||
Returns:
|
||||
Tuple of (research_context_string, research_sources_list)
|
||||
"""
|
||||
try:
|
||||
# Pre-flight validation for Exa search only (not full blog writer workflow)
|
||||
# We only need to validate Exa API calls, not LLM operations
|
||||
from services.database import get_db
|
||||
from services.subscription import PricingService
|
||||
from models.subscription_models import APIProvider
|
||||
|
||||
db = next(get_db())
|
||||
try:
|
||||
pricing_service = PricingService(db)
|
||||
# Only validate Exa API call, not the full research workflow
|
||||
operations_to_validate = [
|
||||
{
|
||||
'provider': APIProvider.EXA,
|
||||
'tokens_requested': 0,
|
||||
'actual_provider_name': 'exa',
|
||||
'operation_type': 'exa_neural_search'
|
||||
}
|
||||
]
|
||||
|
||||
can_proceed, message, error_details = pricing_service.check_comprehensive_limits(
|
||||
user_id=user_id,
|
||||
operations=operations_to_validate
|
||||
)
|
||||
|
||||
if not can_proceed:
|
||||
usage_info = error_details.get('usage_info', {}) if error_details else {}
|
||||
logger.warning(
|
||||
f"[YouTubePlanner] Exa search blocked for user {user_id}: {message}"
|
||||
)
|
||||
raise HTTPException(
|
||||
status_code=429,
|
||||
detail={
|
||||
'error': message,
|
||||
'message': message,
|
||||
'provider': 'exa',
|
||||
'usage_info': usage_info if usage_info else error_details
|
||||
}
|
||||
)
|
||||
|
||||
logger.info(f"[YouTubePlanner] Exa search pre-flight validation passed for user {user_id}")
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.warning(f"[YouTubePlanner] Exa search pre-flight validation failed: {e}")
|
||||
raise
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
# Use ExaResearchProvider directly (common module, same as podcast)
|
||||
from services.blog_writer.research.exa_provider import ExaResearchProvider
|
||||
from types import SimpleNamespace
|
||||
|
||||
# Build research query
|
||||
query_parts = [user_idea]
|
||||
if video_type:
|
||||
query_parts.append(f"{video_type} video")
|
||||
if target_audience and target_audience != "General YouTube audience":
|
||||
query_parts.append(target_audience)
|
||||
|
||||
research_query = " ".join(query_parts)
|
||||
|
||||
# Configure Exa research (same pattern as podcast)
|
||||
cfg = SimpleNamespace(
|
||||
exa_search_type="neural",
|
||||
exa_category="web", # Focus on web content for YouTube
|
||||
exa_include_domains=[],
|
||||
exa_exclude_domains=[],
|
||||
max_sources=10, # Limit sources for cost efficiency
|
||||
source_types=[],
|
||||
)
|
||||
|
||||
# Perform research
|
||||
provider = ExaResearchProvider()
|
||||
result = await provider.search(
|
||||
prompt=research_query,
|
||||
topic=user_idea,
|
||||
industry="",
|
||||
target_audience=target_audience,
|
||||
config=cfg,
|
||||
user_id=user_id,
|
||||
)
|
||||
|
||||
# Track usage
|
||||
cost_total = 0.0
|
||||
if isinstance(result, dict):
|
||||
cost_total = result.get("cost", {}).get("total", 0.005) if result.get("cost") else 0.005
|
||||
provider.track_exa_usage(user_id, cost_total)
|
||||
|
||||
# Extract sources and content
|
||||
sources = result.get("sources", []) or []
|
||||
research_content = result.get("content", "")
|
||||
|
||||
# Build research context for prompt
|
||||
research_context = ""
|
||||
if research_content and sources:
|
||||
# Limit content to 2000 chars to avoid token bloat
|
||||
limited_content = research_content[:2000]
|
||||
research_context = f"""
|
||||
**Research & Current Information:**
|
||||
Based on current web research, here are relevant insights and trends:
|
||||
|
||||
{limited_content}
|
||||
|
||||
**Key Research Sources ({len(sources)} sources):**
|
||||
"""
|
||||
# Add top 5 sources for context
|
||||
for idx, source in enumerate(sources[:5], 1):
|
||||
title = source.get("title", "Untitled") or "Untitled"
|
||||
url = source.get("url", "") or ""
|
||||
excerpt = (source.get("excerpt", "") or "")[:200]
|
||||
if not excerpt:
|
||||
excerpt = (source.get("summary", "") or "")[:200]
|
||||
research_context += f"\n{idx}. {title}\n {excerpt}\n Source: {url}\n"
|
||||
|
||||
research_context += "\n**Use this research to:**\n"
|
||||
research_context += "- Identify current trends and popular angles\n"
|
||||
research_context += "- Enhance SEO keywords with real search data\n"
|
||||
research_context += "- Ensure content is relevant and up-to-date\n"
|
||||
research_context += "- Reference credible sources in the plan\n"
|
||||
research_context += "- Identify gaps or unique angles not covered by competitors\n"
|
||||
|
||||
# Format sources for response
|
||||
formatted_sources = []
|
||||
for source in sources:
|
||||
formatted_sources.append({
|
||||
"title": source.get("title", "") or "",
|
||||
"url": source.get("url", "") or "",
|
||||
"excerpt": (source.get("excerpt", "") or "")[:300],
|
||||
"published_at": source.get("published_at"),
|
||||
"credibility_score": source.get("credibility_score", 0.85) or 0.85,
|
||||
})
|
||||
|
||||
logger.info(f"[YouTubePlanner] Exa research completed: {len(formatted_sources)} sources found")
|
||||
return research_context, formatted_sources
|
||||
|
||||
except HTTPException:
|
||||
# Re-raise HTTPException (subscription limits, etc.)
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"[YouTubePlanner] Research error: {e}", exc_info=True)
|
||||
# Non-critical failure - return empty research
|
||||
return "", []
|
||||
|
||||
573
backend/services/youtube/renderer.py
Normal file
573
backend/services/youtube/renderer.py
Normal file
@@ -0,0 +1,573 @@
|
||||
"""
|
||||
YouTube Video Renderer Service
|
||||
|
||||
Handles video rendering using WAN 2.5 text-to-video and audio generation.
|
||||
"""
|
||||
|
||||
from typing import Dict, Any, List, Optional
|
||||
from pathlib import Path
|
||||
import base64
|
||||
import uuid
|
||||
import requests
|
||||
from loguru import logger
|
||||
from fastapi import HTTPException
|
||||
|
||||
from services.wavespeed.client import WaveSpeedClient
|
||||
from services.llm_providers.main_audio_generation import generate_audio
|
||||
from services.story_writer.video_generation_service import StoryVideoGenerationService
|
||||
from services.subscription import PricingService
|
||||
from services.subscription.preflight_validator import validate_scene_animation_operation
|
||||
from services.llm_providers.main_video_generation import track_video_usage
|
||||
from utils.logger_utils import get_service_logger
|
||||
from utils.asset_tracker import save_asset_to_library
|
||||
|
||||
logger = get_service_logger("youtube.renderer")
|
||||
|
||||
|
||||
class YouTubeVideoRendererService:
|
||||
"""Service for rendering YouTube videos from scenes."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the renderer service."""
|
||||
self.wavespeed_client = WaveSpeedClient()
|
||||
|
||||
# Video output directory
|
||||
base_dir = Path(__file__).parent.parent.parent.parent
|
||||
self.output_dir = base_dir / "youtube_videos"
|
||||
self.output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
logger.info(f"[YouTubeRenderer] Initialized with output directory: {self.output_dir}")
|
||||
|
||||
def render_scene_video(
|
||||
self,
|
||||
scene: Dict[str, Any],
|
||||
video_plan: Dict[str, Any],
|
||||
user_id: str,
|
||||
resolution: str = "720p",
|
||||
generate_audio_enabled: bool = True,
|
||||
voice_id: str = "Wise_Woman",
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Render a single scene into a video.
|
||||
|
||||
Args:
|
||||
scene: Scene data with narration and visual prompts
|
||||
video_plan: Original video plan for context
|
||||
user_id: Clerk user ID
|
||||
resolution: Video resolution (480p, 720p, 1080p)
|
||||
generate_audio: Whether to generate narration audio
|
||||
voice_id: Voice ID for audio generation
|
||||
|
||||
Returns:
|
||||
Dictionary with video metadata, bytes, and cost
|
||||
"""
|
||||
try:
|
||||
scene_number = scene.get("scene_number", 1)
|
||||
narration = scene.get("narration", "").strip()
|
||||
visual_prompt = (scene.get("enhanced_visual_prompt") or scene.get("visual_prompt", "")).strip()
|
||||
duration_estimate = scene.get("duration_estimate", 5)
|
||||
|
||||
# VALIDATION: Check inputs before making expensive API calls
|
||||
if not visual_prompt:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail={
|
||||
"error": f"Scene {scene_number} has no visual prompt",
|
||||
"scene_number": scene_number,
|
||||
"message": "Visual prompt is required for video generation",
|
||||
"user_action": "Please add a visual description for this scene before rendering.",
|
||||
}
|
||||
)
|
||||
|
||||
if len(visual_prompt) < 10:
|
||||
logger.warning(
|
||||
f"[YouTubeRenderer] Scene {scene_number} has very short visual prompt "
|
||||
f"({len(visual_prompt)} chars), may result in poor quality"
|
||||
)
|
||||
|
||||
# Clamp duration to valid WAN 2.5 values (5 or 10 seconds)
|
||||
duration = 5 if duration_estimate <= 7 else 10
|
||||
|
||||
# Log asset usage status
|
||||
has_existing_image = bool(scene.get("imageUrl"))
|
||||
has_existing_audio = bool(scene.get("audioUrl"))
|
||||
|
||||
logger.info(
|
||||
f"[YouTubeRenderer] Rendering scene {scene_number}: "
|
||||
f"resolution={resolution}, duration={duration}s, prompt_length={len(visual_prompt)}, "
|
||||
f"has_existing_image={has_existing_image}, has_existing_audio={has_existing_audio}"
|
||||
)
|
||||
|
||||
# Use existing audio if available, otherwise generate if requested
|
||||
audio_base64 = None
|
||||
scene_audio_url = scene.get("audioUrl")
|
||||
|
||||
if scene_audio_url:
|
||||
# Load existing audio from URL
|
||||
try:
|
||||
from pathlib import Path
|
||||
from urllib.parse import urlparse
|
||||
import requests
|
||||
|
||||
logger.info(f"[YouTubeRenderer] Attempting to load existing audio for scene {scene_number} from URL: {scene_audio_url}")
|
||||
|
||||
# Extract filename from URL (e.g., /api/youtube/audio/filename.mp3)
|
||||
parsed_url = urlparse(scene_audio_url)
|
||||
audio_filename = Path(parsed_url.path).name
|
||||
|
||||
# Try to load from local file system first
|
||||
base_dir = Path(__file__).parent.parent.parent.parent
|
||||
youtube_audio_dir = base_dir / "youtube_audio"
|
||||
audio_path = youtube_audio_dir / audio_filename
|
||||
|
||||
# Debug: If file not found, try to find it with flexible matching
|
||||
if not audio_path.exists():
|
||||
logger.debug(f"[YouTubeRenderer] Audio file not found at {audio_path}. Searching for alternative matches...")
|
||||
if youtube_audio_dir.exists():
|
||||
all_files = list(youtube_audio_dir.glob("*.mp3"))
|
||||
logger.debug(f"[YouTubeRenderer] Found {len(all_files)} MP3 files in directory")
|
||||
|
||||
# Try to find a file that matches the scene (by scene number or title pattern)
|
||||
# The filename format is: scene_{scene_number}_{clean_title}_{unique_id}.mp3
|
||||
# Extract components from expected filename
|
||||
expected_parts = audio_filename.replace('.mp3', '').split('_')
|
||||
if len(expected_parts) >= 3:
|
||||
scene_num_str = expected_parts[1] if expected_parts[0] == 'scene' else None
|
||||
title_part = expected_parts[2] if len(expected_parts) > 2 else None
|
||||
|
||||
# Try to find files matching scene number or title
|
||||
matching_files = []
|
||||
for f in all_files:
|
||||
file_parts = f.stem.split('_')
|
||||
if len(file_parts) >= 3 and file_parts[0] == 'scene':
|
||||
file_scene_num = file_parts[1]
|
||||
file_title = file_parts[2] if len(file_parts) > 2 else ''
|
||||
|
||||
# Match by scene number (try both 0-indexed and 1-indexed)
|
||||
if scene_num_str:
|
||||
scene_num_int = int(scene_num_str)
|
||||
file_scene_int = int(file_scene_num) if file_scene_num.isdigit() else None
|
||||
if file_scene_int == scene_num_int or file_scene_int == scene_num_int - 1 or file_scene_int == scene_num_int + 1:
|
||||
matching_files.append(f.name)
|
||||
# Or match by title
|
||||
elif title_part and title_part.lower() in file_title.lower():
|
||||
matching_files.append(f.name)
|
||||
|
||||
if matching_files:
|
||||
logger.info(
|
||||
f"[YouTubeRenderer] Found potential audio file matches for scene {scene_number}: {matching_files[:3]}. "
|
||||
f"Expected: {audio_filename}"
|
||||
)
|
||||
# Try using the first match
|
||||
alternative_path = youtube_audio_dir / matching_files[0]
|
||||
if alternative_path.exists() and alternative_path.is_file():
|
||||
logger.info(f"[YouTubeRenderer] Using alternative audio file: {matching_files[0]}")
|
||||
audio_path = alternative_path
|
||||
audio_filename = matching_files[0]
|
||||
else:
|
||||
logger.warning(f"[YouTubeRenderer] Alternative match found but file doesn't exist: {alternative_path}")
|
||||
else:
|
||||
# Show sample files for debugging
|
||||
sample_files = [f.name for f in all_files[:10] if f.name.startswith("scene_")]
|
||||
if sample_files:
|
||||
logger.debug(f"[YouTubeRenderer] Sample scene audio files in directory: {sample_files}")
|
||||
|
||||
if audio_path.exists() and audio_path.is_file():
|
||||
with open(audio_path, "rb") as f:
|
||||
audio_bytes = f.read()
|
||||
audio_base64 = base64.b64encode(audio_bytes).decode('utf-8')
|
||||
logger.info(f"[YouTubeRenderer] ✅ Using existing audio for scene {scene_number} from local file: {audio_filename} ({len(audio_bytes)} bytes)")
|
||||
else:
|
||||
# File not found locally - try loading from asset library
|
||||
logger.warning(
|
||||
f"[YouTubeRenderer] Audio file not found locally at {audio_path}. "
|
||||
f"Attempting to load from asset library (filename: {audio_filename})"
|
||||
)
|
||||
|
||||
try:
|
||||
from services.content_asset_service import ContentAssetService
|
||||
from services.database import get_db
|
||||
from models.content_asset_models import AssetType, AssetSource
|
||||
|
||||
db = next(get_db())
|
||||
try:
|
||||
asset_service = ContentAssetService(db)
|
||||
# Try to find the asset by filename and source
|
||||
assets = asset_service.get_assets(
|
||||
user_id=user_id,
|
||||
asset_type=AssetType.AUDIO,
|
||||
source_module=AssetSource.YOUTUBE_CREATOR,
|
||||
limit=100,
|
||||
)
|
||||
|
||||
# Find matching asset by filename
|
||||
matching_asset = None
|
||||
for asset in assets:
|
||||
if asset.filename == audio_filename:
|
||||
matching_asset = asset
|
||||
break
|
||||
|
||||
if matching_asset and matching_asset.file_path:
|
||||
asset_path = Path(matching_asset.file_path)
|
||||
if asset_path.exists() and asset_path.is_file():
|
||||
with open(asset_path, "rb") as f:
|
||||
audio_bytes = f.read()
|
||||
audio_base64 = base64.b64encode(audio_bytes).decode('utf-8')
|
||||
logger.info(
|
||||
f"[YouTubeRenderer] ✅ Loaded audio for scene {scene_number} from asset library: "
|
||||
f"{audio_filename} ({len(audio_bytes)} bytes)"
|
||||
)
|
||||
else:
|
||||
raise FileNotFoundError(f"Asset library file path does not exist: {asset_path}")
|
||||
else:
|
||||
raise FileNotFoundError(f"Audio asset not found in library for filename: {audio_filename}")
|
||||
finally:
|
||||
db.close()
|
||||
except Exception as asset_error:
|
||||
logger.warning(
|
||||
f"[YouTubeRenderer] Failed to load audio from asset library: {asset_error}. "
|
||||
f"Original path attempted: {audio_path}"
|
||||
)
|
||||
raise FileNotFoundError(
|
||||
f"Audio file not found at {audio_path} and not found in asset library: {asset_error}"
|
||||
)
|
||||
|
||||
except FileNotFoundError as e:
|
||||
logger.warning(f"[YouTubeRenderer] ❌ Audio file not found: {e}. Will generate new audio if enabled.")
|
||||
scene_audio_url = None # Fall back to generation
|
||||
except Exception as e:
|
||||
logger.warning(f"[YouTubeRenderer] ❌ Failed to load existing audio: {e}. Will generate new audio if enabled.", exc_info=True)
|
||||
scene_audio_url = None # Fall back to generation
|
||||
|
||||
# Generate audio if not available and generation is enabled
|
||||
if not audio_base64 and generate_audio_enabled and narration and len(narration.strip()) > 0:
|
||||
try:
|
||||
audio_result = generate_audio(
|
||||
text=narration,
|
||||
voice_id=voice_id,
|
||||
user_id=user_id,
|
||||
)
|
||||
# generate_audio may return raw bytes or AudioGenerationResult
|
||||
audio_bytes = audio_result.audio_bytes if hasattr(audio_result, "audio_bytes") else audio_result
|
||||
# Convert to base64 (just the base64 string, not data URI)
|
||||
audio_base64 = base64.b64encode(audio_bytes).decode('utf-8')
|
||||
logger.info(f"[YouTubeRenderer] Generated new audio for scene {scene_number}")
|
||||
except Exception as e:
|
||||
logger.warning(f"[YouTubeRenderer] Audio generation failed: {e}, continuing without audio")
|
||||
|
||||
# VALIDATION: Final check before expensive video API call
|
||||
if not visual_prompt or len(visual_prompt.strip()) < 5:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail={
|
||||
"error": f"Scene {scene_number} has invalid visual prompt",
|
||||
"scene_number": scene_number,
|
||||
"message": "Visual prompt must be at least 5 characters",
|
||||
"user_action": "Please provide a valid visual description for this scene.",
|
||||
}
|
||||
)
|
||||
|
||||
# Generate video using WAN 2.5 text-to-video
|
||||
# This is the expensive API call - all validation should be done before this
|
||||
# Use sync mode to wait for result directly (prevents timeout issues)
|
||||
try:
|
||||
video_result = self.wavespeed_client.generate_text_video(
|
||||
prompt=visual_prompt,
|
||||
resolution=resolution,
|
||||
duration=duration,
|
||||
audio_base64=audio_base64, # Optional: enables lip-sync if provided
|
||||
enable_prompt_expansion=True,
|
||||
enable_sync_mode=True, # Use sync mode to wait for result directly
|
||||
timeout=600, # Increased timeout for sync mode (10 minutes)
|
||||
)
|
||||
except requests.exceptions.Timeout as e:
|
||||
logger.error(f"[YouTubeRenderer] WaveSpeed API timed out for scene {scene_number}: {e}")
|
||||
raise HTTPException(
|
||||
status_code=504,
|
||||
detail={
|
||||
"error": "WaveSpeed request timed out",
|
||||
"scene_number": scene_number,
|
||||
"message": "The video generation request timed out.",
|
||||
"user_action": "Please retry. If it persists, try fewer scenes, lower resolution, or shorter durations.",
|
||||
},
|
||||
) from e
|
||||
except requests.exceptions.RequestException as e:
|
||||
logger.error(f"[YouTubeRenderer] WaveSpeed API request failed for scene {scene_number}: {e}")
|
||||
raise HTTPException(
|
||||
status_code=502,
|
||||
detail={
|
||||
"error": "WaveSpeed request failed",
|
||||
"scene_number": scene_number,
|
||||
"message": str(e),
|
||||
"user_action": "Please retry. If it persists, check network connectivity or try again later.",
|
||||
},
|
||||
) from e
|
||||
|
||||
# Save scene video
|
||||
video_service = StoryVideoGenerationService(output_dir=str(self.output_dir))
|
||||
save_result = video_service.save_scene_video(
|
||||
video_bytes=video_result["video_bytes"],
|
||||
scene_number=scene_number,
|
||||
user_id=user_id,
|
||||
)
|
||||
|
||||
# Update video URL to use YouTube API endpoint
|
||||
filename = save_result["video_filename"]
|
||||
save_result["video_url"] = f"/api/youtube/videos/{filename}"
|
||||
|
||||
# Track usage
|
||||
usage_info = track_video_usage(
|
||||
user_id=user_id,
|
||||
provider=video_result["provider"],
|
||||
model_name=video_result["model_name"],
|
||||
prompt=visual_prompt,
|
||||
video_bytes=video_result["video_bytes"],
|
||||
cost_override=video_result["cost"],
|
||||
)
|
||||
|
||||
logger.info(
|
||||
f"[YouTubeRenderer] ✅ Scene {scene_number} rendered: "
|
||||
f"cost=${video_result['cost']:.2f}, size={len(video_result['video_bytes'])} bytes"
|
||||
)
|
||||
|
||||
return {
|
||||
"scene_number": scene_number,
|
||||
"video_filename": save_result["video_filename"],
|
||||
"video_url": save_result["video_url"],
|
||||
"video_path": save_result["video_path"],
|
||||
"duration": video_result["duration"],
|
||||
"cost": video_result["cost"],
|
||||
"resolution": resolution,
|
||||
"width": video_result["width"],
|
||||
"height": video_result["height"],
|
||||
"file_size": save_result["file_size"],
|
||||
"prediction_id": video_result.get("prediction_id"),
|
||||
"usage_info": usage_info,
|
||||
}
|
||||
|
||||
except HTTPException as e:
|
||||
# Re-raise with better error message for UI
|
||||
error_detail = e.detail
|
||||
if isinstance(error_detail, dict):
|
||||
error_msg = error_detail.get("error", str(error_detail))
|
||||
else:
|
||||
error_msg = str(error_detail)
|
||||
|
||||
logger.error(
|
||||
f"[YouTubeRenderer] Scene {scene_number} failed: {error_msg}",
|
||||
exc_info=True
|
||||
)
|
||||
raise HTTPException(
|
||||
status_code=e.status_code,
|
||||
detail={
|
||||
"error": f"Failed to render scene {scene_number}",
|
||||
"scene_number": scene_number,
|
||||
"message": error_msg,
|
||||
"user_action": "Please try again. If the issue persists, check your scene content and try a different resolution.",
|
||||
}
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"[YouTubeRenderer] Error rendering scene {scene_number}: {e}", exc_info=True)
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail={
|
||||
"error": f"Failed to render scene {scene_number}",
|
||||
"scene_number": scene_number,
|
||||
"message": str(e),
|
||||
"user_action": "Please try again. If the issue persists, check your scene content and try a different resolution.",
|
||||
}
|
||||
)
|
||||
|
||||
def render_full_video(
|
||||
self,
|
||||
scenes: List[Dict[str, Any]],
|
||||
video_plan: Dict[str, Any],
|
||||
user_id: str,
|
||||
resolution: str = "720p",
|
||||
combine_scenes: bool = True,
|
||||
voice_id: str = "Wise_Woman",
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Render a complete video from multiple scenes.
|
||||
|
||||
Args:
|
||||
scenes: List of scene data
|
||||
video_plan: Original video plan
|
||||
user_id: Clerk user ID
|
||||
resolution: Video resolution
|
||||
combine_scenes: Whether to combine scenes into single video
|
||||
voice_id: Voice ID for narration
|
||||
|
||||
Returns:
|
||||
Dictionary with video metadata and scene results
|
||||
"""
|
||||
try:
|
||||
logger.info(
|
||||
f"[YouTubeRenderer] Rendering full video: {len(scenes)} scenes, "
|
||||
f"resolution={resolution}, user={user_id}"
|
||||
)
|
||||
|
||||
# Filter enabled scenes
|
||||
enabled_scenes = [s for s in scenes if s.get("enabled", True)]
|
||||
if not enabled_scenes:
|
||||
raise HTTPException(status_code=400, detail="No enabled scenes to render")
|
||||
|
||||
scene_results = []
|
||||
total_cost = 0.0
|
||||
|
||||
# Render each scene
|
||||
for idx, scene in enumerate(enabled_scenes):
|
||||
logger.info(
|
||||
f"[YouTubeRenderer] Rendering scene {idx + 1}/{len(enabled_scenes)}: "
|
||||
f"Scene {scene.get('scene_number', idx + 1)}"
|
||||
)
|
||||
|
||||
scene_result = self.render_scene_video(
|
||||
scene=scene,
|
||||
video_plan=video_plan,
|
||||
user_id=user_id,
|
||||
resolution=resolution,
|
||||
generate_audio_enabled=True,
|
||||
voice_id=voice_id,
|
||||
)
|
||||
|
||||
scene_results.append(scene_result)
|
||||
total_cost += scene_result["cost"]
|
||||
|
||||
# Combine scenes if requested
|
||||
final_video_path = None
|
||||
final_video_url = None
|
||||
if combine_scenes and len(scene_results) > 1:
|
||||
logger.info("[YouTubeRenderer] Combining scenes into final video...")
|
||||
|
||||
# Prepare data for video concatenation
|
||||
scene_video_paths = [r["video_path"] for r in scene_results]
|
||||
scene_audio_paths = [r.get("audio_path") for r in scene_results if r.get("audio_path")]
|
||||
|
||||
# Use StoryVideoGenerationService to combine
|
||||
video_service = StoryVideoGenerationService(output_dir=str(self.output_dir))
|
||||
|
||||
# Create scene dicts for concatenation
|
||||
scene_dicts = [
|
||||
{
|
||||
"scene_number": r["scene_number"],
|
||||
"title": f"Scene {r['scene_number']}",
|
||||
}
|
||||
for r in scene_results
|
||||
]
|
||||
|
||||
combined_result = video_service.generate_story_video(
|
||||
scenes=scene_dicts,
|
||||
image_paths=[None] * len(scene_results), # No static images
|
||||
audio_paths=scene_audio_paths if scene_audio_paths else [],
|
||||
video_paths=scene_video_paths, # Use rendered videos
|
||||
user_id=user_id,
|
||||
story_title=video_plan.get("video_summary", "YouTube Video")[:50],
|
||||
fps=24,
|
||||
)
|
||||
|
||||
final_video_path = combined_result["video_path"]
|
||||
final_video_url = combined_result["video_url"]
|
||||
|
||||
logger.info(
|
||||
f"[YouTubeRenderer] ✅ Full video rendered: {len(scene_results)} scenes, "
|
||||
f"total_cost=${total_cost:.2f}"
|
||||
)
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"scene_results": scene_results,
|
||||
"total_cost": total_cost,
|
||||
"final_video_path": final_video_path,
|
||||
"final_video_url": final_video_url,
|
||||
"num_scenes": len(scene_results),
|
||||
"resolution": resolution,
|
||||
}
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"[YouTubeRenderer] Error rendering full video: {e}", exc_info=True)
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Failed to render video: {str(e)}"
|
||||
)
|
||||
|
||||
def estimate_render_cost(
|
||||
self,
|
||||
scenes: List[Dict[str, Any]],
|
||||
resolution: str = "720p",
|
||||
image_model: str = "ideogram-v3-turbo",
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Estimate the cost of rendering a video before actually rendering it.
|
||||
|
||||
Args:
|
||||
scenes: List of scene data with duration estimates
|
||||
resolution: Video resolution (480p, 720p, 1080p)
|
||||
|
||||
Returns:
|
||||
Dictionary with cost breakdown and total estimate
|
||||
"""
|
||||
# Pricing per second (same as in WaveSpeedClient)
|
||||
pricing = {
|
||||
"480p": 0.05,
|
||||
"720p": 0.10,
|
||||
"1080p": 0.15,
|
||||
}
|
||||
|
||||
price_per_second = pricing.get(resolution, 0.10)
|
||||
|
||||
# Image generation pricing
|
||||
image_pricing = {
|
||||
"ideogram-v3-turbo": 0.10,
|
||||
"qwen-image": 0.05,
|
||||
}
|
||||
|
||||
image_cost_per_scene = image_pricing.get(image_model, 0.10)
|
||||
|
||||
# Filter enabled scenes
|
||||
enabled_scenes = [s for s in scenes if s.get("enabled", True)]
|
||||
|
||||
scene_costs = []
|
||||
total_cost = 0.0
|
||||
total_duration = 0.0
|
||||
total_image_cost = len(enabled_scenes) * image_cost_per_scene
|
||||
|
||||
for scene in enabled_scenes:
|
||||
scene_number = scene.get("scene_number", 0)
|
||||
duration_estimate = scene.get("duration_estimate", 5)
|
||||
|
||||
# Clamp duration to valid WAN 2.5 values (5 or 10 seconds)
|
||||
duration = 5 if duration_estimate <= 7 else 10
|
||||
|
||||
scene_cost = price_per_second * duration
|
||||
scene_costs.append({
|
||||
"scene_number": scene_number,
|
||||
"duration_estimate": duration_estimate,
|
||||
"actual_duration": duration,
|
||||
"cost": round(scene_cost, 2),
|
||||
})
|
||||
|
||||
total_cost += scene_cost
|
||||
total_duration += duration
|
||||
|
||||
# Add image costs to total
|
||||
total_cost += total_image_cost
|
||||
|
||||
return {
|
||||
"resolution": resolution,
|
||||
"price_per_second": price_per_second,
|
||||
"num_scenes": len(enabled_scenes),
|
||||
"total_duration_seconds": total_duration,
|
||||
"scene_costs": scene_costs,
|
||||
"total_cost": round(total_cost, 2),
|
||||
"estimated_cost_range": {
|
||||
"min": round(total_cost * 0.9, 2), # 10% buffer
|
||||
"max": round(total_cost * 1.1, 2), # 10% buffer
|
||||
},
|
||||
"image_model": image_model,
|
||||
"image_cost_per_scene": image_cost_per_scene,
|
||||
"total_image_cost": round(total_image_cost, 2),
|
||||
}
|
||||
|
||||
598
backend/services/youtube/scene_builder.py
Normal file
598
backend/services/youtube/scene_builder.py
Normal file
@@ -0,0 +1,598 @@
|
||||
"""
|
||||
YouTube Scene Builder Service
|
||||
|
||||
Converts video plans into structured scenes with narration, visual prompts, and timing.
|
||||
"""
|
||||
|
||||
from typing import Dict, Any, Optional, List
|
||||
from loguru import logger
|
||||
from fastapi import HTTPException
|
||||
|
||||
from services.llm_providers.main_text_generation import llm_text_gen
|
||||
from services.story_writer.prompt_enhancer_service import PromptEnhancerService
|
||||
from utils.logger_utils import get_service_logger
|
||||
|
||||
logger = get_service_logger("youtube.scene_builder")
|
||||
|
||||
|
||||
class YouTubeSceneBuilderService:
|
||||
"""Service for building structured video scenes from plans."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the scene builder service."""
|
||||
self.prompt_enhancer = PromptEnhancerService()
|
||||
logger.info("[YouTubeSceneBuilder] Service initialized")
|
||||
|
||||
def build_scenes_from_plan(
|
||||
self,
|
||||
video_plan: Dict[str, Any],
|
||||
user_id: str,
|
||||
custom_script: Optional[str] = None,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Build structured scenes from a video plan.
|
||||
|
||||
This method is optimized to minimize AI calls:
|
||||
- For shorts: Reuses scenes if already generated in plan (0 AI calls)
|
||||
- For medium/long: Generates scenes + batch enhances (1-3 AI calls total)
|
||||
- Custom script: Parses script without AI calls (0 AI calls)
|
||||
|
||||
Args:
|
||||
video_plan: Video plan from planner service
|
||||
user_id: Clerk user ID for subscription checking
|
||||
custom_script: Optional custom script to use instead of generating
|
||||
|
||||
Returns:
|
||||
List of scene dictionaries with narration, visual prompts, timing, etc.
|
||||
"""
|
||||
try:
|
||||
duration_type = video_plan.get('duration_type', 'medium')
|
||||
logger.info(
|
||||
f"[YouTubeSceneBuilder] Building scenes from plan: "
|
||||
f"duration={duration_type}, "
|
||||
f"sections={len(video_plan.get('content_outline', []))}, "
|
||||
f"user={user_id}"
|
||||
)
|
||||
|
||||
duration_metadata = video_plan.get("duration_metadata", {})
|
||||
max_scenes = duration_metadata.get("max_scenes", 10)
|
||||
|
||||
# Optimization: Check if scenes already exist in plan (prevents duplicate generation)
|
||||
# This can happen if plan was generated with include_scenes=True for shorts
|
||||
existing_scenes = video_plan.get("scenes", [])
|
||||
if existing_scenes and video_plan.get("_scenes_included"):
|
||||
# Scenes already generated in plan - reuse them (0 AI calls)
|
||||
logger.info(
|
||||
f"[YouTubeSceneBuilder] ♻️ Reusing {len(existing_scenes)} scenes from plan "
|
||||
f"(duration={duration_type}) - skipping generation to save AI calls"
|
||||
)
|
||||
scenes = self._normalize_scenes_from_plan(video_plan, duration_metadata)
|
||||
# If custom script provided, parse it into scenes (0 AI calls for parsing)
|
||||
elif custom_script:
|
||||
logger.info(
|
||||
f"[YouTubeSceneBuilder] Parsing custom script for scene generation "
|
||||
f"(0 AI calls required)"
|
||||
)
|
||||
scenes = self._parse_custom_script(
|
||||
custom_script, video_plan, duration_metadata, user_id
|
||||
)
|
||||
# For shorts, check if scenes were already generated in plan (optimization)
|
||||
elif video_plan.get("_scenes_included") and duration_type == "shorts":
|
||||
prebuilt = video_plan.get("scenes") or []
|
||||
if prebuilt:
|
||||
logger.info(
|
||||
f"[YouTubeSceneBuilder] Using scenes from optimized plan+scenes call "
|
||||
f"({len(prebuilt)} scenes)"
|
||||
)
|
||||
scenes = self._normalize_scenes_from_plan(video_plan, duration_metadata)
|
||||
else:
|
||||
logger.warning(
|
||||
"[YouTubeSceneBuilder] Plan marked _scenes_included but no scenes present; "
|
||||
"regenerating scenes normally."
|
||||
)
|
||||
scenes = self._generate_scenes_from_plan(
|
||||
video_plan, duration_metadata, user_id
|
||||
)
|
||||
else:
|
||||
# Generate scenes from plan
|
||||
scenes = self._generate_scenes_from_plan(
|
||||
video_plan, duration_metadata, user_id
|
||||
)
|
||||
|
||||
# Limit to max scenes
|
||||
if len(scenes) > max_scenes:
|
||||
logger.warning(
|
||||
f"[YouTubeSceneBuilder] Truncating {len(scenes)} scenes to {max_scenes}"
|
||||
)
|
||||
scenes = scenes[:max_scenes]
|
||||
|
||||
# Enhance visual prompts efficiently based on duration type
|
||||
duration_type = video_plan.get("duration_type", "medium")
|
||||
scenes = self._enhance_visual_prompts_batch(
|
||||
scenes, video_plan, user_id, duration_type
|
||||
)
|
||||
|
||||
logger.info(f"[YouTubeSceneBuilder] ✅ Built {len(scenes)} scenes")
|
||||
return scenes
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"[YouTubeSceneBuilder] Error building scenes: {e}", exc_info=True)
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Failed to build scenes: {str(e)}"
|
||||
)
|
||||
|
||||
def _generate_scenes_from_plan(
|
||||
self,
|
||||
video_plan: Dict[str, Any],
|
||||
duration_metadata: Dict[str, Any],
|
||||
user_id: str,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Generate scenes from video plan using AI."""
|
||||
|
||||
content_outline = video_plan.get("content_outline", [])
|
||||
hook_strategy = video_plan.get("hook_strategy", "")
|
||||
call_to_action = video_plan.get("call_to_action", "")
|
||||
visual_style = video_plan.get("visual_style", "cinematic")
|
||||
tone = video_plan.get("tone", "professional")
|
||||
|
||||
scene_duration_range = duration_metadata.get("scene_duration_range", (5, 15))
|
||||
|
||||
scene_generation_prompt = f"""You are a top YouTube scriptwriter specializing in engaging, viral content. Create compelling scenes that captivate viewers and maximize watch time.
|
||||
|
||||
**VIDEO PLAN:**
|
||||
📝 Summary: {video_plan.get('video_summary', '')}
|
||||
🎯 Goal: {video_plan.get('video_goal', '')}
|
||||
💡 Key Message: {video_plan.get('key_message', '')}
|
||||
🎨 Visual Style: {visual_style}
|
||||
🎭 Tone: {tone}
|
||||
|
||||
**🎣 HOOK STRATEGY:**
|
||||
{hook_strategy}
|
||||
|
||||
**📋 CONTENT STRUCTURE:**
|
||||
{chr(10).join([f"• {section.get('section', '')}: {section.get('description', '')} ({section.get('duration_estimate', 0)}s)" for section in content_outline])}
|
||||
|
||||
**🚀 CALL-TO-ACTION:**
|
||||
{call_to_action}
|
||||
|
||||
**⏱️ TIMING CONSTRAINTS:**
|
||||
• Scene duration: {scene_duration_range[0]}-{scene_duration_range[1]} seconds each
|
||||
• Total target: {duration_metadata.get('target_seconds', 150)} seconds
|
||||
|
||||
**🎬 YOUR MISSION - CREATE VIRAL-WORTHY SCENES:**
|
||||
|
||||
Write narration that:
|
||||
✨ **HOOKS IMMEDIATELY** - First {duration_metadata.get('hook_seconds', 10)}s must GRAB attention
|
||||
🎭 **TELLS A STORY** - Each scene advances the narrative with emotional engagement
|
||||
💡 **DELIVERS VALUE** - Provide insights, tips, or "aha!" moments in every scene
|
||||
🔥 **BUILDS EXCITEMENT** - Use power words, questions, and cliffhangers
|
||||
👥 **CONNECTS PERSONALLY** - Speak directly to the viewer's needs and desires
|
||||
⚡ **MAINTAINS PACE** - Vary sentence length for natural rhythm
|
||||
🎯 **DRIVES ACTION** - Build toward the CTA with increasing urgency
|
||||
|
||||
**REQUIRED SCENE ELEMENTS:**
|
||||
1. **scene_number**: Sequential numbering
|
||||
2. **title**: Catchy, descriptive title (5-8 words max)
|
||||
3. **narration**: ENGAGING spoken script with:
|
||||
- Conversational language ("you know what I mean?")
|
||||
- Rhetorical questions ("Have you ever wondered...?")
|
||||
- Power transitions ("But here's the game-changer...")
|
||||
- Emotional hooks ("Imagine this...")
|
||||
- Action-oriented language ("Let's dive in...")
|
||||
4. **visual_description**: Cinematic, professional YouTube visuals
|
||||
5. **duration_estimate**: Realistic speaking time
|
||||
6. **emphasis**: hook/main_content/transition/cta
|
||||
7. **visual_cues**: ["dramatic_zoom", "text_overlay", "fast_cuts"]
|
||||
|
||||
**🎯 YOUTUBE OPTIMIZATION RULES:**
|
||||
• **Hook Power**: First 3 seconds = make them stay or lose them
|
||||
• **Value Density**: Every 10 seconds must deliver new insight
|
||||
• **Emotional Arc**: Build curiosity → teach → inspire → convert
|
||||
• **Natural Flow**: Scenes must connect seamlessly
|
||||
• **CTA Momentum**: Final scene creates irresistible urge to act
|
||||
|
||||
**📊 FORMAT AS JSON ARRAY:**
|
||||
[
|
||||
{{
|
||||
"scene_number": 1,
|
||||
"title": "The Shocking Truth They Hide",
|
||||
"narration": "You won't believe what just happened in my latest discovery! I was scrolling through the usual content when BAM - this completely changed everything I thought about [topic]. And get this - it could transform YOUR results too!",
|
||||
"visual_description": "Dynamic opening shot with shocking text overlay, fast cuts of social media feeds, energetic music swell, close-up of surprised reaction",
|
||||
"duration_estimate": 8,
|
||||
"emphasis": "hook",
|
||||
"visual_cues": ["shocking_text", "fast_cuts", "music_swell", "reaction_shot"]
|
||||
}},
|
||||
...
|
||||
]
|
||||
|
||||
**🔥 SUCCESS CRITERIA:**
|
||||
✅ First scene hooks in 3 seconds
|
||||
✅ Each scene delivers 1-2 key insights
|
||||
✅ Narration feels like talking to a friend
|
||||
✅ Total story arc creates emotional journey
|
||||
✅ CTA feels like the natural next step
|
||||
✅ Scenes fit duration perfectly"""
|
||||
|
||||
system_prompt = (
|
||||
"You are a master YouTube scriptwriter who creates viral, engaging content that "
|
||||
"keeps viewers watching until the end. You understand YouTube algorithm optimization, "
|
||||
"emotional storytelling, and creating irresistible hooks that make viewers hit 'like' and 'subscribe'. "
|
||||
"Your scripts are conversational, valuable, and conversion-focused."
|
||||
)
|
||||
|
||||
response = llm_text_gen(
|
||||
prompt=scene_generation_prompt,
|
||||
system_prompt=system_prompt,
|
||||
user_id=user_id,
|
||||
json_struct={
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"scene_number": {"type": "number"},
|
||||
"title": {"type": "string"},
|
||||
"narration": {"type": "string"},
|
||||
"visual_description": {"type": "string"},
|
||||
"duration_estimate": {"type": "number"},
|
||||
"emphasis": {"type": "string"},
|
||||
"visual_cues": {
|
||||
"type": "array",
|
||||
"items": {"type": "string"}
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"scene_number", "title", "narration", "visual_description",
|
||||
"duration_estimate", "emphasis"
|
||||
]
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
# Parse response
|
||||
if isinstance(response, list):
|
||||
scenes = response
|
||||
elif isinstance(response, dict) and "scenes" in response:
|
||||
scenes = response["scenes"]
|
||||
else:
|
||||
import json
|
||||
scenes = json.loads(response) if isinstance(response, str) else response
|
||||
|
||||
# Normalize scene data
|
||||
normalized_scenes = []
|
||||
for idx, scene in enumerate(scenes, 1):
|
||||
normalized_scenes.append({
|
||||
"scene_number": scene.get("scene_number", idx),
|
||||
"title": scene.get("title", f"Scene {idx}"),
|
||||
"narration": scene.get("narration", ""),
|
||||
"visual_description": scene.get("visual_description", ""),
|
||||
"duration_estimate": scene.get("duration_estimate", scene_duration_range[0]),
|
||||
"emphasis": scene.get("emphasis", "main_content"),
|
||||
"visual_cues": scene.get("visual_cues", []),
|
||||
"visual_prompt": scene.get("visual_description", ""), # Initial prompt
|
||||
})
|
||||
|
||||
return normalized_scenes
|
||||
|
||||
def _normalize_scenes_from_plan(
|
||||
self,
|
||||
video_plan: Dict[str, Any],
|
||||
duration_metadata: Dict[str, Any],
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Normalize scenes that were generated as part of the plan (optimization for shorts)."""
|
||||
scenes = video_plan.get("scenes", [])
|
||||
scene_duration_range = duration_metadata.get("scene_duration_range", (2, 8))
|
||||
|
||||
normalized_scenes = []
|
||||
for idx, scene in enumerate(scenes, 1):
|
||||
normalized_scenes.append({
|
||||
"scene_number": scene.get("scene_number", idx),
|
||||
"title": scene.get("title", f"Scene {idx}"),
|
||||
"narration": scene.get("narration", ""),
|
||||
"visual_description": scene.get("visual_description", ""),
|
||||
"duration_estimate": scene.get("duration_estimate", scene_duration_range[0]),
|
||||
"emphasis": scene.get("emphasis", "main_content"),
|
||||
"visual_cues": scene.get("visual_cues", []),
|
||||
"visual_prompt": scene.get("visual_description", ""), # Initial prompt
|
||||
})
|
||||
|
||||
logger.info(
|
||||
f"[YouTubeSceneBuilder] ✅ Normalized {len(normalized_scenes)} scenes "
|
||||
f"from optimized plan (saved 1 AI call)"
|
||||
)
|
||||
return normalized_scenes
|
||||
|
||||
def _parse_custom_script(
|
||||
self,
|
||||
custom_script: str,
|
||||
video_plan: Dict[str, Any],
|
||||
duration_metadata: Dict[str, Any],
|
||||
user_id: str,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Parse a custom script into structured scenes."""
|
||||
# Simple parsing: split by double newlines or scene markers
|
||||
import re
|
||||
|
||||
# Try to detect scene markers
|
||||
scene_pattern = r'(?:Scene\s+\d+|#\s*\d+\.|^\d+\.)\s*(.+?)(?=(?:Scene\s+\d+|#\s*\d+\.|^\d+\.|$))'
|
||||
matches = re.finditer(scene_pattern, custom_script, re.MULTILINE | re.DOTALL)
|
||||
|
||||
scenes = []
|
||||
for idx, match in enumerate(matches, 1):
|
||||
scene_text = match.group(1).strip()
|
||||
# Extract narration (first paragraph or before visual markers)
|
||||
narration_match = re.search(r'^(.*?)(?:\n\n|Visual:|Image:)', scene_text, re.DOTALL)
|
||||
narration = narration_match.group(1).strip() if narration_match else scene_text.split('\n')[0]
|
||||
|
||||
# Extract visual description
|
||||
visual_match = re.search(r'(?:Visual:|Image:)\s*(.+?)(?:\n\n|$)', scene_text, re.DOTALL)
|
||||
visual_description = visual_match.group(1).strip() if visual_match else narration
|
||||
|
||||
scenes.append({
|
||||
"scene_number": idx,
|
||||
"title": f"Scene {idx}",
|
||||
"narration": narration,
|
||||
"visual_description": visual_description,
|
||||
"duration_estimate": duration_metadata.get("scene_duration_range", [5, 15])[0],
|
||||
"emphasis": "hook" if idx == 1 else ("cta" if idx == len(list(matches)) else "main_content"),
|
||||
"visual_cues": [],
|
||||
"visual_prompt": visual_description,
|
||||
})
|
||||
|
||||
# Fallback: split by paragraphs if no scene markers
|
||||
if not scenes:
|
||||
paragraphs = [p.strip() for p in custom_script.split('\n\n') if p.strip()]
|
||||
for idx, para in enumerate(paragraphs[:duration_metadata.get("max_scenes", 10)], 1):
|
||||
scenes.append({
|
||||
"scene_number": idx,
|
||||
"title": f"Scene {idx}",
|
||||
"narration": para,
|
||||
"visual_description": para,
|
||||
"duration_estimate": duration_metadata.get("scene_duration_range", [5, 15])[0],
|
||||
"emphasis": "hook" if idx == 1 else ("cta" if idx == len(paragraphs) else "main_content"),
|
||||
"visual_cues": [],
|
||||
"visual_prompt": para,
|
||||
})
|
||||
|
||||
return scenes
|
||||
|
||||
def _enhance_visual_prompts_batch(
|
||||
self,
|
||||
scenes: List[Dict[str, Any]],
|
||||
video_plan: Dict[str, Any],
|
||||
user_id: str,
|
||||
duration_type: str,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Efficiently enhance visual prompts based on video duration type.
|
||||
|
||||
Strategy:
|
||||
- Shorts: Skip enhancement (use original descriptions) - 0 AI calls
|
||||
- Medium: Batch enhance all scenes in 1 call - 1 AI call
|
||||
- Long: Batch enhance in 2 calls (split scenes) - 2 AI calls max
|
||||
"""
|
||||
# For shorts, skip enhancement to save API calls
|
||||
if duration_type == "shorts":
|
||||
logger.info(
|
||||
f"[YouTubeSceneBuilder] Skipping prompt enhancement for shorts "
|
||||
f"({len(scenes)} scenes) to save API calls"
|
||||
)
|
||||
for scene in scenes:
|
||||
scene["enhanced_visual_prompt"] = scene.get(
|
||||
"visual_prompt", scene.get("visual_description", "")
|
||||
)
|
||||
return scenes
|
||||
|
||||
# Build story context for prompt enhancer
|
||||
story_context = {
|
||||
"story_setting": video_plan.get("visual_style", "cinematic"),
|
||||
"story_tone": video_plan.get("tone", "professional"),
|
||||
"writing_style": video_plan.get("visual_style", "cinematic"),
|
||||
}
|
||||
|
||||
# Convert scenes to format expected by enhancer
|
||||
scene_data_list = [
|
||||
{
|
||||
"scene_number": scene.get("scene_number", idx + 1),
|
||||
"title": scene.get("title", ""),
|
||||
"description": scene.get("visual_description", ""),
|
||||
"image_prompt": scene.get("visual_prompt", ""),
|
||||
}
|
||||
for idx, scene in enumerate(scenes)
|
||||
]
|
||||
|
||||
# For medium videos, enhance all scenes in one batch call
|
||||
if duration_type == "medium":
|
||||
logger.info(
|
||||
f"[YouTubeSceneBuilder] Batch enhancing {len(scenes)} scenes "
|
||||
f"for medium video in 1 AI call"
|
||||
)
|
||||
try:
|
||||
# Use a single batch enhancement call
|
||||
enhanced_prompts = self._batch_enhance_prompts(
|
||||
scene_data_list, story_context, user_id
|
||||
)
|
||||
for idx, scene in enumerate(scenes):
|
||||
scene["enhanced_visual_prompt"] = enhanced_prompts.get(
|
||||
idx, scene.get("visual_prompt", scene.get("visual_description", ""))
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"[YouTubeSceneBuilder] Batch enhancement failed: {e}, "
|
||||
f"using original prompts"
|
||||
)
|
||||
for scene in scenes:
|
||||
scene["enhanced_visual_prompt"] = scene.get(
|
||||
"visual_prompt", scene.get("visual_description", "")
|
||||
)
|
||||
return scenes
|
||||
|
||||
# For long videos, split into 2 batches to avoid token limits
|
||||
if duration_type == "long":
|
||||
logger.info(
|
||||
f"[YouTubeSceneBuilder] Batch enhancing {len(scenes)} scenes "
|
||||
f"for long video in 2 AI calls"
|
||||
)
|
||||
mid_point = len(scenes) // 2
|
||||
batches = [
|
||||
scene_data_list[:mid_point],
|
||||
scene_data_list[mid_point:],
|
||||
]
|
||||
|
||||
all_enhanced = {}
|
||||
for batch_idx, batch in enumerate(batches):
|
||||
try:
|
||||
enhanced = self._batch_enhance_prompts(
|
||||
batch, story_context, user_id
|
||||
)
|
||||
start_idx = 0 if batch_idx == 0 else mid_point
|
||||
for local_idx, enhanced_prompt in enhanced.items():
|
||||
all_enhanced[start_idx + local_idx] = enhanced_prompt
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"[YouTubeSceneBuilder] Batch {batch_idx + 1} enhancement "
|
||||
f"failed: {e}, using original prompts"
|
||||
)
|
||||
start_idx = 0 if batch_idx == 0 else mid_point
|
||||
for local_idx, scene_data in enumerate(batch):
|
||||
all_enhanced[start_idx + local_idx] = scene_data.get(
|
||||
"image_prompt", scene_data.get("description", "")
|
||||
)
|
||||
|
||||
for idx, scene in enumerate(scenes):
|
||||
scene["enhanced_visual_prompt"] = all_enhanced.get(
|
||||
idx, scene.get("visual_prompt", scene.get("visual_description", ""))
|
||||
)
|
||||
return scenes
|
||||
|
||||
# Fallback: use original prompts
|
||||
logger.warning(
|
||||
f"[YouTubeSceneBuilder] Unknown duration type '{duration_type}', "
|
||||
f"using original prompts"
|
||||
)
|
||||
for scene in scenes:
|
||||
scene["enhanced_visual_prompt"] = scene.get(
|
||||
"visual_prompt", scene.get("visual_description", "")
|
||||
)
|
||||
return scenes
|
||||
|
||||
def _batch_enhance_prompts(
|
||||
self,
|
||||
scene_data_list: List[Dict[str, Any]],
|
||||
story_context: Dict[str, Any],
|
||||
user_id: str,
|
||||
) -> Dict[int, str]:
|
||||
"""
|
||||
Enhance multiple scene prompts in a single AI call.
|
||||
|
||||
Returns:
|
||||
Dictionary mapping scene index to enhanced prompt
|
||||
"""
|
||||
try:
|
||||
# Build batch enhancement prompt
|
||||
scenes_text = "\n\n".join([
|
||||
f"Scene {scene.get('scene_number', idx + 1)}: {scene.get('title', '')}\n"
|
||||
f"Description: {scene.get('description', '')}\n"
|
||||
f"Current Prompt: {scene.get('image_prompt', '')}"
|
||||
for idx, scene in enumerate(scene_data_list)
|
||||
])
|
||||
|
||||
batch_prompt = f"""You are optimizing visual prompts for AI video generation. Enhance the following scenes to be more detailed and video-optimized.
|
||||
|
||||
**Video Style Context:**
|
||||
- Setting: {story_context.get('story_setting', 'cinematic')}
|
||||
- Tone: {story_context.get('story_tone', 'professional')}
|
||||
- Style: {story_context.get('writing_style', 'cinematic')}
|
||||
|
||||
**Scenes to Enhance:**
|
||||
{scenes_text}
|
||||
|
||||
**Your Task:**
|
||||
For each scene, create an enhanced visual prompt (200-300 words) that:
|
||||
1. Is detailed and specific for video generation
|
||||
2. Includes camera movements, lighting, composition
|
||||
3. Maintains consistency with the video style
|
||||
4. Is optimized for WAN 2.5 text-to-video model
|
||||
|
||||
**Format as JSON array with enhanced prompts:**
|
||||
[
|
||||
{{"scene_index": 0, "enhanced_prompt": "detailed enhanced prompt for scene 1..."}},
|
||||
{{"scene_index": 1, "enhanced_prompt": "detailed enhanced prompt for scene 2..."}},
|
||||
...
|
||||
]
|
||||
|
||||
Make sure the array length matches the number of scenes provided ({len(scene_data_list)}).
|
||||
"""
|
||||
|
||||
system_prompt = (
|
||||
"You are an expert at creating detailed visual prompts for AI video generation. "
|
||||
"Your prompts are specific, cinematic, and optimized for video models."
|
||||
)
|
||||
|
||||
response = llm_text_gen(
|
||||
prompt=batch_prompt,
|
||||
system_prompt=system_prompt,
|
||||
user_id=user_id,
|
||||
json_struct={
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"scene_index": {"type": "number"},
|
||||
"enhanced_prompt": {"type": "string"}
|
||||
},
|
||||
"required": ["scene_index", "enhanced_prompt"]
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
# Parse response
|
||||
if isinstance(response, list):
|
||||
enhanced_list = response
|
||||
elif isinstance(response, str):
|
||||
import json
|
||||
enhanced_list = json.loads(response)
|
||||
else:
|
||||
enhanced_list = response
|
||||
|
||||
# Build result dictionary
|
||||
result = {}
|
||||
for item in enhanced_list:
|
||||
idx = item.get("scene_index", 0)
|
||||
prompt = item.get("enhanced_prompt", "")
|
||||
if prompt:
|
||||
result[idx] = prompt
|
||||
else:
|
||||
# Fallback to original
|
||||
original_scene = scene_data_list[idx] if idx < len(scene_data_list) else {}
|
||||
result[idx] = original_scene.get(
|
||||
"image_prompt", original_scene.get("description", "")
|
||||
)
|
||||
|
||||
# Fill in any missing scenes with original prompts
|
||||
for idx in range(len(scene_data_list)):
|
||||
if idx not in result:
|
||||
original_scene = scene_data_list[idx]
|
||||
result[idx] = original_scene.get(
|
||||
"image_prompt", original_scene.get("description", "")
|
||||
)
|
||||
|
||||
logger.info(
|
||||
f"[YouTubeSceneBuilder] ✅ Batch enhanced {len(result)} prompts "
|
||||
f"in 1 AI call"
|
||||
)
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"[YouTubeSceneBuilder] Batch enhancement failed: {e}",
|
||||
exc_info=True
|
||||
)
|
||||
# Return original prompts as fallback
|
||||
return {
|
||||
idx: scene.get("image_prompt", scene.get("description", ""))
|
||||
for idx, scene in enumerate(scene_data_list)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user