599 lines
25 KiB
Python
599 lines
25 KiB
Python
"""
|
|
YouTube Scene Builder Service
|
|
|
|
Converts video plans into structured scenes with narration, visual prompts, and timing.
|
|
"""
|
|
|
|
from typing import Dict, Any, Optional, List
|
|
from loguru import logger
|
|
from fastapi import HTTPException
|
|
|
|
from services.llm_providers.main_text_generation import llm_text_gen
|
|
from services.story_writer.prompt_enhancer_service import PromptEnhancerService
|
|
from utils.logger_utils import get_service_logger
|
|
|
|
logger = get_service_logger("youtube.scene_builder")
|
|
|
|
|
|
class YouTubeSceneBuilderService:
|
|
"""Service for building structured video scenes from plans."""
|
|
|
|
def __init__(self):
|
|
"""Initialize the scene builder service."""
|
|
self.prompt_enhancer = PromptEnhancerService()
|
|
logger.info("[YouTubeSceneBuilder] Service initialized")
|
|
|
|
def build_scenes_from_plan(
|
|
self,
|
|
video_plan: Dict[str, Any],
|
|
user_id: str,
|
|
custom_script: Optional[str] = None,
|
|
) -> List[Dict[str, Any]]:
|
|
"""
|
|
Build structured scenes from a video plan.
|
|
|
|
This method is optimized to minimize AI calls:
|
|
- For shorts: Reuses scenes if already generated in plan (0 AI calls)
|
|
- For medium/long: Generates scenes + batch enhances (1-3 AI calls total)
|
|
- Custom script: Parses script without AI calls (0 AI calls)
|
|
|
|
Args:
|
|
video_plan: Video plan from planner service
|
|
user_id: Clerk user ID for subscription checking
|
|
custom_script: Optional custom script to use instead of generating
|
|
|
|
Returns:
|
|
List of scene dictionaries with narration, visual prompts, timing, etc.
|
|
"""
|
|
try:
|
|
duration_type = video_plan.get('duration_type', 'medium')
|
|
logger.info(
|
|
f"[YouTubeSceneBuilder] Building scenes from plan: "
|
|
f"duration={duration_type}, "
|
|
f"sections={len(video_plan.get('content_outline', []))}, "
|
|
f"user={user_id}"
|
|
)
|
|
|
|
duration_metadata = video_plan.get("duration_metadata", {})
|
|
max_scenes = duration_metadata.get("max_scenes", 10)
|
|
|
|
# Optimization: Check if scenes already exist in plan (prevents duplicate generation)
|
|
# This can happen if plan was generated with include_scenes=True for shorts
|
|
existing_scenes = video_plan.get("scenes", [])
|
|
if existing_scenes and video_plan.get("_scenes_included"):
|
|
# Scenes already generated in plan - reuse them (0 AI calls)
|
|
logger.info(
|
|
f"[YouTubeSceneBuilder] ♻️ Reusing {len(existing_scenes)} scenes from plan "
|
|
f"(duration={duration_type}) - skipping generation to save AI calls"
|
|
)
|
|
scenes = self._normalize_scenes_from_plan(video_plan, duration_metadata)
|
|
# If custom script provided, parse it into scenes (0 AI calls for parsing)
|
|
elif custom_script:
|
|
logger.info(
|
|
f"[YouTubeSceneBuilder] Parsing custom script for scene generation "
|
|
f"(0 AI calls required)"
|
|
)
|
|
scenes = self._parse_custom_script(
|
|
custom_script, video_plan, duration_metadata, user_id
|
|
)
|
|
# For shorts, check if scenes were already generated in plan (optimization)
|
|
elif video_plan.get("_scenes_included") and duration_type == "shorts":
|
|
prebuilt = video_plan.get("scenes") or []
|
|
if prebuilt:
|
|
logger.info(
|
|
f"[YouTubeSceneBuilder] Using scenes from optimized plan+scenes call "
|
|
f"({len(prebuilt)} scenes)"
|
|
)
|
|
scenes = self._normalize_scenes_from_plan(video_plan, duration_metadata)
|
|
else:
|
|
logger.warning(
|
|
"[YouTubeSceneBuilder] Plan marked _scenes_included but no scenes present; "
|
|
"regenerating scenes normally."
|
|
)
|
|
scenes = self._generate_scenes_from_plan(
|
|
video_plan, duration_metadata, user_id
|
|
)
|
|
else:
|
|
# Generate scenes from plan
|
|
scenes = self._generate_scenes_from_plan(
|
|
video_plan, duration_metadata, user_id
|
|
)
|
|
|
|
# Limit to max scenes
|
|
if len(scenes) > max_scenes:
|
|
logger.warning(
|
|
f"[YouTubeSceneBuilder] Truncating {len(scenes)} scenes to {max_scenes}"
|
|
)
|
|
scenes = scenes[:max_scenes]
|
|
|
|
# Enhance visual prompts efficiently based on duration type
|
|
duration_type = video_plan.get("duration_type", "medium")
|
|
scenes = self._enhance_visual_prompts_batch(
|
|
scenes, video_plan, user_id, duration_type
|
|
)
|
|
|
|
logger.info(f"[YouTubeSceneBuilder] ✅ Built {len(scenes)} scenes")
|
|
return scenes
|
|
|
|
except HTTPException:
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f"[YouTubeSceneBuilder] Error building scenes: {e}", exc_info=True)
|
|
raise HTTPException(
|
|
status_code=500,
|
|
detail=f"Failed to build scenes: {str(e)}"
|
|
)
|
|
|
|
def _generate_scenes_from_plan(
|
|
self,
|
|
video_plan: Dict[str, Any],
|
|
duration_metadata: Dict[str, Any],
|
|
user_id: str,
|
|
) -> List[Dict[str, Any]]:
|
|
"""Generate scenes from video plan using AI."""
|
|
|
|
content_outline = video_plan.get("content_outline", [])
|
|
hook_strategy = video_plan.get("hook_strategy", "")
|
|
call_to_action = video_plan.get("call_to_action", "")
|
|
visual_style = video_plan.get("visual_style", "cinematic")
|
|
tone = video_plan.get("tone", "professional")
|
|
|
|
scene_duration_range = duration_metadata.get("scene_duration_range", (5, 15))
|
|
|
|
scene_generation_prompt = f"""You are a top YouTube scriptwriter specializing in engaging, viral content. Create compelling scenes that captivate viewers and maximize watch time.
|
|
|
|
**VIDEO PLAN:**
|
|
📝 Summary: {video_plan.get('video_summary', '')}
|
|
🎯 Goal: {video_plan.get('video_goal', '')}
|
|
💡 Key Message: {video_plan.get('key_message', '')}
|
|
🎨 Visual Style: {visual_style}
|
|
🎭 Tone: {tone}
|
|
|
|
**🎣 HOOK STRATEGY:**
|
|
{hook_strategy}
|
|
|
|
**📋 CONTENT STRUCTURE:**
|
|
{chr(10).join([f"• {section.get('section', '')}: {section.get('description', '')} ({section.get('duration_estimate', 0)}s)" for section in content_outline])}
|
|
|
|
**🚀 CALL-TO-ACTION:**
|
|
{call_to_action}
|
|
|
|
**⏱️ TIMING CONSTRAINTS:**
|
|
• Scene duration: {scene_duration_range[0]}-{scene_duration_range[1]} seconds each
|
|
• Total target: {duration_metadata.get('target_seconds', 150)} seconds
|
|
|
|
**🎬 YOUR MISSION - CREATE VIRAL-WORTHY SCENES:**
|
|
|
|
Write narration that:
|
|
✨ **HOOKS IMMEDIATELY** - First {duration_metadata.get('hook_seconds', 10)}s must GRAB attention
|
|
🎭 **TELLS A STORY** - Each scene advances the narrative with emotional engagement
|
|
💡 **DELIVERS VALUE** - Provide insights, tips, or "aha!" moments in every scene
|
|
🔥 **BUILDS EXCITEMENT** - Use power words, questions, and cliffhangers
|
|
👥 **CONNECTS PERSONALLY** - Speak directly to the viewer's needs and desires
|
|
⚡ **MAINTAINS PACE** - Vary sentence length for natural rhythm
|
|
🎯 **DRIVES ACTION** - Build toward the CTA with increasing urgency
|
|
|
|
**REQUIRED SCENE ELEMENTS:**
|
|
1. **scene_number**: Sequential numbering
|
|
2. **title**: Catchy, descriptive title (5-8 words max)
|
|
3. **narration**: ENGAGING spoken script with:
|
|
- Conversational language ("you know what I mean?")
|
|
- Rhetorical questions ("Have you ever wondered...?")
|
|
- Power transitions ("But here's the game-changer...")
|
|
- Emotional hooks ("Imagine this...")
|
|
- Action-oriented language ("Let's dive in...")
|
|
4. **visual_description**: Cinematic, professional YouTube visuals
|
|
5. **duration_estimate**: Realistic speaking time
|
|
6. **emphasis**: hook/main_content/transition/cta
|
|
7. **visual_cues**: ["dramatic_zoom", "text_overlay", "fast_cuts"]
|
|
|
|
**🎯 YOUTUBE OPTIMIZATION RULES:**
|
|
• **Hook Power**: First 3 seconds = make them stay or lose them
|
|
• **Value Density**: Every 10 seconds must deliver new insight
|
|
• **Emotional Arc**: Build curiosity → teach → inspire → convert
|
|
• **Natural Flow**: Scenes must connect seamlessly
|
|
• **CTA Momentum**: Final scene creates irresistible urge to act
|
|
|
|
**📊 FORMAT AS JSON ARRAY:**
|
|
[
|
|
{{
|
|
"scene_number": 1,
|
|
"title": "The Shocking Truth They Hide",
|
|
"narration": "You won't believe what just happened in my latest discovery! I was scrolling through the usual content when BAM - this completely changed everything I thought about [topic]. And get this - it could transform YOUR results too!",
|
|
"visual_description": "Dynamic opening shot with shocking text overlay, fast cuts of social media feeds, energetic music swell, close-up of surprised reaction",
|
|
"duration_estimate": 8,
|
|
"emphasis": "hook",
|
|
"visual_cues": ["shocking_text", "fast_cuts", "music_swell", "reaction_shot"]
|
|
}},
|
|
...
|
|
]
|
|
|
|
**🔥 SUCCESS CRITERIA:**
|
|
✅ First scene hooks in 3 seconds
|
|
✅ Each scene delivers 1-2 key insights
|
|
✅ Narration feels like talking to a friend
|
|
✅ Total story arc creates emotional journey
|
|
✅ CTA feels like the natural next step
|
|
✅ Scenes fit duration perfectly"""
|
|
|
|
system_prompt = (
|
|
"You are a master YouTube scriptwriter who creates viral, engaging content that "
|
|
"keeps viewers watching until the end. You understand YouTube algorithm optimization, "
|
|
"emotional storytelling, and creating irresistible hooks that make viewers hit 'like' and 'subscribe'. "
|
|
"Your scripts are conversational, valuable, and conversion-focused."
|
|
)
|
|
|
|
response = llm_text_gen(
|
|
prompt=scene_generation_prompt,
|
|
system_prompt=system_prompt,
|
|
user_id=user_id,
|
|
json_struct={
|
|
"type": "array",
|
|
"items": {
|
|
"type": "object",
|
|
"properties": {
|
|
"scene_number": {"type": "number"},
|
|
"title": {"type": "string"},
|
|
"narration": {"type": "string"},
|
|
"visual_description": {"type": "string"},
|
|
"duration_estimate": {"type": "number"},
|
|
"emphasis": {"type": "string"},
|
|
"visual_cues": {
|
|
"type": "array",
|
|
"items": {"type": "string"}
|
|
}
|
|
},
|
|
"required": [
|
|
"scene_number", "title", "narration", "visual_description",
|
|
"duration_estimate", "emphasis"
|
|
]
|
|
}
|
|
}
|
|
)
|
|
|
|
# Parse response
|
|
if isinstance(response, list):
|
|
scenes = response
|
|
elif isinstance(response, dict) and "scenes" in response:
|
|
scenes = response["scenes"]
|
|
else:
|
|
import json
|
|
scenes = json.loads(response) if isinstance(response, str) else response
|
|
|
|
# Normalize scene data
|
|
normalized_scenes = []
|
|
for idx, scene in enumerate(scenes, 1):
|
|
normalized_scenes.append({
|
|
"scene_number": scene.get("scene_number", idx),
|
|
"title": scene.get("title", f"Scene {idx}"),
|
|
"narration": scene.get("narration", ""),
|
|
"visual_description": scene.get("visual_description", ""),
|
|
"duration_estimate": scene.get("duration_estimate", scene_duration_range[0]),
|
|
"emphasis": scene.get("emphasis", "main_content"),
|
|
"visual_cues": scene.get("visual_cues", []),
|
|
"visual_prompt": scene.get("visual_description", ""), # Initial prompt
|
|
})
|
|
|
|
return normalized_scenes
|
|
|
|
def _normalize_scenes_from_plan(
|
|
self,
|
|
video_plan: Dict[str, Any],
|
|
duration_metadata: Dict[str, Any],
|
|
) -> List[Dict[str, Any]]:
|
|
"""Normalize scenes that were generated as part of the plan (optimization for shorts)."""
|
|
scenes = video_plan.get("scenes", [])
|
|
scene_duration_range = duration_metadata.get("scene_duration_range", (2, 8))
|
|
|
|
normalized_scenes = []
|
|
for idx, scene in enumerate(scenes, 1):
|
|
normalized_scenes.append({
|
|
"scene_number": scene.get("scene_number", idx),
|
|
"title": scene.get("title", f"Scene {idx}"),
|
|
"narration": scene.get("narration", ""),
|
|
"visual_description": scene.get("visual_description", ""),
|
|
"duration_estimate": scene.get("duration_estimate", scene_duration_range[0]),
|
|
"emphasis": scene.get("emphasis", "main_content"),
|
|
"visual_cues": scene.get("visual_cues", []),
|
|
"visual_prompt": scene.get("visual_description", ""), # Initial prompt
|
|
})
|
|
|
|
logger.info(
|
|
f"[YouTubeSceneBuilder] ✅ Normalized {len(normalized_scenes)} scenes "
|
|
f"from optimized plan (saved 1 AI call)"
|
|
)
|
|
return normalized_scenes
|
|
|
|
def _parse_custom_script(
|
|
self,
|
|
custom_script: str,
|
|
video_plan: Dict[str, Any],
|
|
duration_metadata: Dict[str, Any],
|
|
user_id: str,
|
|
) -> List[Dict[str, Any]]:
|
|
"""Parse a custom script into structured scenes."""
|
|
# Simple parsing: split by double newlines or scene markers
|
|
import re
|
|
|
|
# Try to detect scene markers
|
|
scene_pattern = r'(?:Scene\s+\d+|#\s*\d+\.|^\d+\.)\s*(.+?)(?=(?:Scene\s+\d+|#\s*\d+\.|^\d+\.|$))'
|
|
matches = re.finditer(scene_pattern, custom_script, re.MULTILINE | re.DOTALL)
|
|
|
|
scenes = []
|
|
for idx, match in enumerate(matches, 1):
|
|
scene_text = match.group(1).strip()
|
|
# Extract narration (first paragraph or before visual markers)
|
|
narration_match = re.search(r'^(.*?)(?:\n\n|Visual:|Image:)', scene_text, re.DOTALL)
|
|
narration = narration_match.group(1).strip() if narration_match else scene_text.split('\n')[0]
|
|
|
|
# Extract visual description
|
|
visual_match = re.search(r'(?:Visual:|Image:)\s*(.+?)(?:\n\n|$)', scene_text, re.DOTALL)
|
|
visual_description = visual_match.group(1).strip() if visual_match else narration
|
|
|
|
scenes.append({
|
|
"scene_number": idx,
|
|
"title": f"Scene {idx}",
|
|
"narration": narration,
|
|
"visual_description": visual_description,
|
|
"duration_estimate": duration_metadata.get("scene_duration_range", [5, 15])[0],
|
|
"emphasis": "hook" if idx == 1 else ("cta" if idx == len(list(matches)) else "main_content"),
|
|
"visual_cues": [],
|
|
"visual_prompt": visual_description,
|
|
})
|
|
|
|
# Fallback: split by paragraphs if no scene markers
|
|
if not scenes:
|
|
paragraphs = [p.strip() for p in custom_script.split('\n\n') if p.strip()]
|
|
for idx, para in enumerate(paragraphs[:duration_metadata.get("max_scenes", 10)], 1):
|
|
scenes.append({
|
|
"scene_number": idx,
|
|
"title": f"Scene {idx}",
|
|
"narration": para,
|
|
"visual_description": para,
|
|
"duration_estimate": duration_metadata.get("scene_duration_range", [5, 15])[0],
|
|
"emphasis": "hook" if idx == 1 else ("cta" if idx == len(paragraphs) else "main_content"),
|
|
"visual_cues": [],
|
|
"visual_prompt": para,
|
|
})
|
|
|
|
return scenes
|
|
|
|
def _enhance_visual_prompts_batch(
|
|
self,
|
|
scenes: List[Dict[str, Any]],
|
|
video_plan: Dict[str, Any],
|
|
user_id: str,
|
|
duration_type: str,
|
|
) -> List[Dict[str, Any]]:
|
|
"""
|
|
Efficiently enhance visual prompts based on video duration type.
|
|
|
|
Strategy:
|
|
- Shorts: Skip enhancement (use original descriptions) - 0 AI calls
|
|
- Medium: Batch enhance all scenes in 1 call - 1 AI call
|
|
- Long: Batch enhance in 2 calls (split scenes) - 2 AI calls max
|
|
"""
|
|
# For shorts, skip enhancement to save API calls
|
|
if duration_type == "shorts":
|
|
logger.info(
|
|
f"[YouTubeSceneBuilder] Skipping prompt enhancement for shorts "
|
|
f"({len(scenes)} scenes) to save API calls"
|
|
)
|
|
for scene in scenes:
|
|
scene["enhanced_visual_prompt"] = scene.get(
|
|
"visual_prompt", scene.get("visual_description", "")
|
|
)
|
|
return scenes
|
|
|
|
# Build story context for prompt enhancer
|
|
story_context = {
|
|
"story_setting": video_plan.get("visual_style", "cinematic"),
|
|
"story_tone": video_plan.get("tone", "professional"),
|
|
"writing_style": video_plan.get("visual_style", "cinematic"),
|
|
}
|
|
|
|
# Convert scenes to format expected by enhancer
|
|
scene_data_list = [
|
|
{
|
|
"scene_number": scene.get("scene_number", idx + 1),
|
|
"title": scene.get("title", ""),
|
|
"description": scene.get("visual_description", ""),
|
|
"image_prompt": scene.get("visual_prompt", ""),
|
|
}
|
|
for idx, scene in enumerate(scenes)
|
|
]
|
|
|
|
# For medium videos, enhance all scenes in one batch call
|
|
if duration_type == "medium":
|
|
logger.info(
|
|
f"[YouTubeSceneBuilder] Batch enhancing {len(scenes)} scenes "
|
|
f"for medium video in 1 AI call"
|
|
)
|
|
try:
|
|
# Use a single batch enhancement call
|
|
enhanced_prompts = self._batch_enhance_prompts(
|
|
scene_data_list, story_context, user_id
|
|
)
|
|
for idx, scene in enumerate(scenes):
|
|
scene["enhanced_visual_prompt"] = enhanced_prompts.get(
|
|
idx, scene.get("visual_prompt", scene.get("visual_description", ""))
|
|
)
|
|
except Exception as e:
|
|
logger.warning(
|
|
f"[YouTubeSceneBuilder] Batch enhancement failed: {e}, "
|
|
f"using original prompts"
|
|
)
|
|
for scene in scenes:
|
|
scene["enhanced_visual_prompt"] = scene.get(
|
|
"visual_prompt", scene.get("visual_description", "")
|
|
)
|
|
return scenes
|
|
|
|
# For long videos, split into 2 batches to avoid token limits
|
|
if duration_type == "long":
|
|
logger.info(
|
|
f"[YouTubeSceneBuilder] Batch enhancing {len(scenes)} scenes "
|
|
f"for long video in 2 AI calls"
|
|
)
|
|
mid_point = len(scenes) // 2
|
|
batches = [
|
|
scene_data_list[:mid_point],
|
|
scene_data_list[mid_point:],
|
|
]
|
|
|
|
all_enhanced = {}
|
|
for batch_idx, batch in enumerate(batches):
|
|
try:
|
|
enhanced = self._batch_enhance_prompts(
|
|
batch, story_context, user_id
|
|
)
|
|
start_idx = 0 if batch_idx == 0 else mid_point
|
|
for local_idx, enhanced_prompt in enhanced.items():
|
|
all_enhanced[start_idx + local_idx] = enhanced_prompt
|
|
except Exception as e:
|
|
logger.warning(
|
|
f"[YouTubeSceneBuilder] Batch {batch_idx + 1} enhancement "
|
|
f"failed: {e}, using original prompts"
|
|
)
|
|
start_idx = 0 if batch_idx == 0 else mid_point
|
|
for local_idx, scene_data in enumerate(batch):
|
|
all_enhanced[start_idx + local_idx] = scene_data.get(
|
|
"image_prompt", scene_data.get("description", "")
|
|
)
|
|
|
|
for idx, scene in enumerate(scenes):
|
|
scene["enhanced_visual_prompt"] = all_enhanced.get(
|
|
idx, scene.get("visual_prompt", scene.get("visual_description", ""))
|
|
)
|
|
return scenes
|
|
|
|
# Fallback: use original prompts
|
|
logger.warning(
|
|
f"[YouTubeSceneBuilder] Unknown duration type '{duration_type}', "
|
|
f"using original prompts"
|
|
)
|
|
for scene in scenes:
|
|
scene["enhanced_visual_prompt"] = scene.get(
|
|
"visual_prompt", scene.get("visual_description", "")
|
|
)
|
|
return scenes
|
|
|
|
def _batch_enhance_prompts(
|
|
self,
|
|
scene_data_list: List[Dict[str, Any]],
|
|
story_context: Dict[str, Any],
|
|
user_id: str,
|
|
) -> Dict[int, str]:
|
|
"""
|
|
Enhance multiple scene prompts in a single AI call.
|
|
|
|
Returns:
|
|
Dictionary mapping scene index to enhanced prompt
|
|
"""
|
|
try:
|
|
# Build batch enhancement prompt
|
|
scenes_text = "\n\n".join([
|
|
f"Scene {scene.get('scene_number', idx + 1)}: {scene.get('title', '')}\n"
|
|
f"Description: {scene.get('description', '')}\n"
|
|
f"Current Prompt: {scene.get('image_prompt', '')}"
|
|
for idx, scene in enumerate(scene_data_list)
|
|
])
|
|
|
|
batch_prompt = f"""You are optimizing visual prompts for AI video generation. Enhance the following scenes to be more detailed and video-optimized.
|
|
|
|
**Video Style Context:**
|
|
- Setting: {story_context.get('story_setting', 'cinematic')}
|
|
- Tone: {story_context.get('story_tone', 'professional')}
|
|
- Style: {story_context.get('writing_style', 'cinematic')}
|
|
|
|
**Scenes to Enhance:**
|
|
{scenes_text}
|
|
|
|
**Your Task:**
|
|
For each scene, create an enhanced visual prompt (200-300 words) that:
|
|
1. Is detailed and specific for video generation
|
|
2. Includes camera movements, lighting, composition
|
|
3. Maintains consistency with the video style
|
|
4. Is optimized for WAN 2.5 text-to-video model
|
|
|
|
**Format as JSON array with enhanced prompts:**
|
|
[
|
|
{{"scene_index": 0, "enhanced_prompt": "detailed enhanced prompt for scene 1..."}},
|
|
{{"scene_index": 1, "enhanced_prompt": "detailed enhanced prompt for scene 2..."}},
|
|
...
|
|
]
|
|
|
|
Make sure the array length matches the number of scenes provided ({len(scene_data_list)}).
|
|
"""
|
|
|
|
system_prompt = (
|
|
"You are an expert at creating detailed visual prompts for AI video generation. "
|
|
"Your prompts are specific, cinematic, and optimized for video models."
|
|
)
|
|
|
|
response = llm_text_gen(
|
|
prompt=batch_prompt,
|
|
system_prompt=system_prompt,
|
|
user_id=user_id,
|
|
json_struct={
|
|
"type": "array",
|
|
"items": {
|
|
"type": "object",
|
|
"properties": {
|
|
"scene_index": {"type": "number"},
|
|
"enhanced_prompt": {"type": "string"}
|
|
},
|
|
"required": ["scene_index", "enhanced_prompt"]
|
|
}
|
|
}
|
|
)
|
|
|
|
# Parse response
|
|
if isinstance(response, list):
|
|
enhanced_list = response
|
|
elif isinstance(response, str):
|
|
import json
|
|
enhanced_list = json.loads(response)
|
|
else:
|
|
enhanced_list = response
|
|
|
|
# Build result dictionary
|
|
result = {}
|
|
for item in enhanced_list:
|
|
idx = item.get("scene_index", 0)
|
|
prompt = item.get("enhanced_prompt", "")
|
|
if prompt:
|
|
result[idx] = prompt
|
|
else:
|
|
# Fallback to original
|
|
original_scene = scene_data_list[idx] if idx < len(scene_data_list) else {}
|
|
result[idx] = original_scene.get(
|
|
"image_prompt", original_scene.get("description", "")
|
|
)
|
|
|
|
# Fill in any missing scenes with original prompts
|
|
for idx in range(len(scene_data_list)):
|
|
if idx not in result:
|
|
original_scene = scene_data_list[idx]
|
|
result[idx] = original_scene.get(
|
|
"image_prompt", original_scene.get("description", "")
|
|
)
|
|
|
|
logger.info(
|
|
f"[YouTubeSceneBuilder] ✅ Batch enhanced {len(result)} prompts "
|
|
f"in 1 AI call"
|
|
)
|
|
return result
|
|
|
|
except Exception as e:
|
|
logger.error(
|
|
f"[YouTubeSceneBuilder] Batch enhancement failed: {e}",
|
|
exc_info=True
|
|
)
|
|
# Return original prompts as fallback
|
|
return {
|
|
idx: scene.get("image_prompt", scene.get("description", ""))
|
|
for idx, scene in enumerate(scene_data_list)
|
|
}
|
|
|