WIP: AI Podcast Maker and YouTube Creator Studio integration
This commit is contained in:
358
backend/services/youtube/planner.py
Normal file
358
backend/services/youtube/planner.py
Normal file
@@ -0,0 +1,358 @@
|
||||
"""
|
||||
YouTube Video Planner Service
|
||||
|
||||
Generates video plans, outlines, and insights using AI with persona integration.
|
||||
"""
|
||||
|
||||
from typing import Dict, Any, Optional, List
|
||||
from loguru import logger
|
||||
from fastapi import HTTPException
|
||||
|
||||
from services.llm_providers.main_text_generation import llm_text_gen
|
||||
from utils.logger_utils import get_service_logger
|
||||
|
||||
logger = get_service_logger("youtube.planner")
|
||||
|
||||
|
||||
class YouTubePlannerService:
|
||||
"""Service for planning YouTube videos with AI assistance."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the planner service."""
|
||||
logger.info("[YouTubePlanner] Service initialized")
|
||||
|
||||
def generate_video_plan(
|
||||
self,
|
||||
user_idea: str,
|
||||
duration_type: str, # "shorts", "medium", "long"
|
||||
persona_data: Optional[Dict[str, Any]] = None,
|
||||
reference_image_description: Optional[str] = None,
|
||||
source_content_id: Optional[str] = None, # For blog/story conversion
|
||||
source_content_type: Optional[str] = None, # "blog", "story"
|
||||
user_id: str = None,
|
||||
include_scenes: bool = False, # For shorts: combine plan + scenes in one call
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Generate a comprehensive video plan from user input.
|
||||
|
||||
Args:
|
||||
user_idea: User's video idea or topic
|
||||
duration_type: "shorts" (≤60s), "medium" (1-4min), "long" (4-10min)
|
||||
persona_data: Optional persona data for tone/style
|
||||
reference_image_description: Optional description of reference image
|
||||
source_content_id: Optional ID of source content (blog/story)
|
||||
source_content_type: Type of source content
|
||||
user_id: Clerk user ID for subscription checking
|
||||
|
||||
Returns:
|
||||
Dictionary with video plan, outline, insights, and metadata
|
||||
"""
|
||||
try:
|
||||
logger.info(
|
||||
f"[YouTubePlanner] Generating plan: idea={user_idea[:50]}..., "
|
||||
f"duration={duration_type}, user={user_id}"
|
||||
)
|
||||
|
||||
# Build persona context
|
||||
persona_context = self._build_persona_context(persona_data)
|
||||
|
||||
# Build duration context
|
||||
duration_context = self._get_duration_context(duration_type)
|
||||
|
||||
# Build source content context if provided
|
||||
source_context = ""
|
||||
if source_content_id and source_content_type:
|
||||
source_context = f"""
|
||||
**Source Content:**
|
||||
- Type: {source_content_type}
|
||||
- ID: {source_content_id}
|
||||
- Note: This video should be based on the existing {source_content_type} content.
|
||||
"""
|
||||
|
||||
# Build reference image context
|
||||
image_context = ""
|
||||
if reference_image_description:
|
||||
image_context = f"""
|
||||
**Reference Image:**
|
||||
{reference_image_description}
|
||||
- Use this as visual inspiration for the video
|
||||
"""
|
||||
|
||||
# Generate comprehensive video plan
|
||||
planning_prompt = f"""You are an expert YouTube content strategist. Create a comprehensive video plan based on the user's idea.
|
||||
|
||||
**User's Video Idea:**
|
||||
{user_idea}
|
||||
|
||||
**Video Duration Type:**
|
||||
{duration_type} ({duration_context['description']})
|
||||
|
||||
**Duration Guidelines:**
|
||||
- Target length: {duration_context['target_seconds']} seconds
|
||||
- Hook duration: {duration_context['hook_seconds']} seconds
|
||||
- Main content: {duration_context['main_seconds']} seconds
|
||||
- CTA duration: {duration_context['cta_seconds']} seconds
|
||||
- Maximum scenes: {duration_context['max_scenes']} (for shorts, keep 2-4 scenes total)
|
||||
|
||||
{persona_context}
|
||||
|
||||
{source_context}
|
||||
|
||||
{image_context}
|
||||
|
||||
**Your Task:**
|
||||
Create a detailed video plan that includes:
|
||||
|
||||
1. **Video Summary**: A 2-3 sentence overview of what the video will cover
|
||||
2. **Target Audience**: Who this video is for
|
||||
3. **Video Goal**: Primary objective (educate, entertain, sell, inspire, etc.)
|
||||
4. **Key Message**: The main takeaway viewers should remember
|
||||
5. **Hook Strategy**: Attention-grabbing opening (first {duration_context['hook_seconds']} seconds)
|
||||
6. **Content Outline**: High-level structure with 3-5 main sections
|
||||
7. **Call-to-Action**: Clear CTA that fits the video goal
|
||||
8. **Visual Style**: Recommended visual approach (cinematic, tutorial, vlog, etc.)
|
||||
9. **Tone**: Recommended tone (professional, casual, energetic, etc.)
|
||||
10. **SEO Keywords**: 5-7 relevant keywords for YouTube SEO
|
||||
|
||||
**Format your response as JSON:**
|
||||
{{
|
||||
"video_summary": "...",
|
||||
"target_audience": "...",
|
||||
"video_goal": "...",
|
||||
"key_message": "...",
|
||||
"hook_strategy": "...",
|
||||
"content_outline": [
|
||||
{{"section": "Section 1", "description": "...", "duration_estimate": 30}},
|
||||
{{"section": "Section 2", "description": "...", "duration_estimate": 45}}
|
||||
],
|
||||
"call_to_action": "...",
|
||||
"visual_style": "...",
|
||||
"tone": "...",
|
||||
"seo_keywords": ["keyword1", "keyword2", ...]
|
||||
}}
|
||||
|
||||
Make sure the content outline fits within the {duration_type} duration constraints.
|
||||
"""
|
||||
|
||||
system_prompt = (
|
||||
"You are an expert YouTube content strategist specializing in creating "
|
||||
"engaging, well-structured video plans. Your plans are data-driven, "
|
||||
"audience-focused, and optimized for YouTube's algorithm."
|
||||
)
|
||||
|
||||
# For shorts, combine plan + scenes in one call to save API calls
|
||||
if include_scenes and duration_type == "shorts":
|
||||
planning_prompt += f"""
|
||||
|
||||
**IMPORTANT: Since this is a SHORTS video, also generate the complete scene breakdown in the same response.**
|
||||
|
||||
**Additional Task - Generate Detailed Scenes:**
|
||||
Create detailed scenes (up to {duration_context['max_scenes']} scenes) that include:
|
||||
1. Scene number and title
|
||||
2. Narration text (what will be spoken) - keep it concise for shorts
|
||||
3. Visual description (what viewers will see)
|
||||
4. Duration estimate (2-8 seconds each)
|
||||
5. Emphasis tags (hook, main_content, transition, cta)
|
||||
|
||||
**Scene Format:**
|
||||
Each scene should be detailed enough for video generation. Total duration must fit within {duration_context['target_seconds']} seconds.
|
||||
|
||||
**Update JSON structure to include "scenes" array:**
|
||||
Add a "scenes" field with the complete scene breakdown.
|
||||
"""
|
||||
|
||||
json_struct = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"video_summary": {"type": "string"},
|
||||
"target_audience": {"type": "string"},
|
||||
"video_goal": {"type": "string"},
|
||||
"key_message": {"type": "string"},
|
||||
"hook_strategy": {"type": "string"},
|
||||
"content_outline": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"section": {"type": "string"},
|
||||
"description": {"type": "string"},
|
||||
"duration_estimate": {"type": "number"}
|
||||
}
|
||||
}
|
||||
},
|
||||
"call_to_action": {"type": "string"},
|
||||
"visual_style": {"type": "string"},
|
||||
"tone": {"type": "string"},
|
||||
"seo_keywords": {
|
||||
"type": "array",
|
||||
"items": {"type": "string"}
|
||||
},
|
||||
"scenes": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"scene_number": {"type": "number"},
|
||||
"title": {"type": "string"},
|
||||
"narration": {"type": "string"},
|
||||
"visual_description": {"type": "string"},
|
||||
"duration_estimate": {"type": "number"},
|
||||
"emphasis": {"type": "string"},
|
||||
"visual_cues": {
|
||||
"type": "array",
|
||||
"items": {"type": "string"}
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"scene_number", "title", "narration", "visual_description",
|
||||
"duration_estimate", "emphasis"
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"video_summary", "target_audience", "video_goal", "key_message",
|
||||
"hook_strategy", "content_outline", "call_to_action",
|
||||
"visual_style", "tone", "seo_keywords", "scenes"
|
||||
]
|
||||
}
|
||||
else:
|
||||
json_struct = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"video_summary": {"type": "string"},
|
||||
"target_audience": {"type": "string"},
|
||||
"video_goal": {"type": "string"},
|
||||
"key_message": {"type": "string"},
|
||||
"hook_strategy": {"type": "string"},
|
||||
"content_outline": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"section": {"type": "string"},
|
||||
"description": {"type": "string"},
|
||||
"duration_estimate": {"type": "number"}
|
||||
}
|
||||
}
|
||||
},
|
||||
"call_to_action": {"type": "string"},
|
||||
"visual_style": {"type": "string"},
|
||||
"tone": {"type": "string"},
|
||||
"seo_keywords": {
|
||||
"type": "array",
|
||||
"items": {"type": "string"}
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"video_summary", "target_audience", "video_goal", "key_message",
|
||||
"hook_strategy", "content_outline", "call_to_action",
|
||||
"visual_style", "tone", "seo_keywords"
|
||||
]
|
||||
}
|
||||
|
||||
# Generate plan using LLM
|
||||
response = llm_text_gen(
|
||||
prompt=planning_prompt,
|
||||
system_prompt=system_prompt,
|
||||
user_id=user_id,
|
||||
json_struct=json_struct
|
||||
)
|
||||
|
||||
# Parse response (handle both dict and JSON string)
|
||||
if isinstance(response, dict):
|
||||
plan_data = response
|
||||
else:
|
||||
import json
|
||||
plan_data = json.loads(response)
|
||||
|
||||
# Add metadata
|
||||
plan_data["duration_type"] = duration_type
|
||||
plan_data["duration_metadata"] = duration_context
|
||||
plan_data["user_idea"] = user_idea
|
||||
|
||||
# If scenes were included, mark them for scene builder
|
||||
if include_scenes and duration_type == "shorts" and "scenes" in plan_data:
|
||||
plan_data["_scenes_included"] = True
|
||||
logger.info(
|
||||
f"[YouTubePlanner] ✅ Plan + {len(plan_data.get('scenes', []))} scenes "
|
||||
f"generated in 1 AI call (optimized for shorts)"
|
||||
)
|
||||
else:
|
||||
if include_scenes and duration_type == "shorts":
|
||||
# LLM did not return scenes; downstream will regenerate
|
||||
plan_data["_scenes_included"] = False
|
||||
logger.warning(
|
||||
"[YouTubePlanner] Shorts optimization requested but no scenes returned; "
|
||||
"scene builder will generate scenes separately."
|
||||
)
|
||||
logger.info(f"[YouTubePlanner] ✅ Plan generated successfully")
|
||||
|
||||
return plan_data
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"[YouTubePlanner] Error generating plan: {e}", exc_info=True)
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Failed to generate video plan: {str(e)}"
|
||||
)
|
||||
|
||||
def _build_persona_context(self, persona_data: Optional[Dict[str, Any]]) -> str:
|
||||
"""Build persona context string for prompts."""
|
||||
if not persona_data:
|
||||
return """
|
||||
**Persona Context:**
|
||||
- Using default professional tone
|
||||
- No specific persona constraints
|
||||
"""
|
||||
|
||||
core_persona = persona_data.get("core_persona", {})
|
||||
tone = core_persona.get("tone", "professional")
|
||||
voice = core_persona.get("voice_characteristics", {})
|
||||
|
||||
return f"""
|
||||
**Persona Context:**
|
||||
- Tone: {tone}
|
||||
- Voice Style: {voice.get('style', 'professional')}
|
||||
- Communication Style: {voice.get('communication_style', 'clear and direct')}
|
||||
- Brand Values: {core_persona.get('core_belief', 'value-driven content')}
|
||||
- Use this persona to guide the video's tone, style, and messaging approach.
|
||||
"""
|
||||
|
||||
def _get_duration_context(self, duration_type: str) -> Dict[str, Any]:
|
||||
"""Get duration-specific context and constraints."""
|
||||
contexts = {
|
||||
"shorts": {
|
||||
"description": "YouTube Shorts (15-60 seconds)",
|
||||
"target_seconds": 30,
|
||||
"hook_seconds": 3,
|
||||
"main_seconds": 24,
|
||||
"cta_seconds": 3,
|
||||
# Keep scenes tight for shorts to control cost and pacing
|
||||
"max_scenes": 4,
|
||||
"scene_duration_range": (2, 8)
|
||||
},
|
||||
"medium": {
|
||||
"description": "Medium-length video (1-4 minutes)",
|
||||
"target_seconds": 150, # 2.5 minutes
|
||||
"hook_seconds": 10,
|
||||
"main_seconds": 130,
|
||||
"cta_seconds": 10,
|
||||
"max_scenes": 12,
|
||||
"scene_duration_range": (5, 15)
|
||||
},
|
||||
"long": {
|
||||
"description": "Long-form video (4-10 minutes)",
|
||||
"target_seconds": 420, # 7 minutes
|
||||
"hook_seconds": 15,
|
||||
"main_seconds": 380,
|
||||
"cta_seconds": 25,
|
||||
"max_scenes": 20,
|
||||
"scene_duration_range": (10, 30)
|
||||
}
|
||||
}
|
||||
|
||||
return contexts.get(duration_type, contexts["medium"])
|
||||
|
||||
Reference in New Issue
Block a user