WIP: AI Podcast Maker and YouTube Creator Studio integration
This commit is contained in:
@@ -29,17 +29,15 @@ class ExaResearchProvider(BaseProvider):
|
||||
# Determine category: use exa_category if set, otherwise map from source_types
|
||||
category = config.exa_category if config.exa_category else self._map_source_type_to_category(config.source_types)
|
||||
|
||||
# Build search kwargs
|
||||
# Build search kwargs - use correct Exa API format
|
||||
search_kwargs = {
|
||||
'type': config.exa_search_type or "auto",
|
||||
'num_results': min(config.max_sources, 25),
|
||||
'contents': {
|
||||
'text': {'max_characters': 1000},
|
||||
'summary': {'query': f"Key insights about {topic}"},
|
||||
'highlights': {
|
||||
'num_sentences': 2,
|
||||
'highlights_per_url': 3
|
||||
}
|
||||
'text': {'max_characters': 1000},
|
||||
'summary': {'query': f"Key insights about {topic}"},
|
||||
'highlights': {
|
||||
'num_sentences': 2,
|
||||
'highlights_per_url': 3
|
||||
}
|
||||
}
|
||||
|
||||
@@ -53,8 +51,39 @@ class ExaResearchProvider(BaseProvider):
|
||||
|
||||
logger.info(f"[Exa Research] Executing search: {query}")
|
||||
|
||||
# Execute Exa search
|
||||
results = self.exa.search_and_contents(query, **search_kwargs)
|
||||
# Execute Exa search - pass contents parameters directly, not nested
|
||||
try:
|
||||
results = self.exa.search_and_contents(
|
||||
query,
|
||||
text={'max_characters': 1000},
|
||||
summary={'query': f"Key insights about {topic}"},
|
||||
highlights={'num_sentences': 2, 'highlights_per_url': 3},
|
||||
type=config.exa_search_type or "auto",
|
||||
num_results=min(config.max_sources, 25),
|
||||
**({k: v for k, v in {
|
||||
'category': category,
|
||||
'include_domains': config.exa_include_domains,
|
||||
'exclude_domains': config.exa_exclude_domains
|
||||
}.items() if v})
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"[Exa Research] API call failed: {e}")
|
||||
# Try simpler call without contents if the above fails
|
||||
try:
|
||||
logger.info("[Exa Research] Retrying with simplified parameters")
|
||||
results = self.exa.search_and_contents(
|
||||
query,
|
||||
type=config.exa_search_type or "auto",
|
||||
num_results=min(config.max_sources, 25),
|
||||
**({k: v for k, v in {
|
||||
'category': category,
|
||||
'include_domains': config.exa_include_domains,
|
||||
'exclude_domains': config.exa_exclude_domains
|
||||
}.items() if v})
|
||||
)
|
||||
except Exception as retry_error:
|
||||
logger.error(f"[Exa Research] Retry also failed: {retry_error}")
|
||||
raise RuntimeError(f"Exa search failed: {str(retry_error)}") from retry_error
|
||||
|
||||
# Transform to standardized format
|
||||
sources = self._transform_sources(results.results)
|
||||
|
||||
@@ -52,45 +52,44 @@ class BasicResearchStrategy(ResearchStrategy):
|
||||
target_audience: str,
|
||||
config: ResearchConfig
|
||||
) -> str:
|
||||
"""Build basic research prompt focused on keywords and quick insights."""
|
||||
prompt = f"""You are a professional blog content strategist researching for a {industry} blog targeting {target_audience}.
|
||||
"""Build basic research prompt focused on podcast-ready, actionable insights."""
|
||||
prompt = f"""You are a podcast researcher creating TALKING POINTS and FACT CARDS for a {industry} audience of {target_audience}.
|
||||
|
||||
Research Topic: "{topic}"
|
||||
|
||||
Provide analysis in this EXACT format:
|
||||
|
||||
## CURRENT TRENDS (2024-2025)
|
||||
- [Trend 1 with specific data and source URL]
|
||||
- [Trend 2 with specific data and source URL]
|
||||
- [Trend 3 with specific data and source URL]
|
||||
## PODCAST HOOKS (3)
|
||||
- [Hook line with tension + data point + source URL]
|
||||
|
||||
## KEY STATISTICS
|
||||
- [Statistic 1: specific number/percentage with source URL]
|
||||
- [Statistic 2: specific number/percentage with source URL]
|
||||
- [Statistic 3: specific number/percentage with source URL]
|
||||
- [Statistic 4: specific number/percentage with source URL]
|
||||
- [Statistic 5: specific number/percentage with source URL]
|
||||
## OBJECTIONS & COUNTERS (3)
|
||||
- Objection: [common listener objection]
|
||||
Counter: [concise rebuttal with stat + source URL]
|
||||
|
||||
## PRIMARY KEYWORDS
|
||||
1. "{topic}" (main keyword)
|
||||
2. [Variation 1]
|
||||
3. [Variation 2]
|
||||
## KEY STATS & PROOF (6)
|
||||
- [Specific metric with %/number, date, and source URL]
|
||||
|
||||
## SECONDARY KEYWORDS
|
||||
[5 related keywords for blog content]
|
||||
## MINI CASE SNAPS (3)
|
||||
- [Brand/company], [what they did], [outcome metric], [source URL]
|
||||
|
||||
## CONTENT ANGLES (Top 5)
|
||||
1. [Angle 1: specific unique approach]
|
||||
2. [Angle 2: specific unique approach]
|
||||
3. [Angle 3: specific unique approach]
|
||||
4. [Angle 4: specific unique approach]
|
||||
5. [Angle 5: specific unique approach]
|
||||
## KEYWORDS TO MENTION (Primary + 5 Secondary)
|
||||
- Primary: "{topic}"
|
||||
- Secondary: [5 related keywords]
|
||||
|
||||
## 5 CONTENT ANGLES
|
||||
1. [Angle with audience benefit + why-now]
|
||||
2. [Angle ...]
|
||||
3. [Angle ...]
|
||||
4. [Angle ...]
|
||||
5. [Angle ...]
|
||||
|
||||
## FACT CARD LIST (8)
|
||||
- For each: Quote/claim, source URL, published date, metric/context.
|
||||
|
||||
REQUIREMENTS:
|
||||
- Cite EVERY claim with authoritative source URLs
|
||||
- Use 2024-2025 data when available
|
||||
- Include specific numbers, dates, examples
|
||||
- Focus on actionable blog insights for {target_audience}"""
|
||||
- Every claim MUST include a source URL (authoritative, recent: 2024-2025 preferred).
|
||||
- Use concrete numbers, dates, outcomes; avoid generic advice.
|
||||
- Keep bullets tight and scannable for spoken narration."""
|
||||
return prompt.strip()
|
||||
|
||||
|
||||
@@ -107,57 +106,54 @@ class ComprehensiveResearchStrategy(ResearchStrategy):
|
||||
target_audience: str,
|
||||
config: ResearchConfig
|
||||
) -> str:
|
||||
"""Build comprehensive research prompt with all analysis components."""
|
||||
"""Build comprehensive research prompt with podcast-focused, high-value insights."""
|
||||
date_filter = f"\nDate Focus: {config.date_range.value.replace('_', ' ')}" if config.date_range else ""
|
||||
source_filter = f"\nPriority Sources: {', '.join([s.value for s in config.source_types])}" if config.source_types else ""
|
||||
|
||||
prompt = f"""You are a senior blog content strategist conducting comprehensive research for a {industry} blog targeting {target_audience}.
|
||||
prompt = f"""You are a senior podcast researcher creating deeply sourced talking points for a {industry} audience of {target_audience}.
|
||||
|
||||
Research Topic: "{topic}"{date_filter}{source_filter}
|
||||
|
||||
Provide COMPLETE analysis in this EXACT format:
|
||||
|
||||
## TRENDS AND INSIGHTS (2024-2025)
|
||||
[5-7 trends with specific data, numbers, and source URLs]
|
||||
## WHAT'S CHANGED (2024-2025)
|
||||
[5-7 concise trend bullets with numbers + source URLs]
|
||||
|
||||
## KEY STATISTICS
|
||||
[7-10 statistics with exact numbers, percentages, dates, and source URLs]
|
||||
## PROOF & NUMBERS
|
||||
[10 stats with metric, date, sample size/method, and source URL]
|
||||
|
||||
## EXPERT OPINIONS
|
||||
[4-5 expert quotes with full attribution and source URLs]
|
||||
## EXPERT SIGNALS
|
||||
[5 expert quotes with name, title/company, source URL]
|
||||
|
||||
## RECENT DEVELOPMENTS
|
||||
[5-7 recent news/developments with dates and source URLs]
|
||||
## RECENT MOVES
|
||||
[5-7 news items or launches with dates and source URLs]
|
||||
|
||||
## MARKET ANALYSIS
|
||||
[3-5 market insights with data points and source URLs]
|
||||
## MARKET SNAPSHOTS
|
||||
[3-5 insights with TAM/SAM/SOM or adoption metrics, source URLs]
|
||||
|
||||
## BEST PRACTICES & CASE STUDIES
|
||||
[3-5 examples with specific outcomes/metrics and source URLs]
|
||||
## CASE SNAPS
|
||||
[3-5 cases: who, what they did, outcome metric, source URL]
|
||||
|
||||
## KEYWORD ANALYSIS
|
||||
Primary Keywords: [3 main variations]
|
||||
Secondary Keywords: [7-10 related keywords]
|
||||
Long-Tail Opportunities: [5-7 specific search phrases]
|
||||
## KEYWORD PLAN
|
||||
Primary (3), Secondary (8-10), Long-tail (5-7) with intent hints.
|
||||
|
||||
## COMPETITOR ANALYSIS
|
||||
Top Competitors: [5 competitors with brief descriptions]
|
||||
Content Gaps: [5 topics competitors are missing]
|
||||
Competitive Advantages: [5 unique angles we can own]
|
||||
## COMPETITOR GAPS
|
||||
- Top 5 competitors (URL) + 1-line strength
|
||||
- 5 content gaps we can own
|
||||
- 3 unique angles to differentiate
|
||||
|
||||
## CONTENT ANGLES (Exactly 5)
|
||||
1. [Unique angle with reasoning and target benefit]
|
||||
2. [Unique angle with reasoning and target benefit]
|
||||
3. [Unique angle with reasoning and target benefit]
|
||||
4. [Unique angle with reasoning and target benefit]
|
||||
5. [Unique angle with reasoning and target benefit]
|
||||
## PODCAST-READY ANGLES (5)
|
||||
- Each: Hook, promised takeaway, data or example, source URL.
|
||||
|
||||
## FACT CARD LIST (10)
|
||||
- Each: Quote/claim, source URL, published date, metric/context, suggested angle tag.
|
||||
|
||||
VERIFICATION REQUIREMENTS:
|
||||
- Minimum 2 authoritative sources per major claim
|
||||
- Prioritize: Industry publications > Research papers > News > Blogs
|
||||
- 2024-2025 data strongly preferred
|
||||
- All numbers must include context (timeframe, sample size, methodology)
|
||||
- Every recommendation must be actionable for {target_audience}"""
|
||||
- Minimum 2 authoritative sources per major claim.
|
||||
- Prefer industry reports > research papers > news > blogs.
|
||||
- 2024-2025 data strongly preferred.
|
||||
- All numbers must include timeframe and methodology.
|
||||
- Every bullet must be concise for spoken narration and actionable for {target_audience}."""
|
||||
return prompt.strip()
|
||||
|
||||
|
||||
|
||||
@@ -78,6 +78,23 @@ class DailyScheduleGenerator:
|
||||
try:
|
||||
logger.info("🚀 Starting daily schedule generation")
|
||||
|
||||
# CRITICAL VALIDATION: Ensure weekly_themes is a list of dictionaries
|
||||
if not isinstance(weekly_themes, list):
|
||||
raise TypeError(f"weekly_themes must be a list, got {type(weekly_themes)}")
|
||||
|
||||
if not weekly_themes:
|
||||
raise ValueError("weekly_themes cannot be empty")
|
||||
|
||||
for i, theme in enumerate(weekly_themes):
|
||||
if not isinstance(theme, dict):
|
||||
raise TypeError(f"weekly_themes[{i}] must be a dictionary, got {type(theme)}. Value: {theme}")
|
||||
|
||||
# Validate required fields
|
||||
if "week_number" not in theme:
|
||||
raise ValueError(f"weekly_themes[{i}] missing required 'week_number' field")
|
||||
|
||||
logger.info(f"✅ Validated {len(weekly_themes)} weekly themes")
|
||||
|
||||
daily_schedules = []
|
||||
current_date = datetime.now()
|
||||
|
||||
@@ -153,12 +170,22 @@ class DailyScheduleGenerator:
|
||||
def _get_weekly_theme(self, weekly_themes: List[Dict], week_number: int) -> Dict:
|
||||
"""Get weekly theme for specific week number."""
|
||||
try:
|
||||
# Additional validation
|
||||
if not isinstance(weekly_themes, list):
|
||||
raise TypeError(f"weekly_themes must be a list, got {type(weekly_themes)}")
|
||||
|
||||
for theme in weekly_themes:
|
||||
if not isinstance(theme, dict):
|
||||
raise TypeError(f"Theme must be a dictionary, got {type(theme)}: {theme}")
|
||||
|
||||
if theme.get("week_number") == week_number:
|
||||
return theme
|
||||
|
||||
# If no theme found, fail with clear error
|
||||
raise ValueError(f"No weekly theme found for week {week_number}")
|
||||
raise ValueError(
|
||||
f"No weekly theme found for week {week_number}. "
|
||||
f"Available weeks: {[t.get('week_number') for t in weekly_themes if isinstance(t, dict)]}"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting weekly theme: {str(e)}")
|
||||
@@ -205,9 +232,21 @@ class DailyScheduleGenerator:
|
||||
# Call AI service - NO FALLBACKS
|
||||
ai_response = await self.ai_engine.generate_content_recommendations(analysis_data)
|
||||
|
||||
# Validate AI response - NO FALLBACKS
|
||||
# ENHANCED VALIDATION: Check for unexpected types (including float)
|
||||
if ai_response is None:
|
||||
raise ValueError("AI service returned None")
|
||||
|
||||
if isinstance(ai_response, (int, float, str, bool)):
|
||||
raise TypeError(
|
||||
f"AI service returned primitive type {type(ai_response).__name__}: {ai_response}. "
|
||||
f"Expected list of dictionaries. This indicates an AI service error."
|
||||
)
|
||||
|
||||
if not isinstance(ai_response, list):
|
||||
raise ValueError(f"AI service returned unexpected type: {type(ai_response)}. Expected list, got {type(ai_response)}")
|
||||
raise TypeError(
|
||||
f"AI service returned unexpected type: {type(ai_response).__name__}. "
|
||||
f"Expected list, got {type(ai_response)}. Value: {str(ai_response)[:200]}"
|
||||
)
|
||||
|
||||
if not ai_response:
|
||||
raise ValueError("AI service returned empty list of recommendations")
|
||||
|
||||
@@ -25,6 +25,8 @@ from models.content_asset_models import Base as ContentAssetBase
|
||||
from models.product_marketing_models import Campaign, CampaignProposal, CampaignAsset
|
||||
# Product Asset models (Product Marketing Suite - product assets, not campaigns)
|
||||
from models.product_asset_models import ProductAsset, ProductStyleTemplate, EcommerceExport
|
||||
# Podcast Maker models use SubscriptionBase, but import to ensure models are registered
|
||||
from models.podcast_models import PodcastProject
|
||||
|
||||
# Database configuration
|
||||
DATABASE_URL = os.getenv('DATABASE_URL', 'sqlite:///./alwrity.db')
|
||||
|
||||
@@ -69,13 +69,21 @@ def generate_audio(
|
||||
RuntimeError: If subscription limits are exceeded or user_id is missing.
|
||||
"""
|
||||
try:
|
||||
logger.info("[audio_gen] Starting audio generation")
|
||||
logger.debug(f"[audio_gen] Text length: {len(text)} characters, voice: {voice_id}")
|
||||
# VALIDATION: Check inputs before any processing or API calls
|
||||
if not text or not isinstance(text, str) or len(text.strip()) == 0:
|
||||
raise ValueError("Text input is required and cannot be empty")
|
||||
|
||||
text = text.strip() # Normalize whitespace
|
||||
|
||||
if len(text) > 10000:
|
||||
raise ValueError(f"Text is too long ({len(text)} characters). Maximum is 10,000 characters.")
|
||||
|
||||
# SUBSCRIPTION CHECK - Required and strict enforcement
|
||||
if not user_id:
|
||||
raise RuntimeError("user_id is required for subscription checking. Please provide Clerk user ID.")
|
||||
|
||||
logger.info("[audio_gen] Starting audio generation")
|
||||
logger.debug(f"[audio_gen] Text length: {len(text)} characters, voice: {voice_id}")
|
||||
|
||||
# Calculate cost based on character count (every character is 1 token)
|
||||
# Pricing: $0.05 per 1,000 characters
|
||||
character_count = len(text)
|
||||
@@ -190,8 +198,9 @@ def generate_audio(
|
||||
new_cost = current_cost_before + estimated_cost
|
||||
|
||||
# Use direct SQL UPDATE for dynamic attributes
|
||||
from sqlalchemy import text
|
||||
update_query = text("""
|
||||
# Import sqlalchemy.text with alias to avoid shadowing the 'text' parameter
|
||||
from sqlalchemy import text as sql_text
|
||||
update_query = sql_text("""
|
||||
UPDATE usage_summaries
|
||||
SET audio_calls = :new_calls,
|
||||
audio_cost = :new_cost
|
||||
@@ -210,6 +219,8 @@ def generate_audio(
|
||||
summary.updated_at = datetime.utcnow()
|
||||
|
||||
# Create usage log
|
||||
# Store the text parameter in a local variable before any imports to prevent shadowing
|
||||
text_param = text # Capture function parameter before any potential shadowing
|
||||
usage_log = APIUsageLog(
|
||||
user_id=user_id,
|
||||
provider=APIProvider.AUDIO,
|
||||
@@ -224,7 +235,7 @@ def generate_audio(
|
||||
cost_total=estimated_cost,
|
||||
response_time=0.0,
|
||||
status_code=200,
|
||||
request_size=len(text.encode("utf-8")),
|
||||
request_size=len(text_param.encode("utf-8")), # Use captured parameter
|
||||
response_size=len(audio_bytes),
|
||||
billing_period=current_period,
|
||||
)
|
||||
|
||||
139
backend/services/podcast_service.py
Normal file
139
backend/services/podcast_service.py
Normal file
@@ -0,0 +1,139 @@
|
||||
"""
|
||||
Podcast Service
|
||||
|
||||
Service layer for managing podcast project persistence.
|
||||
"""
|
||||
|
||||
from sqlalchemy.orm import Session
|
||||
from sqlalchemy import desc, and_, or_
|
||||
from typing import Optional, List, Dict, Any
|
||||
from datetime import datetime
|
||||
import uuid
|
||||
|
||||
from models.podcast_models import PodcastProject
|
||||
|
||||
|
||||
class PodcastService:
|
||||
"""Service for managing podcast projects."""
|
||||
|
||||
def __init__(self, db: Session):
|
||||
self.db = db
|
||||
|
||||
def create_project(
|
||||
self,
|
||||
user_id: str,
|
||||
project_id: str,
|
||||
idea: str,
|
||||
duration: int,
|
||||
speakers: int,
|
||||
budget_cap: float,
|
||||
**kwargs
|
||||
) -> PodcastProject:
|
||||
"""Create a new podcast project."""
|
||||
project = PodcastProject(
|
||||
project_id=project_id,
|
||||
user_id=user_id,
|
||||
idea=idea,
|
||||
duration=duration,
|
||||
speakers=speakers,
|
||||
budget_cap=budget_cap,
|
||||
status="draft",
|
||||
current_step="create",
|
||||
**kwargs
|
||||
)
|
||||
self.db.add(project)
|
||||
self.db.commit()
|
||||
self.db.refresh(project)
|
||||
return project
|
||||
|
||||
def get_project(self, user_id: str, project_id: str) -> Optional[PodcastProject]:
|
||||
"""Get a project by ID, ensuring user ownership."""
|
||||
return self.db.query(PodcastProject).filter(
|
||||
and_(
|
||||
PodcastProject.project_id == project_id,
|
||||
PodcastProject.user_id == user_id
|
||||
)
|
||||
).first()
|
||||
|
||||
def update_project(
|
||||
self,
|
||||
user_id: str,
|
||||
project_id: str,
|
||||
**updates
|
||||
) -> Optional[PodcastProject]:
|
||||
"""Update project fields."""
|
||||
project = self.get_project(user_id, project_id)
|
||||
if not project:
|
||||
return None
|
||||
|
||||
# Update fields
|
||||
for key, value in updates.items():
|
||||
if hasattr(project, key):
|
||||
setattr(project, key, value)
|
||||
|
||||
project.updated_at = datetime.utcnow()
|
||||
self.db.commit()
|
||||
self.db.refresh(project)
|
||||
return project
|
||||
|
||||
def list_projects(
|
||||
self,
|
||||
user_id: str,
|
||||
status: Optional[str] = None,
|
||||
favorites_only: bool = False,
|
||||
limit: int = 50,
|
||||
offset: int = 0,
|
||||
order_by: str = "updated_at" # "updated_at" or "created_at"
|
||||
) -> tuple[List[PodcastProject], int]:
|
||||
"""List user's projects with optional filtering."""
|
||||
query = self.db.query(PodcastProject).filter(
|
||||
PodcastProject.user_id == user_id
|
||||
)
|
||||
|
||||
# Apply filters
|
||||
if status:
|
||||
query = query.filter(PodcastProject.status == status)
|
||||
|
||||
if favorites_only:
|
||||
query = query.filter(PodcastProject.is_favorite == True)
|
||||
|
||||
# Get total count before pagination
|
||||
total = query.count()
|
||||
|
||||
# Apply ordering
|
||||
if order_by == "created_at":
|
||||
query = query.order_by(desc(PodcastProject.created_at))
|
||||
else:
|
||||
query = query.order_by(desc(PodcastProject.updated_at))
|
||||
|
||||
# Apply pagination
|
||||
projects = query.offset(offset).limit(limit).all()
|
||||
|
||||
return projects, total
|
||||
|
||||
def delete_project(self, user_id: str, project_id: str) -> bool:
|
||||
"""Delete a project."""
|
||||
project = self.get_project(user_id, project_id)
|
||||
if not project:
|
||||
return False
|
||||
|
||||
self.db.delete(project)
|
||||
self.db.commit()
|
||||
return True
|
||||
|
||||
def toggle_favorite(self, user_id: str, project_id: str) -> Optional[PodcastProject]:
|
||||
"""Toggle favorite status of a project."""
|
||||
project = self.get_project(user_id, project_id)
|
||||
if not project:
|
||||
return None
|
||||
|
||||
project.is_favorite = not project.is_favorite
|
||||
project.updated_at = datetime.utcnow()
|
||||
self.db.commit()
|
||||
self.db.refresh(project)
|
||||
return project
|
||||
|
||||
def update_status(self, user_id: str, project_id: str, status: str) -> Optional[PodcastProject]:
|
||||
"""Update project status."""
|
||||
return self.update_project(user_id, project_id, status=status)
|
||||
|
||||
@@ -8,6 +8,8 @@ from typing import Any, Dict, List
|
||||
from fastapi import HTTPException
|
||||
from loguru import logger
|
||||
|
||||
from services.llm_providers.main_text_generation import llm_text_gen
|
||||
|
||||
from .base import StoryServiceBase
|
||||
|
||||
|
||||
|
||||
@@ -545,6 +545,188 @@ def validate_video_generation_operations(
|
||||
)
|
||||
|
||||
|
||||
def validate_scene_animation_operation(
|
||||
pricing_service: PricingService,
|
||||
user_id: str,
|
||||
) -> None:
|
||||
"""
|
||||
Validate the per-scene animation workflow before API calls.
|
||||
"""
|
||||
try:
|
||||
operations_to_validate = [
|
||||
{
|
||||
'provider': APIProvider.VIDEO,
|
||||
'tokens_requested': 0,
|
||||
'actual_provider_name': 'wavespeed',
|
||||
'operation_type': 'scene_animation',
|
||||
}
|
||||
]
|
||||
|
||||
can_proceed, message, error_details = pricing_service.check_comprehensive_limits(
|
||||
user_id=user_id,
|
||||
operations=operations_to_validate,
|
||||
)
|
||||
|
||||
if not can_proceed:
|
||||
logger.error(f"[Pre-flight Validator] Scene animation blocked for user {user_id}: {message}")
|
||||
usage_info = error_details.get('usage_info', {}) if error_details else {}
|
||||
provider = usage_info.get('provider', 'video') if usage_info else 'video'
|
||||
raise HTTPException(
|
||||
status_code=429,
|
||||
detail={
|
||||
'error': message,
|
||||
'message': message,
|
||||
'provider': provider,
|
||||
'usage_info': usage_info if usage_info else error_details,
|
||||
}
|
||||
)
|
||||
|
||||
logger.info(f"[Pre-flight Validator] ✅ Scene animation validated for user {user_id}")
|
||||
# Validation passed - no return needed (function raises HTTPException if validation fails)
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"[Pre-flight Validator] Error validating scene animation: {e}", exc_info=True)
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail={
|
||||
'error': f"Failed to validate scene animation: {str(e)}",
|
||||
'message': f"Failed to validate scene animation: {str(e)}"
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
def validate_image_control_operations(
|
||||
pricing_service: PricingService,
|
||||
user_id: str,
|
||||
num_images: int = 1
|
||||
) -> None:
|
||||
"""
|
||||
Validate image control operations (sketch-to-image, structure control, style transfer) before making API calls.
|
||||
|
||||
Control operations use Stability AI for image generation with control inputs, so they use
|
||||
the same validation as image generation operations.
|
||||
|
||||
Args:
|
||||
pricing_service: PricingService instance
|
||||
user_id: User ID for subscription checking
|
||||
num_images: Number of images to generate (for multiple variations)
|
||||
|
||||
Returns:
|
||||
None - raises HTTPException with 429 status if validation fails
|
||||
"""
|
||||
try:
|
||||
# Control operations use Stability AI, same as image generation
|
||||
operations_to_validate = [
|
||||
{
|
||||
'provider': APIProvider.STABILITY,
|
||||
'tokens_requested': 0,
|
||||
'actual_provider_name': 'stability',
|
||||
'operation_type': 'image_generation' # Control ops use image generation limits
|
||||
}
|
||||
for _ in range(num_images)
|
||||
]
|
||||
|
||||
logger.info(f"[Pre-flight Validator] 🚀 Validating {num_images} image control operation(s) for user {user_id}")
|
||||
|
||||
can_proceed, message, error_details = pricing_service.check_comprehensive_limits(
|
||||
user_id=user_id,
|
||||
operations=operations_to_validate
|
||||
)
|
||||
|
||||
if not can_proceed:
|
||||
logger.error(f"[Pre-flight Validator] Image control blocked for user {user_id}: {message}")
|
||||
|
||||
usage_info = error_details.get('usage_info', {}) if error_details else {}
|
||||
provider = usage_info.get('provider', 'stability') if usage_info else 'stability'
|
||||
|
||||
raise HTTPException(
|
||||
status_code=429,
|
||||
detail={
|
||||
'error': message,
|
||||
'message': message,
|
||||
'provider': provider,
|
||||
'usage_info': usage_info if usage_info else error_details
|
||||
}
|
||||
)
|
||||
|
||||
logger.info(f"[Pre-flight Validator] ✅ Image control validated for user {user_id}")
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"[Pre-flight Validator] Error validating image control: {e}", exc_info=True)
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail={
|
||||
'error': f"Failed to validate image control: {str(e)}",
|
||||
'message': f"Failed to validate image control: {str(e)}"
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
def validate_video_generation_operations(
|
||||
pricing_service: PricingService,
|
||||
user_id: str
|
||||
) -> None:
|
||||
"""
|
||||
Validate video generation operation before making API calls.
|
||||
|
||||
Args:
|
||||
pricing_service: PricingService instance
|
||||
user_id: User ID for subscription checking
|
||||
|
||||
Returns:
|
||||
None - raises HTTPException with 429 status if validation fails
|
||||
"""
|
||||
try:
|
||||
operations_to_validate = [
|
||||
{
|
||||
'provider': APIProvider.VIDEO,
|
||||
'tokens_requested': 0,
|
||||
'actual_provider_name': 'video',
|
||||
'operation_type': 'video_generation'
|
||||
}
|
||||
]
|
||||
|
||||
can_proceed, message, error_details = pricing_service.check_comprehensive_limits(
|
||||
user_id=user_id,
|
||||
operations=operations_to_validate
|
||||
)
|
||||
|
||||
if not can_proceed:
|
||||
logger.error(f"[Pre-flight Validator] Video generation blocked for user {user_id}: {message}")
|
||||
|
||||
usage_info = error_details.get('usage_info', {}) if error_details else {}
|
||||
provider = usage_info.get('provider', 'video') if usage_info else 'video'
|
||||
|
||||
raise HTTPException(
|
||||
status_code=429,
|
||||
detail={
|
||||
'error': message,
|
||||
'message': message,
|
||||
'provider': provider,
|
||||
'usage_info': usage_info if usage_info else error_details
|
||||
}
|
||||
)
|
||||
|
||||
logger.info(f"[Pre-flight Validator] ✅ Video generation validated for user {user_id}")
|
||||
# Validation passed - no return needed (function raises HTTPException if validation fails)
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"[Pre-flight Validator] Error validating video generation: {e}", exc_info=True)
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail={
|
||||
'error': f"Failed to validate video generation: {str(e)}",
|
||||
'message': f"Failed to validate video generation: {str(e)}"
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
def validate_scene_animation_operation(
|
||||
pricing_service: PricingService,
|
||||
user_id: str,
|
||||
@@ -593,4 +775,79 @@ def validate_scene_animation_operation(
|
||||
'error': f"Failed to validate scene animation: {str(e)}",
|
||||
'message': f"Failed to validate scene animation: {str(e)}",
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
def validate_calendar_generation_operations(
|
||||
pricing_service: PricingService,
|
||||
user_id: str,
|
||||
gpt_provider: str = "google"
|
||||
) -> None:
|
||||
"""
|
||||
Validate calendar generation operations before making API calls.
|
||||
|
||||
Args:
|
||||
pricing_service: PricingService instance
|
||||
user_id: User ID for subscription checking
|
||||
gpt_provider: GPT provider from env var (defaults to "google")
|
||||
|
||||
Returns:
|
||||
None - raises HTTPException with 429 status if validation fails
|
||||
"""
|
||||
try:
|
||||
# Determine actual provider for LLM calls based on GPT_PROVIDER env var
|
||||
gpt_provider_lower = gpt_provider.lower()
|
||||
if gpt_provider_lower == "huggingface":
|
||||
llm_provider_enum = APIProvider.MISTRAL
|
||||
llm_provider_name = "huggingface"
|
||||
else:
|
||||
llm_provider_enum = APIProvider.GEMINI
|
||||
llm_provider_name = "gemini"
|
||||
|
||||
# Estimate tokens for 12-step process
|
||||
# This is a heavy operation involving multiple steps and analysis
|
||||
operations_to_validate = [
|
||||
{
|
||||
'provider': llm_provider_enum,
|
||||
'tokens_requested': 20000, # Conservative estimate for full calendar generation
|
||||
'actual_provider_name': llm_provider_name,
|
||||
'operation_type': 'calendar_generation'
|
||||
}
|
||||
]
|
||||
|
||||
logger.info(f"[Pre-flight Validator] 🚀 Validating Calendar Generation for user {user_id}")
|
||||
|
||||
can_proceed, message, error_details = pricing_service.check_comprehensive_limits(
|
||||
user_id=user_id,
|
||||
operations=operations_to_validate
|
||||
)
|
||||
|
||||
if not can_proceed:
|
||||
usage_info = error_details.get('usage_info', {}) if error_details else {}
|
||||
provider = usage_info.get('provider', llm_provider_name) if usage_info else llm_provider_name
|
||||
|
||||
logger.warning(f"[Pre-flight Validator] Calendar generation blocked for user {user_id}: {message}")
|
||||
|
||||
raise HTTPException(
|
||||
status_code=429,
|
||||
detail={
|
||||
'error': message,
|
||||
'message': message,
|
||||
'provider': provider,
|
||||
'usage_info': usage_info if usage_info else error_details
|
||||
}
|
||||
)
|
||||
|
||||
logger.info(f"[Pre-flight Validator] ✅ Calendar Generation validated for user {user_id}")
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"[Pre-flight Validator] Error validating calendar generation: {e}", exc_info=True)
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail={
|
||||
'error': f"Failed to validate calendar generation: {str(e)}",
|
||||
'message': f"Failed to validate calendar generation: {str(e)}"
|
||||
}
|
||||
)
|
||||
@@ -637,4 +637,260 @@ class WaveSpeedClient:
|
||||
status_code=502,
|
||||
detail="Failed to fetch generated audio from WaveSpeed URL",
|
||||
)
|
||||
|
||||
def submit_text_to_video(
|
||||
self,
|
||||
model_path: str,
|
||||
payload: Dict[str, Any],
|
||||
timeout: int = 60,
|
||||
) -> str:
|
||||
"""
|
||||
Submit a text-to-video generation request to WaveSpeed.
|
||||
|
||||
Args:
|
||||
model_path: Model path (e.g., "alibaba/wan-2.5/text-to-video")
|
||||
payload: Request payload with prompt, resolution, duration, optional audio
|
||||
timeout: Request timeout in seconds
|
||||
|
||||
Returns:
|
||||
Prediction ID for polling
|
||||
"""
|
||||
url = f"{self.BASE_URL}/{model_path}"
|
||||
logger.info(f"[WaveSpeed] Submitting text-to-video request to {url}")
|
||||
response = requests.post(url, headers=self._headers(), json=payload, timeout=timeout)
|
||||
|
||||
if response.status_code != 200:
|
||||
logger.error(f"[WaveSpeed] Text-to-video submission failed: {response.status_code} {response.text}")
|
||||
raise HTTPException(
|
||||
status_code=502,
|
||||
detail={
|
||||
"error": "WaveSpeed text-to-video submission failed",
|
||||
"status_code": response.status_code,
|
||||
"response": response.text,
|
||||
},
|
||||
)
|
||||
|
||||
data = response.json().get("data")
|
||||
if not data or "id" not in data:
|
||||
logger.error(f"[WaveSpeed] Unexpected text-to-video response: {response.text}")
|
||||
raise HTTPException(
|
||||
status_code=502,
|
||||
detail={"error": "WaveSpeed response missing prediction id"},
|
||||
)
|
||||
|
||||
prediction_id = data["id"]
|
||||
logger.info(f"[WaveSpeed] Submitted text-to-video request: {prediction_id}")
|
||||
return prediction_id
|
||||
|
||||
def generate_text_video(
|
||||
self,
|
||||
prompt: str,
|
||||
resolution: str = "720p", # 480p, 720p, 1080p
|
||||
duration: int = 5, # 5 or 10 seconds
|
||||
audio_base64: Optional[str] = None, # Optional audio for lip-sync
|
||||
negative_prompt: Optional[str] = None,
|
||||
seed: Optional[int] = None,
|
||||
enable_prompt_expansion: bool = True,
|
||||
enable_sync_mode: bool = False,
|
||||
timeout: int = 180,
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Generate video from text prompt using WAN 2.5 text-to-video.
|
||||
|
||||
Args:
|
||||
prompt: Text prompt describing the video
|
||||
resolution: Output resolution (480p, 720p, 1080p)
|
||||
duration: Video duration in seconds (5 or 10)
|
||||
audio_base64: Optional audio file (wav/mp3, 3-30s, ≤15MB) for lip-sync
|
||||
negative_prompt: Optional negative prompt
|
||||
seed: Optional random seed for reproducibility
|
||||
enable_prompt_expansion: Enable prompt optimizer
|
||||
enable_sync_mode: If True, wait for result and return it directly
|
||||
timeout: Request timeout in seconds
|
||||
|
||||
Returns:
|
||||
Dictionary with video bytes, metadata, and cost
|
||||
"""
|
||||
model_path = "alibaba/wan-2.5/text-to-video"
|
||||
|
||||
# Validate resolution
|
||||
valid_resolutions = ["480p", "720p", "1080p"]
|
||||
if resolution not in valid_resolutions:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Invalid resolution: {resolution}. Must be one of: {valid_resolutions}"
|
||||
)
|
||||
|
||||
# Validate duration
|
||||
if duration not in [5, 10]:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail="Duration must be 5 or 10 seconds"
|
||||
)
|
||||
|
||||
# Build payload
|
||||
payload = {
|
||||
"prompt": prompt,
|
||||
"resolution": resolution,
|
||||
"duration": duration,
|
||||
"enable_prompt_expansion": enable_prompt_expansion,
|
||||
"enable_sync_mode": enable_sync_mode, # Add sync mode to payload
|
||||
}
|
||||
|
||||
# Add optional audio
|
||||
if audio_base64:
|
||||
payload["audio"] = audio_base64
|
||||
|
||||
# Add optional parameters
|
||||
if negative_prompt:
|
||||
payload["negative_prompt"] = negative_prompt
|
||||
if seed is not None:
|
||||
payload["seed"] = seed
|
||||
|
||||
# Submit request
|
||||
logger.info(
|
||||
f"[WaveSpeed] Generating text-to-video: resolution={resolution}, "
|
||||
f"duration={duration}s, prompt_length={len(prompt)}, sync_mode={enable_sync_mode}"
|
||||
)
|
||||
|
||||
# For sync mode, submit and get result directly
|
||||
if enable_sync_mode:
|
||||
url = f"{self.BASE_URL}/{model_path}"
|
||||
response = requests.post(url, headers=self._headers(), json=payload, timeout=timeout)
|
||||
|
||||
if response.status_code != 200:
|
||||
logger.error(f"[WaveSpeed] Text-to-video submission failed: {response.status_code} {response.text}")
|
||||
raise HTTPException(
|
||||
status_code=502,
|
||||
detail={
|
||||
"error": "WaveSpeed text-to-video submission failed",
|
||||
"status_code": response.status_code,
|
||||
"response": response.text[:500],
|
||||
},
|
||||
)
|
||||
|
||||
response_json = response.json()
|
||||
data = response_json.get("data") or response_json
|
||||
|
||||
# In sync mode, result should be directly in outputs
|
||||
outputs = data.get("outputs") or []
|
||||
if not outputs:
|
||||
logger.error(f"[WaveSpeed] No outputs in sync mode response: {response.text[:500]}")
|
||||
raise HTTPException(
|
||||
status_code=502,
|
||||
detail="WaveSpeed text-to-video returned no outputs in sync mode",
|
||||
)
|
||||
|
||||
# Extract video URL from outputs
|
||||
video_url = outputs[0]
|
||||
if not isinstance(video_url, str) or not video_url.startswith("http"):
|
||||
logger.error(f"[WaveSpeed] Invalid video URL format in sync mode: {video_url}")
|
||||
raise HTTPException(
|
||||
status_code=502,
|
||||
detail=f"Invalid video URL format: {video_url}",
|
||||
)
|
||||
|
||||
# Download video
|
||||
logger.info(f"[WaveSpeed] Downloading video from sync mode URL: {video_url}")
|
||||
video_response = requests.get(video_url, timeout=180)
|
||||
|
||||
if video_response.status_code != 200:
|
||||
raise HTTPException(
|
||||
status_code=502,
|
||||
detail={
|
||||
"error": "Failed to download WAN 2.5 video from sync mode",
|
||||
"status_code": video_response.status_code,
|
||||
"response": video_response.text[:200],
|
||||
}
|
||||
)
|
||||
|
||||
video_bytes = video_response.content
|
||||
prediction_id = data.get("id", "sync_mode")
|
||||
metadata = data.get("metadata") or {}
|
||||
# video_url is already set above for sync mode
|
||||
else:
|
||||
# Async mode - submit and poll
|
||||
prediction_id = self.submit_text_to_video(model_path, payload, timeout=timeout)
|
||||
|
||||
# Poll for completion
|
||||
try:
|
||||
result = self.poll_until_complete(
|
||||
prediction_id,
|
||||
timeout_seconds=timeout,
|
||||
interval_seconds=2.0
|
||||
)
|
||||
except HTTPException as e:
|
||||
detail = e.detail or {}
|
||||
if isinstance(detail, dict):
|
||||
detail.setdefault("prediction_id", prediction_id)
|
||||
detail.setdefault("resume_available", True)
|
||||
raise HTTPException(status_code=e.status_code, detail=detail)
|
||||
|
||||
# Extract video URL
|
||||
outputs = result.get("outputs") or []
|
||||
if not outputs:
|
||||
raise HTTPException(
|
||||
status_code=502,
|
||||
detail="WAN 2.5 text-to-video completed but returned no outputs"
|
||||
)
|
||||
|
||||
video_url = outputs[0]
|
||||
if not isinstance(video_url, str) or not video_url.startswith("http"):
|
||||
raise HTTPException(
|
||||
status_code=502,
|
||||
detail=f"Invalid video URL format: {video_url}"
|
||||
)
|
||||
|
||||
# Download video
|
||||
logger.info(f"[WaveSpeed] Downloading video from: {video_url}")
|
||||
video_response = requests.get(video_url, timeout=180)
|
||||
|
||||
if video_response.status_code != 200:
|
||||
raise HTTPException(
|
||||
status_code=502,
|
||||
detail={
|
||||
"error": "Failed to download WAN 2.5 video",
|
||||
"status_code": video_response.status_code,
|
||||
"response": video_response.text[:200],
|
||||
}
|
||||
)
|
||||
|
||||
video_bytes = video_response.content
|
||||
metadata = result.get("metadata") or {}
|
||||
|
||||
# Calculate cost (same pricing as image-to-video)
|
||||
pricing = {
|
||||
"480p": 0.05,
|
||||
"720p": 0.10,
|
||||
"1080p": 0.15,
|
||||
}
|
||||
cost = pricing.get(resolution, 0.10) * duration
|
||||
|
||||
# Get video dimensions
|
||||
resolution_dims = {
|
||||
"480p": (854, 480),
|
||||
"720p": (1280, 720),
|
||||
"1080p": (1920, 1080),
|
||||
}
|
||||
width, height = resolution_dims.get(resolution, (1280, 720))
|
||||
|
||||
logger.info(
|
||||
f"[WaveSpeed] ✅ Generated text-to-video: {len(video_bytes)} bytes, "
|
||||
f"resolution={resolution}, duration={duration}s, cost=${cost:.2f}"
|
||||
)
|
||||
|
||||
return {
|
||||
"video_bytes": video_bytes,
|
||||
"prompt": prompt,
|
||||
"duration": float(duration),
|
||||
"model_name": "alibaba/wan-2.5/text-to-video",
|
||||
"cost": cost,
|
||||
"provider": "wavespeed",
|
||||
"source_video_url": video_url,
|
||||
"prediction_id": prediction_id,
|
||||
"resolution": resolution,
|
||||
"width": width,
|
||||
"height": height,
|
||||
"metadata": metadata,
|
||||
}
|
||||
|
||||
|
||||
2
backend/services/youtube/__init__.py
Normal file
2
backend/services/youtube/__init__.py
Normal file
@@ -0,0 +1,2 @@
|
||||
"""YouTube Creator Studio services."""
|
||||
|
||||
358
backend/services/youtube/planner.py
Normal file
358
backend/services/youtube/planner.py
Normal file
@@ -0,0 +1,358 @@
|
||||
"""
|
||||
YouTube Video Planner Service
|
||||
|
||||
Generates video plans, outlines, and insights using AI with persona integration.
|
||||
"""
|
||||
|
||||
from typing import Dict, Any, Optional, List
|
||||
from loguru import logger
|
||||
from fastapi import HTTPException
|
||||
|
||||
from services.llm_providers.main_text_generation import llm_text_gen
|
||||
from utils.logger_utils import get_service_logger
|
||||
|
||||
logger = get_service_logger("youtube.planner")
|
||||
|
||||
|
||||
class YouTubePlannerService:
|
||||
"""Service for planning YouTube videos with AI assistance."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the planner service."""
|
||||
logger.info("[YouTubePlanner] Service initialized")
|
||||
|
||||
def generate_video_plan(
|
||||
self,
|
||||
user_idea: str,
|
||||
duration_type: str, # "shorts", "medium", "long"
|
||||
persona_data: Optional[Dict[str, Any]] = None,
|
||||
reference_image_description: Optional[str] = None,
|
||||
source_content_id: Optional[str] = None, # For blog/story conversion
|
||||
source_content_type: Optional[str] = None, # "blog", "story"
|
||||
user_id: str = None,
|
||||
include_scenes: bool = False, # For shorts: combine plan + scenes in one call
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Generate a comprehensive video plan from user input.
|
||||
|
||||
Args:
|
||||
user_idea: User's video idea or topic
|
||||
duration_type: "shorts" (≤60s), "medium" (1-4min), "long" (4-10min)
|
||||
persona_data: Optional persona data for tone/style
|
||||
reference_image_description: Optional description of reference image
|
||||
source_content_id: Optional ID of source content (blog/story)
|
||||
source_content_type: Type of source content
|
||||
user_id: Clerk user ID for subscription checking
|
||||
|
||||
Returns:
|
||||
Dictionary with video plan, outline, insights, and metadata
|
||||
"""
|
||||
try:
|
||||
logger.info(
|
||||
f"[YouTubePlanner] Generating plan: idea={user_idea[:50]}..., "
|
||||
f"duration={duration_type}, user={user_id}"
|
||||
)
|
||||
|
||||
# Build persona context
|
||||
persona_context = self._build_persona_context(persona_data)
|
||||
|
||||
# Build duration context
|
||||
duration_context = self._get_duration_context(duration_type)
|
||||
|
||||
# Build source content context if provided
|
||||
source_context = ""
|
||||
if source_content_id and source_content_type:
|
||||
source_context = f"""
|
||||
**Source Content:**
|
||||
- Type: {source_content_type}
|
||||
- ID: {source_content_id}
|
||||
- Note: This video should be based on the existing {source_content_type} content.
|
||||
"""
|
||||
|
||||
# Build reference image context
|
||||
image_context = ""
|
||||
if reference_image_description:
|
||||
image_context = f"""
|
||||
**Reference Image:**
|
||||
{reference_image_description}
|
||||
- Use this as visual inspiration for the video
|
||||
"""
|
||||
|
||||
# Generate comprehensive video plan
|
||||
planning_prompt = f"""You are an expert YouTube content strategist. Create a comprehensive video plan based on the user's idea.
|
||||
|
||||
**User's Video Idea:**
|
||||
{user_idea}
|
||||
|
||||
**Video Duration Type:**
|
||||
{duration_type} ({duration_context['description']})
|
||||
|
||||
**Duration Guidelines:**
|
||||
- Target length: {duration_context['target_seconds']} seconds
|
||||
- Hook duration: {duration_context['hook_seconds']} seconds
|
||||
- Main content: {duration_context['main_seconds']} seconds
|
||||
- CTA duration: {duration_context['cta_seconds']} seconds
|
||||
- Maximum scenes: {duration_context['max_scenes']} (for shorts, keep 2-4 scenes total)
|
||||
|
||||
{persona_context}
|
||||
|
||||
{source_context}
|
||||
|
||||
{image_context}
|
||||
|
||||
**Your Task:**
|
||||
Create a detailed video plan that includes:
|
||||
|
||||
1. **Video Summary**: A 2-3 sentence overview of what the video will cover
|
||||
2. **Target Audience**: Who this video is for
|
||||
3. **Video Goal**: Primary objective (educate, entertain, sell, inspire, etc.)
|
||||
4. **Key Message**: The main takeaway viewers should remember
|
||||
5. **Hook Strategy**: Attention-grabbing opening (first {duration_context['hook_seconds']} seconds)
|
||||
6. **Content Outline**: High-level structure with 3-5 main sections
|
||||
7. **Call-to-Action**: Clear CTA that fits the video goal
|
||||
8. **Visual Style**: Recommended visual approach (cinematic, tutorial, vlog, etc.)
|
||||
9. **Tone**: Recommended tone (professional, casual, energetic, etc.)
|
||||
10. **SEO Keywords**: 5-7 relevant keywords for YouTube SEO
|
||||
|
||||
**Format your response as JSON:**
|
||||
{{
|
||||
"video_summary": "...",
|
||||
"target_audience": "...",
|
||||
"video_goal": "...",
|
||||
"key_message": "...",
|
||||
"hook_strategy": "...",
|
||||
"content_outline": [
|
||||
{{"section": "Section 1", "description": "...", "duration_estimate": 30}},
|
||||
{{"section": "Section 2", "description": "...", "duration_estimate": 45}}
|
||||
],
|
||||
"call_to_action": "...",
|
||||
"visual_style": "...",
|
||||
"tone": "...",
|
||||
"seo_keywords": ["keyword1", "keyword2", ...]
|
||||
}}
|
||||
|
||||
Make sure the content outline fits within the {duration_type} duration constraints.
|
||||
"""
|
||||
|
||||
system_prompt = (
|
||||
"You are an expert YouTube content strategist specializing in creating "
|
||||
"engaging, well-structured video plans. Your plans are data-driven, "
|
||||
"audience-focused, and optimized for YouTube's algorithm."
|
||||
)
|
||||
|
||||
# For shorts, combine plan + scenes in one call to save API calls
|
||||
if include_scenes and duration_type == "shorts":
|
||||
planning_prompt += f"""
|
||||
|
||||
**IMPORTANT: Since this is a SHORTS video, also generate the complete scene breakdown in the same response.**
|
||||
|
||||
**Additional Task - Generate Detailed Scenes:**
|
||||
Create detailed scenes (up to {duration_context['max_scenes']} scenes) that include:
|
||||
1. Scene number and title
|
||||
2. Narration text (what will be spoken) - keep it concise for shorts
|
||||
3. Visual description (what viewers will see)
|
||||
4. Duration estimate (2-8 seconds each)
|
||||
5. Emphasis tags (hook, main_content, transition, cta)
|
||||
|
||||
**Scene Format:**
|
||||
Each scene should be detailed enough for video generation. Total duration must fit within {duration_context['target_seconds']} seconds.
|
||||
|
||||
**Update JSON structure to include "scenes" array:**
|
||||
Add a "scenes" field with the complete scene breakdown.
|
||||
"""
|
||||
|
||||
json_struct = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"video_summary": {"type": "string"},
|
||||
"target_audience": {"type": "string"},
|
||||
"video_goal": {"type": "string"},
|
||||
"key_message": {"type": "string"},
|
||||
"hook_strategy": {"type": "string"},
|
||||
"content_outline": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"section": {"type": "string"},
|
||||
"description": {"type": "string"},
|
||||
"duration_estimate": {"type": "number"}
|
||||
}
|
||||
}
|
||||
},
|
||||
"call_to_action": {"type": "string"},
|
||||
"visual_style": {"type": "string"},
|
||||
"tone": {"type": "string"},
|
||||
"seo_keywords": {
|
||||
"type": "array",
|
||||
"items": {"type": "string"}
|
||||
},
|
||||
"scenes": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"scene_number": {"type": "number"},
|
||||
"title": {"type": "string"},
|
||||
"narration": {"type": "string"},
|
||||
"visual_description": {"type": "string"},
|
||||
"duration_estimate": {"type": "number"},
|
||||
"emphasis": {"type": "string"},
|
||||
"visual_cues": {
|
||||
"type": "array",
|
||||
"items": {"type": "string"}
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"scene_number", "title", "narration", "visual_description",
|
||||
"duration_estimate", "emphasis"
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"video_summary", "target_audience", "video_goal", "key_message",
|
||||
"hook_strategy", "content_outline", "call_to_action",
|
||||
"visual_style", "tone", "seo_keywords", "scenes"
|
||||
]
|
||||
}
|
||||
else:
|
||||
json_struct = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"video_summary": {"type": "string"},
|
||||
"target_audience": {"type": "string"},
|
||||
"video_goal": {"type": "string"},
|
||||
"key_message": {"type": "string"},
|
||||
"hook_strategy": {"type": "string"},
|
||||
"content_outline": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"section": {"type": "string"},
|
||||
"description": {"type": "string"},
|
||||
"duration_estimate": {"type": "number"}
|
||||
}
|
||||
}
|
||||
},
|
||||
"call_to_action": {"type": "string"},
|
||||
"visual_style": {"type": "string"},
|
||||
"tone": {"type": "string"},
|
||||
"seo_keywords": {
|
||||
"type": "array",
|
||||
"items": {"type": "string"}
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"video_summary", "target_audience", "video_goal", "key_message",
|
||||
"hook_strategy", "content_outline", "call_to_action",
|
||||
"visual_style", "tone", "seo_keywords"
|
||||
]
|
||||
}
|
||||
|
||||
# Generate plan using LLM
|
||||
response = llm_text_gen(
|
||||
prompt=planning_prompt,
|
||||
system_prompt=system_prompt,
|
||||
user_id=user_id,
|
||||
json_struct=json_struct
|
||||
)
|
||||
|
||||
# Parse response (handle both dict and JSON string)
|
||||
if isinstance(response, dict):
|
||||
plan_data = response
|
||||
else:
|
||||
import json
|
||||
plan_data = json.loads(response)
|
||||
|
||||
# Add metadata
|
||||
plan_data["duration_type"] = duration_type
|
||||
plan_data["duration_metadata"] = duration_context
|
||||
plan_data["user_idea"] = user_idea
|
||||
|
||||
# If scenes were included, mark them for scene builder
|
||||
if include_scenes and duration_type == "shorts" and "scenes" in plan_data:
|
||||
plan_data["_scenes_included"] = True
|
||||
logger.info(
|
||||
f"[YouTubePlanner] ✅ Plan + {len(plan_data.get('scenes', []))} scenes "
|
||||
f"generated in 1 AI call (optimized for shorts)"
|
||||
)
|
||||
else:
|
||||
if include_scenes and duration_type == "shorts":
|
||||
# LLM did not return scenes; downstream will regenerate
|
||||
plan_data["_scenes_included"] = False
|
||||
logger.warning(
|
||||
"[YouTubePlanner] Shorts optimization requested but no scenes returned; "
|
||||
"scene builder will generate scenes separately."
|
||||
)
|
||||
logger.info(f"[YouTubePlanner] ✅ Plan generated successfully")
|
||||
|
||||
return plan_data
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"[YouTubePlanner] Error generating plan: {e}", exc_info=True)
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Failed to generate video plan: {str(e)}"
|
||||
)
|
||||
|
||||
def _build_persona_context(self, persona_data: Optional[Dict[str, Any]]) -> str:
|
||||
"""Build persona context string for prompts."""
|
||||
if not persona_data:
|
||||
return """
|
||||
**Persona Context:**
|
||||
- Using default professional tone
|
||||
- No specific persona constraints
|
||||
"""
|
||||
|
||||
core_persona = persona_data.get("core_persona", {})
|
||||
tone = core_persona.get("tone", "professional")
|
||||
voice = core_persona.get("voice_characteristics", {})
|
||||
|
||||
return f"""
|
||||
**Persona Context:**
|
||||
- Tone: {tone}
|
||||
- Voice Style: {voice.get('style', 'professional')}
|
||||
- Communication Style: {voice.get('communication_style', 'clear and direct')}
|
||||
- Brand Values: {core_persona.get('core_belief', 'value-driven content')}
|
||||
- Use this persona to guide the video's tone, style, and messaging approach.
|
||||
"""
|
||||
|
||||
def _get_duration_context(self, duration_type: str) -> Dict[str, Any]:
|
||||
"""Get duration-specific context and constraints."""
|
||||
contexts = {
|
||||
"shorts": {
|
||||
"description": "YouTube Shorts (15-60 seconds)",
|
||||
"target_seconds": 30,
|
||||
"hook_seconds": 3,
|
||||
"main_seconds": 24,
|
||||
"cta_seconds": 3,
|
||||
# Keep scenes tight for shorts to control cost and pacing
|
||||
"max_scenes": 4,
|
||||
"scene_duration_range": (2, 8)
|
||||
},
|
||||
"medium": {
|
||||
"description": "Medium-length video (1-4 minutes)",
|
||||
"target_seconds": 150, # 2.5 minutes
|
||||
"hook_seconds": 10,
|
||||
"main_seconds": 130,
|
||||
"cta_seconds": 10,
|
||||
"max_scenes": 12,
|
||||
"scene_duration_range": (5, 15)
|
||||
},
|
||||
"long": {
|
||||
"description": "Long-form video (4-10 minutes)",
|
||||
"target_seconds": 420, # 7 minutes
|
||||
"hook_seconds": 15,
|
||||
"main_seconds": 380,
|
||||
"cta_seconds": 25,
|
||||
"max_scenes": 20,
|
||||
"scene_duration_range": (10, 30)
|
||||
}
|
||||
}
|
||||
|
||||
return contexts.get(duration_type, contexts["medium"])
|
||||
|
||||
412
backend/services/youtube/renderer.py
Normal file
412
backend/services/youtube/renderer.py
Normal file
@@ -0,0 +1,412 @@
|
||||
"""
|
||||
YouTube Video Renderer Service
|
||||
|
||||
Handles video rendering using WAN 2.5 text-to-video and audio generation.
|
||||
"""
|
||||
|
||||
from typing import Dict, Any, List, Optional
|
||||
from pathlib import Path
|
||||
import base64
|
||||
import uuid
|
||||
import requests
|
||||
from loguru import logger
|
||||
from fastapi import HTTPException
|
||||
|
||||
from services.wavespeed.client import WaveSpeedClient
|
||||
from services.llm_providers.main_audio_generation import generate_audio
|
||||
from services.story_writer.video_generation_service import StoryVideoGenerationService
|
||||
from services.subscription import PricingService
|
||||
from services.subscription.preflight_validator import validate_scene_animation_operation
|
||||
from services.llm_providers.main_video_generation import track_video_usage
|
||||
from utils.logger_utils import get_service_logger
|
||||
from utils.asset_tracker import save_asset_to_library
|
||||
|
||||
logger = get_service_logger("youtube.renderer")
|
||||
|
||||
|
||||
class YouTubeVideoRendererService:
|
||||
"""Service for rendering YouTube videos from scenes."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the renderer service."""
|
||||
self.wavespeed_client = WaveSpeedClient()
|
||||
|
||||
# Video output directory
|
||||
base_dir = Path(__file__).parent.parent.parent.parent
|
||||
self.output_dir = base_dir / "youtube_videos"
|
||||
self.output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
logger.info(f"[YouTubeRenderer] Initialized with output directory: {self.output_dir}")
|
||||
|
||||
def render_scene_video(
|
||||
self,
|
||||
scene: Dict[str, Any],
|
||||
video_plan: Dict[str, Any],
|
||||
user_id: str,
|
||||
resolution: str = "720p",
|
||||
generate_audio_enabled: bool = True,
|
||||
voice_id: str = "Wise_Woman",
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Render a single scene into a video.
|
||||
|
||||
Args:
|
||||
scene: Scene data with narration and visual prompts
|
||||
video_plan: Original video plan for context
|
||||
user_id: Clerk user ID
|
||||
resolution: Video resolution (480p, 720p, 1080p)
|
||||
generate_audio: Whether to generate narration audio
|
||||
voice_id: Voice ID for audio generation
|
||||
|
||||
Returns:
|
||||
Dictionary with video metadata, bytes, and cost
|
||||
"""
|
||||
try:
|
||||
scene_number = scene.get("scene_number", 1)
|
||||
narration = scene.get("narration", "").strip()
|
||||
visual_prompt = (scene.get("enhanced_visual_prompt") or scene.get("visual_prompt", "")).strip()
|
||||
duration_estimate = scene.get("duration_estimate", 5)
|
||||
|
||||
# VALIDATION: Check inputs before making expensive API calls
|
||||
if not visual_prompt:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail={
|
||||
"error": f"Scene {scene_number} has no visual prompt",
|
||||
"scene_number": scene_number,
|
||||
"message": "Visual prompt is required for video generation",
|
||||
"user_action": "Please add a visual description for this scene before rendering.",
|
||||
}
|
||||
)
|
||||
|
||||
if len(visual_prompt) < 10:
|
||||
logger.warning(
|
||||
f"[YouTubeRenderer] Scene {scene_number} has very short visual prompt "
|
||||
f"({len(visual_prompt)} chars), may result in poor quality"
|
||||
)
|
||||
|
||||
# Clamp duration to valid WAN 2.5 values (5 or 10 seconds)
|
||||
duration = 5 if duration_estimate <= 7 else 10
|
||||
|
||||
logger.info(
|
||||
f"[YouTubeRenderer] Rendering scene {scene_number}: "
|
||||
f"resolution={resolution}, duration={duration}s, prompt_length={len(visual_prompt)}"
|
||||
)
|
||||
|
||||
# Generate audio if requested - only if narration is not empty
|
||||
audio_base64 = None
|
||||
if generate_audio_enabled and narration and len(narration.strip()) > 0:
|
||||
try:
|
||||
audio_result = generate_audio(
|
||||
text=narration,
|
||||
voice_id=voice_id,
|
||||
user_id=user_id,
|
||||
)
|
||||
# generate_audio may return raw bytes or AudioGenerationResult
|
||||
audio_bytes = audio_result.audio_bytes if hasattr(audio_result, "audio_bytes") else audio_result
|
||||
# Convert to base64 (just the base64 string, not data URI)
|
||||
audio_base64 = base64.b64encode(audio_bytes).decode('utf-8')
|
||||
logger.info(f"[YouTubeRenderer] Generated audio for scene {scene_number}")
|
||||
except Exception as e:
|
||||
logger.warning(f"[YouTubeRenderer] Audio generation failed: {e}, continuing without audio")
|
||||
|
||||
# VALIDATION: Final check before expensive video API call
|
||||
if not visual_prompt or len(visual_prompt.strip()) < 5:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail={
|
||||
"error": f"Scene {scene_number} has invalid visual prompt",
|
||||
"scene_number": scene_number,
|
||||
"message": "Visual prompt must be at least 5 characters",
|
||||
"user_action": "Please provide a valid visual description for this scene.",
|
||||
}
|
||||
)
|
||||
|
||||
# Generate video using WAN 2.5 text-to-video
|
||||
# This is the expensive API call - all validation should be done before this
|
||||
# Use sync mode to wait for result directly (prevents timeout issues)
|
||||
try:
|
||||
video_result = self.wavespeed_client.generate_text_video(
|
||||
prompt=visual_prompt,
|
||||
resolution=resolution,
|
||||
duration=duration,
|
||||
audio_base64=audio_base64, # Optional: enables lip-sync if provided
|
||||
enable_prompt_expansion=True,
|
||||
enable_sync_mode=True, # Use sync mode to wait for result directly
|
||||
timeout=600, # Increased timeout for sync mode (10 minutes)
|
||||
)
|
||||
except requests.exceptions.Timeout as e:
|
||||
logger.error(f"[YouTubeRenderer] WaveSpeed API timed out for scene {scene_number}: {e}")
|
||||
raise HTTPException(
|
||||
status_code=504,
|
||||
detail={
|
||||
"error": "WaveSpeed request timed out",
|
||||
"scene_number": scene_number,
|
||||
"message": "The video generation request timed out.",
|
||||
"user_action": "Please retry. If it persists, try fewer scenes, lower resolution, or shorter durations.",
|
||||
},
|
||||
) from e
|
||||
except requests.exceptions.RequestException as e:
|
||||
logger.error(f"[YouTubeRenderer] WaveSpeed API request failed for scene {scene_number}: {e}")
|
||||
raise HTTPException(
|
||||
status_code=502,
|
||||
detail={
|
||||
"error": "WaveSpeed request failed",
|
||||
"scene_number": scene_number,
|
||||
"message": str(e),
|
||||
"user_action": "Please retry. If it persists, check network connectivity or try again later.",
|
||||
},
|
||||
) from e
|
||||
|
||||
# Save scene video
|
||||
video_service = StoryVideoGenerationService(output_dir=str(self.output_dir))
|
||||
save_result = video_service.save_scene_video(
|
||||
video_bytes=video_result["video_bytes"],
|
||||
scene_number=scene_number,
|
||||
user_id=user_id,
|
||||
)
|
||||
|
||||
# Update video URL to use YouTube API endpoint
|
||||
filename = save_result["video_filename"]
|
||||
save_result["video_url"] = f"/api/youtube/videos/{filename}"
|
||||
|
||||
# Track usage
|
||||
usage_info = track_video_usage(
|
||||
user_id=user_id,
|
||||
provider=video_result["provider"],
|
||||
model_name=video_result["model_name"],
|
||||
prompt=visual_prompt,
|
||||
video_bytes=video_result["video_bytes"],
|
||||
cost_override=video_result["cost"],
|
||||
)
|
||||
|
||||
logger.info(
|
||||
f"[YouTubeRenderer] ✅ Scene {scene_number} rendered: "
|
||||
f"cost=${video_result['cost']:.2f}, size={len(video_result['video_bytes'])} bytes"
|
||||
)
|
||||
|
||||
return {
|
||||
"scene_number": scene_number,
|
||||
"video_filename": save_result["video_filename"],
|
||||
"video_url": save_result["video_url"],
|
||||
"video_path": save_result["video_path"],
|
||||
"duration": video_result["duration"],
|
||||
"cost": video_result["cost"],
|
||||
"resolution": resolution,
|
||||
"width": video_result["width"],
|
||||
"height": video_result["height"],
|
||||
"file_size": save_result["file_size"],
|
||||
"prediction_id": video_result.get("prediction_id"),
|
||||
"usage_info": usage_info,
|
||||
}
|
||||
|
||||
except HTTPException as e:
|
||||
# Re-raise with better error message for UI
|
||||
error_detail = e.detail
|
||||
if isinstance(error_detail, dict):
|
||||
error_msg = error_detail.get("error", str(error_detail))
|
||||
else:
|
||||
error_msg = str(error_detail)
|
||||
|
||||
logger.error(
|
||||
f"[YouTubeRenderer] Scene {scene_number} failed: {error_msg}",
|
||||
exc_info=True
|
||||
)
|
||||
raise HTTPException(
|
||||
status_code=e.status_code,
|
||||
detail={
|
||||
"error": f"Failed to render scene {scene_number}",
|
||||
"scene_number": scene_number,
|
||||
"message": error_msg,
|
||||
"user_action": "Please try again. If the issue persists, check your scene content and try a different resolution.",
|
||||
}
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"[YouTubeRenderer] Error rendering scene {scene_number}: {e}", exc_info=True)
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail={
|
||||
"error": f"Failed to render scene {scene_number}",
|
||||
"scene_number": scene_number,
|
||||
"message": str(e),
|
||||
"user_action": "Please try again. If the issue persists, check your scene content and try a different resolution.",
|
||||
}
|
||||
)
|
||||
|
||||
def render_full_video(
|
||||
self,
|
||||
scenes: List[Dict[str, Any]],
|
||||
video_plan: Dict[str, Any],
|
||||
user_id: str,
|
||||
resolution: str = "720p",
|
||||
combine_scenes: bool = True,
|
||||
voice_id: str = "Wise_Woman",
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Render a complete video from multiple scenes.
|
||||
|
||||
Args:
|
||||
scenes: List of scene data
|
||||
video_plan: Original video plan
|
||||
user_id: Clerk user ID
|
||||
resolution: Video resolution
|
||||
combine_scenes: Whether to combine scenes into single video
|
||||
voice_id: Voice ID for narration
|
||||
|
||||
Returns:
|
||||
Dictionary with video metadata and scene results
|
||||
"""
|
||||
try:
|
||||
logger.info(
|
||||
f"[YouTubeRenderer] Rendering full video: {len(scenes)} scenes, "
|
||||
f"resolution={resolution}, user={user_id}"
|
||||
)
|
||||
|
||||
# Filter enabled scenes
|
||||
enabled_scenes = [s for s in scenes if s.get("enabled", True)]
|
||||
if not enabled_scenes:
|
||||
raise HTTPException(status_code=400, detail="No enabled scenes to render")
|
||||
|
||||
scene_results = []
|
||||
total_cost = 0.0
|
||||
|
||||
# Render each scene
|
||||
for idx, scene in enumerate(enabled_scenes):
|
||||
logger.info(
|
||||
f"[YouTubeRenderer] Rendering scene {idx + 1}/{len(enabled_scenes)}: "
|
||||
f"Scene {scene.get('scene_number', idx + 1)}"
|
||||
)
|
||||
|
||||
scene_result = self.render_scene_video(
|
||||
scene=scene,
|
||||
video_plan=video_plan,
|
||||
user_id=user_id,
|
||||
resolution=resolution,
|
||||
generate_audio_enabled=True,
|
||||
voice_id=voice_id,
|
||||
)
|
||||
|
||||
scene_results.append(scene_result)
|
||||
total_cost += scene_result["cost"]
|
||||
|
||||
# Combine scenes if requested
|
||||
final_video_path = None
|
||||
final_video_url = None
|
||||
if combine_scenes and len(scene_results) > 1:
|
||||
logger.info("[YouTubeRenderer] Combining scenes into final video...")
|
||||
|
||||
# Prepare data for video concatenation
|
||||
scene_video_paths = [r["video_path"] for r in scene_results]
|
||||
scene_audio_paths = [r.get("audio_path") for r in scene_results if r.get("audio_path")]
|
||||
|
||||
# Use StoryVideoGenerationService to combine
|
||||
video_service = StoryVideoGenerationService(output_dir=str(self.output_dir))
|
||||
|
||||
# Create scene dicts for concatenation
|
||||
scene_dicts = [
|
||||
{
|
||||
"scene_number": r["scene_number"],
|
||||
"title": f"Scene {r['scene_number']}",
|
||||
}
|
||||
for r in scene_results
|
||||
]
|
||||
|
||||
combined_result = video_service.generate_story_video(
|
||||
scenes=scene_dicts,
|
||||
image_paths=[None] * len(scene_results), # No static images
|
||||
audio_paths=scene_audio_paths if scene_audio_paths else [],
|
||||
video_paths=scene_video_paths, # Use rendered videos
|
||||
user_id=user_id,
|
||||
story_title=video_plan.get("video_summary", "YouTube Video")[:50],
|
||||
fps=24,
|
||||
)
|
||||
|
||||
final_video_path = combined_result["video_path"]
|
||||
final_video_url = combined_result["video_url"]
|
||||
|
||||
logger.info(
|
||||
f"[YouTubeRenderer] ✅ Full video rendered: {len(scene_results)} scenes, "
|
||||
f"total_cost=${total_cost:.2f}"
|
||||
)
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"scene_results": scene_results,
|
||||
"total_cost": total_cost,
|
||||
"final_video_path": final_video_path,
|
||||
"final_video_url": final_video_url,
|
||||
"num_scenes": len(scene_results),
|
||||
"resolution": resolution,
|
||||
}
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"[YouTubeRenderer] Error rendering full video: {e}", exc_info=True)
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Failed to render video: {str(e)}"
|
||||
)
|
||||
|
||||
def estimate_render_cost(
|
||||
self,
|
||||
scenes: List[Dict[str, Any]],
|
||||
resolution: str = "720p",
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Estimate the cost of rendering a video before actually rendering it.
|
||||
|
||||
Args:
|
||||
scenes: List of scene data with duration estimates
|
||||
resolution: Video resolution (480p, 720p, 1080p)
|
||||
|
||||
Returns:
|
||||
Dictionary with cost breakdown and total estimate
|
||||
"""
|
||||
# Pricing per second (same as in WaveSpeedClient)
|
||||
pricing = {
|
||||
"480p": 0.05,
|
||||
"720p": 0.10,
|
||||
"1080p": 0.15,
|
||||
}
|
||||
|
||||
price_per_second = pricing.get(resolution, 0.10)
|
||||
|
||||
# Filter enabled scenes
|
||||
enabled_scenes = [s for s in scenes if s.get("enabled", True)]
|
||||
|
||||
scene_costs = []
|
||||
total_cost = 0.0
|
||||
total_duration = 0.0
|
||||
|
||||
for scene in enabled_scenes:
|
||||
scene_number = scene.get("scene_number", 0)
|
||||
duration_estimate = scene.get("duration_estimate", 5)
|
||||
|
||||
# Clamp duration to valid WAN 2.5 values (5 or 10 seconds)
|
||||
duration = 5 if duration_estimate <= 7 else 10
|
||||
|
||||
scene_cost = price_per_second * duration
|
||||
scene_costs.append({
|
||||
"scene_number": scene_number,
|
||||
"duration_estimate": duration_estimate,
|
||||
"actual_duration": duration,
|
||||
"cost": round(scene_cost, 2),
|
||||
})
|
||||
|
||||
total_cost += scene_cost
|
||||
total_duration += duration
|
||||
|
||||
return {
|
||||
"resolution": resolution,
|
||||
"price_per_second": price_per_second,
|
||||
"num_scenes": len(enabled_scenes),
|
||||
"total_duration_seconds": total_duration,
|
||||
"scene_costs": scene_costs,
|
||||
"total_cost": round(total_cost, 2),
|
||||
"estimated_cost_range": {
|
||||
"min": round(total_cost * 0.9, 2), # 10% buffer
|
||||
"max": round(total_cost * 1.1, 2), # 10% buffer
|
||||
},
|
||||
}
|
||||
|
||||
551
backend/services/youtube/scene_builder.py
Normal file
551
backend/services/youtube/scene_builder.py
Normal file
@@ -0,0 +1,551 @@
|
||||
"""
|
||||
YouTube Scene Builder Service
|
||||
|
||||
Converts video plans into structured scenes with narration, visual prompts, and timing.
|
||||
"""
|
||||
|
||||
from typing import Dict, Any, Optional, List
|
||||
from loguru import logger
|
||||
from fastapi import HTTPException
|
||||
|
||||
from services.llm_providers.main_text_generation import llm_text_gen
|
||||
from services.story_writer.prompt_enhancer_service import PromptEnhancerService
|
||||
from utils.logger_utils import get_service_logger
|
||||
|
||||
logger = get_service_logger("youtube.scene_builder")
|
||||
|
||||
|
||||
class YouTubeSceneBuilderService:
|
||||
"""Service for building structured video scenes from plans."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the scene builder service."""
|
||||
self.prompt_enhancer = PromptEnhancerService()
|
||||
logger.info("[YouTubeSceneBuilder] Service initialized")
|
||||
|
||||
def build_scenes_from_plan(
|
||||
self,
|
||||
video_plan: Dict[str, Any],
|
||||
user_id: str,
|
||||
custom_script: Optional[str] = None,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Build structured scenes from a video plan.
|
||||
|
||||
Args:
|
||||
video_plan: Video plan from planner service
|
||||
user_id: Clerk user ID for subscription checking
|
||||
custom_script: Optional custom script to use instead of generating
|
||||
|
||||
Returns:
|
||||
List of scene dictionaries with narration, visual prompts, timing, etc.
|
||||
"""
|
||||
try:
|
||||
logger.info(
|
||||
f"[YouTubeSceneBuilder] Building scenes from plan: "
|
||||
f"duration={video_plan.get('duration_type')}, "
|
||||
f"sections={len(video_plan.get('content_outline', []))}"
|
||||
)
|
||||
|
||||
duration_metadata = video_plan.get("duration_metadata", {})
|
||||
max_scenes = duration_metadata.get("max_scenes", 10)
|
||||
|
||||
# If custom script provided, parse it into scenes
|
||||
if custom_script:
|
||||
scenes = self._parse_custom_script(
|
||||
custom_script, video_plan, duration_metadata, user_id
|
||||
)
|
||||
# For shorts, check if scenes were already generated in plan (optimization)
|
||||
elif video_plan.get("_scenes_included") and video_plan.get("duration_type") == "shorts":
|
||||
prebuilt = video_plan.get("scenes") or []
|
||||
if prebuilt:
|
||||
logger.info(
|
||||
f"[YouTubeSceneBuilder] Using scenes from optimized plan+scenes call "
|
||||
f"({len(prebuilt)} scenes)"
|
||||
)
|
||||
scenes = self._normalize_scenes_from_plan(video_plan, duration_metadata)
|
||||
else:
|
||||
logger.warning(
|
||||
"[YouTubeSceneBuilder] Plan marked _scenes_included but no scenes present; "
|
||||
"regenerating scenes normally."
|
||||
)
|
||||
scenes = self._generate_scenes_from_plan(
|
||||
video_plan, duration_metadata, user_id
|
||||
)
|
||||
else:
|
||||
# Generate scenes from plan
|
||||
scenes = self._generate_scenes_from_plan(
|
||||
video_plan, duration_metadata, user_id
|
||||
)
|
||||
|
||||
# Limit to max scenes
|
||||
if len(scenes) > max_scenes:
|
||||
logger.warning(
|
||||
f"[YouTubeSceneBuilder] Truncating {len(scenes)} scenes to {max_scenes}"
|
||||
)
|
||||
scenes = scenes[:max_scenes]
|
||||
|
||||
# Enhance visual prompts efficiently based on duration type
|
||||
duration_type = video_plan.get("duration_type", "medium")
|
||||
scenes = self._enhance_visual_prompts_batch(
|
||||
scenes, video_plan, user_id, duration_type
|
||||
)
|
||||
|
||||
logger.info(f"[YouTubeSceneBuilder] ✅ Built {len(scenes)} scenes")
|
||||
return scenes
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"[YouTubeSceneBuilder] Error building scenes: {e}", exc_info=True)
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Failed to build scenes: {str(e)}"
|
||||
)
|
||||
|
||||
def _generate_scenes_from_plan(
|
||||
self,
|
||||
video_plan: Dict[str, Any],
|
||||
duration_metadata: Dict[str, Any],
|
||||
user_id: str,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Generate scenes from video plan using AI."""
|
||||
|
||||
content_outline = video_plan.get("content_outline", [])
|
||||
hook_strategy = video_plan.get("hook_strategy", "")
|
||||
call_to_action = video_plan.get("call_to_action", "")
|
||||
visual_style = video_plan.get("visual_style", "cinematic")
|
||||
tone = video_plan.get("tone", "professional")
|
||||
|
||||
scene_duration_range = duration_metadata.get("scene_duration_range", (5, 15))
|
||||
|
||||
scene_generation_prompt = f"""You are an expert video scriptwriter. Create detailed scenes for a YouTube video based on this plan.
|
||||
|
||||
**Video Plan:**
|
||||
- Summary: {video_plan.get('video_summary', '')}
|
||||
- Goal: {video_plan.get('video_goal', '')}
|
||||
- Key Message: {video_plan.get('key_message', '')}
|
||||
- Visual Style: {visual_style}
|
||||
- Tone: {tone}
|
||||
|
||||
**Hook Strategy:**
|
||||
{hook_strategy}
|
||||
|
||||
**Content Outline:**
|
||||
{chr(10).join([f"- {section.get('section', '')}: {section.get('description', '')} ({section.get('duration_estimate', 0)}s)" for section in content_outline])}
|
||||
|
||||
**Call-to-Action:**
|
||||
{call_to_action}
|
||||
|
||||
**Duration Constraints:**
|
||||
- Scene duration: {scene_duration_range[0]}-{scene_duration_range[1]} seconds each
|
||||
- Total target: {duration_metadata.get('target_seconds', 150)} seconds
|
||||
|
||||
**Your Task:**
|
||||
Create detailed scenes that include:
|
||||
1. Scene number and title
|
||||
2. Narration text (what will be spoken)
|
||||
3. Visual description (what viewers will see)
|
||||
4. Duration estimate
|
||||
5. Emphasis tags (hook, main_content, transition, cta)
|
||||
|
||||
**Format as JSON array:**
|
||||
[
|
||||
{{
|
||||
"scene_number": 1,
|
||||
"title": "Hook - Attention Grabber",
|
||||
"narration": "The spoken text for this scene...",
|
||||
"visual_description": "Detailed description of what viewers see...",
|
||||
"duration_estimate": 5,
|
||||
"emphasis": "hook",
|
||||
"visual_cues": ["close-up", "dynamic", "bright"]
|
||||
}},
|
||||
...
|
||||
]
|
||||
|
||||
Make sure:
|
||||
- First scene is a strong hook ({duration_metadata.get('hook_seconds', 10)}s)
|
||||
- Last scene includes the CTA ({duration_metadata.get('cta_seconds', 10)}s)
|
||||
- Each scene has clear narration and visual description
|
||||
- Total duration fits within {duration_metadata.get('target_seconds', 150)} seconds
|
||||
- Scenes flow naturally from one to the next
|
||||
"""
|
||||
|
||||
system_prompt = (
|
||||
"You are an expert video scriptwriter specializing in YouTube content. "
|
||||
"Your scenes are engaging, well-paced, and optimized for viewer retention."
|
||||
)
|
||||
|
||||
response = llm_text_gen(
|
||||
prompt=scene_generation_prompt,
|
||||
system_prompt=system_prompt,
|
||||
user_id=user_id,
|
||||
json_struct={
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"scene_number": {"type": "number"},
|
||||
"title": {"type": "string"},
|
||||
"narration": {"type": "string"},
|
||||
"visual_description": {"type": "string"},
|
||||
"duration_estimate": {"type": "number"},
|
||||
"emphasis": {"type": "string"},
|
||||
"visual_cues": {
|
||||
"type": "array",
|
||||
"items": {"type": "string"}
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"scene_number", "title", "narration", "visual_description",
|
||||
"duration_estimate", "emphasis"
|
||||
]
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
# Parse response
|
||||
if isinstance(response, list):
|
||||
scenes = response
|
||||
elif isinstance(response, dict) and "scenes" in response:
|
||||
scenes = response["scenes"]
|
||||
else:
|
||||
import json
|
||||
scenes = json.loads(response) if isinstance(response, str) else response
|
||||
|
||||
# Normalize scene data
|
||||
normalized_scenes = []
|
||||
for idx, scene in enumerate(scenes, 1):
|
||||
normalized_scenes.append({
|
||||
"scene_number": scene.get("scene_number", idx),
|
||||
"title": scene.get("title", f"Scene {idx}"),
|
||||
"narration": scene.get("narration", ""),
|
||||
"visual_description": scene.get("visual_description", ""),
|
||||
"duration_estimate": scene.get("duration_estimate", scene_duration_range[0]),
|
||||
"emphasis": scene.get("emphasis", "main_content"),
|
||||
"visual_cues": scene.get("visual_cues", []),
|
||||
"visual_prompt": scene.get("visual_description", ""), # Initial prompt
|
||||
})
|
||||
|
||||
return normalized_scenes
|
||||
|
||||
def _normalize_scenes_from_plan(
|
||||
self,
|
||||
video_plan: Dict[str, Any],
|
||||
duration_metadata: Dict[str, Any],
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Normalize scenes that were generated as part of the plan (optimization for shorts)."""
|
||||
scenes = video_plan.get("scenes", [])
|
||||
scene_duration_range = duration_metadata.get("scene_duration_range", (2, 8))
|
||||
|
||||
normalized_scenes = []
|
||||
for idx, scene in enumerate(scenes, 1):
|
||||
normalized_scenes.append({
|
||||
"scene_number": scene.get("scene_number", idx),
|
||||
"title": scene.get("title", f"Scene {idx}"),
|
||||
"narration": scene.get("narration", ""),
|
||||
"visual_description": scene.get("visual_description", ""),
|
||||
"duration_estimate": scene.get("duration_estimate", scene_duration_range[0]),
|
||||
"emphasis": scene.get("emphasis", "main_content"),
|
||||
"visual_cues": scene.get("visual_cues", []),
|
||||
"visual_prompt": scene.get("visual_description", ""), # Initial prompt
|
||||
})
|
||||
|
||||
logger.info(
|
||||
f"[YouTubeSceneBuilder] ✅ Normalized {len(normalized_scenes)} scenes "
|
||||
f"from optimized plan (saved 1 AI call)"
|
||||
)
|
||||
return normalized_scenes
|
||||
|
||||
def _parse_custom_script(
|
||||
self,
|
||||
custom_script: str,
|
||||
video_plan: Dict[str, Any],
|
||||
duration_metadata: Dict[str, Any],
|
||||
user_id: str,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Parse a custom script into structured scenes."""
|
||||
# Simple parsing: split by double newlines or scene markers
|
||||
import re
|
||||
|
||||
# Try to detect scene markers
|
||||
scene_pattern = r'(?:Scene\s+\d+|#\s*\d+\.|^\d+\.)\s*(.+?)(?=(?:Scene\s+\d+|#\s*\d+\.|^\d+\.|$))'
|
||||
matches = re.finditer(scene_pattern, custom_script, re.MULTILINE | re.DOTALL)
|
||||
|
||||
scenes = []
|
||||
for idx, match in enumerate(matches, 1):
|
||||
scene_text = match.group(1).strip()
|
||||
# Extract narration (first paragraph or before visual markers)
|
||||
narration_match = re.search(r'^(.*?)(?:\n\n|Visual:|Image:)', scene_text, re.DOTALL)
|
||||
narration = narration_match.group(1).strip() if narration_match else scene_text.split('\n')[0]
|
||||
|
||||
# Extract visual description
|
||||
visual_match = re.search(r'(?:Visual:|Image:)\s*(.+?)(?:\n\n|$)', scene_text, re.DOTALL)
|
||||
visual_description = visual_match.group(1).strip() if visual_match else narration
|
||||
|
||||
scenes.append({
|
||||
"scene_number": idx,
|
||||
"title": f"Scene {idx}",
|
||||
"narration": narration,
|
||||
"visual_description": visual_description,
|
||||
"duration_estimate": duration_metadata.get("scene_duration_range", [5, 15])[0],
|
||||
"emphasis": "hook" if idx == 1 else ("cta" if idx == len(list(matches)) else "main_content"),
|
||||
"visual_cues": [],
|
||||
"visual_prompt": visual_description,
|
||||
})
|
||||
|
||||
# Fallback: split by paragraphs if no scene markers
|
||||
if not scenes:
|
||||
paragraphs = [p.strip() for p in custom_script.split('\n\n') if p.strip()]
|
||||
for idx, para in enumerate(paragraphs[:duration_metadata.get("max_scenes", 10)], 1):
|
||||
scenes.append({
|
||||
"scene_number": idx,
|
||||
"title": f"Scene {idx}",
|
||||
"narration": para,
|
||||
"visual_description": para,
|
||||
"duration_estimate": duration_metadata.get("scene_duration_range", [5, 15])[0],
|
||||
"emphasis": "hook" if idx == 1 else ("cta" if idx == len(paragraphs) else "main_content"),
|
||||
"visual_cues": [],
|
||||
"visual_prompt": para,
|
||||
})
|
||||
|
||||
return scenes
|
||||
|
||||
def _enhance_visual_prompts_batch(
|
||||
self,
|
||||
scenes: List[Dict[str, Any]],
|
||||
video_plan: Dict[str, Any],
|
||||
user_id: str,
|
||||
duration_type: str,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Efficiently enhance visual prompts based on video duration type.
|
||||
|
||||
Strategy:
|
||||
- Shorts: Skip enhancement (use original descriptions) - 0 AI calls
|
||||
- Medium: Batch enhance all scenes in 1 call - 1 AI call
|
||||
- Long: Batch enhance in 2 calls (split scenes) - 2 AI calls max
|
||||
"""
|
||||
# For shorts, skip enhancement to save API calls
|
||||
if duration_type == "shorts":
|
||||
logger.info(
|
||||
f"[YouTubeSceneBuilder] Skipping prompt enhancement for shorts "
|
||||
f"({len(scenes)} scenes) to save API calls"
|
||||
)
|
||||
for scene in scenes:
|
||||
scene["enhanced_visual_prompt"] = scene.get(
|
||||
"visual_prompt", scene.get("visual_description", "")
|
||||
)
|
||||
return scenes
|
||||
|
||||
# Build story context for prompt enhancer
|
||||
story_context = {
|
||||
"story_setting": video_plan.get("visual_style", "cinematic"),
|
||||
"story_tone": video_plan.get("tone", "professional"),
|
||||
"writing_style": video_plan.get("visual_style", "cinematic"),
|
||||
}
|
||||
|
||||
# Convert scenes to format expected by enhancer
|
||||
scene_data_list = [
|
||||
{
|
||||
"scene_number": scene.get("scene_number", idx + 1),
|
||||
"title": scene.get("title", ""),
|
||||
"description": scene.get("visual_description", ""),
|
||||
"image_prompt": scene.get("visual_prompt", ""),
|
||||
}
|
||||
for idx, scene in enumerate(scenes)
|
||||
]
|
||||
|
||||
# For medium videos, enhance all scenes in one batch call
|
||||
if duration_type == "medium":
|
||||
logger.info(
|
||||
f"[YouTubeSceneBuilder] Batch enhancing {len(scenes)} scenes "
|
||||
f"for medium video in 1 AI call"
|
||||
)
|
||||
try:
|
||||
# Use a single batch enhancement call
|
||||
enhanced_prompts = self._batch_enhance_prompts(
|
||||
scene_data_list, story_context, user_id
|
||||
)
|
||||
for idx, scene in enumerate(scenes):
|
||||
scene["enhanced_visual_prompt"] = enhanced_prompts.get(
|
||||
idx, scene.get("visual_prompt", scene.get("visual_description", ""))
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"[YouTubeSceneBuilder] Batch enhancement failed: {e}, "
|
||||
f"using original prompts"
|
||||
)
|
||||
for scene in scenes:
|
||||
scene["enhanced_visual_prompt"] = scene.get(
|
||||
"visual_prompt", scene.get("visual_description", "")
|
||||
)
|
||||
return scenes
|
||||
|
||||
# For long videos, split into 2 batches to avoid token limits
|
||||
if duration_type == "long":
|
||||
logger.info(
|
||||
f"[YouTubeSceneBuilder] Batch enhancing {len(scenes)} scenes "
|
||||
f"for long video in 2 AI calls"
|
||||
)
|
||||
mid_point = len(scenes) // 2
|
||||
batches = [
|
||||
scene_data_list[:mid_point],
|
||||
scene_data_list[mid_point:],
|
||||
]
|
||||
|
||||
all_enhanced = {}
|
||||
for batch_idx, batch in enumerate(batches):
|
||||
try:
|
||||
enhanced = self._batch_enhance_prompts(
|
||||
batch, story_context, user_id
|
||||
)
|
||||
start_idx = 0 if batch_idx == 0 else mid_point
|
||||
for local_idx, enhanced_prompt in enhanced.items():
|
||||
all_enhanced[start_idx + local_idx] = enhanced_prompt
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"[YouTubeSceneBuilder] Batch {batch_idx + 1} enhancement "
|
||||
f"failed: {e}, using original prompts"
|
||||
)
|
||||
start_idx = 0 if batch_idx == 0 else mid_point
|
||||
for local_idx, scene_data in enumerate(batch):
|
||||
all_enhanced[start_idx + local_idx] = scene_data.get(
|
||||
"image_prompt", scene_data.get("description", "")
|
||||
)
|
||||
|
||||
for idx, scene in enumerate(scenes):
|
||||
scene["enhanced_visual_prompt"] = all_enhanced.get(
|
||||
idx, scene.get("visual_prompt", scene.get("visual_description", ""))
|
||||
)
|
||||
return scenes
|
||||
|
||||
# Fallback: use original prompts
|
||||
logger.warning(
|
||||
f"[YouTubeSceneBuilder] Unknown duration type '{duration_type}', "
|
||||
f"using original prompts"
|
||||
)
|
||||
for scene in scenes:
|
||||
scene["enhanced_visual_prompt"] = scene.get(
|
||||
"visual_prompt", scene.get("visual_description", "")
|
||||
)
|
||||
return scenes
|
||||
|
||||
def _batch_enhance_prompts(
|
||||
self,
|
||||
scene_data_list: List[Dict[str, Any]],
|
||||
story_context: Dict[str, Any],
|
||||
user_id: str,
|
||||
) -> Dict[int, str]:
|
||||
"""
|
||||
Enhance multiple scene prompts in a single AI call.
|
||||
|
||||
Returns:
|
||||
Dictionary mapping scene index to enhanced prompt
|
||||
"""
|
||||
try:
|
||||
# Build batch enhancement prompt
|
||||
scenes_text = "\n\n".join([
|
||||
f"Scene {scene.get('scene_number', idx + 1)}: {scene.get('title', '')}\n"
|
||||
f"Description: {scene.get('description', '')}\n"
|
||||
f"Current Prompt: {scene.get('image_prompt', '')}"
|
||||
for idx, scene in enumerate(scene_data_list)
|
||||
])
|
||||
|
||||
batch_prompt = f"""You are optimizing visual prompts for AI video generation. Enhance the following scenes to be more detailed and video-optimized.
|
||||
|
||||
**Video Style Context:**
|
||||
- Setting: {story_context.get('story_setting', 'cinematic')}
|
||||
- Tone: {story_context.get('story_tone', 'professional')}
|
||||
- Style: {story_context.get('writing_style', 'cinematic')}
|
||||
|
||||
**Scenes to Enhance:**
|
||||
{scenes_text}
|
||||
|
||||
**Your Task:**
|
||||
For each scene, create an enhanced visual prompt (200-300 words) that:
|
||||
1. Is detailed and specific for video generation
|
||||
2. Includes camera movements, lighting, composition
|
||||
3. Maintains consistency with the video style
|
||||
4. Is optimized for WAN 2.5 text-to-video model
|
||||
|
||||
**Format as JSON array with enhanced prompts:**
|
||||
[
|
||||
{{"scene_index": 0, "enhanced_prompt": "detailed enhanced prompt for scene 1..."}},
|
||||
{{"scene_index": 1, "enhanced_prompt": "detailed enhanced prompt for scene 2..."}},
|
||||
...
|
||||
]
|
||||
|
||||
Make sure the array length matches the number of scenes provided ({len(scene_data_list)}).
|
||||
"""
|
||||
|
||||
system_prompt = (
|
||||
"You are an expert at creating detailed visual prompts for AI video generation. "
|
||||
"Your prompts are specific, cinematic, and optimized for video models."
|
||||
)
|
||||
|
||||
response = llm_text_gen(
|
||||
prompt=batch_prompt,
|
||||
system_prompt=system_prompt,
|
||||
user_id=user_id,
|
||||
json_struct={
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"scene_index": {"type": "number"},
|
||||
"enhanced_prompt": {"type": "string"}
|
||||
},
|
||||
"required": ["scene_index", "enhanced_prompt"]
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
# Parse response
|
||||
if isinstance(response, list):
|
||||
enhanced_list = response
|
||||
elif isinstance(response, str):
|
||||
import json
|
||||
enhanced_list = json.loads(response)
|
||||
else:
|
||||
enhanced_list = response
|
||||
|
||||
# Build result dictionary
|
||||
result = {}
|
||||
for item in enhanced_list:
|
||||
idx = item.get("scene_index", 0)
|
||||
prompt = item.get("enhanced_prompt", "")
|
||||
if prompt:
|
||||
result[idx] = prompt
|
||||
else:
|
||||
# Fallback to original
|
||||
original_scene = scene_data_list[idx] if idx < len(scene_data_list) else {}
|
||||
result[idx] = original_scene.get(
|
||||
"image_prompt", original_scene.get("description", "")
|
||||
)
|
||||
|
||||
# Fill in any missing scenes with original prompts
|
||||
for idx in range(len(scene_data_list)):
|
||||
if idx not in result:
|
||||
original_scene = scene_data_list[idx]
|
||||
result[idx] = original_scene.get(
|
||||
"image_prompt", original_scene.get("description", "")
|
||||
)
|
||||
|
||||
logger.info(
|
||||
f"[YouTubeSceneBuilder] ✅ Batch enhanced {len(result)} prompts "
|
||||
f"in 1 AI call"
|
||||
)
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"[YouTubeSceneBuilder] Batch enhancement failed: {e}",
|
||||
exc_info=True
|
||||
)
|
||||
# Return original prompts as fallback
|
||||
return {
|
||||
idx: scene.get("image_prompt", scene.get("description", ""))
|
||||
for idx, scene in enumerate(scene_data_list)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user