""" Podcast Maker API Router API endpoints for podcast project persistence and management. """ from fastapi import APIRouter, Depends, HTTPException, Query, Body, BackgroundTasks, Request from sqlalchemy.orm import Session from typing import List, Optional, Dict, Any from pydantic import BaseModel, Field, model_validator from datetime import datetime from pathlib import Path from urllib.parse import quote from services.database import get_db from middleware.auth_middleware import get_current_user, get_current_user_with_query_token from services.podcast_service import PodcastService from models.podcast_models import PodcastProject from services.wavespeed.infinitetalk import animate_scene_with_voiceover from services.story_writer.video_generation_service import StoryVideoGenerationService from services.llm_providers.main_image_generation import generate_image from services.llm_providers.main_video_generation import track_video_usage from services.subscription import PricingService from services.subscription.preflight_validator import validate_scene_animation_operation from api.story_writer.task_manager import task_manager from api.story_writer.utils.auth import require_authenticated_user # Podcast-specific media loading functions (no dependency on story_writer) from services.llm_providers.main_text_generation import llm_text_gen from services.story_writer.audio_generation_service import StoryAudioGenerationService from utils.asset_tracker import save_asset_to_library from models.story_models import StoryAudioResult from loguru import logger from services.blog_writer.research.exa_provider import ExaResearchProvider from types import SimpleNamespace import tempfile import os import uuid router = APIRouter(prefix="/api/podcast", tags=["Podcast Maker"]) AI_VIDEO_SUBDIR = Path("AI_Videos") # Initialize audio service with podcast_audio directory # router.py is at: backend/api/podcast/router.py # parents[0] = backend/api/podcast/ # parents[1] = backend/api/ # parents[2] = backend/ BASE_DIR = Path(__file__).resolve().parents[2] # backend/ PODCAST_AUDIO_DIR = (BASE_DIR / "podcast_audio").resolve() PODCAST_AUDIO_DIR.mkdir(parents=True, exist_ok=True) PODCAST_IMAGES_DIR = (BASE_DIR / "podcast_images").resolve() PODCAST_IMAGES_DIR.mkdir(parents=True, exist_ok=True) audio_service = StoryAudioGenerationService(output_dir=str(PODCAST_AUDIO_DIR)) class PodcastProjectResponse(BaseModel): """Response model for podcast project.""" id: int project_id: str user_id: str idea: str duration: int speakers: int budget_cap: float analysis: Optional[Dict[str, Any]] = None queries: Optional[List[Dict[str, Any]]] = None selected_queries: Optional[List[str]] = None research: Optional[Dict[str, Any]] = None raw_research: Optional[Dict[str, Any]] = None estimate: Optional[Dict[str, Any]] = None script_data: Optional[Dict[str, Any]] = None render_jobs: Optional[List[Dict[str, Any]]] = None knobs: Optional[Dict[str, Any]] = None research_provider: Optional[str] = None show_script_editor: bool = False show_render_queue: bool = False current_step: Optional[str] = None status: str = "draft" is_favorite: bool = False created_at: datetime updated_at: datetime class Config: from_attributes = True class PodcastAnalyzeRequest(BaseModel): """Request model for podcast idea analysis.""" idea: str = Field(..., description="Podcast topic or idea") duration: int = Field(default=10, description="Target duration in minutes") speakers: int = Field(default=1, description="Number of speakers") class PodcastAnalyzeResponse(BaseModel): """Response model for podcast idea analysis.""" audience: str content_type: str top_keywords: list[str] suggested_outlines: list[Dict[str, Any]] title_suggestions: list[str] exa_suggested_config: Optional[Dict[str, Any]] = None class PodcastScriptRequest(BaseModel): """Request model for podcast script generation.""" idea: str = Field(..., description="Podcast idea or topic") duration_minutes: int = Field(default=10, description="Target duration in minutes") speakers: int = Field(default=1, description="Number of speakers") research: Optional[Dict[str, Any]] = Field(None, description="Optional research payload to ground the script") class PodcastSceneLine(BaseModel): speaker: str text: str emphasis: Optional[bool] = False class PodcastScene(BaseModel): id: str title: str duration: int lines: list[PodcastSceneLine] approved: bool = False emotion: Optional[str] = None imageUrl: Optional[str] = None # Generated image URL for video generation class PodcastExaConfig(BaseModel): """Exa config for podcast research.""" exa_search_type: Optional[str] = Field(default="auto", description="auto | keyword | neural") exa_category: Optional[str] = None exa_include_domains: List[str] = [] exa_exclude_domains: List[str] = [] max_sources: int = 8 include_statistics: Optional[bool] = False date_range: Optional[str] = Field(default=None, description="last_month | last_3_months | last_year | all_time") @model_validator(mode="after") def validate_domains(self): if self.exa_include_domains and self.exa_exclude_domains: # Exa API does not allow both include and exclude domains together with contents # Prefer include_domains and drop exclude_domains self.exa_exclude_domains = [] return self class PodcastExaResearchRequest(BaseModel): """Request for podcast research using Exa directly (no blog writer).""" topic: str queries: List[str] exa_config: Optional[PodcastExaConfig] = None class PodcastExaSource(BaseModel): title: str = "" url: str = "" excerpt: str = "" published_at: Optional[str] = None highlights: Optional[List[str]] = None summary: Optional[str] = None source_type: Optional[str] = None index: Optional[int] = None class PodcastExaResearchResponse(BaseModel): sources: List[PodcastExaSource] search_queries: List[str] = [] cost: Optional[Dict[str, Any]] = None search_type: Optional[str] = None provider: str = "exa" content: Optional[str] = None class PodcastScriptResponse(BaseModel): scenes: list[PodcastScene] class PodcastAudioRequest(BaseModel): """Generate TTS for a podcast scene.""" scene_id: str scene_title: str text: str voice_id: Optional[str] = "Wise_Woman" speed: Optional[float] = 1.0 volume: Optional[float] = 1.0 pitch: Optional[float] = 0.0 emotion: Optional[str] = "neutral" english_normalization: Optional[bool] = False # Better number reading for statistics class PodcastAudioResponse(BaseModel): scene_id: str scene_title: str audio_filename: str audio_url: str provider: str model: str voice_id: str text_length: int file_size: int cost: float class PodcastProjectListResponse(BaseModel): """Response model for project list.""" projects: List[PodcastProjectResponse] total: int limit: int offset: int class CreateProjectRequest(BaseModel): """Request model for creating a project.""" project_id: str = Field(..., description="Unique project ID") idea: str = Field(..., description="Episode idea or URL") duration: int = Field(..., description="Duration in minutes") speakers: int = Field(default=1, description="Number of speakers") budget_cap: float = Field(default=50.0, description="Budget cap in USD") class UpdateProjectRequest(BaseModel): """Request model for updating project state.""" analysis: Optional[Dict[str, Any]] = None queries: Optional[List[Dict[str, Any]]] = None selected_queries: Optional[List[str]] = None research: Optional[Dict[str, Any]] = None raw_research: Optional[Dict[str, Any]] = None estimate: Optional[Dict[str, Any]] = None script_data: Optional[Dict[str, Any]] = None render_jobs: Optional[List[Dict[str, Any]]] = None knobs: Optional[Dict[str, Any]] = None research_provider: Optional[str] = None show_script_editor: Optional[bool] = None show_render_queue: Optional[bool] = None current_step: Optional[str] = None status: Optional[str] = None @router.post("/projects", response_model=PodcastProjectResponse, status_code=201) async def create_project( request: CreateProjectRequest, db: Session = Depends(get_db), current_user: Dict[str, Any] = Depends(get_current_user), ): """Create a new podcast project.""" try: user_id = current_user.get("user_id") or current_user.get("id") if not user_id: raise HTTPException(status_code=401, detail="User ID not found") service = PodcastService(db) # Check if project_id already exists for this user existing = service.get_project(user_id, request.project_id) if existing: raise HTTPException(status_code=400, detail="Project ID already exists") project = service.create_project( user_id=user_id, project_id=request.project_id, idea=request.idea, duration=request.duration, speakers=request.speakers, budget_cap=request.budget_cap, ) return PodcastProjectResponse.model_validate(project) except HTTPException: raise except Exception as e: raise HTTPException(status_code=500, detail=f"Error creating project: {str(e)}") @router.post("/analyze", response_model=PodcastAnalyzeResponse) async def analyze_podcast_idea( request: PodcastAnalyzeRequest, current_user: Dict[str, Any] = Depends(get_current_user), ): """ Analyze a podcast idea and return podcast-oriented outlines, keywords, and titles. This uses the shared LLM provider but with a podcast-specific prompt (not story format). """ user_id = require_authenticated_user(current_user) prompt = f""" You are an expert podcast producer. Given a podcast idea, craft concise podcast-ready assets that sound like episode plans (not fiction stories). Podcast Idea: "{request.idea}" Duration: ~{request.duration} minutes Speakers: {request.speakers} (host + optional guest) Return JSON with: - audience: short target audience description - content_type: podcast style/format - top_keywords: 5 podcast-relevant keywords/phrases - suggested_outlines: 2 items, each with title (<=60 chars) and 4-6 short segments (bullet-friendly, factual) - title_suggestions: 3 concise episode titles (no cliffhanger storytelling) - exa_suggested_config: suggested Exa search options to power research (keep conservative defaults to control cost), with: - exa_search_type: "auto" | "neural" | "keyword" (prefer "auto" unless clearly news-heavy) - exa_category: one of ["research paper","news","company","github","tweet","personal site","pdf","financial report","linkedin profile"] - exa_include_domains: up to 3 reputable domains to prioritize (optional) - exa_exclude_domains: up to 3 domains to avoid (optional) - max_sources: 6-10 - include_statistics: boolean (true if topic needs fresh stats) - date_range: one of ["last_month","last_3_months","last_year","all_time"] (pick recent if time-sensitive) Requirements: - Keep language factual, actionable, and suited for spoken audio. - Avoid narrative fiction tone; focus on insights, hooks, objections, and takeaways. - Prefer 2024-2025 context when relevant. """ try: raw = llm_text_gen(prompt=prompt, user_id=user_id, json_struct=None) except HTTPException: # Re-raise HTTPExceptions (e.g., 429 subscription limit) - preserve error details raise except Exception as exc: logger.error(f"[Podcast Analyze] Analysis failed for user {user_id}: {exc}") raise HTTPException(status_code=500, detail=f"Analysis failed: {exc}") # Normalize response (accept dict or JSON string) import json if isinstance(raw, str): try: data = json.loads(raw) except json.JSONDecodeError: raise HTTPException(status_code=500, detail="LLM returned non-JSON output") elif isinstance(raw, dict): data = raw else: raise HTTPException(status_code=500, detail="Unexpected LLM response format") audience = data.get("audience") or "Growth-focused professionals" content_type = data.get("content_type") or "Interview + insights" top_keywords = data.get("top_keywords") or [] suggested_outlines = data.get("suggested_outlines") or [] title_suggestions = data.get("title_suggestions") or [] exa_suggested_config = data.get("exa_suggested_config") or None return PodcastAnalyzeResponse( audience=audience, content_type=content_type, top_keywords=top_keywords, suggested_outlines=suggested_outlines, title_suggestions=title_suggestions, exa_suggested_config=exa_suggested_config, ) @router.post("/research/exa", response_model=PodcastExaResearchResponse) async def podcast_research_exa( request: PodcastExaResearchRequest, current_user: Dict[str, Any] = Depends(get_current_user), ): """ Run podcast research directly via Exa (no blog writer pipeline). """ user_id = require_authenticated_user(current_user) queries = [q.strip() for q in request.queries if q and q.strip()] if not queries: raise HTTPException(status_code=400, detail="At least one query is required for research.") exa_cfg = request.exa_config or PodcastExaConfig() cfg = SimpleNamespace( exa_search_type=exa_cfg.exa_search_type or "auto", exa_category=exa_cfg.exa_category, exa_include_domains=exa_cfg.exa_include_domains or [], exa_exclude_domains=exa_cfg.exa_exclude_domains or [], max_sources=exa_cfg.max_sources or 8, source_types=[], ) provider = ExaResearchProvider() prompt = request.topic try: result = await provider.search( prompt=prompt, topic=request.topic, industry="", target_audience="", config=cfg, user_id=user_id, ) except Exception as exc: logger.error(f"[Podcast Exa Research] Failed for user {user_id}: {exc}") raise HTTPException(status_code=500, detail=f"Exa research failed: {exc}") # Track usage if available try: cost_total = 0.0 if isinstance(result, dict): cost_total = result.get("cost", {}).get("total", 0.005) if result.get("cost") else 0.005 provider.track_exa_usage(user_id, cost_total) except Exception as track_err: logger.warning(f"[Podcast Exa Research] Failed to track usage: {track_err}") sources_payload = [] if isinstance(result, dict): for src in result.get("sources", []) or []: try: sources_payload.append(PodcastExaSource(**src)) except Exception: sources_payload.append(PodcastExaSource(**{ "title": src.get("title", ""), "url": src.get("url", ""), "excerpt": src.get("excerpt", ""), "published_at": src.get("published_at"), "highlights": src.get("highlights"), "summary": src.get("summary"), "source_type": src.get("source_type"), "index": src.get("index"), })) return PodcastExaResearchResponse( sources=sources_payload, search_queries=result.get("search_queries", queries) if isinstance(result, dict) else queries, cost=result.get("cost") if isinstance(result, dict) else None, search_type=result.get("search_type") if isinstance(result, dict) else None, provider=result.get("provider", "exa") if isinstance(result, dict) else "exa", content=result.get("content") if isinstance(result, dict) else None, ) @router.post("/script", response_model=PodcastScriptResponse) async def generate_podcast_script( request: PodcastScriptRequest, current_user: Dict[str, Any] = Depends(get_current_user), ): """ Generate a podcast script outline (scenes + lines) using podcast-oriented prompting. """ user_id = require_authenticated_user(current_user) # Build comprehensive research context for higher-quality scripts research_context = "" if request.research: try: key_insights = request.research.get("keyword_analysis", {}).get("key_insights") or [] fact_cards = request.research.get("factCards", []) or [] mapped_angles = request.research.get("mappedAngles", []) or [] sources = request.research.get("sources", []) or [] top_facts = [f.get("quote", "") for f in fact_cards[:5] if f.get("quote")] angles_summary = [ f"{a.get('title', '')}: {a.get('why', '')}" for a in mapped_angles[:3] if a.get("title") or a.get("why") ] top_sources = [s.get("url") for s in sources[:3] if s.get("url")] research_parts = [] if key_insights: research_parts.append(f"Key Insights: {', '.join(key_insights[:5])}") if top_facts: research_parts.append(f"Key Facts: {', '.join(top_facts)}") if angles_summary: research_parts.append(f"Research Angles: {' | '.join(angles_summary)}") if top_sources: research_parts.append(f"Top Sources: {', '.join(top_sources)}") research_context = "\n".join(research_parts) except Exception as exc: logger.warning(f"Failed to parse research context: {exc}") research_context = "" prompt = f"""You are an expert podcast script planner. Create natural, conversational podcast scenes. Podcast Idea: "{request.idea}" Duration: ~{request.duration_minutes} minutes Speakers: {request.speakers} (Host + optional Guest) {f"RESEARCH CONTEXT:\n{research_context}\n" if research_context else ""} Return JSON with: - scenes: array of scenes. Each scene has: - id: string - title: short scene title (<= 60 chars) - duration: duration in seconds (evenly split across total duration) - emotion: string (one of: "neutral", "happy", "excited", "serious", "curious", "confident") - lines: array of {{"speaker": "...", "text": "...", "emphasis": boolean}} * Write natural, conversational dialogue * Each line can be a sentence or a few sentences that flow together * Use plain text only - no markdown formatting (no asterisks, underscores, etc.) * Mark "emphasis": true for key statistics or important points Guidelines: - Write for spoken delivery: conversational, natural, with contractions - Use research insights naturally - weave statistics into dialogue, don't just list them - Vary emotion per scene based on content - Ensure scenes match target duration: aim for ~2.5 words per second of audio - Keep it engaging and informative, like a real podcast conversation """ try: raw = llm_text_gen(prompt=prompt, user_id=user_id, json_struct=None) except Exception as exc: raise HTTPException(status_code=500, detail=f"Script generation failed: {exc}") import json if isinstance(raw, str): try: data = json.loads(raw) except json.JSONDecodeError: raise HTTPException(status_code=500, detail="LLM returned non-JSON output") elif isinstance(raw, dict): data = raw else: raise HTTPException(status_code=500, detail="Unexpected LLM response format") scenes_data = data.get("scenes") or [] if not isinstance(scenes_data, list): raise HTTPException(status_code=500, detail="LLM response missing scenes array") valid_emotions = {"neutral", "happy", "excited", "serious", "curious", "confident"} # Normalize scenes scenes: list[PodcastScene] = [] for idx, scene in enumerate(scenes_data): title = scene.get("title") or f"Scene {idx + 1}" duration = int(scene.get("duration") or max(30, (request.duration_minutes * 60) // max(1, len(scenes_data)))) emotion = scene.get("emotion") or "neutral" if emotion not in valid_emotions: emotion = "neutral" lines_raw = scene.get("lines") or [] lines: list[PodcastSceneLine] = [] for line in lines_raw: speaker = line.get("speaker") or ("Host" if len(lines) % request.speakers == 0 else "Guest") text = line.get("text") or "" emphasis = line.get("emphasis", False) if text: lines.append(PodcastSceneLine(speaker=speaker, text=text, emphasis=emphasis)) scenes.append( PodcastScene( id=scene.get("id") or f"scene-{idx + 1}", title=title, duration=duration, lines=lines, approved=False, emotion=emotion, ) ) return PodcastScriptResponse(scenes=scenes) @router.post("/audio", response_model=PodcastAudioResponse) async def generate_podcast_audio( request: PodcastAudioRequest, current_user: Dict[str, Any] = Depends(get_current_user), db: Session = Depends(get_db), ): """ Generate AI audio for a podcast scene using shared audio service. """ user_id = require_authenticated_user(current_user) if not request.text or not request.text.strip(): raise HTTPException(status_code=400, detail="Text is required") try: result: StoryAudioResult = audio_service.generate_ai_audio( scene_number=0, scene_title=request.scene_title, text=request.text.strip(), user_id=user_id, voice_id=request.voice_id or "Wise_Woman", speed=request.speed or 1.0, # Normal speed (was 0.9, but too slow - causing duration issues) volume=request.volume or 1.0, pitch=request.pitch or 0.0, # Normal pitch (0.0 = neutral) emotion=request.emotion or "neutral", english_normalization=request.english_normalization or False, ) # Override URL to use podcast endpoint instead of story endpoint if result.get("audio_url") and "/api/story/audio/" in result.get("audio_url", ""): audio_filename = result.get("audio_filename", "") result["audio_url"] = f"/api/podcast/audio/{audio_filename}" except Exception as exc: raise HTTPException(status_code=500, detail=f"Audio generation failed: {exc}") # Save to asset library (podcast module) try: if result.get("audio_url"): save_asset_to_library( db=db, user_id=user_id, asset_type="audio", source_module="podcast_maker", filename=result.get("audio_filename", ""), file_url=result.get("audio_url", ""), file_path=result.get("audio_path"), file_size=result.get("file_size"), mime_type="audio/mpeg", title=f"{request.scene_title} - Podcast", description="Podcast scene narration", tags=["podcast", "audio", request.scene_id], provider=result.get("provider"), model=result.get("model"), cost=result.get("cost"), asset_metadata={ "scene_id": request.scene_id, "scene_title": request.scene_title, "status": "completed", }, ) except Exception as e: logger.warning(f"[Podcast] Failed to save audio asset: {e}") return PodcastAudioResponse( scene_id=request.scene_id, scene_title=request.scene_title, audio_filename=result.get("audio_filename", ""), audio_url=result.get("audio_url", ""), provider=result.get("provider", "wavespeed"), model=result.get("model", "minimax/speech-02-hd"), voice_id=result.get("voice_id", request.voice_id or "Wise_Woman"), text_length=result.get("text_length", len(request.text)), file_size=result.get("file_size", 0), cost=result.get("cost", 0.0), ) class PodcastCombineAudioRequest(BaseModel): """Request model for combining podcast audio files.""" project_id: str scene_ids: List[str] = Field(..., description="List of scene IDs to combine") scene_audio_urls: List[str] = Field(..., description="List of audio URLs for each scene") class PodcastCombineAudioResponse(BaseModel): """Response model for combined podcast audio.""" combined_audio_url: str combined_audio_filename: str total_duration: float file_size: int scene_count: int @router.post("/combine-audio", response_model=PodcastCombineAudioResponse) async def combine_podcast_audio( request: PodcastCombineAudioRequest, current_user: Dict[str, Any] = Depends(get_current_user), db: Session = Depends(get_db), ): """ Combine multiple scene audio files into a single podcast audio file. """ user_id = require_authenticated_user(current_user) if not request.scene_ids or not request.scene_audio_urls: raise HTTPException(status_code=400, detail="Scene IDs and audio URLs are required") if len(request.scene_ids) != len(request.scene_audio_urls): raise HTTPException(status_code=400, detail="Scene IDs and audio URLs count must match") try: # Import moviepy for audio concatenation try: from moviepy import AudioFileClip, concatenate_audioclips except ImportError: logger.error("[Podcast] MoviePy not available for audio combination") raise HTTPException( status_code=500, detail="Audio combination requires MoviePy. Please install: pip install moviepy" ) # Create temporary directory for audio processing temp_dir = Path(tempfile.gettempdir()) / f"podcast_combine_{uuid.uuid4().hex[:8]}" temp_dir.mkdir(parents=True, exist_ok=True) audio_clips = [] total_duration = 0.0 try: # Log incoming request for debugging logger.info(f"[Podcast] Combining audio: {len(request.scene_audio_urls)} URLs received") for idx, url in enumerate(request.scene_audio_urls): logger.info(f"[Podcast] URL {idx+1}: {url}") # Download and load each audio file from podcast_audio directory for idx, audio_url in enumerate(request.scene_audio_urls): try: # Normalize audio URL - handle both absolute and relative paths if audio_url.startswith("http"): # External URL - would need to download logger.error(f"[Podcast] External URLs not supported: {audio_url}") raise HTTPException( status_code=400, detail=f"External URLs not supported. Please use local file paths." ) # Handle relative paths - only /api/podcast/audio/... URLs are supported audio_path = None if audio_url.startswith("/api/"): # Extract filename from URL from urllib.parse import urlparse parsed = urlparse(audio_url) path = parsed.path if parsed.scheme else audio_url # Handle both /api/podcast/audio/ and /api/story/audio/ URLs (for backward compatibility) if "/api/podcast/audio/" in path: filename = path.split("/api/podcast/audio/", 1)[1].split("?", 1)[0].strip() elif "/api/story/audio/" in path: # Convert story audio URLs to podcast audio (they're in the same directory now) filename = path.split("/api/story/audio/", 1)[1].split("?", 1)[0].strip() logger.info(f"[Podcast] Converting story audio URL to podcast: {audio_url} -> {filename}") else: logger.error(f"[Podcast] Unsupported audio URL format: {audio_url}. Expected /api/podcast/audio/ or /api/story/audio/ URLs.") continue if not filename: logger.error(f"[Podcast] Could not extract filename from URL: {audio_url}") continue # Podcast audio files are stored in podcast_audio directory audio_path = (PODCAST_AUDIO_DIR / filename).resolve() # Security check: ensure path is within PODCAST_AUDIO_DIR if not str(audio_path).startswith(str(PODCAST_AUDIO_DIR)): logger.error(f"[Podcast] Attempted path traversal when resolving audio: {audio_url}") continue else: logger.warning(f"[Podcast] Non-API URL format, treating as direct path: {audio_url}") audio_path = Path(audio_url) if not audio_path or not audio_path.exists(): logger.error(f"[Podcast] Audio file not found: {audio_path} (from URL: {audio_url})") continue # Load audio clip audio_clip = AudioFileClip(str(audio_path)) audio_clips.append(audio_clip) total_duration += audio_clip.duration logger.info(f"[Podcast] Loaded audio {idx+1}/{len(request.scene_audio_urls)}: {audio_path.name} ({audio_clip.duration:.2f}s)") except HTTPException: raise except Exception as e: logger.error(f"[Podcast] Failed to load audio {idx+1}: {e}", exc_info=True) # Continue with other audio files continue if not audio_clips: raise HTTPException(status_code=400, detail="No valid audio files found to combine") # Concatenate all audio clips logger.info(f"[Podcast] Combining {len(audio_clips)} audio clips (total duration: {total_duration:.2f}s)") combined_audio = concatenate_audioclips(audio_clips) # Generate output filename output_filename = f"podcast_combined_{request.project_id}_{uuid.uuid4().hex[:8]}.mp3" output_path = PODCAST_AUDIO_DIR / output_filename # Write combined audio file combined_audio.write_audiofile( str(output_path), codec="mp3", bitrate="192k", logger=None, # Suppress moviepy logging ) # Close audio clips to free resources for clip in audio_clips: clip.close() combined_audio.close() file_size = output_path.stat().st_size audio_url = f"/api/podcast/audio/{output_filename}" logger.info(f"[Podcast] Combined audio saved: {output_path} ({file_size} bytes)") # Save to asset library try: save_asset_to_library( db=db, user_id=user_id, asset_type="audio", source_module="podcast_maker", filename=output_filename, file_url=audio_url, file_path=str(output_path), file_size=file_size, mime_type="audio/mpeg", title=f"Combined Podcast - {request.project_id}", description=f"Combined podcast audio from {len(request.scene_ids)} scenes", tags=["podcast", "audio", "combined", request.project_id], asset_metadata={ "project_id": request.project_id, "scene_ids": request.scene_ids, "scene_count": len(request.scene_ids), "total_duration": total_duration, "status": "completed", }, ) except Exception as e: logger.warning(f"[Podcast] Failed to save combined audio asset: {e}") return PodcastCombineAudioResponse( combined_audio_url=audio_url, combined_audio_filename=output_filename, total_duration=total_duration, file_size=file_size, scene_count=len(request.scene_ids), ) finally: # Cleanup temporary directory try: import shutil if temp_dir.exists(): shutil.rmtree(temp_dir) except Exception as e: logger.warning(f"[Podcast] Failed to cleanup temp directory: {e}") except HTTPException: raise except Exception as exc: logger.error(f"[Podcast] Audio combination failed: {exc}", exc_info=True) raise HTTPException(status_code=500, detail=f"Audio combination failed: {exc}") @router.get("/task/{task_id}/status") async def podcast_task_status(task_id: str, current_user: Dict[str, Any] = Depends(get_current_user)): """Expose task status under podcast namespace (reuses shared task manager).""" require_authenticated_user(current_user) return task_manager.get_task_status(task_id) class PodcastImageRequest(BaseModel): """Request for generating an image for a podcast scene.""" scene_id: str scene_title: str scene_content: Optional[str] = None # Optional: scene lines text for context idea: Optional[str] = None # Optional: podcast idea for context width: int = 1024 height: int = 1024 class PodcastImageResponse(BaseModel): """Response for podcast scene image generation.""" scene_id: str scene_title: str image_filename: str image_url: str width: int height: int provider: str model: Optional[str] = None cost: float @router.post("/image", response_model=PodcastImageResponse) async def generate_podcast_scene_image( request: PodcastImageRequest, current_user: Dict[str, Any] = Depends(get_current_user), db: Session = Depends(get_db), ): """ Generate an AI image for a podcast scene. Creates a professional, podcast-appropriate image based on scene title and content. """ user_id = require_authenticated_user(current_user) if not request.scene_title: raise HTTPException(status_code=400, detail="Scene title is required") try: # Build image prompt from scene context prompt_parts = [ "Professional podcast studio setting, modern and clean", f"Scene topic: {request.scene_title}", ] if request.scene_content: # Extract key themes from scene content (first 200 chars) content_preview = request.scene_content[:200].replace("\n", " ") prompt_parts.append(f"Content context: {content_preview}") if request.idea: prompt_parts.append(f"Podcast theme: {request.idea[:100]}") prompt_parts.extend([ "Professional lighting, podcast microphone visible", "Modern podcast studio aesthetic, clean background", "High quality, professional photography style", "Suitable for video generation with talking avatar" ]) image_prompt = ", ".join(prompt_parts) logger.info(f"[Podcast] Generating image for scene {request.scene_id}: {request.scene_title}") # Generate image using main_image_generation service image_options = { "provider": None, # Auto-select provider "width": request.width, "height": request.height, } result = generate_image( prompt=image_prompt, options=image_options, user_id=user_id ) # Save image to podcast images directory base_dir = Path(__file__).parent.parent.parent.parent podcast_images_dir = base_dir / "podcast_images" podcast_images_dir.mkdir(parents=True, exist_ok=True) # Generate filename clean_title = "".join(c if c.isalnum() or c in ('-', '_') else '_' for c in request.scene_title[:30]) unique_id = str(uuid.uuid4())[:8] image_filename = f"scene_{request.scene_id}_{clean_title}_{unique_id}.png" image_path = podcast_images_dir / image_filename # Save image with open(image_path, "wb") as f: f.write(result.image_bytes) logger.info(f"[Podcast] Saved image to: {image_path}") # Create image URL (served via API endpoint) image_url = f"/api/podcast/images/{image_filename}" # Save to asset library try: save_asset_to_library( db=db, user_id=user_id, asset_type="image", source_module="podcast_maker", filename=image_filename, file_url=image_url, file_path=str(image_path), file_size=len(result.image_bytes), mime_type="image/png", title=f"{request.scene_title} - Podcast Scene", description=f"Podcast scene image: {request.scene_title}", prompt=image_prompt, tags=["podcast", "scene", request.scene_id], provider=result.provider, model=result.model, asset_metadata={ "scene_id": request.scene_id, "scene_title": request.scene_title, "status": "completed", }, ) except Exception as e: logger.warning(f"[Podcast] Failed to save image asset: {e}") # Estimate cost (rough estimate: ~$0.04 per image for most providers) cost = 0.04 return PodcastImageResponse( scene_id=request.scene_id, scene_title=request.scene_title, image_filename=image_filename, image_url=image_url, width=result.width, height=result.height, provider=result.provider, model=result.model, cost=cost, ) except HTTPException: raise except Exception as exc: logger.error(f"[Podcast] Image generation failed: {exc}", exc_info=True) raise HTTPException(status_code=500, detail=f"Image generation failed: {str(exc)}") @router.get("/audio/{filename}") async def serve_podcast_audio( filename: str, current_user: Dict[str, Any] = Depends(get_current_user_with_query_token), ): """Serve generated podcast scene audio files. Supports authentication via Authorization header or token query parameter. Query parameter is useful for HTML elements like