From eba5210577e95b1e1ca8fbdede4755f87181f959 Mon Sep 17 00:00:00 2001 From: ajaysi Date: Fri, 12 Dec 2025 21:43:09 +0530 Subject: [PATCH] AI podcast maker performance optimizations --- .gitignore | 6 + backend/alwrity_utils/frontend_serving.py | 88 +- backend/api/podcast/router.py | 877 ++++++++++++++++-- .../story_writer/audio_generation_service.py | 3 + frontend/BUILD_OPTIMIZATION.md | 197 ++++ frontend/OPTIMIZATION_PROGRESS.md | 114 +++ frontend/PERFORMANCE_OPTIMIZATIONS.md | 162 ++++ frontend/UNUSED_JAVASCRIPT_OPTIMIZATION.md | 231 +++++ frontend/package.json | 7 +- frontend/public/index.html | 12 + .../MainDashboard/ContentLifecyclePillars.tsx | 1 + .../MainDashboard/MainDashboard.tsx | 85 +- .../components/WorkflowHeroSection.tsx | 11 +- .../components/PodcastMaker/AnalysisPanel.tsx | 78 +- .../components/PodcastMaker/CreateModal.tsx | 686 ++++++++++++-- .../PodcastMaker/InlineAudioPlayer.tsx | 159 +++- .../PodcastMaker/PodcastDashboard.tsx | 740 +++------------ .../PodcastDashboard/EstimateCard.tsx | 56 ++ .../PodcastMaker/PodcastDashboard/Header.tsx | 68 ++ .../PodcastDashboard/ProgressStepper.tsx | 105 +++ .../PodcastDashboard/QuerySelection.tsx | 169 ++++ .../PodcastDashboard/ResearchSummary.tsx | 148 +++ .../PodcastMaker/PodcastDashboard/index.ts | 8 + .../PodcastDashboard/usePodcastWorkflow.ts | 302 ++++++ .../PodcastMaker/PodcastDashboard/utils.ts | 76 ++ .../components/PodcastMaker/RenderQueue.tsx | 637 +++---------- .../RenderQueue/GuidancePanel.tsx | 84 ++ .../RenderQueue/SceneActionButtons.tsx | 179 ++++ .../PodcastMaker/RenderQueue/SceneCard.tsx | 415 +++++++++ .../PodcastMaker/RenderQueue/SummaryStats.tsx | 71 ++ .../PodcastMaker/RenderQueue/index.ts | 6 + .../RenderQueue/useRenderQueue.ts | 376 ++++++++ .../PodcastMaker/ScriptEditor/LineEditor.tsx | 133 ++- .../PodcastMaker/ScriptEditor/SceneEditor.tsx | 349 ++++++- .../ScriptEditor/ScriptEditor.tsx | 646 +++++++++++-- frontend/src/components/PodcastMaker/types.ts | 19 + .../components/PodcastMaker/ui/GlassyCard.tsx | 14 +- .../PodcastMaker/ui/PrimaryButton.tsx | 5 +- .../PodcastMaker/ui/SecondaryButton.tsx | 8 +- .../src/components/billing/UsageTrends.tsx | 44 +- frontend/src/hooks/usePodcastProjectState.ts | 4 +- frontend/src/services/podcastApi.ts | 357 ++++--- frontend/src/styles/global.css | 1 + frontend/src/utils/lazyRecharts.tsx | 77 ++ frontend/src/utils/lazyWix.ts | 10 + .../scene_scene1_Opening_Hook_ec1e050b.png | Bin 0 -> 806750 bytes 46 files changed, 6176 insertions(+), 1648 deletions(-) create mode 100644 frontend/BUILD_OPTIMIZATION.md create mode 100644 frontend/OPTIMIZATION_PROGRESS.md create mode 100644 frontend/PERFORMANCE_OPTIMIZATIONS.md create mode 100644 frontend/UNUSED_JAVASCRIPT_OPTIMIZATION.md create mode 100644 frontend/src/components/PodcastMaker/PodcastDashboard/EstimateCard.tsx create mode 100644 frontend/src/components/PodcastMaker/PodcastDashboard/Header.tsx create mode 100644 frontend/src/components/PodcastMaker/PodcastDashboard/ProgressStepper.tsx create mode 100644 frontend/src/components/PodcastMaker/PodcastDashboard/QuerySelection.tsx create mode 100644 frontend/src/components/PodcastMaker/PodcastDashboard/ResearchSummary.tsx create mode 100644 frontend/src/components/PodcastMaker/PodcastDashboard/index.ts create mode 100644 frontend/src/components/PodcastMaker/PodcastDashboard/usePodcastWorkflow.ts create mode 100644 frontend/src/components/PodcastMaker/PodcastDashboard/utils.ts create mode 100644 frontend/src/components/PodcastMaker/RenderQueue/GuidancePanel.tsx create mode 100644 frontend/src/components/PodcastMaker/RenderQueue/SceneActionButtons.tsx create mode 100644 frontend/src/components/PodcastMaker/RenderQueue/SceneCard.tsx create mode 100644 frontend/src/components/PodcastMaker/RenderQueue/SummaryStats.tsx create mode 100644 frontend/src/components/PodcastMaker/RenderQueue/index.ts create mode 100644 frontend/src/components/PodcastMaker/RenderQueue/useRenderQueue.ts create mode 100644 frontend/src/utils/lazyRecharts.tsx create mode 100644 frontend/src/utils/lazyWix.ts create mode 100644 podcast_images/scene_scene1_Opening_Hook_ec1e050b.png diff --git a/.gitignore b/.gitignore index 5c7b001b..6e396a7c 100644 --- a/.gitignore +++ b/.gitignore @@ -9,6 +9,12 @@ __pycache__/ backend/.onboarding_progress.json backend/database/migrations/* +*.mp3 +podcast_audio/* +backend/podcast_audio/ + + + .cursorignore story_videos story_videos/* diff --git a/backend/alwrity_utils/frontend_serving.py b/backend/alwrity_utils/frontend_serving.py index 4d63c08e..6b60c203 100644 --- a/backend/alwrity_utils/frontend_serving.py +++ b/backend/alwrity_utils/frontend_serving.py @@ -1,19 +1,67 @@ """ Frontend Serving Module -Handles React frontend serving and static file mounting. +Handles React frontend serving and static file mounting with cache headers. """ import os from pathlib import Path -from fastapi import FastAPI +from fastapi import FastAPI, Request from fastapi.staticfiles import StaticFiles -from fastapi.responses import FileResponse +from fastapi.responses import FileResponse, Response +from starlette.middleware.base import BaseHTTPMiddleware from loguru import logger from typing import Dict, Any +class CacheHeadersMiddleware(BaseHTTPMiddleware): + """ + Middleware to add cache headers to static files. + + This improves performance by allowing browsers to cache static assets + (JS, CSS, images) for 1 year, reducing repeat visit load times. + """ + + async def dispatch(self, request: Request, call_next): + response = await call_next(request) + + # Only add cache headers to static files + if request.url.path.startswith("/static/"): + path = request.url.path.lower() + + # Check if file has a hash in its name (React build pattern: filename.hash.ext) + # Examples: bundle.abc123.js, main.def456.chunk.js, vendors.789abc.js + import re + # Pattern matches: filename.hexhash.ext or filename.hexhash.chunk.ext + hash_pattern = r'\.[a-f0-9]{8,}\.' + has_hash = bool(re.search(hash_pattern, path)) + + # File extensions that should be cached + cacheable_extensions = ['.js', '.css', '.woff', '.woff2', '.ttf', '.otf', + '.png', '.jpg', '.jpeg', '.webp', '.svg', '.ico', '.gif'] + is_cacheable_file = any(path.endswith(ext) for ext in cacheable_extensions) + + if is_cacheable_file: + if has_hash: + # Immutable files (with hash) - cache for 1 year + # These files never change (new hash = new file) + response.headers["Cache-Control"] = "public, max-age=31536000, immutable" + response.headers["Expires"] = "Thu, 31 Dec 2025 23:59:59 GMT" + else: + # Non-hashed files - shorter cache (1 hour) + # These might be updated, so cache for shorter time + response.headers["Cache-Control"] = "public, max-age=3600" + + # Never cache HTML files (index.html) + elif request.url.path == "/" or request.url.path.endswith(".html"): + response.headers["Cache-Control"] = "no-cache, no-store, must-revalidate" + response.headers["Pragma"] = "no-cache" + response.headers["Expires"] = "0" + + return response + + class FrontendServing: - """Manages React frontend serving and static file mounting.""" + """Manages React frontend serving and static file mounting with cache headers.""" def __init__(self, app: FastAPI): self.app = app @@ -21,14 +69,26 @@ class FrontendServing: self.static_path = os.path.join(self.frontend_build_path, "static") def setup_frontend_serving(self) -> bool: - """Set up React frontend serving and static file mounting.""" + """ + Set up React frontend serving and static file mounting with cache headers. + + This method: + 1. Adds cache headers middleware for static files + 2. Mounts static files directory + 3. Configures proper caching for performance + """ try: - logger.info("Setting up frontend serving...") + logger.info("Setting up frontend serving with cache headers...") + + # Add cache headers middleware BEFORE mounting static files + self.app.add_middleware(CacheHeadersMiddleware) + logger.info("Cache headers middleware added") # Mount static files for React app (only if directory exists) if os.path.exists(self.static_path): self.app.mount("/static", StaticFiles(directory=self.static_path), name="static") - logger.info("Frontend static files mounted successfully") + logger.info("Frontend static files mounted successfully with cache headers") + logger.info("Static files will be cached for 1 year (immutable files) or 1 hour (others)") return True else: logger.info("Frontend build directory not found. Static files not mounted.") @@ -39,13 +99,23 @@ class FrontendServing: return False def serve_frontend(self) -> FileResponse | Dict[str, Any]: - """Serve the React frontend.""" + """ + Serve the React frontend index.html. + + Note: index.html is never cached to ensure users always get the latest version. + Static assets (JS/CSS) are cached separately via middleware. + """ try: # Check if frontend build exists index_html = os.path.join(self.frontend_build_path, "index.html") if os.path.exists(index_html): - return FileResponse(index_html) + # Return FileResponse with no-cache headers for HTML + response = FileResponse(index_html) + response.headers["Cache-Control"] = "no-cache, no-store, must-revalidate" + response.headers["Pragma"] = "no-cache" + response.headers["Expires"] = "0" + return response else: return { "message": "Frontend not built. Please run 'npm run build' in the frontend directory.", diff --git a/backend/api/podcast/router.py b/backend/api/podcast/router.py index 1739e877..2a77c255 100644 --- a/backend/api/podcast/router.py +++ b/backend/api/podcast/router.py @@ -7,32 +7,48 @@ API endpoints for podcast project persistence and management. from fastapi import APIRouter, Depends, HTTPException, Query, Body, BackgroundTasks, Request from sqlalchemy.orm import Session from typing import List, Optional, Dict, Any -from pydantic import BaseModel, Field +from pydantic import BaseModel, Field, model_validator from datetime import datetime from pathlib import Path from urllib.parse import quote from services.database import get_db -from middleware.auth_middleware import get_current_user +from middleware.auth_middleware import get_current_user, get_current_user_with_query_token from services.podcast_service import PodcastService from models.podcast_models import PodcastProject from services.wavespeed.infinitetalk import animate_scene_with_voiceover from services.story_writer.video_generation_service import StoryVideoGenerationService +from services.llm_providers.main_image_generation import generate_image from services.llm_providers.main_video_generation import track_video_usage from services.subscription import PricingService from services.subscription.preflight_validator import validate_scene_animation_operation from api.story_writer.task_manager import task_manager from api.story_writer.utils.auth import require_authenticated_user -from api.story_writer.utils.media_utils import load_story_image_bytes, load_story_audio_bytes +# Podcast-specific media loading functions (no dependency on story_writer) from services.llm_providers.main_text_generation import llm_text_gen from services.story_writer.audio_generation_service import StoryAudioGenerationService from utils.asset_tracker import save_asset_to_library from models.story_models import StoryAudioResult from loguru import logger +from services.blog_writer.research.exa_provider import ExaResearchProvider +from types import SimpleNamespace +import tempfile +import os +import uuid router = APIRouter(prefix="/api/podcast", tags=["Podcast Maker"]) AI_VIDEO_SUBDIR = Path("AI_Videos") -audio_service = StoryAudioGenerationService() +# Initialize audio service with podcast_audio directory +# router.py is at: backend/api/podcast/router.py +# parents[0] = backend/api/podcast/ +# parents[1] = backend/api/ +# parents[2] = backend/ +BASE_DIR = Path(__file__).resolve().parents[2] # backend/ +PODCAST_AUDIO_DIR = (BASE_DIR / "podcast_audio").resolve() +PODCAST_AUDIO_DIR.mkdir(parents=True, exist_ok=True) +PODCAST_IMAGES_DIR = (BASE_DIR / "podcast_images").resolve() +PODCAST_IMAGES_DIR.mkdir(parents=True, exist_ok=True) +audio_service = StoryAudioGenerationService(output_dir=str(PODCAST_AUDIO_DIR)) class PodcastProjectResponse(BaseModel): @@ -80,6 +96,7 @@ class PodcastAnalyzeResponse(BaseModel): top_keywords: list[str] suggested_outlines: list[Dict[str, Any]] title_suggestions: list[str] + exa_suggested_config: Optional[Dict[str, Any]] = None class PodcastScriptRequest(BaseModel): @@ -93,6 +110,7 @@ class PodcastScriptRequest(BaseModel): class PodcastSceneLine(BaseModel): speaker: str text: str + emphasis: Optional[bool] = False class PodcastScene(BaseModel): @@ -101,6 +119,54 @@ class PodcastScene(BaseModel): duration: int lines: list[PodcastSceneLine] approved: bool = False + emotion: Optional[str] = None + imageUrl: Optional[str] = None # Generated image URL for video generation + + +class PodcastExaConfig(BaseModel): + """Exa config for podcast research.""" + exa_search_type: Optional[str] = Field(default="auto", description="auto | keyword | neural") + exa_category: Optional[str] = None + exa_include_domains: List[str] = [] + exa_exclude_domains: List[str] = [] + max_sources: int = 8 + include_statistics: Optional[bool] = False + date_range: Optional[str] = Field(default=None, description="last_month | last_3_months | last_year | all_time") + + @model_validator(mode="after") + def validate_domains(self): + if self.exa_include_domains and self.exa_exclude_domains: + # Exa API does not allow both include and exclude domains together with contents + # Prefer include_domains and drop exclude_domains + self.exa_exclude_domains = [] + return self + + +class PodcastExaResearchRequest(BaseModel): + """Request for podcast research using Exa directly (no blog writer).""" + topic: str + queries: List[str] + exa_config: Optional[PodcastExaConfig] = None + + +class PodcastExaSource(BaseModel): + title: str = "" + url: str = "" + excerpt: str = "" + published_at: Optional[str] = None + highlights: Optional[List[str]] = None + summary: Optional[str] = None + source_type: Optional[str] = None + index: Optional[int] = None + + +class PodcastExaResearchResponse(BaseModel): + sources: List[PodcastExaSource] + search_queries: List[str] = [] + cost: Optional[Dict[str, Any]] = None + search_type: Optional[str] = None + provider: str = "exa" + content: Optional[str] = None class PodcastScriptResponse(BaseModel): @@ -117,6 +183,7 @@ class PodcastAudioRequest(BaseModel): volume: Optional[float] = 1.0 pitch: Optional[float] = 0.0 emotion: Optional[str] = "neutral" + english_normalization: Optional[bool] = False # Better number reading for statistics class PodcastAudioResponse(BaseModel): @@ -227,6 +294,14 @@ Return JSON with: - top_keywords: 5 podcast-relevant keywords/phrases - suggested_outlines: 2 items, each with title (<=60 chars) and 4-6 short segments (bullet-friendly, factual) - title_suggestions: 3 concise episode titles (no cliffhanger storytelling) +- exa_suggested_config: suggested Exa search options to power research (keep conservative defaults to control cost), with: + - exa_search_type: "auto" | "neural" | "keyword" (prefer "auto" unless clearly news-heavy) + - exa_category: one of ["research paper","news","company","github","tweet","personal site","pdf","financial report","linkedin profile"] + - exa_include_domains: up to 3 reputable domains to prioritize (optional) + - exa_exclude_domains: up to 3 domains to avoid (optional) + - max_sources: 6-10 + - include_statistics: boolean (true if topic needs fresh stats) + - date_range: one of ["last_month","last_3_months","last_year","all_time"] (pick recent if time-sensitive) Requirements: - Keep language factual, actionable, and suited for spoken audio. @@ -236,7 +311,11 @@ Requirements: try: raw = llm_text_gen(prompt=prompt, user_id=user_id, json_struct=None) + except HTTPException: + # Re-raise HTTPExceptions (e.g., 429 subscription limit) - preserve error details + raise except Exception as exc: + logger.error(f"[Podcast Analyze] Analysis failed for user {user_id}: {exc}") raise HTTPException(status_code=500, detail=f"Analysis failed: {exc}") # Normalize response (accept dict or JSON string) @@ -258,12 +337,91 @@ Requirements: suggested_outlines = data.get("suggested_outlines") or [] title_suggestions = data.get("title_suggestions") or [] + exa_suggested_config = data.get("exa_suggested_config") or None + return PodcastAnalyzeResponse( audience=audience, content_type=content_type, top_keywords=top_keywords, suggested_outlines=suggested_outlines, title_suggestions=title_suggestions, + exa_suggested_config=exa_suggested_config, + ) + + +@router.post("/research/exa", response_model=PodcastExaResearchResponse) +async def podcast_research_exa( + request: PodcastExaResearchRequest, + current_user: Dict[str, Any] = Depends(get_current_user), +): + """ + Run podcast research directly via Exa (no blog writer pipeline). + """ + user_id = require_authenticated_user(current_user) + + queries = [q.strip() for q in request.queries if q and q.strip()] + if not queries: + raise HTTPException(status_code=400, detail="At least one query is required for research.") + + exa_cfg = request.exa_config or PodcastExaConfig() + cfg = SimpleNamespace( + exa_search_type=exa_cfg.exa_search_type or "auto", + exa_category=exa_cfg.exa_category, + exa_include_domains=exa_cfg.exa_include_domains or [], + exa_exclude_domains=exa_cfg.exa_exclude_domains or [], + max_sources=exa_cfg.max_sources or 8, + source_types=[], + ) + + provider = ExaResearchProvider() + prompt = request.topic + + try: + result = await provider.search( + prompt=prompt, + topic=request.topic, + industry="", + target_audience="", + config=cfg, + user_id=user_id, + ) + except Exception as exc: + logger.error(f"[Podcast Exa Research] Failed for user {user_id}: {exc}") + raise HTTPException(status_code=500, detail=f"Exa research failed: {exc}") + + # Track usage if available + try: + cost_total = 0.0 + if isinstance(result, dict): + cost_total = result.get("cost", {}).get("total", 0.005) if result.get("cost") else 0.005 + provider.track_exa_usage(user_id, cost_total) + except Exception as track_err: + logger.warning(f"[Podcast Exa Research] Failed to track usage: {track_err}") + + sources_payload = [] + if isinstance(result, dict): + for src in result.get("sources", []) or []: + try: + sources_payload.append(PodcastExaSource(**src)) + except Exception: + sources_payload.append(PodcastExaSource(**{ + "title": src.get("title", ""), + "url": src.get("url", ""), + "excerpt": src.get("excerpt", ""), + "published_at": src.get("published_at"), + "highlights": src.get("highlights"), + "summary": src.get("summary"), + "source_type": src.get("source_type"), + "index": src.get("index"), + })) + + return PodcastExaResearchResponse( + sources=sources_payload, + search_queries=result.get("search_queries", queries) if isinstance(result, dict) else queries, + cost=result.get("cost") if isinstance(result, dict) else None, + search_type=result.get("search_type") if isinstance(result, dict) else None, + provider=result.get("provider", "exa") if isinstance(result, dict) else "exa", + content=result.get("content") if isinstance(result, dict) else None, ) @@ -277,36 +435,62 @@ async def generate_podcast_script( """ user_id = require_authenticated_user(current_user) - research_snippet = "" + # Build comprehensive research context for higher-quality scripts + research_context = "" if request.research: try: key_insights = request.research.get("keyword_analysis", {}).get("key_insights") or [] + fact_cards = request.research.get("factCards", []) or [] + mapped_angles = request.research.get("mappedAngles", []) or [] sources = request.research.get("sources", []) or [] - top_sources = [s.get("url") for s in sources[:3] if s.get("url")] - research_snippet = f"Key insights: {key_insights}. Top sources: {top_sources}" - except Exception: - research_snippet = "" - prompt = f""" -You are an expert podcast script planner. Create concise, podcast-ready scenes (not narrative fiction). + top_facts = [f.get("quote", "") for f in fact_cards[:5] if f.get("quote")] + angles_summary = [ + f"{a.get('title', '')}: {a.get('why', '')}" for a in mapped_angles[:3] if a.get("title") or a.get("why") + ] + top_sources = [s.get("url") for s in sources[:3] if s.get("url")] + + research_parts = [] + if key_insights: + research_parts.append(f"Key Insights: {', '.join(key_insights[:5])}") + if top_facts: + research_parts.append(f"Key Facts: {', '.join(top_facts)}") + if angles_summary: + research_parts.append(f"Research Angles: {' | '.join(angles_summary)}") + if top_sources: + research_parts.append(f"Top Sources: {', '.join(top_sources)}") + + research_context = "\n".join(research_parts) + except Exception as exc: + logger.warning(f"Failed to parse research context: {exc}") + research_context = "" + + prompt = f"""You are an expert podcast script planner. Create natural, conversational podcast scenes. Podcast Idea: "{request.idea}" Duration: ~{request.duration_minutes} minutes Speakers: {request.speakers} (Host + optional Guest) -Research (if any): {research_snippet} + +{f"RESEARCH CONTEXT:\n{research_context}\n" if research_context else ""} Return JSON with: - scenes: array of scenes. Each scene has: - id: string - title: short scene title (<= 60 chars) - - duration: duration in seconds (aim for evenly split across total duration) - - lines: array of {{"speaker": "...", "text": "..."}}, 3-6 lines per scene, succinct and spoken-friendly. + - duration: duration in seconds (evenly split across total duration) + - emotion: string (one of: "neutral", "happy", "excited", "serious", "curious", "confident") + - lines: array of {{"speaker": "...", "text": "...", "emphasis": boolean}} + * Write natural, conversational dialogue + * Each line can be a sentence or a few sentences that flow together + * Use plain text only - no markdown formatting (no asterisks, underscores, etc.) + * Mark "emphasis": true for key statistics or important points -Requirements: -- Keep language conversational, factual, and action-oriented (no cliffhangers or fictional storytelling). -- Include hooks, objections, counters, and takeaways where relevant. -- Cite no URLs in the lines; keep them clean for narration. -- Ensure total duration aligns with ~{request.duration_minutes} minutes across all scenes. +Guidelines: +- Write for spoken delivery: conversational, natural, with contractions +- Use research insights naturally - weave statistics into dialogue, don't just list them +- Vary emotion per scene based on content +- Ensure scenes match target duration: aim for ~2.5 words per second of audio +- Keep it engaging and informative, like a real podcast conversation """ try: @@ -330,18 +514,24 @@ Requirements: if not isinstance(scenes_data, list): raise HTTPException(status_code=500, detail="LLM response missing scenes array") + valid_emotions = {"neutral", "happy", "excited", "serious", "curious", "confident"} + # Normalize scenes scenes: list[PodcastScene] = [] for idx, scene in enumerate(scenes_data): title = scene.get("title") or f"Scene {idx + 1}" duration = int(scene.get("duration") or max(30, (request.duration_minutes * 60) // max(1, len(scenes_data)))) + emotion = scene.get("emotion") or "neutral" + if emotion not in valid_emotions: + emotion = "neutral" lines_raw = scene.get("lines") or [] lines: list[PodcastSceneLine] = [] for line in lines_raw: speaker = line.get("speaker") or ("Host" if len(lines) % request.speakers == 0 else "Guest") text = line.get("text") or "" + emphasis = line.get("emphasis", False) if text: - lines.append(PodcastSceneLine(speaker=speaker, text=text)) + lines.append(PodcastSceneLine(speaker=speaker, text=text, emphasis=emphasis)) scenes.append( PodcastScene( id=scene.get("id") or f"scene-{idx + 1}", @@ -349,6 +539,7 @@ Requirements: duration=duration, lines=lines, approved=False, + emotion=emotion, ) ) @@ -376,11 +567,17 @@ async def generate_podcast_audio( text=request.text.strip(), user_id=user_id, voice_id=request.voice_id or "Wise_Woman", - speed=request.speed or 1.0, + speed=request.speed or 1.0, # Normal speed (was 0.9, but too slow - causing duration issues) volume=request.volume or 1.0, - pitch=request.pitch or 0.0, + pitch=request.pitch or 0.0, # Normal pitch (0.0 = neutral) emotion=request.emotion or "neutral", + english_normalization=request.english_normalization or False, ) + + # Override URL to use podcast endpoint instead of story endpoint + if result.get("audio_url") and "/api/story/audio/" in result.get("audio_url", ""): + audio_filename = result.get("audio_filename", "") + result["audio_url"] = f"/api/podcast/audio/{audio_filename}" except Exception as exc: raise HTTPException(status_code=500, detail=f"Audio generation failed: {exc}") @@ -426,12 +623,410 @@ async def generate_podcast_audio( ) +class PodcastCombineAudioRequest(BaseModel): + """Request model for combining podcast audio files.""" + project_id: str + scene_ids: List[str] = Field(..., description="List of scene IDs to combine") + scene_audio_urls: List[str] = Field(..., description="List of audio URLs for each scene") + + +class PodcastCombineAudioResponse(BaseModel): + """Response model for combined podcast audio.""" + combined_audio_url: str + combined_audio_filename: str + total_duration: float + file_size: int + scene_count: int + + +@router.post("/combine-audio", response_model=PodcastCombineAudioResponse) +async def combine_podcast_audio( + request: PodcastCombineAudioRequest, + current_user: Dict[str, Any] = Depends(get_current_user), + db: Session = Depends(get_db), +): + """ + Combine multiple scene audio files into a single podcast audio file. + """ + user_id = require_authenticated_user(current_user) + + if not request.scene_ids or not request.scene_audio_urls: + raise HTTPException(status_code=400, detail="Scene IDs and audio URLs are required") + + if len(request.scene_ids) != len(request.scene_audio_urls): + raise HTTPException(status_code=400, detail="Scene IDs and audio URLs count must match") + + try: + # Import moviepy for audio concatenation + try: + from moviepy import AudioFileClip, concatenate_audioclips + except ImportError: + logger.error("[Podcast] MoviePy not available for audio combination") + raise HTTPException( + status_code=500, + detail="Audio combination requires MoviePy. Please install: pip install moviepy" + ) + + # Create temporary directory for audio processing + temp_dir = Path(tempfile.gettempdir()) / f"podcast_combine_{uuid.uuid4().hex[:8]}" + temp_dir.mkdir(parents=True, exist_ok=True) + + audio_clips = [] + total_duration = 0.0 + + try: + # Log incoming request for debugging + logger.info(f"[Podcast] Combining audio: {len(request.scene_audio_urls)} URLs received") + for idx, url in enumerate(request.scene_audio_urls): + logger.info(f"[Podcast] URL {idx+1}: {url}") + + # Download and load each audio file from podcast_audio directory + for idx, audio_url in enumerate(request.scene_audio_urls): + try: + # Normalize audio URL - handle both absolute and relative paths + if audio_url.startswith("http"): + # External URL - would need to download + logger.error(f"[Podcast] External URLs not supported: {audio_url}") + raise HTTPException( + status_code=400, + detail=f"External URLs not supported. Please use local file paths." + ) + + # Handle relative paths - only /api/podcast/audio/... URLs are supported + audio_path = None + if audio_url.startswith("/api/"): + # Extract filename from URL + from urllib.parse import urlparse + parsed = urlparse(audio_url) + path = parsed.path if parsed.scheme else audio_url + + # Handle both /api/podcast/audio/ and /api/story/audio/ URLs (for backward compatibility) + if "/api/podcast/audio/" in path: + filename = path.split("/api/podcast/audio/", 1)[1].split("?", 1)[0].strip() + elif "/api/story/audio/" in path: + # Convert story audio URLs to podcast audio (they're in the same directory now) + filename = path.split("/api/story/audio/", 1)[1].split("?", 1)[0].strip() + logger.info(f"[Podcast] Converting story audio URL to podcast: {audio_url} -> {filename}") + else: + logger.error(f"[Podcast] Unsupported audio URL format: {audio_url}. Expected /api/podcast/audio/ or /api/story/audio/ URLs.") + continue + + if not filename: + logger.error(f"[Podcast] Could not extract filename from URL: {audio_url}") + continue + + # Podcast audio files are stored in podcast_audio directory + audio_path = (PODCAST_AUDIO_DIR / filename).resolve() + + # Security check: ensure path is within PODCAST_AUDIO_DIR + if not str(audio_path).startswith(str(PODCAST_AUDIO_DIR)): + logger.error(f"[Podcast] Attempted path traversal when resolving audio: {audio_url}") + continue + else: + logger.warning(f"[Podcast] Non-API URL format, treating as direct path: {audio_url}") + audio_path = Path(audio_url) + + if not audio_path or not audio_path.exists(): + logger.error(f"[Podcast] Audio file not found: {audio_path} (from URL: {audio_url})") + continue + + # Load audio clip + audio_clip = AudioFileClip(str(audio_path)) + audio_clips.append(audio_clip) + total_duration += audio_clip.duration + logger.info(f"[Podcast] Loaded audio {idx+1}/{len(request.scene_audio_urls)}: {audio_path.name} ({audio_clip.duration:.2f}s)") + + except HTTPException: + raise + except Exception as e: + logger.error(f"[Podcast] Failed to load audio {idx+1}: {e}", exc_info=True) + # Continue with other audio files + continue + + if not audio_clips: + raise HTTPException(status_code=400, detail="No valid audio files found to combine") + + # Concatenate all audio clips + logger.info(f"[Podcast] Combining {len(audio_clips)} audio clips (total duration: {total_duration:.2f}s)") + combined_audio = concatenate_audioclips(audio_clips) + + # Generate output filename + output_filename = f"podcast_combined_{request.project_id}_{uuid.uuid4().hex[:8]}.mp3" + output_path = PODCAST_AUDIO_DIR / output_filename + + # Write combined audio file + combined_audio.write_audiofile( + str(output_path), + codec="mp3", + bitrate="192k", + logger=None, # Suppress moviepy logging + ) + + # Close audio clips to free resources + for clip in audio_clips: + clip.close() + combined_audio.close() + + file_size = output_path.stat().st_size + audio_url = f"/api/podcast/audio/{output_filename}" + + logger.info(f"[Podcast] Combined audio saved: {output_path} ({file_size} bytes)") + + # Save to asset library + try: + save_asset_to_library( + db=db, + user_id=user_id, + asset_type="audio", + source_module="podcast_maker", + filename=output_filename, + file_url=audio_url, + file_path=str(output_path), + file_size=file_size, + mime_type="audio/mpeg", + title=f"Combined Podcast - {request.project_id}", + description=f"Combined podcast audio from {len(request.scene_ids)} scenes", + tags=["podcast", "audio", "combined", request.project_id], + asset_metadata={ + "project_id": request.project_id, + "scene_ids": request.scene_ids, + "scene_count": len(request.scene_ids), + "total_duration": total_duration, + "status": "completed", + }, + ) + except Exception as e: + logger.warning(f"[Podcast] Failed to save combined audio asset: {e}") + + return PodcastCombineAudioResponse( + combined_audio_url=audio_url, + combined_audio_filename=output_filename, + total_duration=total_duration, + file_size=file_size, + scene_count=len(request.scene_ids), + ) + + finally: + # Cleanup temporary directory + try: + import shutil + if temp_dir.exists(): + shutil.rmtree(temp_dir) + except Exception as e: + logger.warning(f"[Podcast] Failed to cleanup temp directory: {e}") + + except HTTPException: + raise + except Exception as exc: + logger.error(f"[Podcast] Audio combination failed: {exc}", exc_info=True) + raise HTTPException(status_code=500, detail=f"Audio combination failed: {exc}") + + @router.get("/task/{task_id}/status") async def podcast_task_status(task_id: str, current_user: Dict[str, Any] = Depends(get_current_user)): """Expose task status under podcast namespace (reuses shared task manager).""" require_authenticated_user(current_user) return task_manager.get_task_status(task_id) + +class PodcastImageRequest(BaseModel): + """Request for generating an image for a podcast scene.""" + scene_id: str + scene_title: str + scene_content: Optional[str] = None # Optional: scene lines text for context + idea: Optional[str] = None # Optional: podcast idea for context + width: int = 1024 + height: int = 1024 + + +class PodcastImageResponse(BaseModel): + """Response for podcast scene image generation.""" + scene_id: str + scene_title: str + image_filename: str + image_url: str + width: int + height: int + provider: str + model: Optional[str] = None + cost: float + + +@router.post("/image", response_model=PodcastImageResponse) +async def generate_podcast_scene_image( + request: PodcastImageRequest, + current_user: Dict[str, Any] = Depends(get_current_user), + db: Session = Depends(get_db), +): + """ + Generate an AI image for a podcast scene. + Creates a professional, podcast-appropriate image based on scene title and content. + """ + user_id = require_authenticated_user(current_user) + + if not request.scene_title: + raise HTTPException(status_code=400, detail="Scene title is required") + + try: + # Build image prompt from scene context + prompt_parts = [ + "Professional podcast studio setting, modern and clean", + f"Scene topic: {request.scene_title}", + ] + + if request.scene_content: + # Extract key themes from scene content (first 200 chars) + content_preview = request.scene_content[:200].replace("\n", " ") + prompt_parts.append(f"Content context: {content_preview}") + + if request.idea: + prompt_parts.append(f"Podcast theme: {request.idea[:100]}") + + prompt_parts.extend([ + "Professional lighting, podcast microphone visible", + "Modern podcast studio aesthetic, clean background", + "High quality, professional photography style", + "Suitable for video generation with talking avatar" + ]) + + image_prompt = ", ".join(prompt_parts) + + logger.info(f"[Podcast] Generating image for scene {request.scene_id}: {request.scene_title}") + + # Generate image using main_image_generation service + image_options = { + "provider": None, # Auto-select provider + "width": request.width, + "height": request.height, + } + + result = generate_image( + prompt=image_prompt, + options=image_options, + user_id=user_id + ) + + # Save image to podcast images directory + base_dir = Path(__file__).parent.parent.parent.parent + podcast_images_dir = base_dir / "podcast_images" + podcast_images_dir.mkdir(parents=True, exist_ok=True) + + # Generate filename + clean_title = "".join(c if c.isalnum() or c in ('-', '_') else '_' for c in request.scene_title[:30]) + unique_id = str(uuid.uuid4())[:8] + image_filename = f"scene_{request.scene_id}_{clean_title}_{unique_id}.png" + image_path = podcast_images_dir / image_filename + + # Save image + with open(image_path, "wb") as f: + f.write(result.image_bytes) + + logger.info(f"[Podcast] Saved image to: {image_path}") + + # Create image URL (served via API endpoint) + image_url = f"/api/podcast/images/{image_filename}" + + # Save to asset library + try: + save_asset_to_library( + db=db, + user_id=user_id, + asset_type="image", + source_module="podcast_maker", + filename=image_filename, + file_url=image_url, + file_path=str(image_path), + file_size=len(result.image_bytes), + mime_type="image/png", + title=f"{request.scene_title} - Podcast Scene", + description=f"Podcast scene image: {request.scene_title}", + prompt=image_prompt, + tags=["podcast", "scene", request.scene_id], + provider=result.provider, + model=result.model, + asset_metadata={ + "scene_id": request.scene_id, + "scene_title": request.scene_title, + "status": "completed", + }, + ) + except Exception as e: + logger.warning(f"[Podcast] Failed to save image asset: {e}") + + # Estimate cost (rough estimate: ~$0.04 per image for most providers) + cost = 0.04 + + return PodcastImageResponse( + scene_id=request.scene_id, + scene_title=request.scene_title, + image_filename=image_filename, + image_url=image_url, + width=result.width, + height=result.height, + provider=result.provider, + model=result.model, + cost=cost, + ) + + except HTTPException: + raise + except Exception as exc: + logger.error(f"[Podcast] Image generation failed: {exc}", exc_info=True) + raise HTTPException(status_code=500, detail=f"Image generation failed: {str(exc)}") + + +@router.get("/audio/{filename}") +async def serve_podcast_audio( + filename: str, + current_user: Dict[str, Any] = Depends(get_current_user_with_query_token), +): + """Serve generated podcast scene audio files. + + Supports authentication via Authorization header or token query parameter. + Query parameter is useful for HTML elements like