From 1d745c9bc8575ea14136af299f8b225cd027bbe8 Mon Sep 17 00:00:00 2001 From: ajaysi Date: Tue, 16 Dec 2025 16:25:52 +0530 Subject: [PATCH] AI podcast project --- .gitignore | 5 + backend/alwrity_utils/frontend_serving.py | 6 +- backend/api/podcast/constants.py | 28 + backend/api/podcast/handlers/__init__.py | 6 + backend/api/podcast/handlers/analysis.py | 96 + backend/api/podcast/handlers/audio.py | 324 ++++ backend/api/podcast/handlers/avatar.py | 381 ++++ backend/api/podcast/handlers/images.py | 431 +++++ backend/api/podcast/handlers/projects.py | 203 ++ backend/api/podcast/handlers/research.py | 99 + backend/api/podcast/handlers/script.py | 142 ++ backend/api/podcast/handlers/video.py | 585 ++++++ backend/api/podcast/models.py | 280 +++ backend/api/podcast/presenter_personas.py | 143 ++ backend/api/podcast/router.py | 1699 +---------------- backend/api/podcast/utils.py | 105 + .../story_writer/routes/video_generation.py | 3 +- backend/models/podcast_models.py | 3 + .../scripts/run_final_video_url_migration.py | 91 + .../llm_providers/main_audio_generation.py | 5 +- backend/services/podcast/__init__.py | 11 + .../podcast/video_combination_service.py | 382 ++++ .../story_writer/audio_generation_service.py | 12 + .../story_writer/video_generation_service.py | 94 +- backend/services/wavespeed/client.py | 376 +++- backend/services/wavespeed/infinitetalk.py | 77 +- frontend/BUILD_OPTIMIZATION.md | 197 -- frontend/OPTIMIZATION_PROGRESS.md | 114 -- frontend/PERFORMANCE_OPTIMIZATIONS.md | 162 -- frontend/UNUSED_JAVASCRIPT_OPTIMIZATION.md | 231 --- .../components/PodcastMaker/AnalysisPanel.tsx | 312 ++- .../components/PodcastMaker/CreateModal.tsx | 603 ++++-- .../src/components/PodcastMaker/FactCard.tsx | 107 +- .../PodcastMaker/PodcastDashboard.tsx | 6 + .../PodcastMaker/PodcastDashboard/Header.tsx | 76 +- .../PodcastDashboard/ResearchSummary.tsx | 80 +- .../PodcastDashboard/usePodcastWorkflow.ts | 55 +- .../components/PodcastMaker/RenderQueue.tsx | 307 ++- .../RenderQueue/SceneActionButtons.tsx | 19 +- .../PodcastMaker/RenderQueue/SceneCard.tsx | 156 +- .../RenderQueue/VideoRegenerateModal.tsx | 228 +++ .../RenderQueue/useRenderQueue.ts | 525 ++++- .../ScriptEditor/AudioRegenerateModal.tsx | 464 +++++ .../ScriptEditor/ImageRegenerateModal.tsx | 563 ++++++ .../PodcastMaker/ScriptEditor/SceneEditor.tsx | 398 +++- .../ScriptEditor/ScriptEditor.tsx | 17 +- frontend/src/components/PodcastMaker/types.ts | 8 + .../PodcastMaker/ui/SecondaryButton.tsx | 27 +- frontend/src/hooks/usePodcastProjectState.ts | 65 +- frontend/src/services/podcastApi.ts | 143 +- 50 files changed, 7637 insertions(+), 2813 deletions(-) create mode 100644 backend/api/podcast/constants.py create mode 100644 backend/api/podcast/handlers/__init__.py create mode 100644 backend/api/podcast/handlers/analysis.py create mode 100644 backend/api/podcast/handlers/audio.py create mode 100644 backend/api/podcast/handlers/avatar.py create mode 100644 backend/api/podcast/handlers/images.py create mode 100644 backend/api/podcast/handlers/projects.py create mode 100644 backend/api/podcast/handlers/research.py create mode 100644 backend/api/podcast/handlers/script.py create mode 100644 backend/api/podcast/handlers/video.py create mode 100644 backend/api/podcast/models.py create mode 100644 backend/api/podcast/presenter_personas.py create mode 100644 backend/api/podcast/utils.py create mode 100644 backend/scripts/run_final_video_url_migration.py create mode 100644 backend/services/podcast/__init__.py create mode 100644 backend/services/podcast/video_combination_service.py delete mode 100644 frontend/BUILD_OPTIMIZATION.md delete mode 100644 frontend/OPTIMIZATION_PROGRESS.md delete mode 100644 frontend/PERFORMANCE_OPTIMIZATIONS.md delete mode 100644 frontend/UNUSED_JAVASCRIPT_OPTIMIZATION.md create mode 100644 frontend/src/components/PodcastMaker/RenderQueue/VideoRegenerateModal.tsx create mode 100644 frontend/src/components/PodcastMaker/ScriptEditor/AudioRegenerateModal.tsx create mode 100644 frontend/src/components/PodcastMaker/ScriptEditor/ImageRegenerateModal.tsx diff --git a/.gitignore b/.gitignore index 6e396a7c..c8f45096 100644 --- a/.gitignore +++ b/.gitignore @@ -14,6 +14,11 @@ podcast_audio/* backend/podcast_audio/ +podcast_audio/ +podcast_images/ +youtube_videos/ +backend/podcast_images/ +backend/podcast_videos/ .cursorignore story_videos diff --git a/backend/alwrity_utils/frontend_serving.py b/backend/alwrity_utils/frontend_serving.py index 6b60c203..5b46a477 100644 --- a/backend/alwrity_utils/frontend_serving.py +++ b/backend/alwrity_utils/frontend_serving.py @@ -45,7 +45,11 @@ class CacheHeadersMiddleware(BaseHTTPMiddleware): # Immutable files (with hash) - cache for 1 year # These files never change (new hash = new file) response.headers["Cache-Control"] = "public, max-age=31536000, immutable" - response.headers["Expires"] = "Thu, 31 Dec 2025 23:59:59 GMT" + # Expires header calculated dynamically to match max-age + # Modern browsers prefer Cache-Control, but Expires provides compatibility + from datetime import datetime, timedelta + expires_date = datetime.utcnow() + timedelta(seconds=31536000) + response.headers["Expires"] = expires_date.strftime("%a, %d %b %Y %H:%M:%S GMT") else: # Non-hashed files - shorter cache (1 hour) # These might be updated, so cache for shorter time diff --git a/backend/api/podcast/constants.py b/backend/api/podcast/constants.py new file mode 100644 index 00000000..31f9863d --- /dev/null +++ b/backend/api/podcast/constants.py @@ -0,0 +1,28 @@ +""" +Podcast API Constants + +Centralized constants and directory configuration for podcast module. +""" + +from pathlib import Path +from services.story_writer.audio_generation_service import StoryAudioGenerationService + +# Directory paths +# router.py is at: backend/api/podcast/router.py +# parents[0] = backend/api/podcast/ +# parents[1] = backend/api/ +# parents[2] = backend/ +BASE_DIR = Path(__file__).resolve().parents[2] # backend/ +PODCAST_AUDIO_DIR = (BASE_DIR / "podcast_audio").resolve() +PODCAST_AUDIO_DIR.mkdir(parents=True, exist_ok=True) +PODCAST_IMAGES_DIR = (BASE_DIR / "podcast_images").resolve() +PODCAST_IMAGES_DIR.mkdir(parents=True, exist_ok=True) +PODCAST_VIDEOS_DIR = (BASE_DIR / "podcast_videos").resolve() +PODCAST_VIDEOS_DIR.mkdir(parents=True, exist_ok=True) + +# Video subdirectory +AI_VIDEO_SUBDIR = Path("AI_Videos") + +# Initialize audio service +audio_service = StoryAudioGenerationService(output_dir=str(PODCAST_AUDIO_DIR)) + diff --git a/backend/api/podcast/handlers/__init__.py b/backend/api/podcast/handlers/__init__.py new file mode 100644 index 00000000..c0306a19 --- /dev/null +++ b/backend/api/podcast/handlers/__init__.py @@ -0,0 +1,6 @@ +""" +Podcast API Handlers + +Handler modules for different podcast operations. +""" + diff --git a/backend/api/podcast/handlers/analysis.py b/backend/api/podcast/handlers/analysis.py new file mode 100644 index 00000000..cb3558fd --- /dev/null +++ b/backend/api/podcast/handlers/analysis.py @@ -0,0 +1,96 @@ +""" +Podcast Analysis Handlers + +Analysis endpoint for podcast ideas. +""" + +from fastapi import APIRouter, Depends, HTTPException +from typing import Dict, Any +import json + +from middleware.auth_middleware import get_current_user +from api.story_writer.utils.auth import require_authenticated_user +from services.llm_providers.main_text_generation import llm_text_gen +from loguru import logger +from ..models import PodcastAnalyzeRequest, PodcastAnalyzeResponse + +router = APIRouter() + + +@router.post("/analyze", response_model=PodcastAnalyzeResponse) +async def analyze_podcast_idea( + request: PodcastAnalyzeRequest, + current_user: Dict[str, Any] = Depends(get_current_user), +): + """ + Analyze a podcast idea and return podcast-oriented outlines, keywords, and titles. + This uses the shared LLM provider but with a podcast-specific prompt (not story format). + """ + user_id = require_authenticated_user(current_user) + + prompt = f""" +You are an expert podcast producer. Given a podcast idea, craft concise podcast-ready assets +that sound like episode plans (not fiction stories). + +Podcast Idea: "{request.idea}" +Duration: ~{request.duration} minutes +Speakers: {request.speakers} (host + optional guest) + +Return JSON with: +- audience: short target audience description +- content_type: podcast style/format +- top_keywords: 5 podcast-relevant keywords/phrases +- suggested_outlines: 2 items, each with title (<=60 chars) and 4-6 short segments (bullet-friendly, factual) +- title_suggestions: 3 concise episode titles (no cliffhanger storytelling) +- exa_suggested_config: suggested Exa search options to power research (keep conservative defaults to control cost), with: + - exa_search_type: "auto" | "neural" | "keyword" (prefer "auto" unless clearly news-heavy) + - exa_category: one of ["research paper","news","company","github","tweet","personal site","pdf","financial report","linkedin profile"] + - exa_include_domains: up to 3 reputable domains to prioritize (optional) + - exa_exclude_domains: up to 3 domains to avoid (optional) + - max_sources: 6-10 + - include_statistics: boolean (true if topic needs fresh stats) + - date_range: one of ["last_month","last_3_months","last_year","all_time"] (pick recent if time-sensitive) + +Requirements: +- Keep language factual, actionable, and suited for spoken audio. +- Avoid narrative fiction tone; focus on insights, hooks, objections, and takeaways. +- Prefer 2024-2025 context when relevant. +""" + + try: + raw = llm_text_gen(prompt=prompt, user_id=user_id, json_struct=None) + except HTTPException: + # Re-raise HTTPExceptions (e.g., 429 subscription limit) - preserve error details + raise + except Exception as exc: + logger.error(f"[Podcast Analyze] Analysis failed for user {user_id}: {exc}") + raise HTTPException(status_code=500, detail=f"Analysis failed: {exc}") + + # Normalize response (accept dict or JSON string) + if isinstance(raw, str): + try: + data = json.loads(raw) + except json.JSONDecodeError: + raise HTTPException(status_code=500, detail="LLM returned non-JSON output") + elif isinstance(raw, dict): + data = raw + else: + raise HTTPException(status_code=500, detail="Unexpected LLM response format") + + audience = data.get("audience") or "Growth-focused professionals" + content_type = data.get("content_type") or "Interview + insights" + top_keywords = data.get("top_keywords") or [] + suggested_outlines = data.get("suggested_outlines") or [] + title_suggestions = data.get("title_suggestions") or [] + + exa_suggested_config = data.get("exa_suggested_config") or None + + return PodcastAnalyzeResponse( + audience=audience, + content_type=content_type, + top_keywords=top_keywords, + suggested_outlines=suggested_outlines, + title_suggestions=title_suggestions, + exa_suggested_config=exa_suggested_config, + ) + diff --git a/backend/api/podcast/handlers/audio.py b/backend/api/podcast/handlers/audio.py new file mode 100644 index 00000000..40955fdb --- /dev/null +++ b/backend/api/podcast/handlers/audio.py @@ -0,0 +1,324 @@ +""" +Podcast Audio Handlers + +Audio generation, combining, and serving endpoints. +""" + +from fastapi import APIRouter, Depends, HTTPException +from fastapi.responses import FileResponse +from sqlalchemy.orm import Session +from typing import Dict, Any +from pathlib import Path +from urllib.parse import urlparse +import tempfile +import uuid +import shutil + +from services.database import get_db +from middleware.auth_middleware import get_current_user, get_current_user_with_query_token +from api.story_writer.utils.auth import require_authenticated_user +from utils.asset_tracker import save_asset_to_library +from models.story_models import StoryAudioResult +from loguru import logger +from ..constants import PODCAST_AUDIO_DIR, audio_service +from ..models import ( + PodcastAudioRequest, + PodcastAudioResponse, + PodcastCombineAudioRequest, + PodcastCombineAudioResponse, +) + +router = APIRouter() + + +@router.post("/audio", response_model=PodcastAudioResponse) +async def generate_podcast_audio( + request: PodcastAudioRequest, + current_user: Dict[str, Any] = Depends(get_current_user), + db: Session = Depends(get_db), +): + """ + Generate AI audio for a podcast scene using shared audio service. + """ + user_id = require_authenticated_user(current_user) + + if not request.text or not request.text.strip(): + raise HTTPException(status_code=400, detail="Text is required") + + try: + result: StoryAudioResult = audio_service.generate_ai_audio( + scene_number=0, + scene_title=request.scene_title, + text=request.text.strip(), + user_id=user_id, + voice_id=request.voice_id or "Wise_Woman", + speed=request.speed or 1.0, # Normal speed (was 0.9, but too slow - causing duration issues) + volume=request.volume or 1.0, + pitch=request.pitch or 0.0, # Normal pitch (0.0 = neutral) + emotion=request.emotion or "neutral", + english_normalization=request.english_normalization or False, + sample_rate=request.sample_rate, + bitrate=request.bitrate, + channel=request.channel, + format=request.format, + language_boost=request.language_boost, + enable_sync_mode=request.enable_sync_mode, + ) + + # Override URL to use podcast endpoint instead of story endpoint + if result.get("audio_url") and "/api/story/audio/" in result.get("audio_url", ""): + audio_filename = result.get("audio_filename", "") + result["audio_url"] = f"/api/podcast/audio/{audio_filename}" + except Exception as exc: + raise HTTPException(status_code=500, detail=f"Audio generation failed: {exc}") + + # Save to asset library (podcast module) + try: + if result.get("audio_url"): + save_asset_to_library( + db=db, + user_id=user_id, + asset_type="audio", + source_module="podcast_maker", + filename=result.get("audio_filename", ""), + file_url=result.get("audio_url", ""), + file_path=result.get("audio_path"), + file_size=result.get("file_size"), + mime_type="audio/mpeg", + title=f"{request.scene_title} - Podcast", + description="Podcast scene narration", + tags=["podcast", "audio", request.scene_id], + provider=result.get("provider"), + model=result.get("model"), + cost=result.get("cost"), + asset_metadata={ + "scene_id": request.scene_id, + "scene_title": request.scene_title, + "status": "completed", + }, + ) + except Exception as e: + logger.warning(f"[Podcast] Failed to save audio asset: {e}") + + return PodcastAudioResponse( + scene_id=request.scene_id, + scene_title=request.scene_title, + audio_filename=result.get("audio_filename", ""), + audio_url=result.get("audio_url", ""), + provider=result.get("provider", "wavespeed"), + model=result.get("model", "minimax/speech-02-hd"), + voice_id=result.get("voice_id", request.voice_id or "Wise_Woman"), + text_length=result.get("text_length", len(request.text)), + file_size=result.get("file_size", 0), + cost=result.get("cost", 0.0), + ) + + +@router.post("/combine-audio", response_model=PodcastCombineAudioResponse) +async def combine_podcast_audio( + request: PodcastCombineAudioRequest, + current_user: Dict[str, Any] = Depends(get_current_user), + db: Session = Depends(get_db), +): + """ + Combine multiple scene audio files into a single podcast audio file. + """ + user_id = require_authenticated_user(current_user) + + if not request.scene_ids or not request.scene_audio_urls: + raise HTTPException(status_code=400, detail="Scene IDs and audio URLs are required") + + if len(request.scene_ids) != len(request.scene_audio_urls): + raise HTTPException(status_code=400, detail="Scene IDs and audio URLs count must match") + + try: + # Import moviepy for audio concatenation + try: + from moviepy import AudioFileClip, concatenate_audioclips + except ImportError: + logger.error("[Podcast] MoviePy not available for audio combination") + raise HTTPException( + status_code=500, + detail="Audio combination requires MoviePy. Please install: pip install moviepy" + ) + + # Create temporary directory for audio processing + temp_dir = Path(tempfile.gettempdir()) / f"podcast_combine_{uuid.uuid4().hex[:8]}" + temp_dir.mkdir(parents=True, exist_ok=True) + + audio_clips = [] + total_duration = 0.0 + + try: + # Log incoming request for debugging + logger.info(f"[Podcast] Combining audio: {len(request.scene_audio_urls)} URLs received") + for idx, url in enumerate(request.scene_audio_urls): + logger.info(f"[Podcast] URL {idx+1}: {url}") + + # Download and load each audio file from podcast_audio directory + for idx, audio_url in enumerate(request.scene_audio_urls): + try: + # Normalize audio URL - handle both absolute and relative paths + if audio_url.startswith("http"): + # External URL - would need to download + logger.error(f"[Podcast] External URLs not supported: {audio_url}") + raise HTTPException( + status_code=400, + detail=f"External URLs not supported. Please use local file paths." + ) + + # Handle relative paths - only /api/podcast/audio/... URLs are supported + audio_path = None + if audio_url.startswith("/api/"): + # Extract filename from URL + parsed = urlparse(audio_url) + path = parsed.path if parsed.scheme else audio_url + + # Handle both /api/podcast/audio/ and /api/story/audio/ URLs (for backward compatibility) + if "/api/podcast/audio/" in path: + filename = path.split("/api/podcast/audio/", 1)[1].split("?", 1)[0].strip() + elif "/api/story/audio/" in path: + # Convert story audio URLs to podcast audio (they're in the same directory now) + filename = path.split("/api/story/audio/", 1)[1].split("?", 1)[0].strip() + logger.info(f"[Podcast] Converting story audio URL to podcast: {audio_url} -> {filename}") + else: + logger.error(f"[Podcast] Unsupported audio URL format: {audio_url}. Expected /api/podcast/audio/ or /api/story/audio/ URLs.") + continue + + if not filename: + logger.error(f"[Podcast] Could not extract filename from URL: {audio_url}") + continue + + # Podcast audio files are stored in podcast_audio directory + audio_path = (PODCAST_AUDIO_DIR / filename).resolve() + + # Security check: ensure path is within PODCAST_AUDIO_DIR + if not str(audio_path).startswith(str(PODCAST_AUDIO_DIR)): + logger.error(f"[Podcast] Attempted path traversal when resolving audio: {audio_url}") + continue + else: + logger.warning(f"[Podcast] Non-API URL format, treating as direct path: {audio_url}") + audio_path = Path(audio_url) + + if not audio_path or not audio_path.exists(): + logger.error(f"[Podcast] Audio file not found: {audio_path} (from URL: {audio_url})") + continue + + # Load audio clip + audio_clip = AudioFileClip(str(audio_path)) + audio_clips.append(audio_clip) + total_duration += audio_clip.duration + logger.info(f"[Podcast] Loaded audio {idx+1}/{len(request.scene_audio_urls)}: {audio_path.name} ({audio_clip.duration:.2f}s)") + + except HTTPException: + raise + except Exception as e: + logger.error(f"[Podcast] Failed to load audio {idx+1}: {e}", exc_info=True) + # Continue with other audio files + continue + + if not audio_clips: + raise HTTPException(status_code=400, detail="No valid audio files found to combine") + + # Concatenate all audio clips + logger.info(f"[Podcast] Combining {len(audio_clips)} audio clips (total duration: {total_duration:.2f}s)") + combined_audio = concatenate_audioclips(audio_clips) + + # Generate output filename + output_filename = f"podcast_combined_{request.project_id}_{uuid.uuid4().hex[:8]}.mp3" + output_path = PODCAST_AUDIO_DIR / output_filename + + # Write combined audio file + combined_audio.write_audiofile( + str(output_path), + codec="mp3", + bitrate="192k", + logger=None, # Suppress moviepy logging + ) + + # Close audio clips to free resources + for clip in audio_clips: + clip.close() + combined_audio.close() + + file_size = output_path.stat().st_size + audio_url = f"/api/podcast/audio/{output_filename}" + + logger.info(f"[Podcast] Combined audio saved: {output_path} ({file_size} bytes)") + + # Save to asset library + try: + save_asset_to_library( + db=db, + user_id=user_id, + asset_type="audio", + source_module="podcast_maker", + filename=output_filename, + file_url=audio_url, + file_path=str(output_path), + file_size=file_size, + mime_type="audio/mpeg", + title=f"Combined Podcast - {request.project_id}", + description=f"Combined podcast audio from {len(request.scene_ids)} scenes", + tags=["podcast", "audio", "combined", request.project_id], + asset_metadata={ + "project_id": request.project_id, + "scene_ids": request.scene_ids, + "scene_count": len(request.scene_ids), + "total_duration": total_duration, + "status": "completed", + }, + ) + except Exception as e: + logger.warning(f"[Podcast] Failed to save combined audio asset: {e}") + + return PodcastCombineAudioResponse( + combined_audio_url=audio_url, + combined_audio_filename=output_filename, + total_duration=total_duration, + file_size=file_size, + scene_count=len(request.scene_ids), + ) + + finally: + # Cleanup temporary directory + try: + if temp_dir.exists(): + shutil.rmtree(temp_dir) + except Exception as e: + logger.warning(f"[Podcast] Failed to cleanup temp directory: {e}") + + except HTTPException: + raise + except Exception as exc: + logger.error(f"[Podcast] Audio combination failed: {exc}", exc_info=True) + raise HTTPException(status_code=500, detail=f"Audio combination failed: {exc}") + + +@router.get("/audio/{filename}") +async def serve_podcast_audio( + filename: str, + current_user: Dict[str, Any] = Depends(get_current_user_with_query_token), +): + """Serve generated podcast scene audio files. + + Supports authentication via Authorization header or token query parameter. + Query parameter is useful for HTML elements like