From 3f984e8d0c23f925e54a01d15d6e70a26a0bae5a Mon Sep 17 00:00:00 2001 From: ajaysi Date: Wed, 6 May 2026 15:29:12 +0530 Subject: [PATCH] feat(podcast): add pre-estimate endpoint, enhance cost estimator with multi-model support, cleanup alpha pricing seeding - Add POST /podcast/pre-estimate endpoint for cost estimation before analysis - Enhance cost_estimator.py with multi-model support (gemini, audio, voice clone, image, video) - Add detailed cost breakdown (llm, audio, media costs + per-phase breakdown) - Remove redundant pricing seeding from init_alpha_subscription_tiers.py - Add SSOT pricing via PricingService.initialize_default_pricing() - Update TopicUrlInput tooltip to show estimate details - Add debug logging for pricing seeding and pre-estimate - Clean up verbose podcast mode debug logs in app.py --- backend/api/podcast/cost_estimator.py | 145 +++- backend/api/podcast/handlers/analysis.py | 445 ++++++++++- .../handlers/tavily_category_research.py | 251 ++++++ backend/api/podcast/handlers/trends.py | 28 + backend/api/podcast/models.py | 64 ++ backend/api/podcast/prompts/__init__.py | 24 + .../prompts/website_enhance_prompts.py | 187 +++++ backend/api/podcast/router.py | 3 +- backend/app.py | 5 +- backend/models/podcast_models.py | 3 + .../scripts/init_alpha_subscription_tiers.py | 189 ++--- .../gemini_audio_text.py | 3 +- backend/services/podcast_context_builder.py | 281 +++++++ .../research/trends/google_trends_service.py | 644 +++++++++------ .../services/subscription/pricing_service.py | 11 +- backend/services/user_data_service.py | 86 +- .../components/PodcastMaker/CreateModal.tsx | 421 +++++++++- .../CreateStep/AvatarSelector.tsx | 173 ++-- .../CreateStep/CategoryResearchModal.tsx | 602 ++++++++++++++ .../CreateStep/PodcastConfiguration.tsx | 48 +- .../PodcastMaker/CreateStep/TopicUrlInput.tsx | 738 ++++++++++++++---- .../CreateStep/TrendingTopicsModal.tsx | 39 +- .../CreateStep/WebsitePreviewModal.tsx | 533 +++++++++++++ .../components/Research/types/intent.types.ts | 1 + .../src/components/shared/VoiceClonePanel.tsx | 145 ++++ .../src/components/shared/VoiceSelector.tsx | 335 +------- .../components/shared/useVoiceFiltering.ts | 56 ++ .../src/components/shared/useVoicePreview.ts | 102 +++ .../src/components/shared/voiceConstants.ts | 81 ++ frontend/src/hooks/useSpeechToText.ts | 150 ++++ frontend/src/services/podcastApi.ts | 144 +++- 31 files changed, 4926 insertions(+), 1011 deletions(-) create mode 100644 backend/api/podcast/handlers/tavily_category_research.py create mode 100644 backend/api/podcast/prompts/__init__.py create mode 100644 backend/api/podcast/prompts/website_enhance_prompts.py create mode 100644 backend/services/podcast_context_builder.py create mode 100644 frontend/src/components/PodcastMaker/CreateStep/CategoryResearchModal.tsx create mode 100644 frontend/src/components/PodcastMaker/CreateStep/WebsitePreviewModal.tsx create mode 100644 frontend/src/components/shared/VoiceClonePanel.tsx create mode 100644 frontend/src/components/shared/useVoiceFiltering.ts create mode 100644 frontend/src/components/shared/useVoicePreview.ts create mode 100644 frontend/src/components/shared/voiceConstants.ts create mode 100644 frontend/src/hooks/useSpeechToText.ts diff --git a/backend/api/podcast/cost_estimator.py b/backend/api/podcast/cost_estimator.py index 42afd952..313a3f18 100644 --- a/backend/api/podcast/cost_estimator.py +++ b/backend/api/podcast/cost_estimator.py @@ -3,6 +3,13 @@ Podcast cost estimation helpers. Builds user-facing podcast estimates from the subscription pricing catalog instead of hard-coded frontend heuristics. + +Supports multiple models for each component: +- Audio TTS: minimax/speech-02-hd (default), qwen3-tts, cosyvoice-tts +- Voice Clone: qwen3, cosyvoice, minimax +- Image: qwen-image (default), ideogram-v3-turbo +- Video: wan-2.5 (default), kling-v2.5, infinitetalk +- LLM: gemini-2.5-flash (default) """ from __future__ import annotations @@ -23,6 +30,7 @@ def _load_pricing( provider: APIProvider, preferred_model: str, ) -> Optional[Dict[str, Any]]: + """Load pricing for a provider and model, with fallback to default.""" pricing = pricing_service.get_pricing_for_provider_model(provider, preferred_model) if pricing: return pricing @@ -30,6 +38,17 @@ def _load_pricing( return pricing_service.get_pricing_for_provider_model(provider, "default") +# Default models used in podcast generation +DEFAULT_MODELS = { + "gemini": "gemini-2.5-flash", + "exa": "exa-search", + "audio_tts": "minimax/speech-02-hd", + "voice_clone": "wavespeed-ai/qwen3-tts/voice-clone", + "image": "qwen-image", + "video": "wan-2.5", +} + + def estimate_podcast_cost( *, db: Session, @@ -37,88 +56,150 @@ def estimate_podcast_cost( speakers: int, query_count: int, include_avatar_phase: bool = True, + # Optional model overrides + gemini_model: str = "gemini-2.5-flash", + audio_tts_model: str = "minimax/speech-02-hd", + voice_clone_engine: str = "qwen3", + image_model: str = "qwen-image", + video_model: str = "wan-2.5", ) -> Optional[Dict[str, Any]]: """ Compute a backend estimate for podcast creation. - - Returns None when pricing rows are unavailable so UI can display "Unavailable". + + Supports customizable models for each component. + Uses pricing_catalog for accurate cost calculation. """ pricing_service = PricingService(db) - gemini_pricing = _load_pricing(pricing_service, APIProvider.GEMINI, "gemini-2.5-flash") + # Load pricing for each component and model + gemini_pricing = _load_pricing(pricing_service, APIProvider.GEMINI, gemini_model) exa_pricing = _load_pricing(pricing_service, APIProvider.EXA, "exa-search") - audio_pricing = _load_pricing(pricing_service, APIProvider.AUDIO, "minimax/speech-02-hd") - video_pricing = _load_pricing(pricing_service, APIProvider.VIDEO, "default") - image_pricing = _load_pricing(pricing_service, APIProvider.STABILITY, "qwen-image") + + # Audio TTS pricing (minimax/speech-02-hd) + audio_pricing = _load_pricing(pricing_service, APIProvider.AUDIO, audio_tts_model) + + # Voice clone pricing (different engines) + voice_clone_model = f"wavespeed-ai/{voice_clone_engine}-tts/voice-clone" + voice_clone_pricing = _load_pricing(pricing_service, APIProvider.AUDIO, voice_clone_model) + if not voice_clone_pricing: + # Try alternate model names + voice_clone_pricing = _load_pricing(pricing_service, APIProvider.AUDIO, f"{voice_clone_engine}/voice-clone") + + # Image pricing (qwen-image or ideogram) + image_pricing = _load_pricing(pricing_service, APIProvider.STABILITY, image_model) + + # Video pricing (wan-2.5, kling, or infinitetalk) + video_pricing = _load_pricing(pricing_service, APIProvider.VIDEO, video_model) + # Return None if critical pricing unavailable (fail fast) if not gemini_pricing: return None + # Configuration minutes = max(1, int(duration_minutes or 1)) speaker_count = max(1, int(speakers or 1)) research_queries = max(1, int(query_count or 1)) - # Phase-level usage assumptions (token/request proxies for pre-creation estimate). + # Token usage assumptions per phase analysis_input_tokens = 1800 analysis_output_tokens = 1000 research_synthesis_input_tokens = 2200 research_synthesis_output_tokens = 900 script_input_tokens = max(1800, minutes * 300) script_output_tokens = max(2200, minutes * 700) - - # TTS token proxy: ~900 chars per minute per speaker. + + # TTS: ~900 chars per minute per speaker estimated_tts_tokens = max(900, minutes * 900 * speaker_count) + + # Voice clone: 1 clone operation per speaker + voice_clone_count = speaker_count + # ===== COST CALCULATIONS ===== + + # 1. Analysis phase (LLM) analysis_cost = ( analysis_input_tokens * float(gemini_pricing.get("cost_per_input_token") or 0.0) + analysis_output_tokens * float(gemini_pricing.get("cost_per_output_token") or 0.0) - + float(gemini_pricing.get("cost_per_request") or 0.0) ) + + # 2. Research phase + # 2a. LLM for research synthesis research_llm_cost = ( research_synthesis_input_tokens * float(gemini_pricing.get("cost_per_input_token") or 0.0) + research_synthesis_output_tokens * float(gemini_pricing.get("cost_per_output_token") or 0.0) - + float(gemini_pricing.get("cost_per_request") or 0.0) ) - script_cost = ( - script_input_tokens * float(gemini_pricing.get("cost_per_input_token") or 0.0) - + script_output_tokens * float(gemini_pricing.get("cost_per_output_token") or 0.0) - + float(gemini_pricing.get("cost_per_request") or 0.0) - ) - + # 2b. Search API (Exa) research_search_cost = 0.0 if exa_pricing: research_search_cost = research_queries * float(exa_pricing.get("cost_per_request") or 0.0) + research_cost = research_search_cost + research_llm_cost + # 3. Script generation (LLM) + script_cost = ( + script_input_tokens * float(gemini_pricing.get("cost_per_input_token") or 0.0) + + script_output_tokens * float(gemini_pricing.get("cost_per_output_token") or 0.0) + ) + + # 4. Audio TTS tts_cost = 0.0 if audio_pricing: - tts_cost = ( - estimated_tts_tokens * float(audio_pricing.get("cost_per_input_token") or 0.0) - + float(audio_pricing.get("cost_per_request") or 0.0) + tts_cost = estimated_tts_tokens * float(audio_pricing.get("cost_per_input_token") or 0.0) + + # 5. Voice cloning (if needed) + voice_clone_cost = 0.0 + if voice_clone_pricing: + voice_clone_cost = voice_clone_count * ( + float(voice_clone_pricing.get("cost_per_request") or 0.0) + + estimated_tts_tokens * float(voice_clone_pricing.get("cost_per_input_token") or 0.0) ) - # Assume one video render request per minute (upper-bound planning estimate). - video_cost = 0.0 - if video_pricing: - video_cost = minutes * float(video_pricing.get("cost_per_request") or 0.0) - + # 6. Avatar image generation avatar_cost = 0.0 if include_avatar_phase and image_pricing: image_unit = float(image_pricing.get("cost_per_image") or image_pricing.get("cost_per_request") or 0.0) avatar_cost = speaker_count * image_unit - research_cost = research_search_cost + research_llm_cost - total = analysis_cost + research_cost + script_cost + tts_cost + video_cost + avatar_cost + # 7. Video rendering + video_cost = 0.0 + if video_pricing: + # Assume 1 video render per minute (upper bound) + video_cost = minutes * float(video_pricing.get("cost_per_request") or 0.0) + + # ===== TOTALS ===== + llm_total = analysis_cost + research_llm_cost + script_cost + audio_total = tts_cost + voice_clone_cost + media_total = avatar_cost + video_cost + total = llm_total + research_search_cost + audio_total + media_total return { + # Cost breakdown + "analysisCost": _round_money(analysis_cost), + "researchCost": _round_money(research_cost), + "researchSearchCost": _round_money(research_search_cost), + "researchLlmCost": _round_money(research_llm_cost), + "scriptCost": _round_money(script_cost), "ttsCost": _round_money(tts_cost), + "voiceCloneCost": _round_money(voice_clone_cost), "avatarCost": _round_money(avatar_cost), "videoCost": _round_money(video_cost), - "researchCost": _round_money(research_cost), - "analysisCost": _round_money(analysis_cost), - "scriptCost": _round_money(script_cost), "total": _round_money(total), + # Totals by category + "llmCost": _round_money(llm_total), + "audioCost": _round_money(audio_total), + "mediaCost": _round_money(media_total), + # Currency "currency": "USD", "source": "pricing_catalog", + # Models used for this estimate + "models": { + "llm": gemini_model, + "research": "exa-search", + "audio_tts": audio_tts_model, + "voice_clone": voice_clone_model, + "image": image_model, + "video": video_model, + }, + # Assumptions used "assumptions": { "analysis_input_tokens": analysis_input_tokens, "analysis_output_tokens": analysis_output_tokens, @@ -128,6 +209,8 @@ def estimate_podcast_cost( "script_output_tokens": script_output_tokens, "estimated_tts_tokens": estimated_tts_tokens, "research_queries": research_queries, + "voice_clone_count": voice_clone_count, "video_requests": minutes, + "avatar_requests": speaker_count if include_avatar_phase else 0, }, - } + } \ No newline at end of file diff --git a/backend/api/podcast/handlers/analysis.py b/backend/api/podcast/handlers/analysis.py index 6ba0743c..97815b1f 100644 --- a/backend/api/podcast/handlers/analysis.py +++ b/backend/api/podcast/handlers/analysis.py @@ -4,8 +4,9 @@ Podcast Analysis Handlers Analysis endpoint for podcast ideas. """ -from fastapi import APIRouter, Depends, HTTPException +from fastapi import APIRouter, Depends, HTTPException, Request from typing import Dict, Any, Optional, List +from datetime import datetime import json import uuid from sqlalchemy.orm import Session @@ -21,11 +22,18 @@ from utils.asset_tracker import save_asset_to_library from loguru import logger import os from ..constants import get_podcast_media_dir +from ..prompts import get_enhance_topic_prompt, format_website_context from ..models import ( PodcastAnalyzeRequest, PodcastAnalyzeResponse, PodcastEnhanceIdeaRequest, - PodcastEnhanceIdeaResponse + PodcastEnhanceIdeaResponse, + ExtractUrlRequest, + ExtractUrlResponse, + WebsiteAnalysisRequest, + WebsiteAnalysisResponse, + PodcastPreEstimateRequest, + PodcastPreEstimateResponse, ) from ..cost_estimator import estimate_podcast_cost @@ -37,6 +45,74 @@ def _is_podcast_only_mode() -> bool: router = APIRouter() +@router.post("/pre-estimate", response_model=PodcastPreEstimateResponse) +async def pre_estimate_cost( + request: PodcastPreEstimateRequest, + db: Session = Depends(get_db), +): + """ + Lightweight endpoint to estimate podcast creation cost before analysis. + + Takes user configuration (duration, speakers, query_count, podcast_mode) and returns + a cost estimate WITHOUT running full analysis. + + Optional model overrides can be specified to estimate with different models. + """ + try: + include_avatar_phase = request.podcast_mode != "audio_only" + + estimate = estimate_podcast_cost( + db=db, + duration_minutes=request.duration, + speakers=request.speakers, + query_count=request.query_count, + include_avatar_phase=include_avatar_phase, + # Model overrides if provided + gemini_model=request.gemini_model or "gemini-2.5-flash", + audio_tts_model=request.audio_tts_model or "minimax/speech-02-hd", + voice_clone_engine=request.voice_clone_engine or "qwen3", + image_model=request.image_model or "qwen-image", + video_model=request.video_model or "wan-2.5", + ) + + # Debug: get pricing row count and providers + from models.subscription_models import APIProviderPricing + pricing_count = db.query(APIProviderPricing).count() + providers = db.query(APIProviderPricing.provider).distinct().all() + provider_list = sorted([p[0].value for p in providers]) if providers else [] + + debug_info = { + "pricing_rows": pricing_count, + "providers": provider_list, + } + + # Log pricing debug info at warning level + logger.warning(f"[PRE-ESTIMATE] Pricing debug: rows={pricing_count}, providers={provider_list}") + logger.warning(f"[PRE-ESTIMATE] Models: llm={request.gemini_model}, tts={request.audio_tts_model}, video={request.video_model}") + + if estimate is None: + return PodcastPreEstimateResponse( + estimate=None, + error="Pricing data unavailable. Please try again later.", + pricing_available=False, + debug=debug_info, + ) + + return PodcastPreEstimateResponse( + estimate=estimate, + error=None, + pricing_available=True, + debug=debug_info, + ) + + except Exception as e: + logger.error(f"Pre-estimate error: {e}") + return PodcastPreEstimateResponse( + estimate=None, + error=str(e), + ) + + @router.post("/idea/enhance", response_model=PodcastEnhanceIdeaResponse) async def enhance_podcast_idea( request: PodcastEnhanceIdeaRequest, @@ -77,39 +153,27 @@ async def enhance_podcast_idea( except Exception as exc: logger.debug(f"[Podcast Enhance] Bible parsing skipped in podcast mode: {exc}") - prompt = f""" -You are a creative podcast producer. Generate 3 distinct, compelling podcast episode concepts from the raw idea. + # Log what's being used for context + context_used = [] + if bible_context: + context_used.append("Podcast Bible") + if request.website_data: + context_used.append("Website Extraction") + if request.topic_context: + category = request.topic_context.get("category", "unknown") + context_used.append(f"Category Research ({category})") + + logger.warning(f"[Podcast Enhance] Generating with context: {', '.join(context_used) if context_used else 'basic idea only'}") -{f"USER PERSONALIZATION CONTEXT (Podcast Bible):\n{bible_context}\n" if bible_context else ""} - -RAW IDEA/KEYWORDS: "{request.idea}" - -TASK: -Generate 3 different enhanced versions, each with a unique angle: -1. Professional & Expert-led angle (focus on authority, insights, and expertise) -2. Storytelling & Human interest angle (focus on narratives, emotions, and personal connections) -3. Trendy & Contemporary angle (focus on current trends, modern perspectives, and relevance) - -Each version should be 2-3 sentences, audience-focused, and align with host persona if provided. - -Return JSON with: -- enhanced_ideas: array of 3 strings, each string being a complete episode pitch (NOT objects, just plain strings) -- rationales: array of 3 strings explaining the approach for each version - -IMPORTANT: enhanced_ideas must be an array of plain strings, NOT objects. Example: -{{ - "enhanced_ideas": [ - "Your expert guide to AI advancement: A practical look at how AI is transforming industries...", - "The human stories behind AI innovation: From Silicon Valley to your daily life...", - "AI in 2026: What's trending and what's next in artificial intelligence..." - ], - "rationales": [ - "Professional approach focusing on expertise and authority", - "Storytelling approach emphasizing human connection", - "Contemporary approach highlighting current relevance" - ] -}} -""" + # Use new context builder for prompt generation + from services.podcast_context_builder import context_builder + context_result = context_builder.build_enhance_context( + idea=request.idea, + bible_context=bible_context, + website_data=request.website_data, + topic_context=request.topic_context, + ) + prompt = context_result["prompt"] try: raw = llm_text_gen( @@ -502,3 +566,316 @@ Requirements: except Exception as exc: logger.error(f"[Regenerate Queries] Failed for user {user_id}: {exc}") raise HTTPException(status_code=500, detail=f"Regenerate queries failed: {exc}") + + +@router.post("/extract-url", response_model=ExtractUrlResponse) +async def extract_url_content( + request: ExtractUrlRequest, + current_user: Dict[str, Any] = Depends(get_current_user), +): + """ + Extract content from a URL using Exa's get_contents API. + + This allows users to paste a blog post or article URL as their podcast topic, + and we'll extract the content to use as the podcast idea. + """ + user_id = require_authenticated_user(current_user) + + from exa_py import Exa + import os + + api_key = os.getenv("EXA_API_KEY") + if not api_key: + raise HTTPException(status_code=500, detail="EXA_API_KEY not configured") + + exa = Exa(api_key) + + logger.warning(f"[ExtractUrl] Extracting content from: {request.url} for user {user_id}") + + try: + result = exa.get_contents( + urls=[request.url], + text=True, + highlights=True, + summary=True, + subpages=2, + ) + except Exception as exa_error: + logger.error(f"[ExtractUrl] Exa call error: {exa_error}") + return ExtractUrlResponse( + success=False, + url=request.url, + error=f"Exa API error: {str(exa_error)}" + ) + + # Check for errors using the correct attribute (statuses is array of status objects) + if hasattr(result, 'statuses') and result.statuses: + for status in result.statuses: + if status.status == "error": + logger.error(f"[ExtractUrl] Failed to extract {status.id}: {status.error.tag if hasattr(status.error, 'tag') else 'unknown'}") + return ExtractUrlResponse( + success=False, + url=request.url, + error=f"Failed to extract content: {status.error.tag if hasattr(status.error, 'tag') else 'unknown error'}" + ) + + if not result.results: + return ExtractUrlResponse( + success=False, + url=request.url, + error="No content found at the provided URL" + ) + + # Extract content - safe to access result now + content = result.results[0] + + # Extract all available fields from Exa response + extracted_text = content.text or "" + extracted_summary = getattr(content, 'summary', "") or "" + extracted_title = content.title or "" + + # Highlights - extract from content.highlights array if available + highlights = [] + if hasattr(content, 'highlights') and content.highlights: + highlights = [h for h in content.highlights if h] + + # Additional fields from Exa response + image = getattr(content, 'image', None) + favicon = getattr(content, 'favicon', None) + + # Subpages - extract with their own content + subpages = [] + if hasattr(content, 'subpages') and content.subpages: + for sp in content.subpages: + subpages.append({ + 'id': sp.get('id', ''), + 'title': sp.get('title', ''), + 'url': sp.get('url', ''), + 'summary': sp.get('summary', ''), + 'text': sp.get('text', '')[:500] if sp.get('text') else '', # First 500 chars + }) + + logger.warning(f"[ExtractUrl] Successfully extracted {len(extracted_text)} chars from {request.url}") + logger.warning(f"[ExtractUrl] title={extracted_title[:50]}, summary={extracted_summary[:50]}, highlights={len(highlights)}, subpages={len(subpages)}") + + return ExtractUrlResponse( + success=True, + title=extracted_title, + text=extracted_text, + summary=extracted_summary, + author=getattr(content, 'author', None), + highlights=highlights, + url=request.url, + image=image, + favicon=favicon, + subpages=subpages, + ) + + +@router.post("/website-analysis", response_model=WebsiteAnalysisResponse) +async def save_website_analysis( + request: WebsiteAnalysisRequest, + current_user: Dict[str, Any] = Depends(get_current_user), +): + """Save the user's website analysis for reuse in future podcasts.""" + user_id = require_authenticated_user(current_user) + + try: + from services.user_data_service import user_data_service + + website_data = { + "website_url": request.website_url, + "extracted_at": datetime.now().isoformat(), + "exa_content": request.exa_content, + "full_analysis": None, + "analysis_status": "pending", + } + + success = user_data_service.save_user_data( + user_id=user_id, + data_key="website_analysis", + data_value=website_data, + ) + + if success: + logger.warning(f"[WebsiteAnalysis] Saved analysis for user {user_id}: {request.website_url}") + return WebsiteAnalysisResponse( + success=True, + website_url=request.website_url, + message="Website analysis saved successfully", + ) + else: + return WebsiteAnalysisResponse( + success=False, + error="Failed to save website analysis", + ) + + except Exception as exc: + logger.error(f"[WebsiteAnalysis] Failed to save for user {user_id}: {exc}") + return WebsiteAnalysisResponse( + success=False, + error=f"Failed to save: {str(exc)}" + ) + + +@router.get("/website-extraction") +async def get_saved_website_extraction(request: Request = None): + """Get previously saved website extraction data for this user.""" + try: + # Safely get current_user from Depends + if request is None or not hasattr(request, 'state'): + logger.warning("[WebsiteExtraction] No request or state - user not authenticated") + return {"success": False, "data": None, "error": "Not authenticated"} + + current_user = getattr(request.state, 'user', None) + if not current_user: + logger.warning("[WebsiteExtraction] No user in request state") + return {"success": False, "data": None, "error": "Not authenticated"} + + user_id = require_authenticated_user(current_user) + + from services.user_data_service import UserDataService + from services.database import get_db + db = next(get_db()) + + user_service = UserDataService(db) + extraction = user_service.get_website_extraction(user_id) + + if extraction: + logger.info(f"[WebsiteExtraction] Found saved data for user {user_id}") + return { + "success": True, + "data": extraction + } + else: + logger.info(f"[WebsiteExtraction] No saved data for user {user_id}") + return { + "success": False, + "data": None + } + + except Exception as exc: + logger.error(f"[WebsiteExtraction] Failed for user: {exc}", exc_info=True) + return { + "success": False, + "error": str(exc) + } + + +@router.post("/website-extraction") +async def save_website_extraction( + extraction: Dict[str, Any], + current_user: Dict[str, Any] = Depends(get_current_user), +): + """Save website extraction data for future use.""" + user_id = require_authenticated_user(current_user) + + try: + from services.user_data_service import UserDataService + from services.database import get_db + db = next(get_db()) + + user_service = UserDataService(db) + success = user_service.save_website_extraction(user_id, extraction) + + if success: + logger.info(f"[WebsiteExtraction] Saved for user {user_id}") + return { + "success": True, + "message": "Website extraction saved" + } + else: + return { + "success": False, + "error": "Failed to save" + } + + except Exception as exc: + logger.error(f"[WebsiteExtraction] Save failed: {exc}") + return { + "success": False, + "error": str(exc) + } + + +@router.post("/project/{project_id}/topic-context") +async def save_topic_context( + project_id: str, + topic_context: Dict[str, Any], + current_user: Dict[str, Any] = Depends(get_current_user), +): + """Save topic context (category research) to a podcast project.""" + user_id = require_authenticated_user(current_user) + + try: + from services.database import get_db + from models.podcast_models import PodcastProject + + db = next(get_db()) + + # Find the project + project = db.query(PodcastProject).filter( + PodcastProject.project_id == project_id, + PodcastProject.user_id == user_id + ).first() + + if not project: + return { + "success": False, + "error": "Project not found" + } + + # Update topic context + project.topic_context = topic_context + db.commit() + + logger.info(f"[TopicContext] Saved for project {project_id}") + return { + "success": True, + "message": "Topic context saved" + } + + except Exception as exc: + logger.error(f"[TopicContext] Save failed: {exc}") + return { + "success": False, + "error": str(exc) + } + + +@router.get("/project/{project_id}/topic-context") +async def get_topic_context( + project_id: str, + current_user: Dict[str, Any] = Depends(get_current_user), +): + """Get topic context from a podcast project.""" + user_id = require_authenticated_user(current_user) + + try: + from services.database import get_db + from models.podcast_models import PodcastProject + + db = next(get_db()) + + project = db.query(PodcastProject).filter( + PodcastProject.project_id == project_id, + PodcastProject.user_id == user_id + ).first() + + if not project: + return { + "success": False, + "error": "Project not found" + } + + return { + "success": True, + "data": project.topic_context + } + + except Exception as exc: + logger.error(f"[TopicContext] Get failed: {exc}") + return { + "success": False, + "error": str(exc) + } diff --git a/backend/api/podcast/handlers/tavily_category_research.py b/backend/api/podcast/handlers/tavily_category_research.py new file mode 100644 index 00000000..be99ade6 --- /dev/null +++ b/backend/api/podcast/handlers/tavily_category_research.py @@ -0,0 +1,251 @@ +""" +Category Research Handlers + +Research endpoints using Tavily or Exa for category-based topic discovery. +""" + +from fastapi import APIRouter, Depends, HTTPException +from typing import Dict, Any, List, Optional +from pydantic import BaseModel +from loguru import logger +from types import SimpleNamespace + +from middleware.auth_middleware import get_current_user +from services.research.tavily_service import TavilyService +from services.blog_writer.research.exa_provider import ExaResearchProvider + +router = APIRouter(prefix="/research", tags=["Podcast Category Research"]) + +CATEGORY_PROVIDER_MAP = { + "news": "tavily", + "finance": "tavily", + "research-paper": "exa", + "personal-site": "exa", +} + +EXA_CATEGORY_MAP = { + "research-paper": "research paper", + "personal-site": "personal site", +} + + +class CategoryResearchRequest(BaseModel): + category: str + keyword: Optional[str] = None + max_results: Optional[int] = 8 + website_url: Optional[str] = None + + +class CategoryTopic(BaseModel): + title: str + url: str + snippet: str + score: float + favicon: Optional[str] = None + + +class CategoryResearchResponse(BaseModel): + success: bool + category: str + provider: str + topics: List[CategoryTopic] + query: Optional[str] = None + error: Optional[str] = None + + +def _normalize_tavily_results(results: List[Dict]) -> List[CategoryTopic]: + topics = [] + for item in results: + topics.append(CategoryTopic( + title=item.get("title", ""), + url=item.get("url", ""), + snippet=item.get("content", ""), + score=item.get("score", 0.0), + favicon=item.get("favicon"), + )) + return topics + + +def _normalize_exa_results(results: List[Dict], query: str) -> List[CategoryTopic]: + topics = [] + for idx, item in enumerate(results): + score = 1.0 - (idx * 0.1) + topics.append(CategoryTopic( + title=item.get("title", "") or f"Result {idx + 1}", + url=item.get("url", ""), + snippet=item.get("summary", "") or item.get("text", "") or "", + score=max(0.5, score), + favicon=None, + )) + return topics + + +async def _search_tavily(category: str, keyword: str, max_results: int) -> CategoryResearchResponse: + logger.info(f"[CategoryResearch] Using Tavily for category={category}, keyword={keyword}") + + try: + tavily = TavilyService() + result = await tavily.search( + query=keyword, + topic=category, + search_depth="basic", + max_results=max_results, + include_favicon=True, + ) + + if not result.get("success"): + raise HTTPException( + status_code=500, + detail=result.get("error", "Tavily search failed") + ) + + topics = _normalize_tavily_results(result.get("results", [])) + logger.info(f"[CategoryResearch] Tavily found {len(topics)} topics") + + return CategoryResearchResponse( + success=True, + category=category, + provider="tavily", + topics=topics, + query=keyword, + ) + + except HTTPException: + raise + except Exception as e: + logger.error(f"[CategoryResearch] Tavily error: {e}", exc_info=True) + raise HTTPException(status_code=500, detail=str(e)) + + +async def _search_exa(category: str, keyword: str, max_results: int, website_url: Optional[str] = None) -> CategoryResearchResponse: + exa_category = EXA_CATEGORY_MAP.get(category, category) + + logger.info(f"[CategoryResearch] Exa: category={category}, exa_category={exa_category}, keyword={keyword}, website_url={website_url}") + + try: + # Import exa directly for more control + import os + from urllib.parse import urlparse + exa_api_key = os.getenv("EXA_API_KEY") + if not exa_api_key: + raise HTTPException(status_code=500, detail="EXA_API_KEY not configured") + + from exa_py import Exa + exa = Exa(exa_api_key) + logger.info(f"[CategoryResearch] Exa client initialized") + + # Build search parameters + search_params = { + "num_results": max_results, + "category": exa_category, + } + + # For personal-site, extract domain from URL if provided + include_domains = None + if category == "personal-site" and website_url: + try: + parsed = urlparse(website_url) + if parsed.netloc: + include_domains = [parsed.netloc] + logger.info(f"[CategoryResearch] Personal site - limiting to domain: {parsed.netloc}") + elif parsed.path and "." in parsed.path: + # Could be domain without protocol + include_domains = [parsed.path] + logger.info(f"[CategoryResearch] Personal site - using as domain: {parsed.path}") + except Exception as url_err: + logger.warning(f"[CategoryResearch] Failed to parse website_url: {url_err}") + + logger.info(f"[CategoryResearch] Calling Exa with params: {search_params}, include_domains={include_domains}") + + # Make the search call + results = exa.search_and_contents( + query=keyword, + type="auto" if category != "personal-site" else "neural", + num_results=max_results, + category=exa_category, + text=True, + summary=True, + include_domains=include_domains, + ) + + logger.info(f"[CategoryResearch] Exa search completed, got results") + + # Transform results to our format + topics = [] + if results and hasattr(results, 'results'): + for item in results.results: + title = getattr(item, 'title', 'Untitled') + url = getattr(item, 'url', '') + snippet = getattr(item, 'summary', '') or getattr(item, 'text', '') or '' + score = 0.8 # Default score for Exa results + + topics.append(CategoryTopic( + title=title, + url=url, + snippet=snippet[:300] if snippet else '', + score=score, + favicon=None, + )) + + logger.info(f"[CategoryResearch] Exa found {len(topics)} topics") + + return CategoryResearchResponse( + success=True, + category=category, + provider="exa", + topics=topics, + query=keyword, + ) + + except HTTPException: + raise + except Exception as e: + import traceback + logger.error(f"[CategoryResearch] Exa error: {type(e).__name__}: {e}") + logger.error(f"[CategoryResearch] Stack: {traceback.format_exc()}") + raise HTTPException(status_code=500, detail=f"Exa search failed: {str(e)}") + + +@router.post("/tavily-category", response_model=CategoryResearchResponse) +async def research_by_category( + request: CategoryResearchRequest, + current_user: Dict[str, Any] = Depends(get_current_user), +): + """ + Research topics by category using Tavily or Exa. + + Categories: + - news, finance: Uses Tavily + - research-paper, personal-site: Uses Exa + """ + category = request.category.lower() + valid_categories = list(CATEGORY_PROVIDER_MAP.keys()) + + logger.info(f"[CategoryResearch] Full request payload: category={request.category}, keyword={request.keyword}, website_url={request.website_url}") + + if category not in valid_categories: + logger.error(f"[CategoryResearch] Invalid category: {category}, valid: {valid_categories}") + raise HTTPException( + status_code=400, + detail=f"Category must be one of: {', '.join(valid_categories)}" + ) + + keyword = request.keyword or category + max_results = min(max(request.max_results or 8, 5), 10) + website_url = request.website_url + + logger.info(f"[CategoryResearch] Processing: category={category}, keyword={keyword}, max_results={max_results}, website_url={website_url}") + + provider = CATEGORY_PROVIDER_MAP.get(category, "tavily") + logger.info(f"[CategoryResearch] Selected provider: {provider} for category: {category}") + + try: + if provider == "tavily": + return await _search_tavily(category, keyword, max_results) + elif provider == "exa": + return await _search_exa(category, keyword, max_results, website_url) + else: + raise HTTPException(status_code=500, detail="Unknown provider") + except Exception as e: + logger.error(f"[CategoryResearch] Outer error: {type(e).__name__}: {e}", exc_info=True) + raise \ No newline at end of file diff --git a/backend/api/podcast/handlers/trends.py b/backend/api/podcast/handlers/trends.py index 038c81a6..588104ad 100644 --- a/backend/api/podcast/handlers/trends.py +++ b/backend/api/podcast/handlers/trends.py @@ -18,6 +18,7 @@ class PodcastTrendsRequest(BaseModel): keywords: List[str] = Field(..., min_length=1, max_length=5, description="1-5 keywords to analyze") timeframe: str = Field(default="today 12-m", description="Timeframe: 'today 3-m', 'today 12-m', 'today 5-y', 'all'") geo: str = Field(default="US", description="Country code: 'US', 'GB', 'IN', etc.") + source: str = Field(default="web", description="Data source: 'web' (Google), 'podcast' (YouTube)") class PodcastTrendsResponse(BaseModel): @@ -47,12 +48,39 @@ async def get_podcast_trends( try: service = GoogleTrendsService() + # Map 'source' to 'gprop' - 'podcast' uses YouTube for video/podcast relevance + gprop_map = {"": "", "web": "", "podcast": "youtube", "news": "news", "images": "images", "shopping": "froogle"} + gprop = gprop_map.get(request.source, "") + result = await service.analyze_trends( keywords=request.keywords, timeframe=request.timeframe, geo=request.geo, + gprop=gprop, user_id=user_id, ) + + has_error = result.get("error") + has_data = ( + len(result.get("interest_over_time", [])) > 0 + or len(result.get("interest_by_region", [])) > 0 + or len(result.get("related_topics", {}).get("top", [])) > 0 + or len(result.get("related_topics", {}).get("rising", [])) > 0 + or len(result.get("related_queries", {}).get("top", [])) > 0 + or len(result.get("related_queries", {}).get("rising", [])) > 0 + ) + + # Return error if: has error OR no data (meaning blocked/empty) + if has_error and not has_data: + error_msg = result.get("error", "") + logger.warning(f"[Trends] No data or error: {error_msg[:100]}") + return PodcastTrendsResponse(success=False, data=result, error=error_msg or "No trends data available. Google may be blocking requests.") + + # Even if no error but empty data - return error + if not has_data: + logger.warning("[Trends] Empty data returned") + return PodcastTrendsResponse(success=False, data=result, error="No trends data available. Please try different keywords.") + return PodcastTrendsResponse(success=True, data=result) except ValueError as e: raise HTTPException(status_code=400, detail=str(e)) diff --git a/backend/api/podcast/models.py b/backend/api/podcast/models.py index 25e141b4..789092b3 100644 --- a/backend/api/podcast/models.py +++ b/backend/api/podcast/models.py @@ -80,6 +80,14 @@ class PodcastEnhanceIdeaRequest(BaseModel): """Request model for enhancing a podcast idea with AI.""" idea: str = Field(..., description="The raw podcast idea or keywords") bible: Optional[Dict[str, Any]] = Field(None, description="Optional Podcast Bible for context") + website_data: Optional[Dict[str, Any]] = Field( + None, + description="Optional website extraction data for enriched context (title, summary, highlights, subpages, url)" + ) + topic_context: Optional[Dict[str, Any]] = Field( + None, + description="Optional category research context (category, topics, selected_topic)" + ) class PodcastEnhanceIdeaResponse(BaseModel): @@ -470,3 +478,59 @@ class VoiceCloneResult(BaseModel): file_size: int task_id: str status: str = "completed" + + +class ExtractUrlRequest(BaseModel): + """Request to extract content from a URL using Exa.""" + url: str = Field(..., description="URL to extract content from") + + +class ExtractUrlResponse(BaseModel): + """Response with extracted content from URL.""" + success: bool + title: Optional[str] = None + text: Optional[str] = None + summary: Optional[str] = None + author: Optional[str] = None + highlights: Optional[List[str]] = Field(default_factory=list, description="Key highlights from the content") + url: str + image: Optional[str] = None + favicon: Optional[str] = None + subpages: Optional[List[Dict[str, Any]]] = Field(default_factory=list, description="Subpages with their own content") + error: Optional[str] = None + + +class WebsiteAnalysisRequest(BaseModel): + """Request to save user's website analysis.""" + website_url: str = Field(..., description="The website URL") + exa_content: Dict[str, Any] = Field(default_factory=dict, description="Exa extracted content") + + +class WebsiteAnalysisResponse(BaseModel): + """Response for website analysis.""" + success: bool + website_url: Optional[str] = None + message: Optional[str] = None + error: Optional[str] = None + + +class PodcastPreEstimateRequest(BaseModel): + """Request model for pre-analysis cost estimate.""" + duration: int = Field(default=10, description="Target duration in minutes") + speakers: int = Field(default=1, description="Number of speakers") + query_count: int = Field(default=3, description="Number of research queries") + podcast_mode: str = Field(default="audio_video", description="Podcast mode: audio_only, video_only, or audio_video") + # Optional model overrides for cost estimation + gemini_model: Optional[str] = Field(default=None, description="LLM model: gemini-2.5-flash, gemini-1.5-flash, etc.") + audio_tts_model: Optional[str] = Field(default=None, description="Audio TTS model: minimax/speech-02-hd") + voice_clone_engine: Optional[str] = Field(default=None, description="Voice clone engine: qwen3, cosyvoice, minimax") + image_model: Optional[str] = Field(default=None, description="Image model: qwen-image, ideogram-v3-turbo") + video_model: Optional[str] = Field(default=None, description="Video model: wan-2.5, kling-v2.5-turbo-std-5s, wavespeed-ai/infinitetalk") + + +class PodcastPreEstimateResponse(BaseModel): + """Response model for pre-analysis cost estimate.""" + estimate: Optional[Dict[str, Any]] = None + error: Optional[str] = None + pricing_available: bool = Field(default=False, description="Whether pricing data is available in DB") + debug: Optional[Dict[str, Any]] = Field(default=None, description="Debug info: pricing rows count, providers") diff --git a/backend/api/podcast/prompts/__init__.py b/backend/api/podcast/prompts/__init__.py new file mode 100644 index 00000000..9004a115 --- /dev/null +++ b/backend/api/podcast/prompts/__init__.py @@ -0,0 +1,24 @@ +""" +Prompts module for podcast topic enhancement. +""" + +from .website_enhance_prompts import ( + get_enhance_topic_prompt, + format_website_context, + STANDARD_ENHANCE_PROMPT, + WEBSITE_AWARE_ENHANCE_PROMPT, +) + +from services.podcast_context_builder import ( + PodcastContextBuilder, + context_builder, +) + +__all__ = [ + "get_enhance_topic_prompt", + "format_website_context", + "STANDARD_ENHANCE_PROMPT", + "WEBSITE_AWARE_ENHANCE_PROMPT", + "PodcastContextBuilder", + "context_builder", +] \ No newline at end of file diff --git a/backend/api/podcast/prompts/website_enhance_prompts.py b/backend/api/podcast/prompts/website_enhance_prompts.py new file mode 100644 index 00000000..51f7ccb4 --- /dev/null +++ b/backend/api/podcast/prompts/website_enhance_prompts.py @@ -0,0 +1,187 @@ +""" +Website-aware prompts for podcast topic enhancement. + +This module provides prompts for enhancing podcast topics with optional +website extraction data for richer context. +""" + +from typing import Dict, Any, Optional +from string import Template + + +# Standard prompt for when no website data is available +STANDARD_ENHANCE_PROMPT = Template("""">You are a creative podcast producer. Generate 3 distinct, compelling podcast episode concepts from the raw idea. + +${bible_context} + +RAW IDEA/KEYWORDS: "$idea" + +TASK: +Generate 3 different enhanced versions, each with a unique angle: +1. Professional & Expert-led angle (focus on authority, insights, and expertise) +2. Storytelling & Human interest angle (focus on narratives, emotions, and personal connections) +3. Trendy & Contemporary angle (focus on current trends, modern perspectives, and relevance) + +Each version should be 2-3 sentences, audience-focused, and align with host persona if provided. + +Return JSON with: +- enhanced_ideas: array of 3 strings, each string being a complete episode pitch (NOT objects, just plain strings) +- rationales: array of 3 strings explaining the approach for each version + +IMPORTANT: enhanced_ideas must be an array of plain strings, NOT objects. Example: +{ + "enhanced_ideas": [ + "Your expert guide to AI advancement: A practical look at how AI is transforming industries...", + "The human stories behind AI innovation: From Silicon Valley to your daily life...", + "AI in 2026: What's trending and what's next in artificial intelligence..." + ], + "rationales": [ + "Professional approach focusing on expertise and authority", + "Storytelling approach emphasizing human connection", + "Contemporary approach highlighting current relevance" + ] +} +""") + + +# Website-aware prompt for when website data is available +WEBSITE_AWARE_ENHANCE_PROMPT = Template("""">You are a creative podcast producer. Generate 3 distinct, compelling podcast episode concepts from the raw idea, enriched with website content analysis. + +${bible_context} + +WEBSITE CONTENT ANALYSIS: +${website_context} + +RAW IDEA/KEYWORDS: "$idea" + +TASK: +Generate 3 different enhanced versions, each with a unique angle, that INCORPORATE the website content context: +1. Professional & Expert-led angle (focus on authority, insights, and expertise from the website) +2. Storytelling & Human interest angle (focus on narratives, emotions, and personal connections tied to the brand) +3. Trendy & Contemporary angle (focus on current trends, modern perspectives, and relevance leveraging the site's focus areas) + +Each version should: +- Be 2-3 sentences +- Reference specific elements from the website content when relevant +- Be audience-focused and align with host persona if provided +- NOT just repeat the website summary - create fresh podcast angles + +Return JSON with: +- enhanced_ideas: array of 3 strings, each string being a complete episode pitch (NOT objects, just plain strings) +- rationales: array of 3 strings explaining the approach for each version + +IMPORTANT: enhanced_ideas must be an array of plain strings, NOT objects. Example: +{ + "enhanced_ideas": [ + "Your expert guide to AI advancement: A practical look at how AI is transforming industries...", + "The human stories behind AI innovation: From Silicon Valley to your daily life...", + "AI in 2026: What's trending and what's next in artificial intelligence..." + ], + "rationales": [ + "Professional approach focusing on expertise and authority", + "Storytelling approach emphasizing human connection", + "Contemporary approach highlighting current relevance" + ] +} +""") + + +def get_enhance_topic_prompt( + idea: str, + bible_context: str = "", + website_data: Optional[Dict[str, Any]] = None +) -> str: + """ + Returns the appropriate prompt based on available context. + + Args: + idea: The raw podcast idea or keywords + bible_context: Optional Podcast Bible context string + website_data: Optional website extraction data + + Returns: + Formatted prompt string with appropriate context + """ + # Build bible context section + bible_section = f"USER PERSONALIZATION CONTEXT (Podcast Bible):\n{bible_context}\n" if bible_context else "" + + if website_data: + # Build website context section + website_context_parts = [] + if website_data.get('url'): + website_context_parts.append(f"Source: {website_data.get('url')}") + if website_data.get('title'): + website_context_parts.append(f"Company/Organization: {website_data.get('title')}") + if website_data.get('summary'): + website_context_parts.append(f"About: {website_data.get('summary')}") + if website_data.get('highlights'): + highlights_str = ', '.join(website_data.get('highlights', [])[:3]) + website_context_parts.append(f"Key Highlights: {highlights_str}") + if website_data.get('subpages'): + subpages_str = ', '.join([ + sp.get('title', sp.get('url', '')) + for sp in website_data.get('subpages', [])[:3] + ]) + website_context_parts.append(f"Subpages: {subpages_str}") + + website_context_str = "\n".join(website_context_parts) + + return WEBSITE_AWARE_ENHANCE_PROMPT.substitute( + idea=idea, + bible_context=bible_section, + website_context=website_context_str + ) + else: + return STANDARD_ENHANCE_PROMPT.substitute( + idea=idea, + bible_context=bible_section + ) + + +def format_website_context(website_data: Dict[str, Any]) -> str: + """ + Format website data for inclusion in progress messages. + + Args: + website_data: Website extraction data + + Returns: + Formatted string describing what's being used + """ + parts = [] + + if website_data.get('title'): + parts.append(f"• {website_data['title']}") + + if website_data.get('summary'): + summary_preview = website_data['summary'][:100] + parts.append(f"• Summary: {summary_preview}...") + + if website_data.get('highlights'): + parts.append(f"• {len(website_data['highlights'])} key highlights") + + if website_data.get('subpages'): + parts.append(f"• {len(website_data['subpages'])} subpages analyzed") + + if website_data.get('url'): + parts.append(f"• Source: {website_data['url']}") + + return "\n".join(parts) if parts else "Basic website analysis" + + if website_data.get('title'): + parts.append(f"• {website_data['title']}") + + if website_data.get('summary'): + summary_preview = website_data['summary'][:100] + parts.append(f"• Summary: {summary_preview}...") + + if website_data.get('highlights'): + parts.append(f"• {len(website_data['highlights'])} key highlights") + + if website_data.get('subpages'): + parts.append(f"• {len(website_data['subpages'])} subpages analyzed") + + if website_data.get('url'): + parts.append(f"• Source: {website_data['url']}") + + return "\n".join(parts) if parts else "Basic website analysis" \ No newline at end of file diff --git a/backend/api/podcast/router.py b/backend/api/podcast/router.py index 4257599b..43839caf 100644 --- a/backend/api/podcast/router.py +++ b/backend/api/podcast/router.py @@ -12,7 +12,7 @@ from api.story_writer.utils.auth import require_authenticated_user from api.story_writer.task_manager import task_manager # Import all handler routers -from .handlers import projects, analysis, research, script, audio, images, video, avatar, dubbing, broll, trends +from .handlers import projects, analysis, research, script, audio, images, video, avatar, dubbing, broll, trends, tavily_category_research # Create main router router = APIRouter(prefix="/api/podcast", tags=["Podcast Maker"]) @@ -29,6 +29,7 @@ router.include_router(avatar.router) router.include_router(dubbing.router) router.include_router(broll.router) router.include_router(trends.router) +router.include_router(tavily_category_research.router) @router.get("/task/{task_id}/status") diff --git a/backend/app.py b/backend/app.py index fad59c5a..db6e427d 100644 --- a/backend/app.py +++ b/backend/app.py @@ -52,7 +52,7 @@ def is_podcast_only_demo_mode() -> bool: env_val = os.getenv("ALWRITY_ENABLED_FEATURES", "all") enabled = get_enabled_features() result = "podcast" in enabled and "all" not in enabled - print(f"[DEBUG] is_podcast_only_demo_mode: ALWRITY_ENABLED_FEATURES={env_val}, enabled={enabled}, result={result}", flush=True) + # Removed debug print - too verbose during startup return result @@ -712,6 +712,9 @@ async def startup_event(): try: _log_memory_usage() + # Note: Pricing is initialized per-user in services/database.py:init_user_database() + # which runs on first database access for each user. No global seeding needed at startup. + # Skip startup health checks in podcast-only mode to avoid unnecessary DB errors if not is_podcast_only_demo_mode(): startup_report = run_startup_health_routine(app) diff --git a/backend/models/podcast_models.py b/backend/models/podcast_models.py index 7f87a700..7ecf76fc 100644 --- a/backend/models/podcast_models.py +++ b/backend/models/podcast_models.py @@ -45,6 +45,9 @@ class PodcastProject(Base): knobs = Column(JSON, nullable=True) # Knobs settings research_provider = Column(String(50), nullable=True, default="google") # Research provider + # Project-specific topic context (category research, selected topics) + topic_context = Column(JSON, nullable=True) # { category: "news"|"finance", topics: [...], selected_topic: {...} } + # UI state show_script_editor = Column(Boolean, default=False) show_render_queue = Column(Boolean, default=False) diff --git a/backend/scripts/init_alpha_subscription_tiers.py b/backend/scripts/init_alpha_subscription_tiers.py index 41229d4e..213f7310 100644 --- a/backend/scripts/init_alpha_subscription_tiers.py +++ b/backend/scripts/init_alpha_subscription_tiers.py @@ -2,6 +2,10 @@ """ Initialize Alpha Tester Subscription Tiers Creates subscription plans for alpha testing with appropriate limits. + +NOTE: Pricing is seeded via PricingService.initialize_default_pricing() +which runs in services/database.py:init_user_database() +NOT via this script. """ import sys @@ -10,7 +14,7 @@ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from sqlalchemy.orm import Session from models.subscription_models import ( - SubscriptionPlan, SubscriptionTier, APIProviderPricing, APIProvider + SubscriptionPlan, SubscriptionTier ) from services.database import get_db_session from datetime import datetime @@ -24,7 +28,7 @@ def create_alpha_subscription_tiers(): db = get_db_session() if not db: - logger.error("❌ Could not get database session") + logger.error("Could not get database session") return False try: @@ -38,12 +42,12 @@ def create_alpha_subscription_tiers(): "description": "Free tier for alpha testing - Limited usage", "features": ["blog_writer", "basic_seo", "content_planning"], "limits": { - "gemini_calls_limit": 50, # 50 calls per day - "gemini_tokens_limit": 10000, # 10k tokens per day - "tavily_calls_limit": 20, # 20 searches per day - "serper_calls_limit": 10, # 10 SEO searches per day - "stability_calls_limit": 5, # 5 images per day - "monthly_cost_limit": 5.0 # $5 monthly limit + "gemini_calls_limit": 50, + "gemini_tokens_limit": 10000, + "tavily_calls_limit": 20, + "serper_calls_limit": 10, + "stability_calls_limit": 5, + "monthly_cost_limit": 5.0 } }, { @@ -54,12 +58,12 @@ def create_alpha_subscription_tiers(): "description": "Basic alpha tier - Moderate usage for testing", "features": ["blog_writer", "seo_analysis", "content_planning", "strategy_copilot"], "limits": { - "gemini_calls_limit": 200, # 200 calls per day - "gemini_tokens_limit": 50000, # 50k tokens per day - "tavily_calls_limit": 100, # 100 searches per day - "serper_calls_limit": 50, # 50 SEO searches per day - "stability_calls_limit": 25, # 25 images per day - "monthly_cost_limit": 25.0 # $25 monthly limit + "gemini_calls_limit": 200, + "gemini_tokens_limit": 50000, + "tavily_calls_limit": 100, + "serper_calls_limit": 50, + "stability_calls_limit": 25, + "monthly_cost_limit": 25.0 } }, { @@ -70,12 +74,12 @@ def create_alpha_subscription_tiers(): "description": "Pro alpha tier - High usage for power users", "features": ["blog_writer", "seo_analysis", "content_planning", "strategy_copilot", "advanced_analytics"], "limits": { - "gemini_calls_limit": 500, # 500 calls per day - "gemini_tokens_limit": 150000, # 150k tokens per day - "tavily_calls_limit": 300, # 300 searches per day - "serper_calls_limit": 150, # 150 SEO searches per day - "stability_calls_limit": 100, # 100 images per day - "monthly_cost_limit": 100.0 # $100 monthly limit + "gemini_calls_limit": 500, + "gemini_tokens_limit": 150000, + "tavily_calls_limit": 300, + "serper_calls_limit": 150, + "stability_calls_limit": 100, + "monthly_cost_limit": 100.0 } }, { @@ -86,34 +90,31 @@ def create_alpha_subscription_tiers(): "description": "Enterprise alpha tier - Unlimited usage for enterprise testing", "features": ["blog_writer", "seo_analysis", "content_planning", "strategy_copilot", "advanced_analytics", "custom_integrations"], "limits": { - "gemini_calls_limit": 0, # Unlimited calls - "gemini_tokens_limit": 0, # Unlimited tokens - "tavily_calls_limit": 0, # Unlimited searches - "serper_calls_limit": 0, # Unlimited SEO searches - "stability_calls_limit": 0, # Unlimited images - "monthly_cost_limit": 500.0 # $500 monthly limit + "gemini_calls_limit": 0, + "gemini_tokens_limit": 0, + "tavily_calls_limit": 0, + "serper_calls_limit": 0, + "stability_calls_limit": 0, + "monthly_cost_limit": 500.0 } } ] # Create subscription plans for tier_data in alpha_tiers: - # Check if plan already exists existing_plan = db.query(SubscriptionPlan).filter( SubscriptionPlan.name == tier_data["name"] ).first() if existing_plan: - logger.info(f"✅ Plan '{tier_data['name']}' already exists, updating...") - # Update existing plan + logger.info(f"Plan '{tier_data['name']}' already exists, updating...") for key, value in tier_data["limits"].items(): setattr(existing_plan, key, value) existing_plan.description = tier_data["description"] existing_plan.features = tier_data["features"] existing_plan.updated_at = datetime.utcnow() else: - logger.info(f"🆕 Creating new plan: {tier_data['name']}") - # Create new plan + logger.info(f"Creating new plan: {tier_data['name']}") plan = SubscriptionPlan( name=tier_data["name"], tier=tier_data["tier"], @@ -126,106 +127,17 @@ def create_alpha_subscription_tiers(): db.add(plan) db.commit() - logger.info("✅ Alpha subscription tiers created/updated successfully!") - - # Create API provider pricing - create_api_pricing(db) + logger.info("Alpha subscription tiers created/updated successfully!") return True except Exception as e: - logger.error(f"❌ Error creating alpha subscription tiers: {e}") + logger.error(f"Error creating alpha subscription tiers: {e}") db.rollback() return False finally: db.close() -def create_api_pricing(db: Session): - """Create API provider pricing configuration.""" - - try: - # Gemini pricing (based on current Google AI pricing) - gemini_pricing = [ - { - "model_name": "gemini-2.0-flash-exp", - "cost_per_input_token": 0.00000075, # $0.75 per 1M tokens - "cost_per_output_token": 0.000003, # $3 per 1M tokens - "description": "Gemini 2.0 Flash Experimental" - }, - { - "model_name": "gemini-1.5-flash", - "cost_per_input_token": 0.00000075, # $0.75 per 1M tokens - "cost_per_output_token": 0.000003, # $3 per 1M tokens - "description": "Gemini 1.5 Flash" - }, - { - "model_name": "gemini-1.5-pro", - "cost_per_input_token": 0.00000125, # $1.25 per 1M tokens - "cost_per_output_token": 0.000005, # $5 per 1M tokens - "description": "Gemini 1.5 Pro" - } - ] - - # Tavily pricing - tavily_pricing = [ - { - "model_name": "search", - "cost_per_search": 0.001, # $0.001 per search - "description": "Tavily Search API" - } - ] - - # Serper pricing - serper_pricing = [ - { - "model_name": "search", - "cost_per_search": 0.001, # $0.001 per search - "description": "Serper Google Search API" - } - ] - - # Stability AI pricing - stability_pricing = [ - { - "model_name": "stable-diffusion-xl", - "cost_per_image": 0.01, # $0.01 per image - "description": "Stable Diffusion XL" - } - ] - - # Create pricing records - pricing_configs = [ - (APIProvider.GEMINI, gemini_pricing), - (APIProvider.TAVILY, tavily_pricing), - (APIProvider.SERPER, serper_pricing), - (APIProvider.STABILITY, stability_pricing) - ] - - for provider, pricing_list in pricing_configs: - for pricing_data in pricing_list: - # Check if pricing already exists - existing_pricing = db.query(APIProviderPricing).filter( - APIProviderPricing.provider == provider, - APIProviderPricing.model_name == pricing_data["model_name"] - ).first() - - if existing_pricing: - logger.info(f"✅ Pricing for {provider.value}/{pricing_data['model_name']} already exists") - else: - logger.info(f"🆕 Creating pricing for {provider.value}/{pricing_data['model_name']}") - pricing = APIProviderPricing( - provider=provider, - **pricing_data - ) - db.add(pricing) - - db.commit() - logger.info("✅ API provider pricing created successfully!") - - except Exception as e: - logger.error(f"❌ Error creating API pricing: {e}") - db.rollback() - def assign_default_plan_to_users(): """Assign Free Alpha plan to all existing users.""" if os.getenv('ENABLE_ALPHA', 'false').lower() not in {'1','true','yes','on'}: @@ -234,32 +146,28 @@ def assign_default_plan_to_users(): db = get_db_session() if not db: - logger.error("❌ Could not get database session") + logger.error("Could not get database session") return False try: - # Get Free Alpha plan free_plan = db.query(SubscriptionPlan).filter( SubscriptionPlan.name == "Free Alpha" ).first() if not free_plan: - logger.error("❌ Free Alpha plan not found") + logger.error("Free Alpha plan not found") return False - - # For now, we'll create a default user subscription - # In a real system, you'd query actual users + from models.subscription_models import UserSubscription, BillingCycle, UsageStatus - from datetime import datetime, timedelta + from datetime import timedelta - # Create default user subscription for testing default_user_id = "default_user" existing_subscription = db.query(UserSubscription).filter( UserSubscription.user_id == default_user_id ).first() if not existing_subscription: - logger.info(f"🆕 Creating default subscription for {default_user_id}") + logger.info(f"Creating default subscription for {default_user_id}") subscription = UserSubscription( user_id=default_user_id, plan_id=free_plan.id, @@ -272,33 +180,32 @@ def assign_default_plan_to_users(): ) db.add(subscription) db.commit() - logger.info(f"✅ Default subscription created for {default_user_id}") + logger.info(f"Default subscription created for {default_user_id}") else: - logger.info(f"✅ Default subscription already exists for {default_user_id}") + logger.info(f"Default subscription already exists for {default_user_id}") return True except Exception as e: - logger.error(f"❌ Error assigning default plan: {e}") + logger.error(f"Error assigning default plan: {e}") db.rollback() return False finally: db.close() if __name__ == "__main__": - logger.info("🚀 Initializing Alpha Subscription Tiers...") + logger.info("Initializing Alpha Subscription Tiers...") success = create_alpha_subscription_tiers() if success: - logger.info("✅ Subscription tiers created successfully!") + logger.info("Subscription tiers created successfully!") - # Assign default plan assign_success = assign_default_plan_to_users() if assign_success: - logger.info("✅ Default plan assigned successfully!") + logger.info("Default plan assigned successfully!") else: - logger.error("❌ Failed to assign default plan") + logger.error("Failed to assign default plan") else: - logger.error("❌ Failed to create subscription tiers") + logger.error("Failed to create subscription tiers") - logger.info("🎉 Alpha subscription system initialization complete!") + logger.info("Alpha subscription system initialization complete!") \ No newline at end of file diff --git a/backend/services/llm_providers/audio_to_text_generation/gemini_audio_text.py b/backend/services/llm_providers/audio_to_text_generation/gemini_audio_text.py index b1dd9181..48e5033a 100644 --- a/backend/services/llm_providers/audio_to_text_generation/gemini_audio_text.py +++ b/backend/services/llm_providers/audio_to_text_generation/gemini_audio_text.py @@ -67,10 +67,11 @@ import sys from pathlib import Path import google.genai as genai from google.genai import types - +from dotenv import load_dotenv from loguru import logger from utils.logger_utils import get_service_logger +from services.api_key_manager import APIKeyManager # Use service-specific logger to avoid conflicts logger = get_service_logger("gemini_audio_text") diff --git a/backend/services/podcast_context_builder.py b/backend/services/podcast_context_builder.py new file mode 100644 index 00000000..dac52869 --- /dev/null +++ b/backend/services/podcast_context_builder.py @@ -0,0 +1,281 @@ +""" +Podcast Context Builder Service + +Builds unified context for AI prompts from multiple sources: +- Podcast Bible (user personalization) +- Website Extraction (from Exa) +- Topic Context (category research: News/Finance) +""" + +from typing import Dict, Any, Optional, List +from loguru import logger + + +class PodcastContextBuilder: + """Builds unified context for AI prompt enhancements.""" + + def build_enhance_context( + self, + idea: str, + bible_context: str = "", + website_data: Optional[Dict[str, Any]] = None, + topic_context: Optional[Dict[str, Any]] = None, + ) -> Dict[str, Any]: + """ + Build context for topic enhancement prompt. + + Args: + idea: Raw podcast idea/keywords + bible_context: Serialized Podcast Bible string + website_data: Website extraction data (title, summary, highlights, url, subpages) + topic_context: Category research data (category, topics, selected_topic) + + Returns: + Dict with: + - prompt: The formatted prompt + - contexts_used: List of context types being used + - context_description: Human-readable description for logging + """ + contexts_used = [] + context_parts = [] + + # Track what contexts are available + if bible_context: + contexts_used.append("Podcast Bible") + + if website_data: + contexts_used.append("Website Analysis") + + if topic_context: + category = topic_context.get("category", "unknown") + contexts_used.append(f"Category Research ({category})") + + # Build Bible section + if bible_context: + context_parts.append(f"USER PERSONALIZATION CONTEXT (Podcast Bible):\n{bible_context}") + + # Build Website section + if website_data: + website_section = self._format_website_section(website_data) + context_parts.append(website_section) + + # Build Topic/Category section + if topic_context: + topic_section = self._format_topic_section(topic_context) + context_parts.append(topic_section) + + # Select appropriate prompt template based on available context + prompt = self._select_prompt(idea, context_parts, website_data, topic_context) + + return { + "prompt": prompt, + "contexts_used": contexts_used, + "context_description": ", ".join(contexts_used) if contexts_used else "basic idea only", + } + + def _format_website_section(self, website_data: Dict[str, Any]) -> str: + """Format website data for prompt inclusion.""" + parts = [] + + if website_data.get("url"): + parts.append(f"Source URL: {website_data['url']}") + + if website_data.get("title"): + parts.append(f"Company/Organization: {website_data['title']}") + + if website_data.get("summary"): + parts.append(f"About: {website_data['summary']}") + + if website_data.get("highlights"): + highlights = website_data.get("highlights", []) + if highlights: + parts.append(f"Key Highlights: {', '.join(highlights[:3])}") + + if website_data.get("subpages"): + subpages = website_data.get("subpages", []) + if subpages: + subpage_titles = [sp.get("title", sp.get("url", "")) for sp in subpages[:3]] + parts.append(f"Subpages: {', '.join(subpage_titles)}") + + return "WEBSITE CONTENT ANALYSIS:\n" + "\n".join(parts) + + def _format_topic_section(self, topic_context: Dict[str, Any]) -> str: + """Format category research data for prompt inclusion.""" + parts = [] + + category = topic_context.get("category", "") + if category: + parts.append(f"Research Category: {category.upper()}") + + # Include selected topic details + selected = topic_context.get("selected_topic", {}) + if selected: + if selected.get("title"): + parts.append(f"Selected Topic: {selected['title']}") + if selected.get("snippet"): + parts.append(f"Context: {selected['snippet']}") + if selected.get("url"): + parts.append(f"Source: {selected['url']}") + + # Include some alternative topics for reference + topics = topic_context.get("topics", []) + if topics: + alt_titles = [t.get("title", "") for t in topics[:3] if t.get("title")] + if alt_titles: + parts.append(f"Related Topics: {', '.join(alt_titles)}") + + return "CATEGORY RESEARCH CONTEXT:\n" + "\n".join(parts) + + def _select_prompt( + self, + idea: str, + context_parts: List[str], + website_data: Optional[Dict[str, Any]], + topic_context: Optional[Dict[str, Any]], + ) -> str: + """Select and format the appropriate prompt based on available context.""" + + context_str = "\n\n".join(context_parts) + + # Full context prompt (all sources available) + if website_data and topic_context: + return f"""You are a creative podcast producer. Generate 3 distinct, compelling podcast episode concepts from the raw idea, enriched with website content analysis AND category research. + +{context_str} + +RAW IDEA/KEYWORDS: "{idea}" + +TASK: +Generate 3 different enhanced versions that INCORPORATE both the website content AND category research context: +1. Professional & Expert-led angle (leverage website authority + research insights) +2. Storytelling & Human interest angle (brand narratives + research findings) +3. Trendy & Contemporary angle (current trends + research relevance) + +Each version should: +- Be 2-3 sentences +- Reference specific elements from both website AND research when relevant +- Be audience-focused and align with host persona if provided +- NOT just repeat summaries - create fresh podcast angles + +Return JSON with: +- enhanced_ideas: array of 3 strings (each a complete episode pitch) +- rationales: array of 3 strings explaining each approach + +Example format: +{{ + "enhanced_ideas": ["Pitch 1...", "Pitch 2...", "Pitch 3..."], + "rationales": ["Reason 1", "Reason 2", "Reason 3"] +}} +""" + + # Website-only context + elif website_data: + return f"""You are a creative podcast producer. Generate 3 distinct, compelling podcast episode concepts from the raw idea, enriched with website content analysis. + +{context_str} + +RAW IDEA/KEYWORDS: "{idea}" + +TASK: +Generate 3 different enhanced versions that INCORPORATE the website content: +1. Professional & Expert-led angle (focus on authority, insights from website) +2. Storytelling & Human interest angle (brand narratives, personal connections) +3. Trendy & Contemporary angle (modern perspectives, current relevance) + +Each version should: +- Be 2-3 sentences +- Reference specific elements from the website when relevant +- Be audience-focused and align with host persona if provided + +Return JSON with: +- enhanced_ideas: array of 3 strings +- rationales: array of 3 strings + +Example format: +{{ + "enhanced_ideas": ["Pitch 1...", "Pitch 2...", "Pitch 3..."], + "rationales": ["Reason 1", "Reason 2", "Reason 3"] +}} +""" + + # Category research only context + elif topic_context: + category = topic_context.get("category", "research").upper() + return f"""You are a creative podcast producer. Generate 3 distinct, compelling podcast episode concepts from the raw idea, enriched with {category} category research. + +{context_str} + +RAW IDEA/KEYWORDS: "{idea}" + +TASK: +Generate 3 different enhanced versions that INCORPORATE the {category} research: +1. Professional & Expert-led angle (leverage research insights and data) +2. Storytelling & Human interest angle (real-world applications, human impact) +3. Trendy & Contemporary angle (cutting-edge trends, future outlook) + +Each version should: +- Be 2-3 sentences +- Reference specific elements from the research when relevant +- Connect the research to the raw idea meaningfully + +Return JSON with: +- enhanced_ideas: array of 3 strings +- rationales: array of 3 strings + +Example format: +{{ + "enhanced_ideas": ["Pitch 1...", "Pitch 2...", "Pitch 3..."], + "rationales": ["Reason 1", "Reason 2", "Reason 3"] +}} +""" + + # Standard context (no additional context) + else: + return f"""You are a creative podcast producer. Generate 3 distinct, compelling podcast episode concepts from the raw idea. + +{context_str} + +RAW IDEA/KEYWORDS: "{idea}" + +TASK: +Generate 3 different enhanced versions with unique angles: +1. Professional & Expert-led angle (focus on authority, insights) +2. Storytelling & Human interest angle (focus on narratives, emotions) +3. Trendy & Contemporary angle (focus on trends, modern relevance) + +Each version should be 2-3 sentences, audience-focused. + +Return JSON with: +- enhanced_ideas: array of 3 strings +- rationales: array of 3 strings + +Example format: +{{ + "enhanced_ideas": ["Pitch 1...", "Pitch 2...", "Pitch 3..."], + "rationales": ["Reason 1", "Reason 2", "Reason 3"] +}} +""" + + def format_context_for_logging( + self, + website_data: Optional[Dict] = None, + topic_context: Optional[Dict] = None, + ) -> str: + """Format context description for logging.""" + contexts = [] + + if website_data: + title = website_data.get("title", "Unknown") + contexts.append(f"Website: {title[:30]}...") + + if topic_context: + category = topic_context.get("category", "unknown") + selected = topic_context.get("selected_topic", {}) + topic_title = selected.get("title", "Not selected") + contexts.append(f"Category: {category} ({topic_title[:20]}...)") + + return " | ".join(contexts) if contexts else "No extended context" + + +# Singleton instance for reuse +context_builder = PodcastContextBuilder() \ No newline at end of file diff --git a/backend/services/research/trends/google_trends_service.py b/backend/services/research/trends/google_trends_service.py index 902b9e4a..2fcedccd 100644 --- a/backend/services/research/trends/google_trends_service.py +++ b/backend/services/research/trends/google_trends_service.py @@ -4,147 +4,273 @@ Google Trends Service Provides Google Trends data integration for the Research Engine. Handles rate limiting, caching, error handling, and data serialization. +Key design decisions: +- Monkey-patches urllib3 Retry to fix method_whitelist→allowed_methods (urllib3 2.x) +- Monkey-patches pytrends related_topics/related_queries to catch IndexError bug +- Uses TrendReq built-in retries (3 retries, 1s backoff) for automatic 429 handling +- Random user-agent rotation per instance to reduce fingerprinting +- 1-second delays between sequential requests to respect rate limits +- 24-hour in-memory cache to avoid redundant API calls + Author: ALwrity Team -Version: 1.0 +Version: 2.0 """ import asyncio +import random +import time from typing import List, Dict, Any, Optional from datetime import datetime, timedelta from loguru import logger import pandas as pd +# --------------------------------------------------------------------------- +# Monkey-patches: fix compatibility issues before importing/using pytrends +# --------------------------------------------------------------------------- + +# Patch 1: urllib3 2.x renamed Retry's `method_whitelist` to `allowed_methods`. +# pytrends 4.9.2 still uses `method_whitelist`, which crashes with urllib3 2.x. +# We patch Retry.__init__ to accept `method_whitelist` and remap it. try: - from pytrends.request import TrendReq + from urllib3.util.retry import Retry as _OrigRetry + + _orig_retry_init = _OrigRetry.__init__ + + def _patched_retry_init(self, *args, **kwargs): + if 'method_whitelist' in kwargs and 'allowed_methods' not in kwargs: + kwargs['allowed_methods'] = kwargs.pop('method_whitelist') + _orig_retry_init(self, *args, **kwargs) + + _OrigRetry.__init__ = _patched_retry_init + logger.debug("[Trends] Patched urllib3 Retry.__init__ for method_whitelist→allowed_methods") +except Exception as _patch_err: + logger.warning(f"[Trends] Could not patch urllib3 Retry: {_patch_err}") + +# Now safe to import pytrends +try: + from pytrends.request import TrendReq as _TrendReq PYTrends_AVAILABLE = True except ImportError: PYTrends_AVAILABLE = False logger.warning("pytrends not installed. Google Trends features will be unavailable.") +# Patch 2: pytrends related_topics() and related_queries() use keyword[0] +# which raises IndexError on empty lists, but only catch KeyError. +# We fix this by catching (KeyError, IndexError) for the keyword extraction. +if PYTrends_AVAILABLE: + import json as _json + import pandas as _pd + + def _fixed_related_topics(self): + result_dict = {} + related_payload = {} + for request_json in self.related_topics_widget_list: + try: + kw = request_json['request']['restriction'][ + 'complexKeywordsRestriction']['keyword'][0]['value'] + except (KeyError, IndexError): + kw = '' + related_payload['req'] = _json.dumps(request_json['request']) + related_payload['token'] = request_json['token'] + related_payload['tz'] = self.tz + req_json = self._get_data( + url=_TrendReq.RELATED_QUERIES_URL, + method=_TrendReq.GET_METHOD, + trim_chars=5, + params=related_payload, + ) + try: + top_list = req_json['default']['rankedList'][0]['rankedKeyword'] + df_top = _pd.json_normalize(top_list, sep='_') + except (KeyError, IndexError): + df_top = None + try: + rising_list = req_json['default']['rankedList'][1]['rankedKeyword'] + df_rising = _pd.json_normalize(rising_list, sep='_') + except (KeyError, IndexError): + df_rising = None + result_dict[kw] = {'rising': df_rising, 'top': df_top} + return result_dict + + def _fixed_related_queries(self): + result_dict = {} + related_payload = {} + for request_json in self.related_queries_widget_list: + try: + kw = request_json['request']['restriction'][ + 'complexKeywordsRestriction']['keyword'][0]['value'] + except (KeyError, IndexError): + kw = '' + related_payload['req'] = _json.dumps(request_json['request']) + related_payload['token'] = request_json['token'] + related_payload['tz'] = self.tz + req_json = self._get_data( + url=_TrendReq.RELATED_QUERIES_URL, + method=_TrendReq.GET_METHOD, + trim_chars=5, + params=related_payload, + ) + try: + top_df = _pd.DataFrame( + req_json['default']['rankedList'][0]['rankedKeyword']) + top_df = top_df[['query', 'value']] + except (KeyError, IndexError): + top_df = None + try: + rising_df = _pd.DataFrame( + req_json['default']['rankedList'][1]['rankedKeyword']) + rising_df = rising_df[['query', 'value']] + except (KeyError, IndexError): + rising_df = None + result_dict[kw] = {'top': top_df, 'rising': rising_df} + return result_dict + + _TrendReq.related_topics = _fixed_related_topics + _TrendReq.related_queries = _fixed_related_queries + logger.debug("[Trends] Patched TrendReq.related_topics/related_queries for IndexError") + from .rate_limiter import RateLimiter class GoogleTrendsService: """ Service for fetching and analyzing Google Trends data. - - Features: - - Interest over time - - Interest by region - - Related topics - - Related queries - - Rate limiting (1 req/sec) - - Caching (24-hour TTL) - - Async support - - Error handling with retry logic + + Uses TrendReq with no retries (fail-fast) to avoid hitting CAPTCHA on blocks. + 429 retry handling (1s, 2s, 4s backoff). Random user-agent is set + per instance to reduce fingerprinting. """ - + + USER_AGENTS = [ + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:125.0) Gecko/20100101 Firefox/125.0", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 14_4) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.3 Safari/605.1.15", + "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36 Edg/124.0.0.0", + ] + def __init__(self): - """Initialize the Google Trends service.""" if not PYTrends_AVAILABLE: raise RuntimeError("pytrends library is required. Install with: pip install pytrends") - - self.rate_limiter = RateLimiter(max_calls=1, period=1.0) # 1 request per second - self.cache: Dict[str, Dict[str, Any]] = {} # Simple in-memory cache - self.cache_ttl = timedelta(hours=24) # 24-hour cache - - logger.info("GoogleTrendsService initialized") - + + self.rate_limiter = RateLimiter(max_calls=1, period=1.0) + self.cache: Dict[str, Any] = {} + self.cache_ttl = timedelta(hours=24) + + logger.info("GoogleTrendsService initialized (pytrends 4.9.2, fail-fast, 2s delays)") + + # ----------------------------------------------------------------------- + # Public API + # ----------------------------------------------------------------------- + async def analyze_trends( self, keywords: List[str], timeframe: str = "today 12-m", geo: str = "US", + gprop: str = "", user_id: Optional[str] = None, ) -> Dict[str, Any]: """ Comprehensive trends analysis. - - Fetches all trends data in a single optimized call: - - Interest over time - - Interest by region - - Related topics (top & rising) - - Related queries (top & rising) - + Args: - keywords: List of keywords to analyze (1-5 keywords recommended) - timeframe: Timeframe string (e.g., "today 12-m", "today 1-y", "all") + keywords: List of keywords to analyze (1-5) + timeframe: Timeframe (e.g., "today 12-m", "today 3-m", "today 5-y") geo: Country code (e.g., "US", "GB", "IN") - user_id: User ID for subscription checks (optional for now) - - Returns: - Dict containing all trends data in serializable format - - Raises: - ValueError: If keywords list is empty or too long - RuntimeError: If pytrends is not available or API fails + gprop: Google property filter - '' for web, 'youtube' for YouTube, 'news', 'images', 'froogle' + user_id: Optional user ID for tracking + + Fetches: interest over time, interest by region, related topics, + and related queries using a single TrendReq session. """ if not keywords: raise ValueError("Keywords list cannot be empty") - + if len(keywords) > 5: logger.warning(f"Too many keywords ({len(keywords)}), using first 5") keywords = keywords[:5] - - # Check cache first + cache_key = self._build_cache_key(keywords, timeframe, geo) cached_data = self._get_from_cache(cache_key) if cached_data: logger.info(f"Returning cached trends data for: {keywords}") return {**cached_data, "cached": True} - - # Rate limit + await self.rate_limiter.acquire() - + + total_start = time.monotonic() + + interest_over_time: List[Dict[str, Any]] = [] + interest_by_region: List[Dict[str, Any]] = [] + related_topics: Dict[str, List[Dict[str, Any]]] = {"top": [], "rising": []} + related_queries: Dict[str, List[Dict[str, Any]]] = {"top": [], "rising": []} + try: - logger.info(f"Fetching Google Trends data for: {keywords} (timeframe: {timeframe}, geo: {geo})") - - # Initialize pytrends (sync operation, run in thread) + logger.info(f"[Trends] ===== START analyze_trends ===== keywords={keywords} timeframe={timeframe} geo={geo}") + + # Initialize TrendReq with gprop (youtube for video/podcast relevance) + init_start = time.monotonic() pytrends = await asyncio.to_thread( - self._initialize_pytrends, + self._create_pytrends, keywords, timeframe, - geo + geo, + gprop, ) - - # Fetch all data in parallel (pytrends methods are sync, so use to_thread) - interest_over_time_task = asyncio.to_thread( - lambda: self._safe_interest_over_time(pytrends) + init_ms = int((time.monotonic() - init_start) * 1000) + logger.info(f"[Trends] TrendReq init + build_payload took {init_ms}ms") + + # --- Interest Over Time --- + iot_start = time.monotonic() + interest_over_time = await asyncio.to_thread( + lambda: self._fetch_interest_over_time(pytrends) ) - interest_by_region_task = asyncio.to_thread( - lambda: self._safe_interest_by_region(pytrends) + iot_ms = int((time.monotonic() - iot_start) * 1000) + logger.info(f"[Trends] interest_over_time took {iot_ms}ms, returned {len(interest_over_time)} points") + + await asyncio.sleep(2) + + # --- Interest By Region --- + ibr_start = time.monotonic() + interest_by_region = await asyncio.to_thread( + lambda: self._fetch_interest_by_region(pytrends) ) - related_topics_task = asyncio.to_thread( - lambda: self._safe_related_topics(pytrends, keywords) + ibr_ms = int((time.monotonic() - ibr_start) * 1000) + logger.info(f"[Trends] interest_by_region took {ibr_ms}ms, returned {len(interest_by_region)} regions") + + await asyncio.sleep(2) + + # --- Related Topics --- + rt_start = time.monotonic() + related_topics = await asyncio.to_thread( + lambda: self._fetch_related_topics(pytrends) ) - related_queries_task = asyncio.to_thread( - lambda: self._safe_related_queries(pytrends, keywords) + rt_ms = int((time.monotonic() - rt_start) * 1000) + rt_top = len(related_topics.get("top", [])) + rt_rising = len(related_topics.get("rising", [])) + logger.info(f"[Trends] related_topics took {rt_ms}ms, top={rt_top} rising={rt_rising}") + + await asyncio.sleep(2) + + # --- Related Queries --- + rq_start = time.monotonic() + related_queries = await asyncio.to_thread( + lambda: self._fetch_related_queries(pytrends) ) - - # Wait for all tasks - interest_over_time, interest_by_region, related_topics, related_queries = await asyncio.gather( - interest_over_time_task, - interest_by_region_task, - related_topics_task, - related_queries_task, - return_exceptions=True + rq_ms = int((time.monotonic() - rq_start) * 1000) + rq_top = len(related_queries.get("top", [])) + rq_rising = len(related_queries.get("rising", [])) + logger.info(f"[Trends] related_queries took {rq_ms}ms, top={rq_top} rising={rq_rising}") + + total_ms = int((time.monotonic() - total_start) * 1000) + logger.info( + f"[Trends] ===== DONE analyze_trends ===== total={total_ms}ms " + f"iot={len(interest_over_time)} ibr={len(interest_by_region)} " + f"rt_top={rt_top} rq_top={rq_top}" ) - - # Handle exceptions - if isinstance(interest_over_time, Exception): - logger.error(f"Interest over time failed: {interest_over_time}") - interest_over_time = [] - if isinstance(interest_by_region, Exception): - logger.error(f"Interest by region failed: {interest_by_region}") - interest_by_region = [] - if isinstance(related_topics, Exception): - logger.error(f"Related topics failed: {related_topics}") - related_topics = {"top": [], "rising": []} - if isinstance(related_queries, Exception): - logger.error(f"Related queries failed: {related_queries}") - related_queries = {"top": [], "rising": []} - - # Build result + result = { "interest_over_time": interest_over_time, "interest_by_region": interest_by_region, @@ -153,186 +279,268 @@ class GoogleTrendsService: "timeframe": timeframe, "geo": geo, "keywords": keywords, + "source": "web" if gprop == "" else "podcast" if gprop == "youtube" else gprop, "timestamp": datetime.utcnow().isoformat(), - "cached": False + "cached": False, } - - # Cache result + self._save_to_cache(cache_key, result) - - logger.info(f"Google Trends data fetched successfully: {len(interest_over_time)} time points, {len(interest_by_region)} regions") - + + logger.info( + f"Google Trends data fetched successfully: " + f"{len(interest_over_time)} time points, {len(interest_by_region)} regions" + ) + return result - + except Exception as e: logger.error(f"Google Trends analysis failed: {e}") - # Return fallback response - return self._create_fallback_response(keywords, timeframe, geo, str(e)) - - def _initialize_pytrends( + return self._create_fallback_response(keywords, timeframe, geo, gprop, str(e)) + + # ----------------------------------------------------------------------- + # TrendReq factory + # ----------------------------------------------------------------------- + + def _create_pytrends( self, keywords: List[str], timeframe: str, - geo: str - ) -> TrendReq: - """Initialize pytrends and build payload (sync operation).""" - pytrends = TrendReq(hl='en-US', tz=360) - pytrends.build_payload(kw_list=keywords, timeframe=timeframe, geo=geo) + geo: str, + gprop: str = "", + ) -> _TrendReq: + """Create TrendReq with optional gprop (e.g., 'youtube' for video trends).""" + start = time.monotonic() + ua = random.choice(self.USER_AGENTS) + logger.info(f"[Trends] Creating TrendReq (fail-fast, gprop='{gprop}', UA={ua[:40]}...)") + pytrends = _TrendReq( + hl='en-US', + tz=360, + timeout=(10, 30), + retries=0, + backoff_factor=0, + requests_args={'headers': {'User-Agent': ua}}, + ) + # gprop: '' = web, 'youtube' = YouTube, 'news', 'images', 'froogle' + pytrends.build_payload(kw_list=keywords, timeframe=timeframe, geo=geo, gprop=gprop) + elapsed = int((time.monotonic() - start) * 1000) + logger.info(f"[Trends] TrendReq init + build_payload completed in {elapsed}ms (gprop={gprop})") return pytrends - - def _safe_interest_over_time(self, pytrends: TrendReq) -> List[Dict[str, Any]]: - """Safely fetch interest over time data.""" + + # ----------------------------------------------------------------------- + # Data fetchers — each catches all exceptions and returns defaults + # ----------------------------------------------------------------------- + + def _fetch_interest_over_time(self, pytrends: _TrendReq, keywords: List[str] = None) -> List[Dict[str, Any]]: + """Fetch interest over time data.""" + start = time.monotonic() try: df = pytrends.interest_over_time() - if df.empty: + elapsed = int((time.monotonic() - start) * 1000) + if df is None or (hasattr(df, 'empty') and df.empty): + logger.info(f"[Trends] interest_over_time returned empty in {elapsed}ms") return [] - return self._format_dataframe(df.reset_index()) + # Use pytrends.kw_list if keywords not provided + kw = keywords or pytrends.kw_list + result = self._format_dataframe(df.reset_index(), kw) + logger.info(f"[Trends] interest_over_time returned {len(result)} points in {elapsed}ms") + return result except Exception as e: - logger.error(f"Error fetching interest over time: {e}") + elapsed = int((time.monotonic() - start) * 1000) + logger.error(f"[Trends] interest_over_time failed in {elapsed}ms: {e}") return [] - - def _safe_interest_by_region(self, pytrends: TrendReq) -> List[Dict[str, Any]]: - """Safely fetch interest by region data.""" + + def _fetch_interest_by_region(self, pytrends: _TrendReq, keywords: List[str] = None) -> List[Dict[str, Any]]: + """Fetch interest by region data.""" + start = time.monotonic() try: df = pytrends.interest_by_region(resolution='COUNTRY', inc_low_vol=True, inc_geo_code=False) - if df.empty: + elapsed = int((time.monotonic() - start) * 1000) + if df is None or (hasattr(df, 'empty') and df.empty): + logger.info(f"[Trends] interest_by_region returned empty in {elapsed}ms") return [] - return self._format_dataframe(df.reset_index()) + result = self._format_dataframe(df.reset_index(), keywords or pytrends.kw_list) + logger.info(f"[Trends] interest_by_region returned {len(result)} regions in {elapsed}ms") + return result except Exception as e: - logger.error(f"Error fetching interest by region: {e}") + elapsed = int((time.monotonic() - start) * 1000) + logger.error(f"[Trends] interest_by_region failed in {elapsed}ms: {e}") return [] - - def _safe_related_topics( - self, - pytrends: TrendReq, - keywords: List[str] - ) -> Dict[str, List[Dict[str, Any]]]: - """Safely fetch related topics.""" + + def _fetch_related_topics(self, pytrends: _TrendReq) -> Dict[str, List[Dict[str, Any]]]: + """Fetch related topics. Patches catch IndexError from pytrends bug.""" + start = time.monotonic() + result = {"top": [], "rising": []} try: topics_data = pytrends.related_topics() - result = {"top": [], "rising": []} - - for keyword in keywords: - if keyword in topics_data and isinstance(topics_data[keyword], dict): - keyword_topics = topics_data[keyword] - - if "top" in keyword_topics and not keyword_topics["top"].empty: - top_df = keyword_topics["top"] - # Select relevant columns - if "topic_title" in top_df.columns and "value" in top_df.columns: - top_data = top_df[["topic_title", "value"]].to_dict('records') - result["top"].extend(top_data) - - if "rising" in keyword_topics and not keyword_topics["rising"].empty: - rising_df = keyword_topics["rising"] - if "topic_title" in rising_df.columns and "value" in rising_df.columns: - rising_data = rising_df[["topic_title", "value"]].to_dict('records') - result["rising"].extend(rising_data) - + elapsed = int((time.monotonic() - start) * 1000) + + if topics_data is None: + logger.info(f"[Trends] related_topics returned None in {elapsed}ms") + return result + + if not isinstance(topics_data, dict): + logger.info(f"[Trends] related_topics returned {type(topics_data).__name__}, expected dict") + return result + + for key, keyword_data in topics_data.items(): + if keyword_data is None or not isinstance(keyword_data, dict): + continue + + for section in ["top", "rising"]: + section_df = keyword_data.get(section) + if section_df is None: + continue + if hasattr(section_df, 'empty') and section_df.empty: + continue + if not hasattr(section_df, 'to_dict'): + continue + + try: + if "topic_title" in section_df.columns and "value" in section_df.columns: + data = section_df[["topic_title", "value"]].to_dict('records') + else: + data = section_df.to_dict('records') + result[section].extend(data) + except Exception as e: + logger.debug(f"Error parsing {section} topics for key '{key}': {e}") + continue + + logger.info(f"[Trends] related_topics completed in {elapsed}ms, top={len(result['top'])} rising={len(result['rising'])}") return result except Exception as e: - logger.error(f"Error fetching related topics: {e}") - return {"top": [], "rising": []} - - def _safe_related_queries( - self, - pytrends: TrendReq, - keywords: List[str] - ) -> Dict[str, List[Dict[str, Any]]]: - """Safely fetch related queries.""" + elapsed = int((time.monotonic() - start) * 1000) + logger.error(f"[Trends] related_topics failed in {elapsed}ms: {e}") + return result + + def _fetch_related_queries(self, pytrends: _TrendReq) -> Dict[str, List[Dict[str, Any]]]: + """Fetch related queries. Patches catch IndexError from pytrends bug.""" + start = time.monotonic() + result = {"top": [], "rising": []} try: queries_data = pytrends.related_queries() - result = {"top": [], "rising": []} - - for keyword in keywords: - if keyword in queries_data and isinstance(queries_data[keyword], dict): - keyword_queries = queries_data[keyword] - - if "top" in keyword_queries and not keyword_queries["top"].empty: - top_df = keyword_queries["top"] - result["top"].extend(top_df.to_dict('records')) - - if "rising" in keyword_queries and not keyword_queries["rising"].empty: - rising_df = keyword_queries["rising"] - result["rising"].extend(rising_df.to_dict('records')) - + elapsed = int((time.monotonic() - start) * 1000) + + if queries_data is None: + logger.info(f"[Trends] related_queries returned None in {elapsed}ms") + return result + + if not isinstance(queries_data, dict): + logger.info(f"[Trends] related_queries returned {type(queries_data).__name__}, expected dict") + return result + + for key, keyword_data in queries_data.items(): + if keyword_data is None or not isinstance(keyword_data, dict): + continue + + for section in ["top", "rising"]: + section_df = keyword_data.get(section) + if section_df is None: + continue + if hasattr(section_df, 'empty') and section_df.empty: + continue + if not hasattr(section_df, 'to_dict'): + continue + + try: + data = section_df.to_dict('records') + result[section].extend(data) + except Exception as e: + logger.debug(f"Error parsing {section} queries for key '{key}': {e}") + continue + + logger.info(f"[Trends] related_queries completed in {elapsed}ms, top={len(result['top'])} rising={len(result['rising'])}") return result except Exception as e: - logger.error(f"Error fetching related queries: {e}") - return {"top": [], "rising": []} - - def _format_dataframe(self, df: pd.DataFrame) -> List[Dict[str, Any]]: - """Convert DataFrame to list of dicts (serializable format).""" + elapsed = int((time.monotonic() - start) * 1000) + logger.error(f"[Trends] related_queries failed in {elapsed}ms: {e}") + return result + + # ----------------------------------------------------------------------- + # Helpers + # ----------------------------------------------------------------------- + + def _format_dataframe(self, df: pd.DataFrame, keywords: List[str] = None) -> List[Dict[str, Any]]: + """Convert DataFrame to list of dicts. Handles both pytrends and SerpAPI formats.""" if df.empty: return [] - # Convert datetime columns to strings - for col in df.columns: - if pd.api.types.is_datetime64_any_dtype(df[col]): - df[col] = df[col].astype(str) + # Try to detect and handle SerpAPI-style nested data + # Check if the dataframe has 'date' column and 'values' array column + records = df.to_dict('records') - # Convert to dict records - return df.to_dict('records') - + # Check first record for nested values pattern (SerpAPI format) + if records and 'values' in records[0] and isinstance(records[0]['values'], list): + # SerpAPI-style: need to flatten + flat_records = [] + for record in records: + date_str = record.get('date', '') + timestamp = record.get('timestamp', '') + is_partial = record.get('partial_data', False) + + # Extract values from nested array + for val_entry in record['values']: + keyword_name = val_entry.get('query', '') + value = val_entry.get('value', val_entry.get('extracted_value', 0)) + flat_record = { + 'date': date_str, + 'timestamp': timestamp, + keyword_name: int(value) if value else 0, + } + if is_partial: + flat_record['isPartial'] = True + flat_records.append(flat_record) + records = flat_records + + # Convert datetime columns to strings + for record in records: + for key, value in record.items(): + if hasattr(value, 'year'): # datetime-like + record[key] = str(value) + + return records + def _build_cache_key(self, keywords: List[str], timeframe: str, geo: str) -> str: - """Build cache key from parameters.""" keywords_str = ":".join(sorted(keywords)) return f"google_trends:{keywords_str}:{timeframe}:{geo}" - + def _get_from_cache(self, cache_key: str) -> Optional[Dict[str, Any]]: - """Get data from cache if not expired.""" if cache_key not in self.cache: return None - cached_entry = self.cache[cache_key] cached_time = datetime.fromisoformat(cached_entry.get("timestamp", "")) - if datetime.utcnow() - cached_time > self.cache_ttl: - # Expired, remove from cache del self.cache[cache_key] return None - - # Return cached data (without cached flag) result = {**cached_entry} result.pop("cached", None) return result - + def _save_to_cache(self, cache_key: str, data: Dict[str, Any]): - """Save data to cache.""" - # Store with timestamp - cache_entry = { - **data, - "cached_at": datetime.utcnow().isoformat() - } + cache_entry = {**data, "cached_at": datetime.utcnow().isoformat()} self.cache[cache_key] = cache_entry - - # Clean up old cache entries periodically - if len(self.cache) > 100: # Limit cache size + if len(self.cache) > 100: self._cleanup_cache() - + def _cleanup_cache(self): - """Remove expired cache entries.""" now = datetime.utcnow() expired_keys = [] - for key, entry in self.cache.items(): cached_time = datetime.fromisoformat(entry.get("cached_at", entry.get("timestamp", ""))) if now - cached_time > self.cache_ttl: expired_keys.append(key) - for key in expired_keys: del self.cache[key] - logger.debug(f"Cleaned up {len(expired_keys)} expired cache entries") - + def _create_fallback_response( self, keywords: List[str], timeframe: str, geo: str, - error_message: str + gprop: str = "", + error_message: str = "", ) -> Dict[str, Any]: - """Create fallback response when trends analysis fails.""" + source = "web" if gprop == "" else "podcast" if gprop == "youtube" else gprop return { "interest_over_time": [], "interest_by_region": [], @@ -341,40 +549,38 @@ class GoogleTrendsService: "timeframe": timeframe, "geo": geo, "keywords": keywords, + "source": source, "timestamp": datetime.utcnow().isoformat(), "cached": False, - "error": error_message + "error": error_message, } - + async def get_trending_searches( self, country: str = "united_states", - user_id: Optional[str] = None + user_id: Optional[str] = None, ) -> List[str]: - """ - Get current trending searches for a country. - - Args: - country: Country name (e.g., "united_states", "united_kingdom") - user_id: User ID for subscription checks - - Returns: - List of trending search terms - """ await self.rate_limiter.acquire() - + try: - pytrends = TrendReq(hl='en-US', tz=360) + ua = random.choice(self.USER_AGENTS) + pytrends = _TrendReq( + hl='en-US', + tz=360, + timeout=(10, 30), + retries=0, + backoff_factor=0, + requests_args={'headers': {'User-Agent': ua}}, + ) trending_df = await asyncio.to_thread( lambda: pytrends.trending_searches(pn=country) ) - - if trending_df.empty: + + if trending_df is None or (hasattr(trending_df, 'empty') and trending_df.empty): return [] - - # Return as list of strings + return trending_df[0].tolist() if len(trending_df.columns) > 0 else [] - + except Exception as e: logger.error(f"Error fetching trending searches: {e}") - return [] + return [] \ No newline at end of file diff --git a/backend/services/subscription/pricing_service.py b/backend/services/subscription/pricing_service.py index 6134ec3a..a29874d1 100644 --- a/backend/services/subscription/pricing_service.py +++ b/backend/services/subscription/pricing_service.py @@ -494,7 +494,16 @@ class PricingService: logger.debug(f"Added new pricing for {pricing_data['provider'].value}:{pricing_data['model_name']}") self.db.commit() - logger.info("Default API pricing initialized/updated. HuggingFace pricing loaded from env vars if available.") + + # Debug: count pricing rows seeded + total_rows = self.db.query(APIProviderPricing).count() + providers = self.db.query(APIProviderPricing.provider).distinct().all() + provider_list = sorted([p[0].value for p in providers]) if providers else [] + logger.info(f"[PRICING_INIT] Default API pricing initialized: {len(all_pricing)} rows configured, {total_rows} rows in DB, providers: {provider_list}") + + # Warning-level log that will be visible + logger.warning(f"[PRICING_INIT] Pricing ready: {total_rows} rows for {len(provider_list)} providers") + logger.warning("Default API pricing initialized/updated. HuggingFace pricing loaded from env vars if available.") def initialize_default_plans(self): """Initialize default subscription plans.""" diff --git a/backend/services/user_data_service.py b/backend/services/user_data_service.py index a96616cc..f3f7f589 100644 --- a/backend/services/user_data_service.py +++ b/backend/services/user_data_service.py @@ -4,6 +4,7 @@ Handles fetching user data from the onboarding database. """ from typing import Optional, List, Dict, Any +from datetime import datetime from sqlalchemy.orm import Session from loguru import logger @@ -92,5 +93,88 @@ class UserDataService: return integrated_data.get('website_analysis') except Exception as e: - logger.error(f"Error getting user website analysis: {str(e)}") + logger.error(f"Error getting user website analysis: {e}") + return None + + def save_website_extraction(self, user_id: str, extraction_data: Dict[str, Any]) -> bool: + """ + Save website extraction data for future use. + + Args: + user_id: The user ID + extraction_data: Website extraction data (title, summary, highlights, url, subpages) + + Returns: + True if saved successfully + """ + try: + # Clean data - remove images/favicon + clean_data = { + k: v for k, v in extraction_data.items() + if k not in ('image', 'favicon') + } + clean_data['saved_at'] = datetime.now().isoformat() + + # Find or create user session for storing + onboarding = self.db.query(OnboardingSession).filter( + OnboardingSession.user_id == user_id + ).first() + + if not onboarding: + # Create new session if not exists + onboarding = OnboardingSession(user_id=user_id) + self.db.add(onboarding) + + # Try to update website_analysis field + # The field might be JSON in the model + try: + existing = onboarding.website_analysis + if isinstance(existing, dict): + existing.update(clean_data) + onboarding.website_analysis = existing + else: + onboarding.website_analysis = clean_data + except Exception as ex: + logger.warning(f"Could not update website_analysis: {ex}") + onboarding.website_analysis = clean_data + + self.db.commit() + logger.info(f"Saved website extraction for user {user_id}") + return True + + except Exception as e: + logger.error(f"Error saving website extraction: {str(e)}") + self.db.rollback() + return False + + def get_website_extraction(self, user_id: str) -> Optional[Dict[str, Any]]: + """ + Get saved website extraction data. + + Args: + user_id: The user ID + + Returns: + Website extraction data or None + """ + try: + onboarding = self.db.query(OnboardingSession).filter( + OnboardingSession.user_id == user_id + ).first() + + if not onboarding: + return None + + extraction = onboarding.website_analysis + if isinstance(extraction, dict): + # Return clean data without internal fields + return { + k: v for k, v in extraction.items() + if k not in ('saved_at', 'full_analysis', 'analysis_status') + } + + return None + + except Exception as e: + logger.error(f"Error getting website extraction: {str(e)}") return None diff --git a/frontend/src/components/PodcastMaker/CreateModal.tsx b/frontend/src/components/PodcastMaker/CreateModal.tsx index 3c0d077e..c640ba9f 100644 --- a/frontend/src/components/PodcastMaker/CreateModal.tsx +++ b/frontend/src/components/PodcastMaker/CreateModal.tsx @@ -1,5 +1,6 @@ import React, { useState, useEffect, useMemo, useCallback } from "react"; -import { Stack, Paper, Box, Chip, Typography } from "@mui/material"; +import { Stack, Paper, Box, Chip, Typography, Dialog, DialogTitle, DialogContent, DialogActions, CircularProgress } from "@mui/material"; +import { AutoAwesome as AutoAwesomeIcon } from "@mui/icons-material"; import { CreateProjectPayload, Knobs, PodcastMode } from "./types"; import { useSubscription } from "../../contexts/SubscriptionContext"; import { podcastApi } from "../../services/podcastApi"; @@ -16,6 +17,7 @@ import { AvatarSelector } from "./CreateStep/AvatarSelector"; import { CreateActions } from "./CreateStep/CreateActions"; import { EnhancedTopicChoicesModal } from "./EnhancedTopicChoicesModal"; import { TrendingTopicsModal } from "./CreateStep/TrendingTopicsModal"; +import { CategoryResearchModal } from "./CreateStep/CategoryResearchModal"; const ENHANCE_TOPIC_PROGRESS_MESSAGES = [ "Analyzing your topic idea...", @@ -23,6 +25,53 @@ const ENHANCE_TOPIC_PROGRESS_MESSAGES = [ "Aligning language for podcast listeners...", ]; +// Dynamic progress messages based on context +const getEnhanceProgressMessage = (index: number, hasWebsite: boolean, hasTopicContext: boolean): string => { + const messagesWithAll = [ + "Analyzing your topic with website and category research...", + "Incorporating website insights and research findings...", + "Generating podcast angles based on all available context...", + "Creating personalized episode concepts...", + "Finalizing enhanced pitch options...", + ]; + + const messagesWithWebsite = [ + "Analyzing your topic with website content...", + "Incorporating website insights and company details...", + "Generating podcast angles based on your website analysis...", + "Creating personalized episode concepts...", + "Finalizing enhanced pitch options...", + ]; + + const messagesWithTopic = [ + "Analyzing your topic with category research...", + "Incorporating research insights and trends...", + "Generating podcast angles based on your research...", + "Creating personalized episode concepts...", + "Finalizing enhanced pitch options...", + ]; + + const messagesBasic = [ + "Analyzing your topic idea...", + "Enhancing clarity and hook...", + "Aligning language for podcast listeners...", + "Crafting compelling angles...", + "Finalizing recommendations...", + ]; + + let messages; + if (hasWebsite && hasTopicContext) { + messages = messagesWithAll; + } else if (hasWebsite) { + messages = messagesWithWebsite; + } else if (hasTopicContext) { + messages = messagesWithTopic; + } else { + messages = messagesBasic; + } + return messages[index % messages.length]; +}; + interface CreateModalProps { onCreate: (payload: CreateProjectPayload) => void; open: boolean; @@ -61,20 +110,96 @@ export const CreateModal: React.FC = ({ onCreate, open, defaul const [enhancedRationales, setEnhancedRationales] = useState([]); const [choicesModalOpen, setChoicesModalOpen] = useState(false); const [editedChoices, setEditedChoices] = useState([]); + + // Website extraction data for AI enhance + const [websiteData, setWebsiteData] = useState<{ + title?: string; + text?: string; + summary?: string; + highlights?: string[]; + url: string; + subpages?: Array<{id?: string; title?: string; url?: string; summary?: string; text?: string}>; + } | null>(null); + + // Category research context for AI enhance + const [topicContext, setTopicContext] = useState<{ + category: string; + topics: Array<{title: string; url: string; snippet: string; score: number}>; + selected_topic: {title: string; url: string; snippet: string}; + } | null>(null); + + // Enhance topic progress modal state + const [showEnhanceProgressModal, setShowEnhanceProgressModal] = useState(false); // Trending topics state const [trendingModalOpen, setTrendingModalOpen] = useState(false); const [trendingLoading, setTrendingLoading] = useState(false); - // Rotate placeholder every 3 seconds - useEffect(() => { - if (!topicInput) { - const interval = setInterval(() => { - setPlaceholderIndex((prev) => (prev + 1) % TOPIC_PLACEHOLDERS.length); - }, 3000); - return () => clearInterval(interval); + // Category research state + const [categoryResearchOpen, setCategoryResearchOpen] = useState(false); + const [selectedCategory, setSelectedCategory] = useState<"news" | "finance" | "research-paper" | "personal-site">("news"); + const [categoryLoading, setCategoryLoading] = useState(false); + const [categoryTopics, setCategoryTopics] = useState>([]); + const [categoryError, setCategoryError] = useState(null); + const [categoryCached, setCategoryCached] = useState(false); + const [lastSearchedTopic, setLastSearchedTopic] = useState(""); + const [lastSearchedCategory, setLastSearchedCategory] = useState<"news" | "finance" | "research-paper" | "personal-site">("news"); + +// Rotate placeholder every 3 seconds +useEffect(() => { + if (!topicInput) { + const interval = setInterval(() => { + setPlaceholderIndex((prev) => (prev + 1) % TOPIC_PLACEHOLDERS.length); + }, 3000); + return () => clearInterval(interval); + } +}, [topicInput]); + +// Cost estimate state - compatible with TopicUrlInput props +type EstimateType = number | { ttsCost: number; avatarCost: number; videoCost: number; researchCost: number; total: number; } | null; +const [estimatedCost, setEstimatedCost] = useState(null); +const [costEstimateLoading, setCostEstimateLoading] = useState(false); + +// Fetch cost estimate when config changes +useEffect(() => { + const fetchEstimate = async () => { + if (!duration || !speakers || !podcastMode) return; + + setCostEstimateLoading(true); + try { + const result = await podcastApi.preEstimateCost({ + duration, + speakers, + queryCount: 3, // Default to 3 queries + podcastMode, + }); + + console.log('[Cost Estimate] Response:', result); + console.log('[Cost Estimate] Total:', result.estimate?.total); + console.log('[Cost Estimate] Full breakdown:', result.estimate); + + if (result.estimate?.total !== undefined) { + // Store full estimate object for tooltip + setEstimatedCost(result.estimate); + } else { + setEstimatedCost(null); + } + } catch (error) { + console.error("Cost estimate error:", error); + setEstimatedCost(null); + } finally { + setCostEstimateLoading(false); } - }, [topicInput]); + }; + + fetchEstimate(); +}, [duration, speakers, podcastMode]); // Fetch Brand Avatar on mount but don't select it useEffect(() => { @@ -94,6 +219,28 @@ export const CreateModal: React.FC = ({ onCreate, open, defaul fetchBrandAvatar(); }, []); + // Load saved website extraction on mount + useEffect(() => { + const loadSavedWebsiteExtraction = async () => { + try { + const result = await podcastApi.getWebsiteExtraction(); + if (result.success && result.data) { + setWebsiteData({ + title: result.data.title, + text: result.data.text, + summary: result.data.summary, + highlights: result.data.highlights, + url: result.data.url, + subpages: result.data.subpages, + }); + } + } catch (error) { + console.warn("Failed to load saved website extraction:", error); + } + }; + loadSavedWebsiteExtraction(); + }, []); + useEffect(() => { if (!avatarPreview) { setAvatarPreviewBlobUrl(null); @@ -204,7 +351,7 @@ export const CreateModal: React.FC = ({ onCreate, open, defaul }; const isUrl = useMemo(() => detectUrl(topicInput), [topicInput]); - const enhanceTopicMessage = enhancingTopic ? ENHANCE_TOPIC_PROGRESS_MESSAGES[enhanceTopicProgressIndex] : undefined; + const enhanceTopicMessage = enhancingTopic ? getEnhanceProgressMessage(enhanceTopicProgressIndex, !!websiteData, !!topicContext) : undefined; useEffect(() => { if (!enhancingTopic) { @@ -213,22 +360,39 @@ export const CreateModal: React.FC = ({ onCreate, open, defaul } const interval = setInterval(() => { - setEnhanceTopicProgressIndex((prev) => (prev + 1) % ENHANCE_TOPIC_PROGRESS_MESSAGES.length); + setEnhanceTopicProgressIndex((prev) => { + const maxMessages = (websiteData || topicContext) ? 5 : 3; + return (prev + 1) % maxMessages; + }); }, 1200); return () => clearInterval(interval); - }, [enhancingTopic]); + }, [enhancingTopic, websiteData, topicContext]); // Handle AI Details button click const handleAIDetailsClick = async () => { if (!topicInput.trim() || enhancingTopic) return; + // Show progress modal + setShowEnhanceProgressModal(true); + try { setEnhancingTopic(true); - // We pass the current Bible context if we have it (unlikely here as it's generated in analysis) - // But the backend will generate it from onboarding data if missing + + // Build website data (excluding images/favicon) + const websiteDataForApi = websiteData ? { + title: websiteData.title, + text: websiteData.text, + summary: websiteData.summary, + highlights: websiteData.highlights, + url: websiteData.url, + subpages: websiteData.subpages, + } : undefined; + const result = await podcastApi.enhanceIdea({ idea: topicInput, + website_data: websiteDataForApi, + topic_context: topicContext || undefined, }); if (result.enhanced_ideas && result.enhanced_ideas.length === 3) { @@ -241,9 +405,67 @@ export const CreateModal: React.FC = ({ onCreate, open, defaul console.error("Failed to enhance idea with AI:", error); } finally { setEnhancingTopic(false); + setShowEnhanceProgressModal(false); } }; + // Handle Category Research (News/Finance/Research Papers/Personal Website) click + const handleCategoryResearchClick = async (category: "news" | "finance" | "research-paper" | "personal-site", websiteUrl?: string, forceRefresh: boolean = false, overrideKeyword?: string) => { + const currentTopic = (overrideKeyword || topicInput.trim()); + + // Check if we have cached results for the same topic + category combination (only if not force refresh) + if (!forceRefresh && !overrideKeyword && currentTopic === lastSearchedTopic && category === lastSearchedCategory && categoryTopics.length > 0) { + setSelectedCategory(category); + setCategoryResearchOpen(true); + setCategoryCached(true); + setCategoryLoading(false); + return; + } + + setSelectedCategory(category); + setCategoryResearchOpen(true); + setCategoryLoading(true); + setCategoryError(null); + setCategoryCached(false); + setCategoryTopics([]); + + // For personal-site, check if topic input looks like a URL + let websiteUrlToUse: string | undefined; + if (category === "personal-site" && topicInput.trim()) { + const topicText = topicInput.trim(); + // Check if it looks like a URL + if (topicText.startsWith('http://') || topicText.startsWith('https://') || topicText.includes('://') || (topicText.includes('.') && !topicText.includes(' '))) { + websiteUrlToUse = topicText; + } + } + + try { + const result = await podcastApi.researchByCategory({ + category, + keyword: currentTopic || undefined, + maxResults: 8, + websiteUrl: websiteUrlToUse, + }); + + if (result.success) { + setCategoryTopics(result.topics || []); + setLastSearchedTopic(currentTopic); + setLastSearchedCategory(category); + } else { + setCategoryError(result.error || `Failed to fetch ${category} topics`); + } + } catch (error: any) { + setCategoryError(error?.message || `Failed to fetch ${category} topics`); + } finally { + setCategoryLoading(false); + } + }; + + // Handle Redo Search for category research + const handleCategoryRedoSearch = (keyword: string, websiteUrl?: string) => { + handleCategoryResearchClick(selectedCategory, websiteUrl, true, keyword); + }; + // Handle enhanced topic choice selection const handleChoiceSelection = (selectedIndex: number, editedChoice: string) => { const selectedTopic = editedChoice; @@ -290,20 +512,39 @@ export const CreateModal: React.FC = ({ onCreate, open, defaul // Determine if input is idea or URL // For URL, we extract the first URL found or use the whole string if it's a direct URL let finalIdea = ""; - let finalUrl = ""; if (isUrl) { - // Simple extraction: if the input contains a URL, we treat the input as the URL (or extract it) - // For now, let's assume the user pasted a URL. - // If there's mixed text, we might want to just send the whole thing as 'url' if the backend handles extraction, - // or extract it here. - // The previous logic used specific 'url' state. + // Extract the URL from the input const urlMatch = topicInput.match(/(https?:\/\/[^\s]+)/); - if (urlMatch) { - finalUrl = urlMatch[0]; - } else { - // Fallback - finalUrl = topicInput; + const detectedUrl = urlMatch ? urlMatch[0] : topicInput; + + // Extract content from the URL using Exa + try { + setEnhancingTopic(true); + setEnhanceTopicProgressIndex(0); + + const { podcastApi } = await import("../../services/podcastApi"); + const extractResult = await podcastApi.extractUrl({ url: detectedUrl }); + + if (extractResult.success && extractResult.summary) { + // Use extracted content as the podcast topic + finalIdea = extractResult.summary; + if (extractResult.title) { + finalIdea = `${extractResult.title}: ${finalIdea}`; + } + } else if (extractResult.success && extractResult.text) { + // Fallback to text if no summary + finalIdea = extractResult.text.substring(0, 500); + } else { + // Fallback: use the URL itself if extraction fails + finalIdea = detectedUrl; + console.warn("[CreateModal] URL extraction failed:", extractResult.error); + } + } catch (error) { + console.error("[CreateModal] URL extraction error:", error); + finalIdea = detectedUrl; // Fallback to URL + } finally { + setEnhancingTopic(false); } } else { finalIdea = topicInput; @@ -370,7 +611,7 @@ export const CreateModal: React.FC = ({ onCreate, open, defaul try { await onCreate({ - ideaOrUrl: finalUrl || finalIdea, + ideaOrUrl: finalIdea, speakers, duration, knobs: finalKnobs, @@ -588,13 +829,18 @@ export const CreateModal: React.FC = ({ onCreate, open, defaul showAIDetailsButton={showAIDetailsButton} onAIDetailsClick={handleAIDetailsClick} onTrendingTopicsClick={() => setTrendingModalOpen(true)} + onCategoryResearchClick={handleCategoryResearchClick} placeholderIndex={placeholderIndex} loading={enhancingTopic} loadingMessage={enhanceTopicMessage} + extractedData={websiteData} + setExtractedData={setWebsiteData} trendingLoading={trendingLoading} - estimatedCost={null} + categoryResearchLoading={categoryLoading} + estimatedCost={estimatedCost} duration={duration} speakers={speakers} + podcastMode={podcastMode} knobs={knobs} /> @@ -666,6 +912,127 @@ export const CreateModal: React.FC = ({ onCreate, open, defaul onSelectTopic={(topic) => setTopicInput(topic)} initialKeywords={topicInput} /> + + {/* Category Research Modal */} + setCategoryResearchOpen(false)} + category={selectedCategory} + keyword={topicInput} + websiteUrl={selectedCategory === "personal-site" ? topicInput : undefined} + loading={categoryLoading} + topics={categoryTopics} + error={categoryError} + onSelectTopic={(topic) => { + // Save topic context + const selectedTopicData = categoryTopics.find(t => t.title === topic); + if (selectedTopicData) { + setTopicContext({ + category: selectedCategory, + topics: categoryTopics.map(t => ({title: t.title, url: t.url, snippet: t.snippet, score: t.score})), + selected_topic: { + title: selectedTopicData.title, + url: selectedTopicData.url, + snippet: selectedTopicData.snippet, + }, + }); + } + setTopicInput(topic); + setCategoryResearchOpen(false); + }} + onRedoSearch={handleCategoryRedoSearch} + onConfirmSelection={(selectedTopics) => { + if (selectedTopics.length > 0) { + // Save topic context + const firstSelected = categoryTopics.find(t => t.title === selectedTopics[0]); + if (firstSelected) { + setTopicContext({ + category: selectedCategory, + topics: categoryTopics.map(t => ({title: t.title, url: t.url, snippet: t.snippet, score: t.score})), + selected_topic: { + title: firstSelected.title, + url: firstSelected.url, + snippet: firstSelected.snippet, + }, + }); + } + setTopicInput(selectedTopics[0]); + } + setCategoryResearchOpen(false); + }} + isCached={categoryCached} + /> + + {/* Enhance Topic Progress Modal */} + setShowEnhanceProgressModal(false)} + maxWidth="sm" + fullWidth + PaperProps={{ + sx: { + background: "linear-gradient(135deg, #1e1b4b 0%, #312e81 100%)", + backgroundColor: "#1e1b4b", + color: "#fff", + borderRadius: 3, + boxShadow: "0 8px 40px rgba(49, 46, 129, 0.4)", + }, + }} + > + + + Enhancing Your Topic + + + + + + + {enhanceTopicMessage || "Processing your topic..."} + + + This may take a few seconds + + + {/* Context info */} + + + Using context from: + + + {websiteData && ( + + )} + {topicContext && ( + + )} + {(!websiteData && !topicContext) && ( + + )} + + + + ); diff --git a/frontend/src/components/PodcastMaker/CreateStep/AvatarSelector.tsx b/frontend/src/components/PodcastMaker/CreateStep/AvatarSelector.tsx index ba833507..d5a14da9 100644 --- a/frontend/src/components/PodcastMaker/CreateStep/AvatarSelector.tsx +++ b/frontend/src/components/PodcastMaker/CreateStep/AvatarSelector.tsx @@ -102,80 +102,76 @@ export const AvatarSelector: React.FC = ({ sx={{ flex: 1, minWidth: 0, - p: { xs: 1.5, sm: 2.5 }, - borderRadius: 2, + borderRadius: 3, background: "#ffffff", - border: "1px solid rgba(15, 23, 42, 0.08)", - boxShadow: "0 1px 2px rgba(15, 23, 42, 0.04)", + border: "1px solid", + borderColor: "#e2e8f0", + boxShadow: "0 8px 30px rgba(15, 23, 42, 0.12)", + position: "relative", + overflow: "hidden", + "&::before": { + content: '""', + position: "absolute", + top: 0, + left: 0, + right: 0, + height: "3px", + background: "linear-gradient(90deg, #667eea 0%, #764ba2 50%, #667eea 100%)", + }, }} > - - - - 3 - - - - - - Podcast Presenter Avatar - - - + + + + 3 + + + + - - Avatar Options: + + Podcast Presenter Avatar - - Brand Avatar: Use your configured brand avatar for consistency.

- Asset Library: Choose from your previously uploaded images.

- Take a Selfie: Use your camera to capture a photo instantly for your podcast presenter.

- Upload your photo: We'll enhance it into a professional podcast presenter using AI. + + Select or upload an image for your presenter
- } - arrow - placement="top" - > - -
-
- - - {/* Left Side: Tabs & Content */} - + + + {/* Tabs in header - Mobile Responsive */} = ({ display: "none", }, "& .MuiTabs-flexContainer": { - gap: { xs: 0.5, sm: 1.5 }, + gap: 0.5, }, "& .MuiTab-root": { textTransform: "none", - minHeight: { xs: 32, sm: 44 }, + minHeight: { xs: 28, sm: 36 }, fontWeight: 600, - fontSize: { xs: "0.7rem", sm: "0.875rem" }, - borderRadius: { xs: "6px", sm: "12px" }, - px: { xs: 1, sm: 2.5 }, - minWidth: { xs: "auto", sm: 0 }, + fontSize: { xs: "0.65rem", sm: "0.8rem" }, + borderRadius: 1, + px: { xs: 1, sm: 1.5 }, + py: 0.5, + minWidth: "auto", color: "#64748b", - border: "1.5px solid #e2e8f0", - transition: "all 0.2s cubic-bezier(0.4, 0, 0.2, 1)", + border: "1px solid #e2e8f0", backgroundColor: "#ffffff", + transition: "all 0.2s ease", "&:hover": { - borderColor: "#cbd5e1", - backgroundColor: "#f8fafc", - transform: { xs: "none", sm: "translateY(-1px)" }, + borderColor: "#c7d2fe", + backgroundColor: "#eef2ff", }, "&.Mui-selected": { color: "#ffffff", borderColor: "transparent", - background: "linear-gradient(135deg, #667eea 0%, #764ba2 100%)", - boxShadow: "0 4px 12px rgba(102, 126, 234, 0.25)", + background: "linear-gradient(135deg, #6366f1 0%, #8b5cf6 100%)", + boxShadow: "0 2px 8px rgba(99, 102, 241, 0.3)", }, }, }} @@ -216,6 +212,33 @@ export const AvatarSelector: React.FC = ({ ))} + + + + Avatar Options: + + + Brand Avatar: Use your configured brand avatar for consistency.
+ Asset Library: Choose from your previously uploaded images.
+ Take a Selfie: Use your camera to capture a photo instantly.
+ Upload your photo: We'll enhance it into a professional presenter. +
+ + } + arrow + placement="top" + > + +
+ + + + {/* Content Area */} + + {/* Left Side: Content based on selected tab */} + {avatarTab === 0 && ( diff --git a/frontend/src/components/PodcastMaker/CreateStep/CategoryResearchModal.tsx b/frontend/src/components/PodcastMaker/CreateStep/CategoryResearchModal.tsx new file mode 100644 index 00000000..0539ba56 --- /dev/null +++ b/frontend/src/components/PodcastMaker/CreateStep/CategoryResearchModal.tsx @@ -0,0 +1,602 @@ +import React, { useState, useEffect } from "react"; +import { + Dialog, + DialogTitle, + DialogContent, + DialogActions, + Button, + Box, + Typography, + Stack, + CircularProgress, + Alert, + Chip, + IconButton, + TextField, + Tooltip, + Checkbox, +} from "@mui/material"; +import { + Newspaper as NewspaperIcon, + ShowChart as ShowChartIcon, + School as SchoolIcon, + Public as PublicIcon, + Close as CloseIcon, + OpenInNew as OpenInNewIcon, + Refresh as RefreshIcon, + CheckCircle as CheckCircleIcon, + Lightbulb as LightbulbIcon, + Search as SearchIcon, + Language as LanguageIcon, +} from "@mui/icons-material"; + +interface CategoryTopic { + title: string; + url: string; + snippet: string; + score: number; + favicon?: string; +} + +type CategoryType = "news" | "finance" | "research-paper" | "personal-site"; + +interface CategoryResearchModalProps { + open: boolean; + onClose: () => void; + category: CategoryType; + keyword?: string; + websiteUrl?: string; + loading?: boolean; + topics?: CategoryTopic[]; + error?: string | null; + onSelectTopic: (topic: string) => void; + onRedoSearch?: (keyword: string, websiteUrl?: string) => void; + onConfirmSelection?: (selectedTopics: string[]) => void; + isCached?: boolean; +} + +const CATEGORY_CONFIG: Record = { + "news": { label: "News", icon: , color: "#4F46E5", bgLight: "#EEF2FF" }, + "finance": { label: "Finance", icon: , color: "#059669", bgLight: "#ECFDF5" }, + "research-paper": { label: "Research Papers", icon: , color: "#7C3AED", bgLight: "#F3E8FF" }, + "personal-site": { label: "Personal Website", icon: , color: "#D97706", bgLight: "#FEF3C7" }, +}; + +const BEST_PRACTICES: Record = { + "news": [ + "Use specific, focused keywords for better results", + "Include relevant industry or niche terms", + "Add location or timeframe for localized news", + "Avoid very general terms like 'news' or 'updates'", + ], + "finance": [ + "Use specific, focused keywords for better results", + "Include asset class (stocks, crypto, forex, bonds)", + "Add timeframe (q1 2024, last month, etc.)", + "Include market or sector names for targeted results", + ], + "research-paper": [ + "Use academic keywords and terminology", + "Include specific topics or research areas", + "Add field of study (AI, medicine, climate, etc.)", + "Works best with technical or scientific topics", + ], + "personal-site": [ + "Enter the website URL in the input field below", + "The search will find content within that domain", + "Use specific page or topic keywords for best results", + "Leave keyword empty to get all pages from the site", + ], +}; + +export const CategoryResearchModal: React.FC = ({ + open, + onClose, + category, + keyword, + websiteUrl = "", + loading = false, + topics = [], + error = null, + onSelectTopic, + onRedoSearch, + onConfirmSelection, + isCached = false, +}) => { + const config = CATEGORY_CONFIG[category]; + const categoryLabel = config.label; + const categoryIcon = config.icon; + const categoryColor = config.color; + const categoryBgLight = config.bgLight; + + const [redoKeyword, setRedoKeyword] = useState(keyword || ""); + const [localWebsiteUrl, setLocalWebsiteUrl] = useState(websiteUrl); + const [selectedTopics, setSelectedTopics] = useState>(new Set()); + + useEffect(() => { + if (open) { + setRedoKeyword(keyword || ""); + setLocalWebsiteUrl(websiteUrl || ""); + setSelectedTopics(new Set()); + } + }, [open, keyword, websiteUrl]); + + const handleSelectTopic = (topic: CategoryTopic) => { + onSelectTopic(topic.title); + }; + + const handleClose = () => { + onClose(); + }; + + const handleRedoClick = () => { + if (onRedoSearch && redoKeyword.trim()) { + onRedoSearch(redoKeyword.trim(), category === "personal-site" ? localWebsiteUrl : undefined); + } + }; + + const handleToggleSelectTopic = (title: string) => { + const newSelected = new Set(selectedTopics); + if (newSelected.has(title)) { + newSelected.delete(title); + } else { + newSelected.add(title); + } + setSelectedTopics(newSelected); + }; + + const handleSelectAll = () => { + const allTitles = new Set(topics.map(t => t.title)); + setSelectedTopics(allTitles); + }; + + const handleDeselectAll = () => { + setSelectedTopics(new Set()); + }; + + const handleConfirm = () => { + if (onConfirmSelection && selectedTopics.size > 0) { + onConfirmSelection(Array.from(selectedTopics)); + onClose(); + } + }; + + const getDomain = (url: string) => { + try { + return new URL(url).hostname.replace("www.", ""); + } catch { + return url; + } + }; + + const isPersonalSite = category === "personal-site"; + + return ( + + + + + {categoryIcon} + + + + {categoryLabel} + + {keyword && ( + + Searching: {keyword} + + )} + + + + + + Best Practices for Search + + {BEST_PRACTICES[category].map((tip, idx) => ( + + • {tip} + + ))} + + } + arrow + placement="bottom-end" + > + } + label="For best results" + size="small" + sx={{ + background: categoryBgLight, + color: categoryColor, + border: `1px solid ${categoryColor}25`, + fontWeight: 600, + fontSize: "0.75rem", + cursor: "help", + "& .MuiChip-icon": { color: categoryColor }, + "&:hover": { + background: `${categoryColor}15`, + }, + }} + /> + + + + + + + + + {loading && ( + + + + Searching {categoryLabel.toLowerCase()}... + + + {isPersonalSite + ? `Searching within ${localWebsiteUrl || "your website"}` + : `Finding relevant ${categoryLabel.toLowerCase()} for your podcast`} + + + )} + + {error && ( + + {error} + + )} + + {!loading && !error && topics.length === 0 && ( + + + {React.cloneElement(categoryIcon as React.ReactElement, { sx: { fontSize: 32, color: "#d1d5db" } })} + + + No results found + + + {isPersonalSite + ? "Enter a website URL and try different keywords" + : "Try different search terms or redo search"} + + + )} + + {!loading && !error && topics.length > 0 && ( + <> + {/* Redo Search Bar */} + + + + Search again + + + {/* Website URL input for Personal Site */} + {isPersonalSite && ( + setLocalWebsiteUrl(e.target.value)} + sx={{ + width: 260, + "& .MuiOutlinedInput-root": { + background: "#fff", + fontSize: "0.8rem", + height: 34, + }, + "& .MuiOutlinedInput-notchedOutline": { + borderColor: "#d1d5db", + }, + }} + /> + )} + + setRedoKeyword(e.target.value)} + onKeyDown={(e) => e.key === "Enter" && handleRedoClick()} + sx={{ + flex: 1, + minWidth: 150, + maxWidth: 280, + "& .MuiOutlinedInput-root": { + background: "#fff", + fontSize: "0.8rem", + height: 34, + }, + "& .MuiOutlinedInput-notchedOutline": { + borderColor: "#d1d5db", + }, + }} + /> + + + + {/* Select All / Deselect All */} + {topics.length > 0 && ( + + + {selectedTopics.size} of {topics.length} selected + + + | + + + )} + + + {topics.map((topic, idx) => ( + + + handleToggleSelectTopic(topic.title)} + sx={{ + p: 0, + mt: 0.25, + color: "#d1d5db", + "&.Mui-checked": { color: categoryColor }, + }} + /> + handleSelectTopic(topic)}> + + {topic.title} + + + {topic.snippet} + + + {topic.favicon && ( + { + (e.target as HTMLImageElement).style.display = "none"; + }} + /> + )} + + {getDomain(topic.url)} + + + + + + + + ))} + + + )} + + + + + {topics.length} results • {category === "news" || category === "finance" ? "Powered by Tavily" : "Powered by Exa"} + + + + + + + + ); +}; \ No newline at end of file diff --git a/frontend/src/components/PodcastMaker/CreateStep/PodcastConfiguration.tsx b/frontend/src/components/PodcastMaker/CreateStep/PodcastConfiguration.tsx index 3a921186..aeeec8a6 100644 --- a/frontend/src/components/PodcastMaker/CreateStep/PodcastConfiguration.tsx +++ b/frontend/src/components/PodcastMaker/CreateStep/PodcastConfiguration.tsx @@ -57,14 +57,14 @@ export const PodcastConfiguration: React.FC = ({ sx={{ flex: { xs: "1 1 auto", lg: "0 0 320px" }, width: { xs: "100%", lg: "320px" }, - p: 3, borderRadius: 3, background: "#ffffff", - border: "1px solid rgba(102, 126, 234, 0.15)", + border: "1px solid", + borderColor: "#e2e8f0", height: "100%", display: "flex", flexDirection: "column", - boxShadow: "0 4px 20px rgba(102, 126, 234, 0.08)", + boxShadow: "0 8px 30px rgba(15, 23, 42, 0.12)", position: "relative", overflow: "hidden", "&::before": { @@ -78,49 +78,47 @@ export const PodcastConfiguration: React.FC = ({ }, }} > - + {/* Header with gradient background */} + 2 - + + + + + Basic Configuration + + + Set duration, speakers, and podcast mode + - - Basic Configuration - - + {/* Podcast Mode */} diff --git a/frontend/src/components/PodcastMaker/CreateStep/TopicUrlInput.tsx b/frontend/src/components/PodcastMaker/CreateStep/TopicUrlInput.tsx index 08b9fa5f..6c9a5776 100644 --- a/frontend/src/components/PodcastMaker/CreateStep/TopicUrlInput.tsx +++ b/frontend/src/components/PodcastMaker/CreateStep/TopicUrlInput.tsx @@ -1,7 +1,9 @@ -import React from "react"; -import { Box, Typography, TextField, Tooltip, Button, CircularProgress, alpha, Stack, Chip } from "@mui/material"; -import { AutoAwesome as AutoAwesomeIcon, AttachMoney as AttachMoneyIcon, TrendingUp as TrendingUpIcon } from "@mui/icons-material"; +import React, { useState, useCallback, useEffect, useRef } from "react"; +import { Box, Typography, TextField, Tooltip, Button, CircularProgress, alpha, Stack, Chip, IconButton, Collapse } from "@mui/material"; +import { AutoAwesome as AutoAwesomeIcon, AttachMoney as AttachMoneyIcon, TrendingUp as TrendingUpIcon, Mic as MicIcon, Stop as StopIcon, Language as LanguageIcon, Newspaper as NewspaperIcon, ShowChart as ShowChartIcon, School as SchoolIcon, Public as PublicIcon, Lightbulb as LightbulbIcon } from "@mui/icons-material"; import { Knobs } from "../types"; +import { podcastApi } from "../../../services/podcastApi"; +import { WebsitePreviewModal } from "./WebsitePreviewModal"; export const TOPIC_PLACEHOLDERS = [ "Industry insights: Latest trends in AI for Content Marketing", @@ -19,11 +21,14 @@ interface TopicUrlInputProps { showAIDetailsButton: boolean; onAIDetailsClick?: () => void; onTrendingTopicsClick?: () => void; + onCategoryResearchClick?: (category: "news" | "finance" | "research-paper" | "personal-site", websiteUrl?: string) => void; placeholderIndex: number; loading?: boolean; loadingMessage?: string; trendingLoading?: boolean; - estimatedCost?: { + categoryResearchLoading?: boolean; + // Estimated cost - can be a number (from pre-estimate) or object (from analyze response) + estimatedCost?: number | { ttsCost: number; avatarCost: number; videoCost: number; @@ -33,6 +38,40 @@ interface TopicUrlInputProps { duration?: number; speakers?: number; knobs?: Knobs; + podcastMode?: string; + // Website extraction data - passed from parent for use with AI enhance + extractedData?: { + title?: string; + text?: string; + summary?: string; + highlights?: string[]; + url: string; + image?: string; + favicon?: string; + subpages?: Array<{id?: string; title?: string; url?: string; summary?: string; text?: string}>; + } | null; + setExtractedData?: (data: any) => void; +} + +interface SpeechRecognitionType { + lang: string; + continuous: boolean; + interimResults: boolean; + maxAlternatives: number; + onresult: ((event: { results: { isFinal: boolean; [index: number]: { transcript: string } }[], resultIndex: number }) => void) | null; + onerror: ((event: { error: string }) => void) | null; + onend: (() => void) | null; + onstart: (() => void) | null; + start: () => void; + stop: () => void; + abort: () => void; +} + +declare global { + interface Window { + SpeechRecognition: new () => SpeechRecognitionType; + webkitSpeechRecognition: new () => SpeechRecognitionType; + } } export const TopicUrlInput: React.FC = ({ @@ -42,26 +81,161 @@ export const TopicUrlInput: React.FC = ({ showAIDetailsButton, onAIDetailsClick, onTrendingTopicsClick, + onCategoryResearchClick, placeholderIndex, loading = false, loadingMessage, trendingLoading = false, + categoryResearchLoading = false, estimatedCost, duration = 1, speakers = 1, knobs, + podcastMode = "audio_video", + extractedData: extractedDataProp, + setExtractedData: setExtractedDataProp, }) => { + // Helper to get total cost from various estimate formats (number | object | null) + const getTotalCost = (cost: number | { total: number } | null | undefined): number | null => { + if (cost === null || cost === undefined) return null; + if (typeof cost === "number") return cost; + if (typeof cost === "object" && "total" in cost) return cost.total; + return null; + }; + + const totalCost = getTotalCost(estimatedCost); + + const [isListening, setIsListening] = useState(false); + const [error, setError] = useState(null); + const recognitionRef = useRef(null); + + // Use props if provided, otherwise use local state (for backward compatibility) + const [localExtractedData, setLocalExtractedData] = useState(null); + const _extractedData = extractedDataProp !== undefined ? extractedDataProp : localExtractedData; + const _setExtractedData = setExtractedDataProp || setLocalExtractedData; + + // Website extraction state + const [showWebsiteInput, setShowWebsiteInput] = useState(false); + const [websiteUrl, setWebsiteUrl] = useState(""); + const [isExtracting, setIsExtracting] = useState(false); + const [extractedData, setExtractedData] = useState<{title?: string; text?: string; summary?: string; highlights?: string[]; url: string; image?: string; favicon?: string; subpages?: Array<{id?: string; title?: string; url?: string; summary?: string; text?: string}>} | null>(null); + const [showPreviewModal, setShowPreviewModal] = useState(false); + const [websiteError, setWebsiteError] = useState(null); + + const isSupported = typeof window !== 'undefined' && (window.SpeechRecognition !== undefined || window.webkitSpeechRecognition !== undefined); + + const getBrowserLanguage = (): string => { + const lang = (navigator.language || '').toLowerCase(); + if (lang.startsWith('en')) return 'en-US'; + if (lang.startsWith('hi')) return 'hi-IN'; + if (lang.startsWith('es')) return 'es-ES'; + if (lang.startsWith('fr')) return 'fr-FR'; + if (lang.startsWith('de')) return 'de-DE'; + if (lang.startsWith('zh')) return 'zh-CN'; + if (lang.startsWith('ja')) return 'ja-JP'; + if (lang.startsWith('ko')) return 'ko-KR'; + return 'en-US'; + }; + + const startListening = useCallback(() => { + if (!isSupported) { + setError('Speech recognition is not supported in this browser. Try Chrome or Edge.'); + return; + } + + setError(null); + + const SpeechRecognitionAPI = window.SpeechRecognition || (window as any).webkitSpeechRecognition; + if (!recognitionRef.current) { + const recognition = new SpeechRecognitionAPI() as SpeechRecognitionType; + recognition.lang = getBrowserLanguage(); + recognition.continuous = false; + recognition.interimResults = true; + recognition.maxAlternatives = 1; + + recognition.onresult = (event) => { + let transcript = ''; + let isFinal = false; + + for (let i = 0; i < event.results.length; i++) { + transcript += event.results[i][0].transcript; + if (event.results[i].isFinal) { + isFinal = true; + } + } + + if (isFinal) { + const newValue = value ? `${value} ${transcript.trim()}`.trim() : transcript.trim(); + onChange(newValue); + } + }; + + recognition.onerror = (event) => { + console.error('[Speech] Error:', event.error); + if (event.error === 'not-allowed') { + setError('Microphone access denied. Please allow microphone access in your browser settings.'); + } else if (event.error === 'network') { + setError('Network error. Please check your internet connection.'); + } else if (event.error !== 'aborted') { + setError(`Speech recognition error: ${event.error}`); + } + setIsListening(false); + }; + + recognition.onend = () => { + setIsListening(false); + }; + + recognitionRef.current = recognition; + } + + recognitionRef.current.onstart = () => { + setIsListening(true); + }; + + try { + recognitionRef.current.start(); + } catch (e) { + console.error('[Speech] Start error:', e); + setError('Failed to start speech recognition. Please try again.'); + } + }, [isSupported, onChange, value]); + + const stopListening = useCallback(() => { + if (recognitionRef.current) { + recognitionRef.current.stop(); + } + setIsListening(false); + }, []); + + const handleMicClick = useCallback(() => { + if (isListening) { + stopListening(); + } else { + startListening(); + } + }, [isListening, stopListening, startListening]); + + useEffect(() => { + return () => { + if (recognitionRef.current) { + recognitionRef.current.abort(); + } + }; + }, []); + return ( = ({ }, }} > - - - +{/* Header with gradient background */} + + + 1 - + - - Enter Podcast Topic or Blog URL - - - - Estimated Cost Breakdown: - - - • Audio Generation: ${estimatedCost.ttsCost}
- • Avatar Creation: ${estimatedCost.avatarCost}
- • Video Rendering: ${estimatedCost.videoCost}
- • Research: ${estimatedCost.researchCost}
- - Total: ${estimatedCost.total} + + {!showWebsiteInput && ( + } + label="Your Website" + onClick={() => setShowWebsiteInput(true)} + disabled={loading} + size="small" + sx={{ + background: "rgba(102, 126, 234, 0.08)", + color: "#667eea", + border: "1px solid rgba(102, 126, 234, 0.25)", + fontWeight: 600, + fontSize: "0.75rem", + height: 26, + "&:hover": { + background: "rgba(102, 126, 234, 0.15)", + transform: "scale(1.02)", + }, + }} + /> + )} + + + + Estimated Cost: + + + Total: ${totalCost} - Based on {duration} min, {speakers} speaker{speakers > 1 ? "s" : ""}, {knobs?.bitrate === "hd" ? "HD" : "standard"} quality + Based on {duration} min, {speakers} speaker{speakers > 1 ? "s" : ""}, {podcastMode} mode - -
- ) : ( - "Estimate unavailable until returned by the server." - ) +
+ ) : ( + "Estimate unavailable. Pricing data not found." + ) + } + arrow + placement="top" + > + } + label={totalCost ? `Est. $${totalCost}` : "Est. Unavailable"} + size="small" + sx={{ + background: totalCost ? "linear-gradient(135deg, rgba(16, 185, 129, 0.12) 0%, rgba(5, 150, 105, 0.12) 100%)" : "rgba(100, 116, 139, 0.12)", + color: totalCost ? "#059669" : "#475569", + fontWeight: 600, + border: totalCost ? "1px solid rgba(16, 185, 129, 0.2)" : "1px solid rgba(100, 116, 139, 0.25)", + fontSize: "0.75rem", + height: 26, + cursor: "help", + }} + /> + +
+
+ + {/* Website input row - appears when user clicks "Your Website" chip */} + + + setWebsiteUrl(e.target.value)} + disabled={isExtracting} + error={!!websiteError} + helperText={websiteError} + sx={{ + "& .MuiOutlinedInput-root": { + backgroundColor: "#f8fafc", + fontSize: "0.875rem", + "&.Mui-focused": { + backgroundColor: "#ffffff", + }, + }, + }} + /> + + + + + + - } - label={estimatedCost ? `Est. $${estimatedCost.total}` : "Est. Unavailable"} + onChange(e.target.value)} size="small" + disabled={isListening} + helperText={ + error + ? error + : isListening + ? "Listening... Speak your topic now." + : isUrl + ? "URL detected. We'll analyze this page content." + : "Enter a clear, concise topic. You can also click the mic to speak." + } sx={{ - background: estimatedCost - ? "linear-gradient(135deg, rgba(16, 185, 129, 0.12) 0%, rgba(5, 150, 105, 0.12) 100%)" - : "rgba(100, 116, 139, 0.12)", - color: estimatedCost ? "#059669" : "#475569", - fontWeight: 600, - border: estimatedCost - ? "1px solid rgba(16, 185, 129, 0.2)" - : "1px solid rgba(100, 116, 139, 0.25)", - fontSize: "0.75rem", - height: 26, - cursor: "help", + "& .MuiOutlinedInput-root": { + backgroundColor: isListening ? "rgba(16, 185, 129, 0.04)" : "#f8fafc", + border: isListening ? "2px solid rgba(16, 185, 129, 0.5)" : "2px solid rgba(102, 126, 234, 0.2)", + borderRadius: 2, + fontSize: "1rem", + transition: "all 0.2s ease", + "&:hover": { + backgroundColor: "#ffffff", + borderColor: isListening ? "rgba(16, 185, 129, 0.7)" : "rgba(102, 126, 234, 0.4)", + boxShadow: isListening ? "0 2px 8px rgba(16, 185, 129, 0.15)" : "0 2px 8px rgba(102, 126, 234, 0.1)", + }, + "&.Mui-focused": { + backgroundColor: "#ffffff", + borderColor: isListening ? "#10b981" : isUrl ? "#10b981" : "#667eea", + borderWidth: 2, + boxShadow: isListening + ? "0 0 0 4px rgba(16, 185, 129, 0.1)" + : isUrl + ? "0 0 0 4px rgba(16, 185, 129, 0.1)" + : "0 0 0 4px rgba(102, 126, 234, 0.1)", + }, + }, + "& .MuiOutlinedInput-input": { + fontSize: "1rem", + lineHeight: 1.7, + color: "#1e293b", + fontWeight: 500, + "&::placeholder": { + color: "#64748b", + opacity: 1, + fontWeight: 400, + }, + }, + "& .MuiFormHelperText-root": { + color: error ? "#ef4444" : isListening ? "#059669" : isUrl ? "#059669" : "#64748b", + fontSize: "0.8125rem", + fontWeight: 500, + mt: 1, + }, }} /> -
- - onChange(e.target.value)} - size="small" - helperText={ - isUrl - ? "URL detected. We'll analyze this page content." - : "Enter a clear, concise topic. We'll expand it into a full script after you click Analyze." - } - sx={{ - "& .MuiOutlinedInput-root": { - backgroundColor: "#f8fafc", - border: "2px solid rgba(102, 126, 234, 0.2)", - borderRadius: 2, - fontSize: "1rem", - transition: "all 0.2s ease", - "&:hover": { - backgroundColor: "#ffffff", - borderColor: "rgba(102, 126, 234, 0.4)", - boxShadow: "0 2px 8px rgba(102, 126, 234, 0.1)", - }, - "&.Mui-focused": { - backgroundColor: "#ffffff", - borderColor: isUrl ? "#10b981" : "#667eea", - borderWidth: 2, - boxShadow: isUrl - ? "0 0 0 4px rgba(16, 185, 129, 0.1)" - : "0 0 0 4px rgba(102, 126, 234, 0.1)", - }, - }, - "& .MuiOutlinedInput-input": { - fontSize: "1rem", - lineHeight: 1.7, - color: "#1e293b", - fontWeight: 500, - "&::placeholder": { - color: "#64748b", - opacity: 1, - fontWeight: 400, - }, - }, - "& .MuiFormHelperText-root": { - color: isUrl ? "#059669" : "#64748b", + + {/* Mic button with listening indicator - positioned inside the textarea bottom-right */} + {isSupported && !loading && ( + + {isListening && ( + + Listening... + + )} + + {isListening ? ( + + ) : ( + + )} + + + )} + + + {/* Category Research Chips - News + Finance + Research Papers + Personal Website */} + {showAIDetailsButton && !isUrl && onCategoryResearchClick && ( + + : } + label="News" + onClick={() => onCategoryResearchClick("news")} + disabled={categoryResearchLoading || loading} + size="small" + sx={{ + background: "linear-gradient(135deg, rgba(102, 126, 234, 0.1) 0%, rgba(118, 75, 162, 0.1) 100%)", + color: "#667eea", + border: "1px solid rgba(102, 126, 234, 0.3)", + fontWeight: 600, fontSize: "0.8125rem", - fontWeight: 500, - mt: 1, - }, - }} - /> - + "&:hover": { + background: "linear-gradient(135deg, rgba(102, 126, 234, 0.2) 0%, rgba(118, 75, 162, 0.2) 100%)", + transform: "scale(1.02)", + }, + }} + /> + : } + label="Finance" + onClick={() => onCategoryResearchClick("finance")} + disabled={categoryResearchLoading || loading} + size="small" + sx={{ + background: "linear-gradient(135deg, rgba(16, 185, 129, 0.1) 0%, rgba(5, 150, 105, 0.1) 100%)", + color: "#10b981", + border: "1px solid rgba(16, 185, 129, 0.3)", + fontWeight: 600, + fontSize: "0.8125rem", + "&:hover": { + background: "linear-gradient(135deg, rgba(16, 185, 129, 0.2) 0%, rgba(5, 150, 105, 0.2) 100%)", + transform: "scale(1.02)", + }, + }} + /> + : } + label="Research Papers" + onClick={() => onCategoryResearchClick("research-paper")} + disabled={categoryResearchLoading || loading} + size="small" + sx={{ + background: "linear-gradient(135deg, rgba(139, 92, 246, 0.1) 0%, rgba(124, 58, 237, 0.1) 100%)", + color: "#8b5cf6", + border: "1px solid rgba(139, 92, 246, 0.3)", + fontWeight: 600, + fontSize: "0.8125rem", + "&:hover": { + background: "linear-gradient(135deg, rgba(139, 92, 246, 0.2) 0%, rgba(124, 58, 237, 0.2) 100%)", + transform: "scale(1.02)", + }, + }} + /> + : } + label="Personal Site" + onClick={() => onCategoryResearchClick("personal-site", value)} + disabled={categoryResearchLoading || loading} + size="small" + sx={{ + background: "linear-gradient(135deg, rgba(245, 158, 11, 0.1) 0%, rgba(217, 119, 6, 0.1) 100%)", + color: "#f59e0b", + border: "1px solid rgba(245, 158, 11, 0.3)", + fontWeight: 600, + fontSize: "0.8125rem", + "&:hover": { + background: "linear-gradient(135deg, rgba(245, 158, 11, 0.2) 0%, rgba(217, 119, 6, 0.2) 100%)", + transform: "scale(1.02)", + }, + }} + /> + + )} {/* Enhance topic with AI button + Get Trending Topics - appears when user types (and not a URL) */} {showAIDetailsButton && !isUrl && ( @@ -340,6 +750,32 @@ export const TopicUrlInput: React.FC = ({ )} + + {/* Website Preview Modal */} + { + setShowPreviewModal(false); + setShowWebsiteInput(false); + setWebsiteUrl(""); + }} + onUseTextOnly={() => { + if (extractedData?.summary) { + const newValue = extractedData.title + ? `${extractedData.title}: ${extractedData.summary}` + : extractedData.summary; + onChange(newValue); + } + setShowPreviewModal(false); + setShowWebsiteInput(false); + setWebsiteUrl(""); + }} + onAnalyzeContent={() => { + // Phase 2: Will trigger full website analysis + console.log("[TopicUrlInput] Analyze Content clicked - Phase 2 feature"); + }} + /> ); -}; +}; \ No newline at end of file diff --git a/frontend/src/components/PodcastMaker/CreateStep/TrendingTopicsModal.tsx b/frontend/src/components/PodcastMaker/CreateStep/TrendingTopicsModal.tsx index 52f76324..feaa2759 100644 --- a/frontend/src/components/PodcastMaker/CreateStep/TrendingTopicsModal.tsx +++ b/frontend/src/components/PodcastMaker/CreateStep/TrendingTopicsModal.tsx @@ -83,10 +83,11 @@ export const TrendingTopicsModal: React.FC = ({ if (result.success && result.data) { setTrendsData(result.data as GoogleTrendsData); } else { - setError(result.error || "Failed to fetch trends data"); + setError(result.error || "Failed to fetch trends data. Google may be rate-limiting requests — please try again in a few minutes."); } } catch (err: any) { - setError(err?.response?.data?.detail || err?.message || "Failed to fetch trending topics"); + const msg = err?.response?.data?.detail || err?.message || "Failed to fetch trending topics. Please try again later."; + setError(msg); } finally { setLoading(false); } @@ -113,6 +114,15 @@ export const TrendingTopicsModal: React.FC = ({ const regions = trendsData?.interest_by_region || []; const relatedTopics = trendsData?.related_topics || { top: [], rising: [] }; const relatedQueries = trendsData?.related_queries || { top: [], rising: [] }; + const hasAnyData = trendsData + && ( + trendsData.interest_over_time?.length > 0 + || trendsData.interest_by_region?.length > 0 + || trendsData.related_topics?.top?.length > 0 + || trendsData.related_topics?.rising?.length > 0 + || trendsData.related_queries?.top?.length > 0 + || trendsData.related_queries?.rising?.length > 0 + ); return ( = ({ )} - {!loading && trendsData && ( + {!loading && trendsData && !hasAnyData && ( + + + + No trends data available + + + Google Trends could not find data for “{initialKeywords}”. + {trendsData.error + ? " This may be due to rate limiting — please try again in a few minutes." + : " The topic may be too specific. Try a broader keyword."} + + + + )} + + {!loading && trendsData && hasAnyData && ( <> { + try { + const urlObj = new URL(url); + const hostname = urlObj.hostname.replace(/^www\./, ''); + return hostname; + } catch { + return "Website"; + } +}; + +interface ExtractedData { + title?: string; + text?: string; + summary?: string; + highlights?: string[]; + url: string; + image?: string; + favicon?: string; + subpages?: Array<{ + id?: string; + title?: string; + url?: string; + summary?: string; + text?: string; + }>; +} + +interface WebsitePreviewModalProps { + open: boolean; + extractedData: ExtractedData | null; + onClose: () => void; + onUseTextOnly: () => void; + onAnalyzeContent: () => void; +} + +export const WebsitePreviewModal: React.FC = ({ + open, + extractedData, + onClose, + onUseTextOnly, + onAnalyzeContent, +}) => { + if (!extractedData) return null; + + const rootDomain = extractRootDomain(extractedData.url); + + return ( + + + + {(extractedData.favicon || extractedData.image) ? ( + { + (e.target as HTMLImageElement).style.display = 'none'; + }} + /> + ) : ( + + + + )} + + + {rootDomain} Content Analysis + + + Extracted content from your website + + + + + + + + + + {/* Title */} + {extractedData.title && ( + + + Company / Organization + + + {extractedData.title} + + + )} + + {/* Summary */} + {extractedData.summary && ( + + + About + + + + {extractedData.summary.length > 800 + ? extractedData.summary.substring(0, 800) + "..." + : extractedData.summary} + + + + )} + + {/* Highlights */} + {extractedData.highlights && extractedData.highlights.length > 0 && ( + + + Key Highlights + + + {extractedData.highlights.slice(0, 6).map((highlight, index) => ( + + + + {highlight} + + + ))} + + + )} + + + + {/* URL */} + + + + + + + Source URL + + + {extractedData.url} + + + + + {/* Image / Favicon Display */} + {(extractedData.image || extractedData.favicon) && ( + + + Site Image + + + {extractedData.favicon && ( + { + (e.target as HTMLImageElement).style.display = 'none'; + }} + /> + )} + {extractedData.image && ( + { + (e.target as HTMLImageElement).style.display = 'none'; + }} + /> + )} + + + )} + + {/* Subpages Display */} + {extractedData.subpages && extractedData.subpages.length > 0 && ( + + + Subpages ({extractedData.subpages.length}) + + + {extractedData.subpages.slice(0, 4).map((subpage, index) => ( + + + {subpage.title || subpage.url || `Page ${index + 1}`} + + {subpage.summary && ( + + {subpage.summary} + + )} + {subpage.url && ( + + {subpage.url} + + )} + + ))} + + + )} + + + + + + + + + + + ); +}; \ No newline at end of file diff --git a/frontend/src/components/Research/types/intent.types.ts b/frontend/src/components/Research/types/intent.types.ts index 8a66a70b..ab4b478f 100644 --- a/frontend/src/components/Research/types/intent.types.ts +++ b/frontend/src/components/Research/types/intent.types.ts @@ -153,6 +153,7 @@ export interface GoogleTrendsData { timeframe: string; geo: string; keywords: string[]; + source?: string; timestamp: string; cached?: boolean; error?: string; diff --git a/frontend/src/components/shared/VoiceClonePanel.tsx b/frontend/src/components/shared/VoiceClonePanel.tsx new file mode 100644 index 00000000..81d3b221 --- /dev/null +++ b/frontend/src/components/shared/VoiceClonePanel.tsx @@ -0,0 +1,145 @@ +import React from "react"; +import { + Box, + Button, + Stack, + Typography, + Collapse, + IconButton, +} from "@mui/material"; +import { + ExpandLess, + ExpandMore, + AutoAwesome, + RestartAlt, + CheckCircle, + Close, +} from "@mui/icons-material"; +import { VoiceAvatarPlaceholder } from "../OnboardingWizard/PersonalizationStep/components/VoiceAvatarPlaceholder"; + +export interface VoiceClonePanelProps { + showVoiceClonePanel: boolean; + voiceCreated: boolean; + redoingClone: boolean; + onTogglePanel: () => void; + onVoiceSet: () => void; + onCancelRedo: () => void; + onDoneWithVoice: () => void; +} + +export const VoiceClonePanel: React.FC = ({ + showVoiceClonePanel, + voiceCreated, + redoingClone, + onTogglePanel, + onVoiceSet, + onCancelRedo, + onDoneWithVoice, +}) => { + return ( + + + + + + + + {voiceCreated && ( + + + + + {redoingClone ? "Voice Clone Updated!" : "Voice Clone Created Successfully!"} + + + + + {redoingClone ? "Your voice clone has been updated and will be used for your podcast." : "Your custom voice clone is ready and will be used for your podcast."} + + + + + + + + )} + + + + ); +}; \ No newline at end of file diff --git a/frontend/src/components/shared/VoiceSelector.tsx b/frontend/src/components/shared/VoiceSelector.tsx index 8d412905..da20715b 100644 --- a/frontend/src/components/shared/VoiceSelector.tsx +++ b/frontend/src/components/shared/VoiceSelector.tsx @@ -42,35 +42,20 @@ import { import { getLatestVoiceClone, VoiceCloneResponse } from "../../api/brandAssets"; import { getAuthTokenGetter, getApiUrl } from "../../api/client"; import { VoiceAvatarPlaceholder } from "../OnboardingWizard/PersonalizationStep/components/VoiceAvatarPlaceholder"; - -export type VoiceOption = { - id: string; - name: string; - personality?: string; - isCustom?: boolean; - previewUrl?: string; - gender?: "male" | "female"; - category?: string; -}; - -export type VoiceAudioSettings = { - speed: number; - volume: number; - pitch: number; - emotion: string; -}; - -const DEFAULT_AUDIO_SETTINGS: VoiceAudioSettings = { - speed: 1.0, - volume: 1.0, - pitch: 0, - emotion: "neutral", -}; - -const EMOTION_OPTIONS = ["neutral", "happy", "sad", "angry", "fearful", "disgusted", "surprised"]; - -type GenderFilter = "all" | "male" | "female"; -type CategoryFilter = string; +import { useVoicePreview } from "./useVoicePreview"; +import { useVoiceFiltering } from "./useVoiceFiltering"; +import { VoiceClonePanel } from "./VoiceClonePanel"; +import { + VoiceOption, + VoiceAudioSettings, + DEFAULT_AUDIO_SETTINGS, + EMOTION_OPTIONS, + VOICE_PREVIEW_MAP, + CATEGORY_OPTIONS, + PREDEFINED_VOICES, + CategoryFilter, + VoiceSelectorGenderFilter, +} from "./voiceConstants"; interface VoiceSelectorProps { value: string; @@ -82,58 +67,6 @@ interface VoiceSelectorProps { onAudioSettingsChange?: (settings: VoiceAudioSettings) => void; } -const VOICE_SAMPLE_BASE = "/assets/voice-samples"; - -const VOICE_PREVIEW_MAP: Record = { - Wise_Woman: `${VOICE_SAMPLE_BASE}/wise_woman.mp3`, - Friendly_Person: `${VOICE_SAMPLE_BASE}/friendly_person.mp3`, - Inspirational_girl: `${VOICE_SAMPLE_BASE}/inspirational_girl.mp3`, - Deep_Voice_Man: `${VOICE_SAMPLE_BASE}/deep_voice_man.mp3`, - Calm_Woman: `${VOICE_SAMPLE_BASE}/calm_woman.mp3`, - Casual_Guy: `${VOICE_SAMPLE_BASE}/casual_guy.mp3`, - Lively_Girl: `${VOICE_SAMPLE_BASE}/lively_girl.mp3`, - Patient_Man: `${VOICE_SAMPLE_BASE}/patient_man.mp3`, - Young_Knight: `${VOICE_SAMPLE_BASE}/young_knight.mp3`, - Determined_Man: `${VOICE_SAMPLE_BASE}/determined_man.mp3`, - Lovely_Girl: `${VOICE_SAMPLE_BASE}/lovely_girl.mp3`, - Decent_Boy: `${VOICE_SAMPLE_BASE}/decent_boy.mp3`, - Imposing_Manner: `${VOICE_SAMPLE_BASE}/imposing_manner.mp3`, - Elegant_Man: `${VOICE_SAMPLE_BASE}/elegant_man.mp3`, - Abbess: `${VOICE_SAMPLE_BASE}/abbess.mp3`, - Sweet_Girl_2: `${VOICE_SAMPLE_BASE}/sweet_girl.mp3`, - Exuberant_Girl: `${VOICE_SAMPLE_BASE}/exuberant_girl.mp3`, -}; - -const CATEGORY_OPTIONS: { value: CategoryFilter; label: string }[] = [ - { value: "all", label: "All" }, - { value: "educational", label: "Educational" }, - { value: "marketing", label: "Marketing" }, - { value: "professional", label: "Professional" }, - { value: "creative", label: "Creative" }, - { value: "calming", label: "Calming" }, - { value: "motivational", label: "Motivational" }, -]; - -const PREDEFINED_VOICES: VoiceOption[] = [ - { id: "Wise_Woman", name: "Wise Woman", personality: "Authoritative, trustworthy female voice - perfect for educational content", previewUrl: VOICE_PREVIEW_MAP.Wise_Woman, gender: "female", category: "educational" }, - { id: "Friendly_Person", name: "Friendly Person", personality: "Warm, approachable voice - great for welcoming introductions", previewUrl: VOICE_PREVIEW_MAP.Friendly_Person, category: "marketing" }, - { id: "Inspirational_girl", name: "Inspirational Girl", personality: "Motivational, uplifting female voice - ideal for inspiration", previewUrl: VOICE_PREVIEW_MAP.Inspirational_girl, gender: "female", category: "motivational" }, - { id: "Deep_Voice_Man", name: "Deep Voice Man", personality: "Powerful, commanding male voice - excellent for serious topics", previewUrl: VOICE_PREVIEW_MAP.Deep_Voice_Man, gender: "male", category: "professional" }, - { id: "Calm_Woman", name: "Calm Woman", personality: "Soothing, composed female voice - perfect for meditation or sensitive topics", previewUrl: VOICE_PREVIEW_MAP.Calm_Woman, gender: "female", category: "calming" }, - { id: "Casual_Guy", name: "Casual Guy", personality: "Relaxed, conversational male voice - great for vlogs and tutorials", previewUrl: VOICE_PREVIEW_MAP.Casual_Guy, gender: "male", category: "marketing" }, - { id: "Lively_Girl", name: "Lively Girl", personality: "Energetic, enthusiastic female voice - ideal for exciting announcements", previewUrl: VOICE_PREVIEW_MAP.Lively_Girl, gender: "female", category: "marketing" }, - { id: "Patient_Man", name: "Patient Man", personality: "Gentle, understanding male voice - perfect for explanations", previewUrl: VOICE_PREVIEW_MAP.Patient_Man, gender: "male", category: "educational" }, - { id: "Young_Knight", name: "Young Knight", personality: "Brave, confident male voice - great for adventure and gaming", previewUrl: VOICE_PREVIEW_MAP.Young_Knight, gender: "male", category: "creative" }, - { id: "Determined_Man", name: "Determined Man", personality: "Strong, resolute male voice - excellent for motivational speeches", previewUrl: VOICE_PREVIEW_MAP.Determined_Man, gender: "male", category: "motivational" }, - { id: "Lovely_Girl", name: "Lovely Girl", personality: "Sweet, charming female voice - ideal for storytelling", previewUrl: VOICE_PREVIEW_MAP.Lovely_Girl, gender: "female", category: "creative" }, - { id: "Decent_Boy", name: "Decent Boy", personality: "Honest, sincere male voice - perfect for testimonials", previewUrl: VOICE_PREVIEW_MAP.Decent_Boy, gender: "male", category: "marketing" }, - { id: "Imposing_Manner", name: "Imposing Manner", personality: "Formal, dignified male voice - great for corporate content", previewUrl: VOICE_PREVIEW_MAP.Imposing_Manner, gender: "male", category: "professional" }, - { id: "Elegant_Man", name: "Elegant Man", personality: "Refined, sophisticated male voice - ideal for luxury content", previewUrl: VOICE_PREVIEW_MAP.Elegant_Man, gender: "male", category: "professional" }, - { id: "Abbess", name: "Abbess", personality: "Spiritual, serene female voice - perfect for meditation", previewUrl: VOICE_PREVIEW_MAP.Abbess, gender: "female", category: "calming" }, - { id: "Sweet_Girl_2", name: "Sweet Girl 2", personality: "Gentle, melodic female voice - excellent for children's content", previewUrl: VOICE_PREVIEW_MAP.Sweet_Girl_2, gender: "female", category: "creative" }, - { id: "Exuberant_Girl", name: "Exuberant Girl", personality: "Joyful, expressive female voice - ideal for celebrations", previewUrl: VOICE_PREVIEW_MAP.Exuberant_Girl, gender: "female", category: "creative" }, -]; - export const VOICE_CLONE_ID = "MY_VOICE_CLONE"; export const VoiceSelector: React.FC = ({ @@ -147,7 +80,6 @@ export const VoiceSelector: React.FC = ({ }) => { const [voiceClone, setVoiceClone] = useState(null); const [loadingVoiceClone, setLoadingVoiceClone] = useState(false); - const [playingPreview, setPlayingPreview] = useState(null); const [showVoiceClonePanel, setShowVoiceClonePanel] = useState(false); const [voiceCreated, setVoiceCreated] = useState(false); const [redoingClone, setRedoingClone] = useState(false); @@ -157,12 +89,23 @@ export const VoiceSelector: React.FC = ({ const [localAudioSettings, setLocalAudioSettings] = useState( externalAudioSettings || { ...DEFAULT_AUDIO_SETTINGS } ); - const [genderFilter, setGenderFilter] = useState("all"); + const [genderFilter, setGenderFilter] = useState("all"); const [categoryFilter, setCategoryFilter] = useState("all"); - const audioRef = useRef(null); const prevVoiceCloneIdRef = useRef(null); - const fetchVoiceClone = async () => { + const { playingPreview, handlePreview, stopCurrentAudio } = useVoicePreview(); + + const isPreviewing = playingPreview !== null; + + const { voiceOptions, filteredVoices } = useVoiceFiltering({ + showVoiceClone, + voiceClone, + value, + genderFilter, + categoryFilter, + }); + + const fetchVoiceClone = useCallback(async () => { try { setLoadingVoiceClone(true); const result = await getLatestVoiceClone(); @@ -174,36 +117,7 @@ export const VoiceSelector: React.FC = ({ } finally { setLoadingVoiceClone(false); } - }; - - const voiceOptions = useMemo(() => { - const options: VoiceOption[] = [...PREDEFINED_VOICES]; - - if (showVoiceClone && voiceClone?.success && voiceClone.custom_voice_id) { - options.unshift({ - id: VOICE_CLONE_ID, - name: voiceClone.voice_name || voiceClone.custom_voice_id || "My Voice Clone", - personality: "Your own voice - cloned from audio sample", - isCustom: true, - previewUrl: voiceClone.preview_audio_url, - }); - } - - return options; - }, [showVoiceClone, voiceClone]); - - const filteredVoices = useMemo(() => { - const filtered = PREDEFINED_VOICES.filter(v => { - if (genderFilter !== "all" && v.gender !== genderFilter) return false; - if (categoryFilter !== "all" && v.category !== categoryFilter) return false; - return true; - }); - if (value && value !== VOICE_CLONE_ID && !filtered.some(v => v.id === value)) { - const selected = PREDEFINED_VOICES.find(v => v.id === value); - if (selected) filtered.unshift(selected); - } - return filtered; - }, [genderFilter, categoryFilter, value]); + }, []); useEffect(() => { if (!showVoiceClone) return; @@ -222,80 +136,6 @@ export const VoiceSelector: React.FC = ({ } }, [voiceClone]); - const stopCurrentAudio = useCallback(() => { - if (audioRef.current) { - audioRef.current.pause(); - audioRef.current.currentTime = 0; - audioRef.current.onended = null; - audioRef.current.onerror = null; - audioRef.current = null; - } - }, []); - - const handlePreview = useCallback(async (voice: VoiceOption) => { - if (!voice.previewUrl) return; - - if (playingPreview === voice.id) { - stopCurrentAudio(); - setPlayingPreview(null); - return; - } - - stopCurrentAudio(); - setPlayingPreview(voice.id); - - // Append auth token for endpoints that require it (e.g. /api/assets/) - let previewUrl = voice.previewUrl; - // Convert relative URLs to absolute (pointing to backend, not Vercel) - if (previewUrl.startsWith('/')) { - previewUrl = `${getApiUrl()}${previewUrl}`; - } - try { - const tokenGetter = getAuthTokenGetter(); - if (tokenGetter) { - const token = await tokenGetter(); - if (token && previewUrl.includes('/api/')) { - const separator = previewUrl.includes('?') ? '&' : '?'; - previewUrl = `${previewUrl}${separator}token=${encodeURIComponent(token)}`; - } - } - } catch (e) { - // Token retrieval failed — try URL without token - } - - const audio = new Audio(previewUrl); - audioRef.current = audio; - - audio.onerror = () => { - console.error("Failed to load voice preview audio:", voice.previewUrl); - if (audioRef.current === audio) { - audioRef.current = null; - } - setPlayingPreview(null); - }; - - audio.onended = () => { - if (audioRef.current === audio) { - audioRef.current = null; - } - setPlayingPreview(null); - }; - - audio.play().catch((err) => { - console.error("Failed to play voice preview:", err); - if (audioRef.current === audio) { - audioRef.current = null; - } - setPlayingPreview(null); - }); - }, [playingPreview, stopCurrentAudio]); - - useEffect(() => { - return () => { - stopCurrentAudio(); - }; - }, [stopCurrentAudio]); - const handleChange = (newValue: string) => { if (newValue === VOICE_CLONE_ID && voiceClone?.success) { onChange(voiceClone.custom_voice_id || VOICE_CLONE_ID); @@ -358,8 +198,6 @@ export const VoiceSelector: React.FC = ({ } }, [showVoiceClonePanel]); - const isPreviewing = playingPreview !== null; - useEffect(() => { if (externalAudioSettings) { setLocalAudioSettings(externalAudioSettings); @@ -738,7 +576,7 @@ export const VoiceSelector: React.FC = ({ key={val} label={label} size="small" - onClick={() => setGenderFilter(val as GenderFilter)} + onClick={() => setGenderFilter(val as VoiceSelectorGenderFilter)} variant={genderFilter === val ? "filled" : "outlined"} sx={{ height: 22, @@ -987,110 +825,15 @@ export const VoiceSelector: React.FC = ({ )} {(showVoiceClone && !voiceClone?.success) || redoingClone ? ( - - - - - - - - {voiceCreated && ( - - - - - {redoingClone ? "Voice Clone Updated!" : "Voice Clone Created Successfully!"} - - - - - {redoingClone ? "Your voice clone has been updated and will be used for your podcast." : "Your custom voice clone is ready and will be used for your podcast."} - - - - - - - - )} - - - + ) : null} {/* Voice Fine-tune Modal */} diff --git a/frontend/src/components/shared/useVoiceFiltering.ts b/frontend/src/components/shared/useVoiceFiltering.ts new file mode 100644 index 00000000..5eb4c280 --- /dev/null +++ b/frontend/src/components/shared/useVoiceFiltering.ts @@ -0,0 +1,56 @@ +import { useMemo } from "react"; +import { VoiceOption, PREDEFINED_VOICES, VoiceSelectorGenderFilter, CategoryFilter } from "./voiceConstants"; +import { VoiceCloneResponse } from "../../api/brandAssets"; +import { VOICE_CLONE_ID } from "./VoiceSelector"; + +export interface UseVoiceFilteringParams { + showVoiceClone: boolean; + voiceClone: VoiceCloneResponse | null; + value: string; + genderFilter: VoiceSelectorGenderFilter; + categoryFilter: CategoryFilter; +} + +export interface UseVoiceFilteringReturn { + voiceOptions: VoiceOption[]; + filteredVoices: VoiceOption[]; +} + +export const useVoiceFiltering = ({ + showVoiceClone, + voiceClone, + value, + genderFilter, + categoryFilter, +}: UseVoiceFilteringParams): UseVoiceFilteringReturn => { + const voiceOptions = useMemo(() => { + const options: VoiceOption[] = [...PREDEFINED_VOICES]; + + if (showVoiceClone && voiceClone?.success && voiceClone.custom_voice_id) { + options.unshift({ + id: VOICE_CLONE_ID, + name: voiceClone.voice_name || voiceClone.custom_voice_id || "My Voice Clone", + personality: "Your own voice - cloned from audio sample", + isCustom: true, + previewUrl: voiceClone.preview_audio_url, + }); + } + + return options; + }, [showVoiceClone, voiceClone]); + + const filteredVoices = useMemo(() => { + const filtered = PREDEFINED_VOICES.filter(v => { + if (genderFilter !== "all" && v.gender !== genderFilter) return false; + if (categoryFilter !== "all" && v.category !== categoryFilter) return false; + return true; + }); + if (value && value !== VOICE_CLONE_ID && !filtered.some(v => v.id === value)) { + const selected = PREDEFINED_VOICES.find(v => v.id === value); + if (selected) filtered.unshift(selected); + } + return filtered; + }, [genderFilter, categoryFilter, value]); + + return { voiceOptions, filteredVoices }; +}; \ No newline at end of file diff --git a/frontend/src/components/shared/useVoicePreview.ts b/frontend/src/components/shared/useVoicePreview.ts new file mode 100644 index 00000000..0e1cbb15 --- /dev/null +++ b/frontend/src/components/shared/useVoicePreview.ts @@ -0,0 +1,102 @@ +import { useState, useCallback, useRef, useEffect } from "react"; +import { VoiceOption } from "./voiceConstants"; +import { getAuthTokenGetter, getApiUrl } from "../../api/client"; + +export interface UseVoicePreviewReturn { + playingPreview: string | null; + handlePreview: (voice: VoiceOption) => Promise; + stopCurrentAudio: () => void; +} + +export const useVoicePreview = (): UseVoicePreviewReturn => { + const [playingPreview, setPlayingPreview] = useState(null); + const audioRef = useRef(null); + + const stopCurrentAudio = useCallback(() => { + if (audioRef.current) { + audioRef.current.pause(); + audioRef.current.currentTime = 0; + audioRef.current.onended = null; + audioRef.current.onerror = null; + audioRef.current = null; + } + }, []); + + const handlePreview = useCallback(async (voice: VoiceOption) => { + if (!voice.previewUrl) return; + + if (playingPreview === voice.id) { + stopCurrentAudio(); + setPlayingPreview(null); + return; + } + + stopCurrentAudio(); + setPlayingPreview(voice.id); + + let previewUrl = voice.previewUrl; + + // For local development with frontend dev server, don't prepend API URL + // The frontend serves static files from /public/ through webpack dev server + const isLocalDev = window.location.hostname === 'localhost' && !previewUrl.includes('/api/'); + if (!isLocalDev && previewUrl.startsWith('/')) { + previewUrl = `${getApiUrl()}${previewUrl}`; + } + + if (isLocalDev) { + console.log("[VoicePreview] Local dev - using relative URL:", previewUrl); + } else { + console.log("[VoicePreview] Full URL:", previewUrl); + } + try { + const tokenGetter = getAuthTokenGetter(); + if (tokenGetter) { + const token = await tokenGetter(); + if (token && previewUrl.includes('/api/')) { + const separator = previewUrl.includes('?') ? '&' : '?'; + previewUrl = `${previewUrl}${separator}token=${encodeURIComponent(token)}`; + } + } + } catch (e) { + // Token retrieval failed — try URL without token + } + + const audio = new Audio(previewUrl); + audioRef.current = audio; + + audio.onerror = () => { + console.error("Failed to load voice preview audio:", voice.previewUrl); + if (audioRef.current === audio) { + audioRef.current = null; + } + setPlayingPreview(null); + }; + + audio.onended = () => { + if (audioRef.current === audio) { + audioRef.current = null; + } + setPlayingPreview(null); + }; + + audio.play().catch((err) => { + console.error("Failed to play voice preview:", err); + if (audioRef.current === audio) { + audioRef.current = null; + } + setPlayingPreview(null); + }); + }, [playingPreview, stopCurrentAudio]); + + useEffect(() => { + return () => { + stopCurrentAudio(); + }; + }, [stopCurrentAudio]); + + return { + playingPreview, + handlePreview, + stopCurrentAudio, + }; +}; \ No newline at end of file diff --git a/frontend/src/components/shared/voiceConstants.ts b/frontend/src/components/shared/voiceConstants.ts new file mode 100644 index 00000000..ff51b707 --- /dev/null +++ b/frontend/src/components/shared/voiceConstants.ts @@ -0,0 +1,81 @@ +export type VoiceOption = { + id: string; + name: string; + personality?: string; + isCustom?: boolean; + previewUrl?: string; + gender?: "male" | "female"; + category?: string; +}; + +export type VoiceAudioSettings = { + speed: number; + volume: number; + pitch: number; + emotion: string; +}; + +export const DEFAULT_AUDIO_SETTINGS: VoiceAudioSettings = { + speed: 1.0, + volume: 1.0, + pitch: 0, + emotion: "neutral", +}; + +export const EMOTION_OPTIONS = ["neutral", "happy", "sad", "angry", "fearful", "disgusted", "surprised"]; + +export const VOICE_SAMPLE_BASE = "/assets/voice-samples"; + +export const VOICE_PREVIEW_MAP: Record = { + Wise_Woman: `${VOICE_SAMPLE_BASE}/wise_woman.mp3`, + Friendly_Person: `${VOICE_SAMPLE_BASE}/friendly_person.mp3`, + Inspirational_girl: `${VOICE_SAMPLE_BASE}/inspirational_girl.mp3`, + Deep_Voice_Man: `${VOICE_SAMPLE_BASE}/deep_voice_man.mp3`, + Calm_Woman: `${VOICE_SAMPLE_BASE}/calm_woman.mp3`, + Casual_Guy: `${VOICE_SAMPLE_BASE}/casual_guy.mp3`, + Lively_Girl: `${VOICE_SAMPLE_BASE}/lively_girl.mp3`, + Patient_Man: `${VOICE_SAMPLE_BASE}/patient_man.mp3`, + Young_Knight: `${VOICE_SAMPLE_BASE}/young_knight.mp3`, + Determined_Man: `${VOICE_SAMPLE_BASE}/determined_man.mp3`, + Lovely_Girl: `${VOICE_SAMPLE_BASE}/lovely_girl.mp3`, + Decent_Boy: `${VOICE_SAMPLE_BASE}/decent_boy.mp3`, + Imposing_Manner: `${VOICE_SAMPLE_BASE}/imposing_manner.mp3`, + Elegant_Man: `${VOICE_SAMPLE_BASE}/elegant_man.mp3`, + Abbess: `${VOICE_SAMPLE_BASE}/abbess.mp3`, + Sweet_Girl_2: `${VOICE_SAMPLE_BASE}/sweet_girl.mp3`, + Exuberant_Girl: `${VOICE_SAMPLE_BASE}/exuberant_girl.mp3`, +}; + +export type CategoryFilter = string; + +export const CATEGORY_OPTIONS: { value: CategoryFilter; label: string }[] = [ + { value: "all", label: "All" }, + { value: "educational", label: "Educational" }, + { value: "marketing", label: "Marketing" }, + { value: "professional", label: "Professional" }, + { value: "creative", label: "Creative" }, + { value: "calming", label: "Calming" }, + { value: "motivational", label: "Motivational" }, +]; + +export const PREDEFINED_VOICES: VoiceOption[] = [ + { id: "Wise_Woman", name: "Wise Woman", personality: "Authoritative, trustworthy female voice - perfect for educational content", previewUrl: VOICE_PREVIEW_MAP.Wise_Woman, gender: "female", category: "educational" }, + { id: "Friendly_Person", name: "Friendly Person", personality: "Warm, approachable voice - great for welcoming introductions", previewUrl: VOICE_PREVIEW_MAP.Friendly_Person, category: "marketing" }, + { id: "Inspirational_girl", name: "Inspirational Girl", personality: "Motivational, uplifting female voice - ideal for inspiration", previewUrl: VOICE_PREVIEW_MAP.Inspirational_girl, gender: "female", category: "motivational" }, + { id: "Deep_Voice_Man", name: "Deep Voice Man", personality: "Powerful, commanding male voice - excellent for serious topics", previewUrl: VOICE_PREVIEW_MAP.Deep_Voice_Man, gender: "male", category: "professional" }, + { id: "Calm_Woman", name: "Calm Woman", personality: "Soothing, composed female voice - perfect for meditation or sensitive topics", previewUrl: VOICE_PREVIEW_MAP.Calm_Woman, gender: "female", category: "calming" }, + { id: "Casual_Guy", name: "Casual Guy", personality: "Relaxed, conversational male voice - great for vlogs and tutorials", previewUrl: VOICE_PREVIEW_MAP.Casual_Guy, gender: "male", category: "marketing" }, + { id: "Lively_Girl", name: "Lively Girl", personality: "Energetic, enthusiastic female voice - ideal for exciting announcements", previewUrl: VOICE_PREVIEW_MAP.Lively_Girl, gender: "female", category: "marketing" }, + { id: "Patient_Man", name: "Patient Man", personality: "Gentle, understanding male voice - perfect for explanations", previewUrl: VOICE_PREVIEW_MAP.Patient_Man, gender: "male", category: "educational" }, + { id: "Young_Knight", name: "Young Knight", personality: "Brave, confident male voice - great for adventure and gaming", previewUrl: VOICE_PREVIEW_MAP.Young_Knight, gender: "male", category: "creative" }, + { id: "Determined_Man", name: "Determined Man", personality: "Strong, resolute male voice - excellent for motivational speeches", previewUrl: VOICE_PREVIEW_MAP.Determined_Man, gender: "male", category: "motivational" }, + { id: "Lovely_Girl", name: "Lovely Girl", personality: "Sweet, charming female voice - ideal for storytelling", previewUrl: VOICE_PREVIEW_MAP.Lovely_Girl, gender: "female", category: "creative" }, + { id: "Decent_Boy", name: "Decent Boy", personality: "Honest, sincere male voice - perfect for testimonials", previewUrl: VOICE_PREVIEW_MAP.Decent_Boy, gender: "male", category: "marketing" }, + { id: "Imposing_Manner", name: "Imposing Manner", personality: "Formal, dignified male voice - great for corporate content", previewUrl: VOICE_PREVIEW_MAP.Imposing_Manner, gender: "male", category: "professional" }, + { id: "Elegant_Man", name: "Elegant Man", personality: "Refined, sophisticated male voice - ideal for luxury content", previewUrl: VOICE_PREVIEW_MAP.Elegant_Man, gender: "male", category: "professional" }, + { id: "Abbess", name: "Abbess", personality: "Spiritual, serene female voice - perfect for meditation", previewUrl: VOICE_PREVIEW_MAP.Abbess, gender: "female", category: "calming" }, + { id: "Sweet_Girl_2", name: "Sweet Girl 2", personality: "Gentle, melodic female voice - excellent for children's content", previewUrl: VOICE_PREVIEW_MAP.Sweet_Girl_2, gender: "female", category: "creative" }, + { id: "Exuberant_Girl", name: "Exuberant Girl", personality: "Joyful, expressive female voice - ideal for celebrations", previewUrl: VOICE_PREVIEW_MAP.Exuberant_Girl, gender: "female", category: "creative" }, +]; + +export type VoiceSelectorGenderFilter = "all" | "male" | "female"; diff --git a/frontend/src/hooks/useSpeechToText.ts b/frontend/src/hooks/useSpeechToText.ts new file mode 100644 index 00000000..49bef961 --- /dev/null +++ b/frontend/src/hooks/useSpeechToText.ts @@ -0,0 +1,150 @@ +import { useState, useRef, useCallback, useEffect } from 'react'; + +export interface UseSpeechToTextReturn { + isRecording: boolean; + recordingSeconds: number; + audioBlob: Blob | null; + error: string | null; + isSupported: boolean; + startRecording: () => Promise; + stopRecording: () => void; + reset: () => void; +} + +const MAX_RECORDING_SECONDS = 60; + +/** + * Reusable hook for recording audio from the browser microphone. + * Extracted and generalized from VoiceAvatarPlaceholder.tsx recording logic. + */ +export const useSpeechToText = (): UseSpeechToTextReturn => { + const [isRecording, setIsRecording] = useState(false); + const [recordingSeconds, setRecordingSeconds] = useState(0); + const [audioBlob, setAudioBlob] = useState(null); + const [error, setError] = useState(null); + + const streamRef = useRef(null); + const recorderRef = useRef(null); + const chunksRef = useRef([]); + const timerRef = useRef(null); + + const isSupported = typeof window !== 'undefined' && !!navigator.mediaDevices?.getUserMedia && typeof MediaRecorder !== 'undefined'; + + const cleanup = useCallback(() => { + if (timerRef.current) { + window.clearInterval(timerRef.current); + timerRef.current = null; + } + if (streamRef.current) { + streamRef.current.getTracks().forEach((t) => t.stop()); + streamRef.current = null; + } + recorderRef.current = null; + chunksRef.current = []; + setIsRecording(false); + setRecordingSeconds(0); + }, []); + + const stopRecording = useCallback(() => { + try { + if (recorderRef.current && recorderRef.current.state !== 'inactive') { + recorderRef.current.stop(); + } else { + cleanup(); + } + } catch { + cleanup(); + } + }, [cleanup]); + + const startRecording = useCallback(async () => { + if (!isSupported) { + setError('Microphone is not supported in this browser.'); + return; + } + + setError(null); + setAudioBlob(null); + cleanup(); + + try { + const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); + streamRef.current = stream; + + const mimeType = MediaRecorder.isTypeSupported('audio/webm;codecs=opus') + ? 'audio/webm;codecs=opus' + : MediaRecorder.isTypeSupported('audio/webm') + ? 'audio/webm' + : 'audio/mp4'; + + const recorder = new MediaRecorder(stream, { mimeType }); + recorderRef.current = recorder; + chunksRef.current = []; + + recorder.ondataavailable = (e) => { + if (e.data && e.data.size > 0) { + chunksRef.current.push(e.data); + } + }; + + recorder.onstop = () => { + try { + const chunks = [...chunksRef.current]; + const blob = new Blob(chunks, { type: mimeType }); + setAudioBlob(blob); + } catch (err: any) { + setError('Failed to create audio recording. Please try again.'); + } finally { + cleanup(); + } + }; + + recorder.onerror = () => { + setError('Recording error occurred. Please try again.'); + cleanup(); + }; + + recorder.start(); + setIsRecording(true); + setRecordingSeconds(0); + + timerRef.current = window.setInterval(() => { + setRecordingSeconds((s) => { + const next = s + 1; + if (next >= MAX_RECORDING_SECONDS) { + stopRecording(); + } + return next; + }); + }, 1000); + } catch (e: any) { + setError(e?.message || 'Failed to access microphone'); + cleanup(); + } + }, [isSupported, cleanup, stopRecording]); + + const reset = useCallback(() => { + setAudioBlob(null); + setError(null); + cleanup(); + }, [cleanup]); + + // Cleanup on unmount + useEffect(() => { + return () => { + if (timerRef.current) window.clearInterval(timerRef.current); + if (streamRef.current) streamRef.current.getTracks().forEach((t) => t.stop()); + }; + }, []); + + return { + isRecording, + recordingSeconds, + audioBlob, + error, + isSupported, + startRecording, + stopRecording, +reset, + }; +}; diff --git a/frontend/src/services/podcastApi.ts b/frontend/src/services/podcastApi.ts index 6a34cb22..8b695ec5 100644 --- a/frontend/src/services/podcastApi.ts +++ b/frontend/src/services/podcastApi.ts @@ -392,7 +392,27 @@ export const podcastApi = { }; }, - async enhanceIdea(params: { idea: string; bible?: any }): Promise<{ enhanced_ideas: string[]; rationales: string[] }> { + async getWebsiteExtraction(): Promise<{ success: boolean; data?: any; error?: string }> { + const response = await aiApiClient.get("/api/podcast/website-extraction"); + return response.data; + }, + + async saveWebsiteExtraction(data: any): Promise<{ success: boolean; message?: string; error?: string }> { + const response = await aiApiClient.post("/api/podcast/website-extraction", data); + return response.data; + }, + + async saveTopicContext(projectId: string, topicContext: any): Promise<{ success: boolean; message?: string; error?: string }> { + const response = await aiApiClient.post(`/api/podcast/project/${projectId}/topic-context`, topicContext); + return response.data; + }, + + async getTopicContext(projectId: string): Promise<{ success: boolean; data?: any; error?: string }> { + const response = await aiApiClient.get(`/api/podcast/project/${projectId}/topic-context`); + return response.data; + }, + + async enhanceIdea(params: { idea: string; bible?: any; website_data?: any; topic_context?: any }): Promise<{ enhanced_ideas: string[]; rationales: string[] }> { const response = await aiApiClient.post("/api/podcast/idea/enhance", params); return response.data; }, @@ -401,6 +421,7 @@ export const podcastApi = { keywords: string[]; timeframe?: string; geo?: string; + source?: string; }): Promise<{ success: boolean; data?: { @@ -411,6 +432,7 @@ export const podcastApi = { timeframe: string; geo: string; keywords: string[]; + source: string; cached: boolean; }; error?: string; @@ -419,6 +441,33 @@ export const podcastApi = { keywords: params.keywords, timeframe: params.timeframe || "today 12-m", geo: params.geo || "US", + source: params.source || "web", // 'web' = Google, 'podcast' = YouTube + }); + return response.data; + }, + + async extractUrl(params: { url: string }): Promise<{ + success: boolean; + title?: string; + text?: string; + summary?: string; + highlights?: string[]; + author?: string; + url: string; + image?: string; + favicon?: string; + subpages?: Array<{id: string; title: string; url: string; summary: string; text: string}>; + error?: string; + }> { + const response = await aiApiClient.post("/api/podcast/extract-url", params); + return response.data; + }, + + async transcribeAudio(audioBlob: Blob): Promise<{ text: string; error?: string }> { + const formData = new FormData(); + formData.append("audio", audioBlob, `recording_${Date.now()}.webm`); + const response = await aiApiClient.post("/api/podcast/transcribe", formData, { + headers: { "Content-Type": "multipart/form-data" }, }); return response.data; }, @@ -1085,16 +1134,103 @@ export const podcastApi = { return response.data; }, - async generateChartPreview(params: { +async generateChartPreview(params: { chart_data: Record; chart_type: string; title: string; }): Promise<{ preview_url: string; chart_id: string }> { - // Canonical backend endpoint from api/podcast/handlers/broll.py after router prefix composition: - // /api/podcast (main router) + /broll (handler prefix) + /preview/chart (endpoint) const response = await aiApiClient.post('/api/podcast/broll/preview/chart', params); return response.data; }, + + async researchByCategory(params: { + category: "news" | "finance" | "research-paper" | "personal-site"; + keyword?: string; + maxResults?: number; + websiteUrl?: string; + }): Promise<{ + success: boolean; + category: string; + provider: string; + topics: Array<{ + title: string; + url: string; + snippet: string; + score: number; + favicon?: string; + }>; + query?: string; + error?: string; + }> { + const response = await aiApiClient.post('/api/podcast/research/tavily-category', { + category: params.category, + keyword: params.keyword, + max_results: params.maxResults, + website_url: params.websiteUrl, + }); +return response.data; + }, + + async preEstimateCost(params: { + duration: number; + speakers: number; + queryCount: number; + podcastMode: string; + gemini_model?: string; + audio_tts_model?: string; + voice_clone_engine?: string; + image_model?: string; + video_model?: string; + }): Promise<{ + estimate?: { + // Individual costs + analysisCost: number; + researchCost: number; + researchSearchCost: number; + researchLlmCost: number; + scriptCost: number; + ttsCost: number; + voiceCloneCost: number; + avatarCost: number; + videoCost: number; + total: number; + // Category totals + llmCost: number; + audioCost: number; + mediaCost: number; + // Metadata + currency: string; + source: string; + models: { + llm: string; + research: string; + audio_tts: string; + voice_clone: string; + image: string; + video: string; + }; + assumptions: Record; + } | null; + error?: string | null; + pricing_available?: boolean; + debug?: { + pricing_rows: number; + providers: string[]; + }; + }> { + const response = await aiApiClient.post('/api/podcast/pre-estimate', { + duration: params.duration, + speakers: params.speakers, + query_count: params.queryCount, + podcast_mode: params.podcastMode, + gemini_model: params.gemini_model, + audio_tts_model: params.audio_tts_model, + voice_clone_engine: params.voice_clone_engine, + image_model: params.image_model, + video_model: params.video_model, + }); + return response.data; + }, }; export type PodcastApi = typeof podcastApi;