AI Researcher and Video Studio implementation complete

2026-01-05 15:49:51 +05:30
parent b134e9dc7e
commit 0b63ae7fc1
200 changed files with 39535 additions and 1375 deletions
--- a/backend/services/research/intent/intent_aware_analyzer.py
+++ b/backend/services/research/intent/intent_aware_analyzer.py
@@ -50,6 +50,7 @@ class IntentAwareAnalyzer:
        raw_results: Dict[str, Any],
        intent: ResearchIntent,
        research_persona: Optional[ResearchPersona] = None,
+        user_id: Optional[str] = None,
    ) -> IntentDrivenResearchResult:
        """
        Analyze raw research results based on user intent.
@@ -84,7 +85,7 @@ class IntentAwareAnalyzer:
            result = llm_text_gen(
                prompt=prompt,
                json_struct=analysis_schema,
-                user_id=None
+                user_id=user_id  # Required for subscription checking
            )
            
            if isinstance(result, dict) and "error" in result:
--- a/backend/services/research/intent/intent_prompt_builder.py
+++ b/backend/services/research/intent/intent_prompt_builder.py
@@ -151,6 +151,8 @@ Analyze the user's input and infer their research intent. Determine:

 11. **CONFIDENCE**: How confident are you in this inference? (0.0-1.0)
    - If < 0.7, set needs_clarification to true and provide clarifying_questions
+    - Provide a brief reason for your confidence level
+    - If confidence is low, provide an example of what a great input would look like

 ## OUTPUT FORMAT

@@ -168,6 +170,8 @@ Return a JSON object:
    "perspective": "target perspective or null",
    "time_sensitivity": "real_time|recent|historical|evergreen",
    "confidence": 0.85,
+    "confidence_reason": "Brief explanation of why this confidence level (e.g., 'User provided clear keywords and context' or 'Input is vague, missing specific goals')",
+    "great_example": "Example of what a great input would look like for this research (only if confidence < 0.8)",
    "needs_clarification": false,
    "clarifying_questions": [],
    "analysis_summary": "Brief summary of what the user wants"
--- a/backend/services/research/intent/intent_query_generator.py
+++ b/backend/services/research/intent/intent_query_generator.py
@@ -39,6 +39,7 @@ class IntentQueryGenerator:
        self,
        intent: ResearchIntent,
        research_persona: Optional[ResearchPersona] = None,
+        user_id: Optional[str] = None,
    ) -> Dict[str, Any]:
        """
        Generate targeted research queries based on intent.
@@ -89,7 +90,7 @@ class IntentQueryGenerator:
            result = llm_text_gen(
                prompt=prompt,
                json_struct=query_schema,
-                user_id=None
+                user_id=user_id
            )
            
            if isinstance(result, dict) and "error" in result:
--- a/backend/services/research/intent/research_intent_inference.py
+++ b/backend/services/research/intent/research_intent_inference.py
@@ -51,6 +51,7 @@ class ResearchIntentInference:
        competitor_data: Optional[List[Dict]] = None,
        industry: Optional[str] = None,
        target_audience: Optional[str] = None,
+        user_id: Optional[str] = None,
    ) -> IntentInferenceResponse:
        """
        Analyze user input and infer their research intent.
@@ -96,13 +97,15 @@ class ResearchIntentInference:
                    "perspective": {"type": "string"},
                    "time_sensitivity": {"type": "string"},
                    "confidence": {"type": "number"},
+                    "confidence_reason": {"type": "string"},
+                    "great_example": {"type": "string"},
                    "needs_clarification": {"type": "boolean"},
                    "clarifying_questions": {"type": "array", "items": {"type": "string"}},
                    "analysis_summary": {"type": "string"}
                },
                "required": [
                    "input_type", "primary_question", "purpose", "content_output",
-                    "expected_deliverables", "depth", "confidence", "analysis_summary"
+                    "expected_deliverables", "depth", "confidence", "confidence_reason", "analysis_summary"
                ]
            }
            
@@ -112,7 +115,7 @@ class ResearchIntentInference:
            result = llm_text_gen(
                prompt=prompt,
                json_struct=intent_schema,
-                user_id=None
+                user_id=user_id
            )
            
            if isinstance(result, dict) and "error" in result:
@@ -134,6 +137,8 @@ class ResearchIntentInference:
                suggested_keywords=self._extract_keywords_from_input(user_input, keywords),
                suggested_angles=result.get("focus_areas", []),
                quick_options=quick_options,
+                confidence_reason=result.get("confidence_reason", ""),
+                great_example=result.get("great_example", ""),
            )
            
            logger.info(f"Intent inferred: purpose={intent.purpose}, confidence={intent.confidence}")
@@ -166,7 +171,7 @@ class ResearchIntentInference:
        if not expected_deliverables:
            expected_deliverables = self._infer_deliverables_from_purpose(purpose)
        
-        return ResearchIntent(
+        intent = ResearchIntent(
            primary_question=result.get("primary_question", user_input),
            secondary_questions=result.get("secondary_questions", []),
            purpose=purpose.value,
@@ -179,9 +184,13 @@ class ResearchIntentInference:
            input_type=input_type.value,
            original_input=user_input,
            confidence=float(result.get("confidence", 0.7)),
+            confidence_reason=result.get("confidence_reason"),
+            great_example=result.get("great_example"),
            needs_clarification=result.get("needs_clarification", False),
            clarifying_questions=result.get("clarifying_questions", []),
        )
+        
+        return intent
    
    def _safe_enum(self, enum_class, value: str, default):
        """Safely convert string to enum, returning default if invalid."""
--- a/backend/services/research/intent/unified_research_analyzer.py
+++ b/backend/services/research/intent/unified_research_analyzer.py
@@ -0,0 +1,559 @@
+"""
+Unified Research Analyzer
+
+Combines intent inference, query generation, and parameter optimization
+into a single AI call with justifications for each decision.
+
+This reduces 2 LLM calls to 1, improves coherence, and provides
+user-friendly justifications for all settings.
+
+Author: ALwrity Team
+Version: 1.0
+"""
+
+import json
+from typing import Dict, Any, List, Optional, Tuple
+from loguru import logger
+
+from models.research_intent_models import (
+    ResearchIntent,
+    ResearchQuery,
+    IntentInferenceResponse,
+    ResearchPurpose,
+    ContentOutput,
+    ExpectedDeliverable,
+    ResearchDepthLevel,
+    InputType,
+)
+from models.research_persona_models import ResearchPersona
+
+
+class UnifiedResearchAnalyzer:
+    """
+    Unified AI-driven analyzer that performs:
+    1. Intent inference (what user wants)
+    2. Query generation (how to search)
+    3. Parameter optimization (Exa/Tavily settings)
+    
+    All in a single LLM call with justifications.
+    """
+    
+    def __init__(self):
+        """Initialize the unified analyzer."""
+        logger.info("UnifiedResearchAnalyzer initialized")
+    
+    async def analyze(
+        self,
+        user_input: str,
+        keywords: Optional[List[str]] = None,
+        research_persona: Optional[ResearchPersona] = None,
+        competitor_data: Optional[List[Dict]] = None,
+        industry: Optional[str] = None,
+        target_audience: Optional[str] = None,
+        user_id: Optional[str] = None,
+    ) -> Dict[str, Any]:
+        """
+        Perform unified analysis of user research request.
+        
+        Returns:
+            Dict containing:
+            - intent: ResearchIntent
+            - queries: List[ResearchQuery]
+            - exa_config: Dict with settings and justifications
+            - tavily_config: Dict with settings and justifications
+            - recommended_provider: str
+            - provider_justification: str
+        """
+        try:
+            logger.info(f"Unified analysis for: {user_input[:100]}...")
+            
+            keywords = keywords or []
+            
+            # Build the unified prompt
+            prompt = self._build_unified_prompt(
+                user_input=user_input,
+                keywords=keywords,
+                research_persona=research_persona,
+                competitor_data=competitor_data,
+                industry=industry,
+                target_audience=target_audience,
+            )
+            
+            # Define the comprehensive JSON schema
+            unified_schema = self._build_unified_schema()
+            
+            # Call LLM (single call for everything)
+            from services.llm_providers.main_text_generation import llm_text_gen
+            
+            result = llm_text_gen(
+                prompt=prompt,
+                json_struct=unified_schema,
+                user_id=user_id
+            )
+            
+            if isinstance(result, dict) and "error" in result:
+                logger.error(f"Unified analysis failed: {result.get('error')}")
+                return self._create_fallback_response(user_input, keywords)
+            
+            # Parse the unified result
+            return self._parse_unified_result(result, user_input)
+            
+        except Exception as e:
+            logger.error(f"Error in unified analysis: {e}")
+            return self._create_fallback_response(user_input, keywords or [])
+    
+    def _build_unified_prompt(
+        self,
+        user_input: str,
+        keywords: List[str],
+        research_persona: Optional[ResearchPersona] = None,
+        competitor_data: Optional[List[Dict]] = None,
+        industry: Optional[str] = None,
+        target_audience: Optional[str] = None,
+    ) -> str:
+        """Build the unified prompt for intent + queries + parameters."""
+        
+        # Build persona context
+        persona_context = self._build_persona_context(research_persona, industry, target_audience)
+        
+        # Build competitor context
+        competitor_context = self._build_competitor_context(competitor_data)
+        
+        prompt = f'''You are an expert AI research strategist. Analyze the user's research request and provide a complete research plan including intent understanding, search queries, and optimal API settings.
+
+## USER INPUT
+"{user_input}"
+{f"KEYWORDS: {', '.join(keywords)}" if keywords else ""}
+
+## USER CONTEXT
+{persona_context}
+{competitor_context}
+
+## YOUR TASK: Provide a Complete Research Plan
+
+### PART 1: INTENT ANALYSIS
+Understand what the user really wants from their research.
+
+### PART 2: SEARCH QUERIES
+Generate 4-8 targeted search queries optimized for semantic search.
+
+### PART 3: PROVIDER SETTINGS
+Configure Exa and Tavily API parameters with justifications.
+
+### PART 4: GOOGLE TRENDS KEYWORDS (if trends in deliverables)
+If "trends" is in expected_deliverables OR purpose is "explore_trends":
+- Suggest 1-3 optimized keywords for Google Trends analysis
+- These may differ from research queries (trends need broader, searchable terms)
+- Consider: What keywords will show meaningful trends over time?
+- Consider: What timeframe will show relevant trends? (1 year, 12 months, etc.)
+- Consider: What geographic region is most relevant for the user?
+- Explain what insights trends will uncover for content generation:
+  * Search interest trends over time (optimal publication timing)
+  * Regional interest distribution (audience targeting)
+  * Related topics for content expansion
+  * Related queries for FAQ sections
+  * Rising topics for timely content opportunities
+
+---
+
+## AVAILABLE PROVIDER OPTIONS
+
+### EXA API OPTIONS (Semantic Search Engine)
+| Parameter | Options | Description |
+|-----------|---------|-------------|
+| type | "auto", "neural", "fast", "deep" | "neural" = semantic understanding, "deep" = comprehensive with query expansion |
+| category | "company", "research paper", "news", "github", "tweet", "personal site", "pdf", "financial report", "people" | Focus on specific content types |
+| numResults | 5-25 | Number of results (10 recommended) |
+| includeDomains | string[] | Domains to include (e.g., ["arxiv.org", "nature.com"]) |
+| excludeDomains | string[] | Domains to exclude |
+| startPublishedDate | ISO date | Filter by publish date (e.g., "2024-01-01T00:00:00.000Z") |
+| text | boolean | Include full text content |
+| highlights | boolean | Extract key highlights |
+| context | boolean | Return as single context string for RAG |
+
+**WHEN TO USE EXA:**
+- Semantic understanding needed (finding similar content)
+- Academic/research papers
+- Company/competitor research
+- Deep, comprehensive results
+- Historical content
+
+### TAVILY API OPTIONS (AI-Powered Search)
+| Parameter | Options | Description |
+|-----------|---------|-------------|
+| topic | "general", "news", "finance" | Search topic category |
+| search_depth | "basic", "advanced" | "advanced" = multiple semantic snippets per URL |
+| include_answer | false, true, "basic", "advanced" | AI-generated answer from results |
+| include_raw_content | false, true, "markdown", "text" | Raw page content format |
+| time_range | "day", "week", "month", "year" | Filter by recency |
+| max_results | 5-20 | Number of results |
+| include_domains | string[] | Domains to include |
+| exclude_domains | string[] | Domains to exclude |
+
+**WHEN TO USE TAVILY:**
+- Real-time/current events
+- News and trending topics
+- Quick facts with AI answers
+- Financial data
+- Recent time-sensitive content
+
+---
+
+## OUTPUT FORMAT
+
+Return a JSON object with this exact structure:
+
+```json
+{{
+    "intent": {{
+        "input_type": "keywords|question|goal|mixed",
+        "primary_question": "The main question to answer",
+        "secondary_questions": ["question 1", "question 2"],
+        "purpose": "learn|create_content|make_decision|compare|solve_problem|find_data|explore_trends|validate|generate_ideas",
+        "content_output": "blog|podcast|video|social_post|newsletter|presentation|report|whitepaper|email|general",
+        "expected_deliverables": ["key_statistics", "expert_quotes", "case_studies", "trends", "best_practices"],
+        "depth": "overview|detailed|expert",
+        "focus_areas": ["area1", "area2"],
+        "perspective": "target perspective or null",
+        "time_sensitivity": "real_time|recent|historical|evergreen",
+        "confidence": 0.85,
+        "confidence_reason": "Why this confidence level",
+        "great_example": "Example of better input if confidence < 0.8",
+        "needs_clarification": false,
+        "clarifying_questions": [],
+        "analysis_summary": "Brief summary of research plan"
+    }},
+    "queries": [
+        {{
+            "query": "Optimized search query string",
+            "purpose": "key_statistics|expert_quotes|case_studies|trends|etc",
+            "provider": "exa|tavily",
+            "priority": 5,
+            "expected_results": "What we expect to find",
+            "justification": "Why this query and provider"
+        }}
+    ],
+    "enhanced_keywords": ["expanded", "related", "keywords"],
+    "research_angles": ["Angle 1: ...", "Angle 2: ..."],
+    "recommended_provider": "exa|tavily",
+    "provider_justification": "Why this provider is best for this research",
+    "exa_config": {{
+        "enabled": true,
+        "type": "auto|neural|fast|deep",
+        "type_justification": "Why this search type",
+        "category": "news|research paper|company|etc or null",
+        "category_justification": "Why this category or null",
+        "numResults": 10,
+        "numResults_justification": "Why this number",
+        "includeDomains": [],
+        "includeDomains_justification": "Why these domains or empty",
+        "startPublishedDate": "2024-01-01T00:00:00.000Z or null",
+        "date_justification": "Why this date filter or null",
+        "highlights": true,
+        "highlights_justification": "Why enable/disable highlights",
+        "context": true,
+        "context_justification": "Why enable/disable context string"
+    }},
+    "tavily_config": {{
+        "enabled": true,
+        "topic": "general|news|finance",
+        "topic_justification": "Why this topic",
+        "search_depth": "basic|advanced",
+        "search_depth_justification": "Why this depth",
+        "include_answer": "true|false|basic|advanced",
+        "include_answer_justification": "Why this answer mode",
+        "time_range": "day|week|month|year|null",
+        "time_range_justification": "Why this time range or null",
+        "max_results": 10,
+        "max_results_justification": "Why this number",
+        "include_raw_content": "false|true|markdown|text",
+        "include_raw_content_justification": "Why this content mode"
+    }},
+    "trends_config": {{
+        "enabled": true|false,
+        "keywords": ["keyword1", "keyword2"],
+        "keywords_justification": "Why these keywords for trends analysis",
+        "timeframe": "today 1-y|today 12-m|all",
+        "timeframe_justification": "Why this timeframe",
+        "geo": "US|GB|IN|etc",
+        "geo_justification": "Why this geographic region",
+        "expected_insights": [
+            "Search interest trends over the past year",
+            "Regional interest distribution",
+            "Related topics for content expansion",
+            "Related queries for FAQ sections",
+            "Optimal publication timing based on interest peaks"
+        ]
+    }}
+}}
+```
+
+## DECISION RULES
+
+1. **Provider Selection:**
+   - Use EXA for: academic research, competitor analysis, deep understanding, finding similar content
+   - Use TAVILY for: news, current events, quick facts, financial data, real-time info
+
+2. **Query Optimization:**
+   - Include relevant keywords for semantic matching
+   - Add context words based on deliverables (e.g., "statistics 2024" for key_statistics)
+   - Match query style to provider (natural language for Exa, keyword-rich for Tavily)
+
+3. **Parameter Selection:**
+   - ALWAYS provide justification for each parameter choice
+   - Consider time sensitivity when setting date filters
+   - Match category/topic to content type
+   - Use "advanced" depth when quality matters more than speed
+
+4. **Google Trends Keywords (if trends enabled):**
+   - Suggest 1-3 keywords optimized for trends analysis
+   - Keywords should be broader than research queries (e.g., "AI marketing" vs "AI marketing tools for small businesses")
+   - Consider what will show meaningful search interest trends
+   - Choose timeframe based on content type (12 months for blogs, 1 year for comprehensive)
+   - Select geo based on user's target audience or industry
+   - List specific insights trends will uncover
+
+5. **Justifications:**
+   - Keep justifications concise (1 sentence)
+   - Explain the "why" not the "what"
+   - Reference user's intent when relevant
+'''
+
+        return prompt
+    
+    def _build_unified_schema(self) -> Dict[str, Any]:
+        """Build the JSON schema for unified response."""
+        return {
+            "type": "object",
+            "properties": {
+                "intent": {
+                    "type": "object",
+                    "properties": {
+                        "input_type": {"type": "string", "enum": ["keywords", "question", "goal", "mixed"]},
+                        "primary_question": {"type": "string"},
+                        "secondary_questions": {"type": "array", "items": {"type": "string"}},
+                        "purpose": {"type": "string"},
+                        "content_output": {"type": "string"},
+                        "expected_deliverables": {"type": "array", "items": {"type": "string"}},
+                        "depth": {"type": "string", "enum": ["overview", "detailed", "expert"]},
+                        "focus_areas": {"type": "array", "items": {"type": "string"}},
+                        "perspective": {"type": "string"},
+                        "time_sensitivity": {"type": "string"},
+                        "confidence": {"type": "number"},
+                        "confidence_reason": {"type": "string"},
+                        "great_example": {"type": "string"},
+                        "needs_clarification": {"type": "boolean"},
+                        "clarifying_questions": {"type": "array", "items": {"type": "string"}},
+                        "analysis_summary": {"type": "string"}
+                    },
+                    "required": ["primary_question", "purpose", "expected_deliverables", "confidence"]
+                },
+                "queries": {
+                    "type": "array",
+                    "items": {
+                        "type": "object",
+                        "properties": {
+                            "query": {"type": "string"},
+                            "purpose": {"type": "string"},
+                            "provider": {"type": "string"},
+                            "priority": {"type": "integer"},
+                            "expected_results": {"type": "string"},
+                            "justification": {"type": "string"}
+                        },
+                        "required": ["query", "purpose", "provider", "priority"]
+                    }
+                },
+                "enhanced_keywords": {"type": "array", "items": {"type": "string"}},
+                "research_angles": {"type": "array", "items": {"type": "string"}},
+                "recommended_provider": {"type": "string"},
+                "provider_justification": {"type": "string"},
+                "exa_config": {
+                    "type": "object",
+                    "properties": {
+                        "enabled": {"type": "boolean"},
+                        "type": {"type": "string"},
+                        "type_justification": {"type": "string"},
+                        "category": {"type": "string"},
+                        "category_justification": {"type": "string"},
+                        "numResults": {"type": "integer"},
+                        "numResults_justification": {"type": "string"},
+                        "includeDomains": {"type": "array", "items": {"type": "string"}},
+                        "includeDomains_justification": {"type": "string"},
+                        "startPublishedDate": {"type": "string"},
+                        "date_justification": {"type": "string"},
+                        "highlights": {"type": "boolean"},
+                        "highlights_justification": {"type": "string"},
+                        "context": {"type": "boolean"},
+                        "context_justification": {"type": "string"}
+                    }
+                },
+                "tavily_config": {
+                    "type": "object",
+                    "properties": {
+                        "enabled": {"type": "boolean"},
+                        "topic": {"type": "string"},
+                        "topic_justification": {"type": "string"},
+                        "search_depth": {"type": "string"},
+                        "search_depth_justification": {"type": "string"},
+                        "include_answer": {"type": "string"},
+                        "include_answer_justification": {"type": "string"},
+                        "time_range": {"type": "string"},
+                        "time_range_justification": {"type": "string"},
+                        "max_results": {"type": "integer"},
+                        "max_results_justification": {"type": "string"},
+                        "include_raw_content": {"type": "string"},
+                        "include_raw_content_justification": {"type": "string"}
+                    }
+                },
+                "trends_config": {
+                    "type": "object",
+                    "properties": {
+                        "enabled": {"type": "boolean"},
+                        "keywords": {"type": "array", "items": {"type": "string"}},
+                        "keywords_justification": {"type": "string"},
+                        "timeframe": {"type": "string"},
+                        "timeframe_justification": {"type": "string"},
+                        "geo": {"type": "string"},
+                        "geo_justification": {"type": "string"},
+                        "expected_insights": {"type": "array", "items": {"type": "string"}}
+                    }
+                }
+            },
+            "required": ["intent", "queries", "recommended_provider", "exa_config", "tavily_config"]
+        }
+    
+    def _build_persona_context(
+        self,
+        research_persona: Optional[ResearchPersona],
+        industry: Optional[str],
+        target_audience: Optional[str],
+    ) -> str:
+        """Build persona context section."""
+        parts = []
+        
+        if research_persona:
+            if research_persona.default_industry:
+                parts.append(f"Industry: {research_persona.default_industry}")
+            if research_persona.default_target_audience:
+                parts.append(f"Target Audience: {research_persona.default_target_audience}")
+            if research_persona.research_angles:
+                parts.append(f"Preferred Research Angles: {', '.join(research_persona.research_angles[:3])}")
+            if research_persona.suggested_keywords:
+                parts.append(f"Relevant Keywords: {', '.join(research_persona.suggested_keywords[:5])}")
+        else:
+            if industry:
+                parts.append(f"Industry: {industry}")
+            if target_audience:
+                parts.append(f"Target Audience: {target_audience}")
+        
+        if not parts:
+            return "No specific user context available. Use general best practices."
+        
+        return "\n".join(parts)
+    
+    def _build_competitor_context(self, competitor_data: Optional[List[Dict]]) -> str:
+        """Build competitor context section."""
+        if not competitor_data:
+            return ""
+        
+        competitor_names = [c.get("name", c.get("url", "")) for c in competitor_data[:5]]
+        if competitor_names:
+            return f"\nKnown Competitors: {', '.join(competitor_names)}"
+        return ""
+    
+    def _parse_unified_result(self, result: Dict[str, Any], user_input: str) -> Dict[str, Any]:
+        """Parse the unified LLM result into structured response."""
+        
+        intent_data = result.get("intent", {})
+        
+        # Build ResearchIntent
+        intent = ResearchIntent(
+            primary_question=intent_data.get("primary_question", user_input),
+            secondary_questions=intent_data.get("secondary_questions", []),
+            purpose=intent_data.get("purpose", "learn"),
+            content_output=intent_data.get("content_output", "general"),
+            expected_deliverables=intent_data.get("expected_deliverables", ["key_statistics"]),
+            depth=intent_data.get("depth", "detailed"),
+            focus_areas=intent_data.get("focus_areas", []),
+            perspective=intent_data.get("perspective"),
+            time_sensitivity=intent_data.get("time_sensitivity"),
+            input_type=intent_data.get("input_type", "keywords"),
+            original_input=user_input,
+            confidence=float(intent_data.get("confidence", 0.7)),
+            confidence_reason=intent_data.get("confidence_reason"),
+            great_example=intent_data.get("great_example"),
+            needs_clarification=intent_data.get("needs_clarification", False),
+            clarifying_questions=intent_data.get("clarifying_questions", []),
+        )
+        
+        # Build queries
+        queries = []
+        for q in result.get("queries", []):
+            try:
+                queries.append(ResearchQuery(
+                    query=q.get("query", ""),
+                    purpose=q.get("purpose", "key_statistics"),
+                    provider=q.get("provider", "exa"),
+                    priority=int(q.get("priority", 3)),
+                    expected_results=q.get("expected_results", ""),
+                ))
+            except Exception as e:
+                logger.warning(f"Failed to parse query: {e}")
+        
+        return {
+            "success": True,
+            "intent": intent,
+            "queries": queries,
+            "enhanced_keywords": result.get("enhanced_keywords", []),
+            "research_angles": result.get("research_angles", []),
+            "recommended_provider": result.get("recommended_provider", "exa"),
+            "provider_justification": result.get("provider_justification", ""),
+            "exa_config": result.get("exa_config", {}),
+            "tavily_config": result.get("tavily_config", {}),
+            "trends_config": result.get("trends_config", {}),  # NEW: Google Trends configuration
+            "analysis_summary": intent_data.get("analysis_summary", ""),
+        }
+    
+    def _create_fallback_response(self, user_input: str, keywords: List[str]) -> Dict[str, Any]:
+        """Create fallback response when analysis fails."""
+        return {
+            "success": False,
+            "intent": ResearchIntent(
+                primary_question=f"What are the key insights about: {user_input}?",
+                purpose="learn",
+                content_output="general",
+                expected_deliverables=["key_statistics", "best_practices"],
+                depth="detailed",
+                original_input=user_input,
+                confidence=0.5,
+            ),
+            "queries": [
+                ResearchQuery(
+                    query=user_input,
+                    purpose="key_statistics",
+                    provider="exa",
+                    priority=5,
+                    expected_results="General research results",
+                )
+            ],
+            "enhanced_keywords": keywords,
+            "research_angles": [],
+            "recommended_provider": "exa",
+            "provider_justification": "Default fallback to Exa for semantic search",
+            "exa_config": {
+                "enabled": True,
+                "type": "auto",
+                "type_justification": "Auto mode for balanced results",
+                "numResults": 10,
+                "highlights": True,
+            },
+            "tavily_config": {
+                "enabled": True,
+                "topic": "general",
+                "search_depth": "advanced",
+                "include_answer": True,
+            },
+            "trends_config": {
+                "enabled": False,  # Disabled in fallback
+            },
+        }
--- a/backend/services/research/research_persona_service.py
+++ b/backend/services/research/research_persona_service.py
@@ -34,39 +34,81 @@ class ResearchPersonaService:
        user_id: str
    ) -> Optional[ResearchPersona]:
        """
-        Get research persona for user ONLY if it exists in cache.
-        This method NEVER generates - it only returns cached personas.
+        Get research persona for user if it exists in database (regardless of cache validity).
+        This method NEVER generates - it only returns existing personas.
        Use this for config endpoints to avoid triggering rate limit checks.
        
+        Note: Returns persona even if cache is expired - cache validity only matters for regeneration.
+        
        Args:
            user_id: User ID (Clerk string)
            
        Returns:
-            ResearchPersona if cached and valid, None otherwise
+            ResearchPersona if exists in database, None otherwise
        """
        try:
            # Get persona data record
            persona_data = self._get_persona_data_record(user_id)
            
            if not persona_data:
-                logger.debug(f"No persona data found for user {user_id}")
+                logger.debug(f"[get_cached_only] No persona data record found for user {user_id}")
                return None
            
-            # Only return if cache is valid and persona exists
-            if self.is_cache_valid(persona_data) and persona_data.research_persona:
+            # Check if research_persona field exists and is not None/empty
+            # Handle cases where it might be None, empty dict {}, or empty string ""
+            research_persona_raw = persona_data.research_persona
+            has_persona = (
+                research_persona_raw is not None 
+                and research_persona_raw != {}
+                and research_persona_raw != ""
+                and (isinstance(research_persona_raw, dict) and len(research_persona_raw) > 0)
+            )
+            
+            logger.info(
+                f"[get_cached_only] Checking research persona for user {user_id}: "
+                f"persona_data exists=True, research_persona_raw={research_persona_raw is not None}, "
+                f"research_persona type={type(research_persona_raw)}, "
+                f"has_persona={has_persona}, "
+                f"generated_at={persona_data.research_persona_generated_at}"
+            )
+            
+            # Return persona if it exists, regardless of cache validity
+            # Cache validity only matters when deciding whether to regenerate
+            if has_persona:
                try:
-                    logger.debug(f"Returning cached research persona for user {user_id}")
-                    return ResearchPersona(**persona_data.research_persona)
+                    cache_valid = self.is_cache_valid(persona_data)
+                    cache_status = "valid" if cache_valid else "expired"
+                    logger.info(
+                        f"[get_cached_only] ✅ Returning research persona for user {user_id} "
+                        f"(cache: {cache_status}, generated_at: {persona_data.research_persona_generated_at})"
+                    )
+                    # Ensure we're passing a dict to ResearchPersona
+                    if not isinstance(research_persona_raw, dict):
+                        logger.error(f"[get_cached_only] research_persona_raw is not a dict: {type(research_persona_raw)}")
+                        return None
+                    parsed_persona = ResearchPersona(**research_persona_raw)
+                    logger.info(
+                        f"[get_cached_only] ✅ Successfully parsed persona for user {user_id}: "
+                        f"industry={parsed_persona.default_industry}, "
+                        f"target_audience={parsed_persona.default_target_audience}"
+                    )
+                    return parsed_persona
                except Exception as e:
-                    logger.warning(f"Failed to parse cached research persona: {e}")
+                    logger.error(f"[get_cached_only] ❌ Failed to parse research persona for user {user_id}: {e}", exc_info=True)
+                    logger.debug(
+                        f"[get_cached_only] Persona data details: "
+                        f"type={type(research_persona_raw)}, "
+                        f"is_dict={isinstance(research_persona_raw, dict)}, "
+                        f"value sample: {str(research_persona_raw)[:500] if research_persona_raw else 'None'}"
+                    )
                    return None
            
-            # Cache invalid or persona missing - return None (don't generate)
-            logger.debug(f"No valid cached research persona for user {user_id}")
+            # Persona doesn't exist in database
+            logger.info(f"[get_cached_only] ⚠️ No research persona found in database for user {user_id}")
            return None
                
        except Exception as e:
-            logger.error(f"Error getting cached research persona for user {user_id}: {e}")
+            logger.error(f"[get_cached_only] ❌ Error getting research persona for user {user_id}: {e}", exc_info=True)
            return None

    def get_or_generate(
@@ -92,25 +134,40 @@ class ResearchPersonaService:
                logger.warning(f"No persona data found for user {user_id}, cannot generate research persona")
                return None
            
-            # Check cache if not forcing refresh
-            if not force_refresh and self.is_cache_valid(persona_data):
-                if persona_data.research_persona:
+            # Check if persona exists in database
+            if persona_data.research_persona:
+                # Persona exists - check if we should return it or regenerate
+                cache_valid = self.is_cache_valid(persona_data)
+                
+                if not force_refresh and cache_valid:
+                    # Cache is valid - return existing persona
                    logger.info(f"Using cached research persona for user {user_id}")
                    try:
                        return ResearchPersona(**persona_data.research_persona)
                    except Exception as e:
                        logger.warning(f"Failed to parse cached research persona: {e}, regenerating...")
-                        # Fall through to regeneration
+                        # Fall through to regeneration if parsing fails
+                elif not force_refresh:
+                    # Persona exists but cache expired - return it anyway (don't regenerate unless forced)
+                    logger.info(f"Research persona exists for user {user_id} but cache expired - returning existing persona (use force_refresh=true to regenerate)")
+                    try:
+                        return ResearchPersona(**persona_data.research_persona)
+                    except Exception as e:
+                        logger.warning(f"Failed to parse existing research persona: {e}, regenerating...")
+                        # Fall through to regeneration if parsing fails
                else:
-                    logger.info(f"Research persona missing for user {user_id}, generating...")
-            else:
-                if force_refresh:
+                    # force_refresh=True - regenerate even though persona exists
                    logger.info(f"Forcing refresh of research persona for user {user_id}")
-                else:
-                    logger.info(f"Cache expired for user {user_id}, regenerating...")
+            else:
+                # Persona doesn't exist - generate new one
+                logger.info(f"Research persona missing for user {user_id}, generating...")
            
-            # Generate new research persona
+            # Generate new research persona (only reaches here if:
+            # 1. Persona doesn't exist, OR
+            # 2. force_refresh=True, OR
+            # 3. Parsing of existing persona failed
            try:
+                logger.info(f"Generating research persona for user {user_id}")
                research_persona = self.generate_research_persona(user_id)
            except HTTPException:
                # Re-raise HTTPExceptions (e.g., 429 subscription limit) so they propagate to API
--- a/backend/services/research/trends/init.py
+++ b/backend/services/research/trends/init.py
@@ -0,0 +1,9 @@
+"""
+Google Trends Research Service
+
+Provides Google Trends data integration for the Research Engine.
+"""
+
+from .google_trends_service import GoogleTrendsService
+
+__all__ = ['GoogleTrendsService']
--- a/backend/services/research/trends/google_trends_service.py
+++ b/backend/services/research/trends/google_trends_service.py
@@ -0,0 +1,380 @@
+"""
+Google Trends Service
+
+Provides Google Trends data integration for the Research Engine.
+Handles rate limiting, caching, error handling, and data serialization.
+
+Author: ALwrity Team
+Version: 1.0
+"""
+
+import asyncio
+from typing import List, Dict, Any, Optional
+from datetime import datetime, timedelta
+from loguru import logger
+import pandas as pd
+
+try:
+    from pytrends.request import TrendReq
+    PYTrends_AVAILABLE = True
+except ImportError:
+    PYTrends_AVAILABLE = False
+    logger.warning("pytrends not installed. Google Trends features will be unavailable.")
+
+from .rate_limiter import RateLimiter
+
+
+class GoogleTrendsService:
+    """
+    Service for fetching and analyzing Google Trends data.
+    
+    Features:
+    - Interest over time
+    - Interest by region
+    - Related topics
+    - Related queries
+    - Rate limiting (1 req/sec)
+    - Caching (24-hour TTL)
+    - Async support
+    - Error handling with retry logic
+    """
+    
+    def __init__(self):
+        """Initialize the Google Trends service."""
+        if not PYTrends_AVAILABLE:
+            raise RuntimeError("pytrends library is required. Install with: pip install pytrends")
+        
+        self.rate_limiter = RateLimiter(max_calls=1, period=1.0)  # 1 request per second
+        self.cache: Dict[str, Dict[str, Any]] = {}  # Simple in-memory cache
+        self.cache_ttl = timedelta(hours=24)  # 24-hour cache
+        
+        logger.info("GoogleTrendsService initialized")
+    
+    async def analyze_trends(
+        self,
+        keywords: List[str],
+        timeframe: str = "today 12-m",
+        geo: str = "US",
+        user_id: Optional[str] = None,
+    ) -> Dict[str, Any]:
+        """
+        Comprehensive trends analysis.
+        
+        Fetches all trends data in a single optimized call:
+        - Interest over time
+        - Interest by region
+        - Related topics (top & rising)
+        - Related queries (top & rising)
+        
+        Args:
+            keywords: List of keywords to analyze (1-5 keywords recommended)
+            timeframe: Timeframe string (e.g., "today 12-m", "today 1-y", "all")
+            geo: Country code (e.g., "US", "GB", "IN")
+            user_id: User ID for subscription checks (optional for now)
+            
+        Returns:
+            Dict containing all trends data in serializable format
+            
+        Raises:
+            ValueError: If keywords list is empty or too long
+            RuntimeError: If pytrends is not available or API fails
+        """
+        if not keywords:
+            raise ValueError("Keywords list cannot be empty")
+        
+        if len(keywords) > 5:
+            logger.warning(f"Too many keywords ({len(keywords)}), using first 5")
+            keywords = keywords[:5]
+        
+        # Check cache first
+        cache_key = self._build_cache_key(keywords, timeframe, geo)
+        cached_data = self._get_from_cache(cache_key)
+        if cached_data:
+            logger.info(f"Returning cached trends data for: {keywords}")
+            return {**cached_data, "cached": True}
+        
+        # Rate limit
+        await self.rate_limiter.acquire()
+        
+        try:
+            logger.info(f"Fetching Google Trends data for: {keywords} (timeframe: {timeframe}, geo: {geo})")
+            
+            # Initialize pytrends (sync operation, run in thread)
+            pytrends = await asyncio.to_thread(
+                self._initialize_pytrends,
+                keywords,
+                timeframe,
+                geo
+            )
+            
+            # Fetch all data in parallel (pytrends methods are sync, so use to_thread)
+            interest_over_time_task = asyncio.to_thread(
+                lambda: self._safe_interest_over_time(pytrends)
+            )
+            interest_by_region_task = asyncio.to_thread(
+                lambda: self._safe_interest_by_region(pytrends)
+            )
+            related_topics_task = asyncio.to_thread(
+                lambda: self._safe_related_topics(pytrends, keywords)
+            )
+            related_queries_task = asyncio.to_thread(
+                lambda: self._safe_related_queries(pytrends, keywords)
+            )
+            
+            # Wait for all tasks
+            interest_over_time, interest_by_region, related_topics, related_queries = await asyncio.gather(
+                interest_over_time_task,
+                interest_by_region_task,
+                related_topics_task,
+                related_queries_task,
+                return_exceptions=True
+            )
+            
+            # Handle exceptions
+            if isinstance(interest_over_time, Exception):
+                logger.error(f"Interest over time failed: {interest_over_time}")
+                interest_over_time = []
+            if isinstance(interest_by_region, Exception):
+                logger.error(f"Interest by region failed: {interest_by_region}")
+                interest_by_region = []
+            if isinstance(related_topics, Exception):
+                logger.error(f"Related topics failed: {related_topics}")
+                related_topics = {"top": [], "rising": []}
+            if isinstance(related_queries, Exception):
+                logger.error(f"Related queries failed: {related_queries}")
+                related_queries = {"top": [], "rising": []}
+            
+            # Build result
+            result = {
+                "interest_over_time": interest_over_time,
+                "interest_by_region": interest_by_region,
+                "related_topics": related_topics,
+                "related_queries": related_queries,
+                "timeframe": timeframe,
+                "geo": geo,
+                "keywords": keywords,
+                "timestamp": datetime.utcnow().isoformat(),
+                "cached": False
+            }
+            
+            # Cache result
+            self._save_to_cache(cache_key, result)
+            
+            logger.info(f"Google Trends data fetched successfully: {len(interest_over_time)} time points, {len(interest_by_region)} regions")
+            
+            return result
+            
+        except Exception as e:
+            logger.error(f"Google Trends analysis failed: {e}")
+            # Return fallback response
+            return self._create_fallback_response(keywords, timeframe, geo, str(e))
+    
+    def _initialize_pytrends(
+        self,
+        keywords: List[str],
+        timeframe: str,
+        geo: str
+    ) -> TrendReq:
+        """Initialize pytrends and build payload (sync operation)."""
+        pytrends = TrendReq(hl='en-US', tz=360)
+        pytrends.build_payload(kw_list=keywords, timeframe=timeframe, geo=geo)
+        return pytrends
+    
+    def _safe_interest_over_time(self, pytrends: TrendReq) -> List[Dict[str, Any]]:
+        """Safely fetch interest over time data."""
+        try:
+            df = pytrends.interest_over_time()
+            if df.empty:
+                return []
+            return self._format_dataframe(df.reset_index())
+        except Exception as e:
+            logger.error(f"Error fetching interest over time: {e}")
+            return []
+    
+    def _safe_interest_by_region(self, pytrends: TrendReq) -> List[Dict[str, Any]]:
+        """Safely fetch interest by region data."""
+        try:
+            df = pytrends.interest_by_region(resolution='COUNTRY', inc_low_vol=True, inc_geo_code=False)
+            if df.empty:
+                return []
+            return self._format_dataframe(df.reset_index())
+        except Exception as e:
+            logger.error(f"Error fetching interest by region: {e}")
+            return []
+    
+    def _safe_related_topics(
+        self,
+        pytrends: TrendReq,
+        keywords: List[str]
+    ) -> Dict[str, List[Dict[str, Any]]]:
+        """Safely fetch related topics."""
+        try:
+            topics_data = pytrends.related_topics()
+            result = {"top": [], "rising": []}
+            
+            for keyword in keywords:
+                if keyword in topics_data and isinstance(topics_data[keyword], dict):
+                    keyword_topics = topics_data[keyword]
+                    
+                    if "top" in keyword_topics and not keyword_topics["top"].empty:
+                        top_df = keyword_topics["top"]
+                        # Select relevant columns
+                        if "topic_title" in top_df.columns and "value" in top_df.columns:
+                            top_data = top_df[["topic_title", "value"]].to_dict('records')
+                            result["top"].extend(top_data)
+                    
+                    if "rising" in keyword_topics and not keyword_topics["rising"].empty:
+                        rising_df = keyword_topics["rising"]
+                        if "topic_title" in rising_df.columns and "value" in rising_df.columns:
+                            rising_data = rising_df[["topic_title", "value"]].to_dict('records')
+                            result["rising"].extend(rising_data)
+            
+            return result
+        except Exception as e:
+            logger.error(f"Error fetching related topics: {e}")
+            return {"top": [], "rising": []}
+    
+    def _safe_related_queries(
+        self,
+        pytrends: TrendReq,
+        keywords: List[str]
+    ) -> Dict[str, List[Dict[str, Any]]]:
+        """Safely fetch related queries."""
+        try:
+            queries_data = pytrends.related_queries()
+            result = {"top": [], "rising": []}
+            
+            for keyword in keywords:
+                if keyword in queries_data and isinstance(queries_data[keyword], dict):
+                    keyword_queries = queries_data[keyword]
+                    
+                    if "top" in keyword_queries and not keyword_queries["top"].empty:
+                        top_df = keyword_queries["top"]
+                        result["top"].extend(top_df.to_dict('records'))
+                    
+                    if "rising" in keyword_queries and not keyword_queries["rising"].empty:
+                        rising_df = keyword_queries["rising"]
+                        result["rising"].extend(rising_df.to_dict('records'))
+            
+            return result
+        except Exception as e:
+            logger.error(f"Error fetching related queries: {e}")
+            return {"top": [], "rising": []}
+    
+    def _format_dataframe(self, df: pd.DataFrame) -> List[Dict[str, Any]]:
+        """Convert DataFrame to list of dicts (serializable format)."""
+        if df.empty:
+            return []
+        
+        # Convert datetime columns to strings
+        for col in df.columns:
+            if pd.api.types.is_datetime64_any_dtype(df[col]):
+                df[col] = df[col].astype(str)
+        
+        # Convert to dict records
+        return df.to_dict('records')
+    
+    def _build_cache_key(self, keywords: List[str], timeframe: str, geo: str) -> str:
+        """Build cache key from parameters."""
+        keywords_str = ":".join(sorted(keywords))
+        return f"google_trends:{keywords_str}:{timeframe}:{geo}"
+    
+    def _get_from_cache(self, cache_key: str) -> Optional[Dict[str, Any]]:
+        """Get data from cache if not expired."""
+        if cache_key not in self.cache:
+            return None
+        
+        cached_entry = self.cache[cache_key]
+        cached_time = datetime.fromisoformat(cached_entry.get("timestamp", ""))
+        
+        if datetime.utcnow() - cached_time > self.cache_ttl:
+            # Expired, remove from cache
+            del self.cache[cache_key]
+            return None
+        
+        # Return cached data (without cached flag)
+        result = {**cached_entry}
+        result.pop("cached", None)
+        return result
+    
+    def _save_to_cache(self, cache_key: str, data: Dict[str, Any]):
+        """Save data to cache."""
+        # Store with timestamp
+        cache_entry = {
+            **data,
+            "cached_at": datetime.utcnow().isoformat()
+        }
+        self.cache[cache_key] = cache_entry
+        
+        # Clean up old cache entries periodically
+        if len(self.cache) > 100:  # Limit cache size
+            self._cleanup_cache()
+    
+    def _cleanup_cache(self):
+        """Remove expired cache entries."""
+        now = datetime.utcnow()
+        expired_keys = []
+        
+        for key, entry in self.cache.items():
+            cached_time = datetime.fromisoformat(entry.get("cached_at", entry.get("timestamp", "")))
+            if now - cached_time > self.cache_ttl:
+                expired_keys.append(key)
+        
+        for key in expired_keys:
+            del self.cache[key]
+        
+        logger.debug(f"Cleaned up {len(expired_keys)} expired cache entries")
+    
+    def _create_fallback_response(
+        self,
+        keywords: List[str],
+        timeframe: str,
+        geo: str,
+        error_message: str
+    ) -> Dict[str, Any]:
+        """Create fallback response when trends analysis fails."""
+        return {
+            "interest_over_time": [],
+            "interest_by_region": [],
+            "related_topics": {"top": [], "rising": []},
+            "related_queries": {"top": [], "rising": []},
+            "timeframe": timeframe,
+            "geo": geo,
+            "keywords": keywords,
+            "timestamp": datetime.utcnow().isoformat(),
+            "cached": False,
+            "error": error_message
+        }
+    
+    async def get_trending_searches(
+        self,
+        country: str = "united_states",
+        user_id: Optional[str] = None
+    ) -> List[str]:
+        """
+        Get current trending searches for a country.
+        
+        Args:
+            country: Country name (e.g., "united_states", "united_kingdom")
+            user_id: User ID for subscription checks
+            
+        Returns:
+            List of trending search terms
+        """
+        await self.rate_limiter.acquire()
+        
+        try:
+            pytrends = TrendReq(hl='en-US', tz=360)
+            trending_df = await asyncio.to_thread(
+                lambda: pytrends.trending_searches(pn=country)
+            )
+            
+            if trending_df.empty:
+                return []
+            
+            # Return as list of strings
+            return trending_df[0].tolist() if len(trending_df.columns) > 0 else []
+            
+        except Exception as e:
+            logger.error(f"Error fetching trending searches: {e}")
+            return []
--- a/backend/services/research/trends/rate_limiter.py
+++ b/backend/services/research/trends/rate_limiter.py
@@ -0,0 +1,57 @@
+"""
+Rate Limiter for Google Trends API
+
+Ensures we don't exceed Google Trends rate limits (1 request per second).
+"""
+
+import asyncio
+from time import time
+from collections import deque
+from loguru import logger
+
+
+class RateLimiter:
+    """
+    Simple rate limiter for Google Trends API.
+    
+    Limits requests to max_calls per period (in seconds).
+    """
+    
+    def __init__(self, max_calls: int = 1, period: float = 1.0):
+        """
+        Initialize rate limiter.
+        
+        Args:
+            max_calls: Maximum number of calls allowed
+            period: Time period in seconds
+        """
+        self.max_calls = max_calls
+        self.period = period
+        self.calls = deque()
+        self._lock = asyncio.Lock()
+    
+    async def acquire(self):
+        """
+        Acquire permission to make a request.
+        
+        Will wait if rate limit would be exceeded.
+        """
+        async with self._lock:
+            now = time()
+            
+            # Remove old calls outside the period
+            while self.calls and self.calls[0] < now - self.period:
+                self.calls.popleft()
+            
+            # If at limit, wait until oldest call expires
+            if len(self.calls) >= self.max_calls:
+                sleep_time = self.period - (now - self.calls[0])
+                if sleep_time > 0:
+                    logger.debug(f"Rate limit reached, waiting {sleep_time:.2f}s")
+                    await asyncio.sleep(sleep_time)
+                    # Recursively try again after waiting
+                    return await self.acquire()
+            
+            # Record this call
+            self.calls.append(time())
+            logger.debug(f"Rate limit check passed, {len(self.calls)}/{self.max_calls} calls in period")