feat: image generation overhaul (model-aware text, dim clamping, \.30 pricing), event-driven dashboard cache invalidation, SEO insights (AI visibility, GSC, keyword gap), YouTube OAuth/publish, blog writer & content planning improvements, scheduler monitoring updates

2026-05-30 07:58:22 +05:30
parent aaf94049da
commit 64f1f88cdd
129 changed files with 8796 additions and 8755 deletions
--- a/backend/services/blog_writer/outline/keyword_curator.py
+++ b/backend/services/blog_writer/outline/keyword_curator.py
@@ -0,0 +1,194 @@
+"""
+Keyword Curator - Smart keyword selection engine for SEO-optimized outline generation.
+
+Instead of dumping all discovered keywords into the LLM prompt (which causes
+keyword stuffing and dilutes topical focus), this module selects a highly
+curated subset based on SEO best practices and assigns each keyword a
+specific structural role in the outline.
+"""
+
+from typing import Dict, Any, List, Optional
+
+
+class KeywordCurator:
+    """
+    Curates a strict, minimal keyword set for outline generation.
+    
+    Selection Rules (SEO Best Practice):
+    1. Primary (H1 Focus)   → top 2 — brand name + core topic
+    2. Secondary (H2 Focus) → top 2 — feature/benefit anchors
+    3. Long-tail (H3 Focus) → top 2 — informational intent phrases
+    4. Semantic (Body Context) → top 4 — prevent topical drift
+    5. Trending (Mention)   → top 2 — brief contextual mentions
+    6. Content Gap (Edge)   → top 1 — competitive differentiator
+    """
+
+    # How many keywords to select from each category
+    SLOTS: Dict[str, int] = {
+        "primary": 2,
+        "secondary": 2,
+        "long_tail": 2,
+        "semantic": 4,
+        "trending": 2,
+        "content_gap": 1,
+    }
+
+    def curate(
+        self,
+        keyword_analysis: Dict[str, Any],
+    ) -> Dict[str, Any]:
+        """
+        Apply selection rules and return a structured, minimal keyword payload.
+        
+        Args:
+            keyword_analysis: Raw keyword_analysis dict from research
+                             (keys: primary, secondary, long_tail,
+                              semantic_keywords, trending_terms, content_gaps, ...)
+        
+        Returns:
+            Dict with curated keyword groups plus all other analysis fields preserved.
+        """
+        curated: Dict[str, Any] = {}
+
+        # --- Select from keyword lists ---
+        curated["primary"] = self._pick(keyword_analysis, "primary")
+        curated["secondary"] = self._pick(keyword_analysis, "secondary")
+        curated["long_tail"] = self._pick(keyword_analysis, "long_tail")
+
+        # semantic_keywords is the actual key in the research data
+        curated["semantic"] = self._pick(keyword_analysis, "semantic_keywords", slot_key="semantic")
+        curated["trending"] = self._pick(keyword_analysis, "trending_terms", slot_key="trending")
+        curated["content_gap"] = self._pick(keyword_analysis, "content_gaps", slot_key="content_gap")
+
+        # --- Build a flat "locked" set for quick reference ---
+        locked: List[str] = []
+        for group in curated.values():
+            if isinstance(group, list):
+                locked.extend(group)
+        curated["locked_keywords"] = locked
+
+        # --- Track counts for transparency ---
+        total_raw = 0
+        total_curated = 0
+        for source_key, limit in self.SLOTS.items():
+            raw_key = self._source_key(source_key)
+            raw_list = keyword_analysis.get(raw_key, [])
+            total_raw += len(raw_list) if isinstance(raw_list, list) else 0
+            curated_list = curated.get(source_key, [])
+            total_curated += len(curated_list) if isinstance(curated_list, list) else 0
+        curated["stats"] = {
+            "total_raw": total_raw,
+            "total_curated": total_curated,
+            "reduction_pct": round((1 - total_curated / max(total_raw, 1)) * 100, 1),
+        }
+
+        # --- Preserve non-keyword analysis fields ---
+        for field in ("search_intent", "difficulty", "analysis_insights"):
+            if field in keyword_analysis:
+                curated[field] = keyword_analysis[field]
+
+        return curated
+
+    def format_for_prompt(self, curated: Dict[str, Any]) -> str:
+        """
+        Format the curated keyword payload into a strict structural prompt section.
+        
+        Returns a string ready to be injected into the outline prompt.
+        """
+        lines: List[str] = []
+        lines.append("## KEYWORD PLACEMENT DIRECTIVES\n")
+
+        # H1 — primary
+        primary = curated.get("primary", [])
+        if primary:
+            h1_text = " | ".join(primary)
+            lines.append(f"### H1 (must contain, in order of priority): {h1_text}")
+            lines.append("   → Anchor the title and main heading on these terms.")
+        else:
+            lines.append("### H1: No primary keywords provided — derive from topic context.")
+
+        # H2 — secondary
+        secondary = curated.get("secondary", [])
+        if secondary:
+            lines.append(f"### H2 sections must anchor on (one per major section): {', '.join(secondary)}")
+            lines.append("   → Each secondary keyword should map to a distinct H2 section.")
+
+        # H3 — long-tail
+        long_tail = curated.get("long_tail", [])
+        if long_tail:
+            lines.append(f"### H3 / Subsection anchors for informational intent: {', '.join(long_tail)}")
+            lines.append("   → Use these as deeper-dive subsections under the relevant H2.")
+
+        # Body-level — semantic
+        semantic = curated.get("semantic", [])
+        if semantic:
+            lines.append(f"### Body-level semantic signals (use naturally, max 1-2 mentions each): {', '.join(semantic)}")
+            lines.append("   → These prevent topical drift. Weave into paragraph text, not headings.")
+
+        # Trending — brief
+        trending = curated.get("trending", [])
+        if trending:
+            lines.append(f"### Trending context (mention subtly if relevant): {', '.join(trending)}")
+            lines.append("   → Optional. Only include if it strengthens timeliness/narrative.")
+
+        # Content gap — competitive edge
+        content_gap = curated.get("content_gap", [])
+        if content_gap:
+            lines.append(f"### Competitive advantage signal (must weave into narrative): {content_gap[0]}")
+            lines.append("   → This is your primary differentiation hook. Surface it prominently in the unique value section.")
+
+        lines.append("")
+        lines.append("GUIDELINE: Treat these as the primary keyword anchors. You may include closely related")
+        lines.append("intent-matching variations where natural, but avoid inserting every raw research keyword.")
+        lines.append("Quality over density — each keyword earns its place by serving a clear structural purpose.")
+
+        stats = curated.get("stats", {})
+        if stats:
+            lines.append(
+                f"\n[From {stats.get('total_raw', '?')} raw research keywords "
+                f"→ curated to {stats.get('total_curated', '?')} locked keywords "
+                f"({stats.get('reduction_pct', '?')}% reduction)]"
+            )
+
+        return "\n".join(lines)
+
+    # ------------------------------------------------------------------
+    # Internal helpers
+    # ------------------------------------------------------------------
+
+    @staticmethod
+    def _source_key(slot_key: str) -> str:
+        """Map internal slot key to the actual field name in keyword_analysis."""
+        mapping = {
+            "primary": "primary",
+            "secondary": "secondary",
+            "long_tail": "long_tail",
+            "semantic": "semantic_keywords",
+            "trending": "trending_terms",
+            "content_gap": "content_gaps",
+        }
+        return mapping.get(slot_key, slot_key)
+
+    def _pick(
+        self,
+        data: Dict[str, Any],
+        source_key: str,
+        slot_key: Optional[str] = None,
+    ) -> List[str]:
+        """
+        Pick up to N items from a keyword list.
+        
+        Args:
+            data: The raw keyword_analysis dict.
+            source_key: The actual key in the dict (e.g. 'semantic_keywords').
+            slot_key: The internal slot name for looking up the limit.
+                      Falls back to source_key if not provided.
+        Returns:
+            Sliced list of at most N strings.
+        """
+        limit_key = slot_key or source_key
+        limit = self.SLOTS.get(limit_key, 5)
+        raw: Any = data.get(source_key, [])
+        if not isinstance(raw, list):
+            return []
+        return raw[:limit]
--- a/backend/services/blog_writer/outline/metadata_collector.py
+++ b/backend/services/blog_writer/outline/metadata_collector.py
@@ -1,7 +1,7 @@
 """
 Metadata Collector - Handles collection and formatting of outline metadata.

-Collects source mapping stats, grounding insights, optimization results, and research coverage.
+Collects source mapping stats, grounding insights, and research coverage.
 """

 from typing import Dict, Any, List
@@ -54,31 +54,6 @@ class MetadataCollector:
            quality_indicators=grounding_insights.get('quality_indicators')
        )
    
-    def collect_optimization_results(self, optimized_sections, focus):
-        """Collect optimization results for UI display."""
-        from models.blog_models import OptimizationResults
-        
-        # Calculate a quality score based on section completeness
-        total_sections = len(optimized_sections)
-        complete_sections = sum(1 for section in optimized_sections 
-                              if section.heading and section.subheadings and section.key_points)
-        
-        quality_score = (complete_sections / total_sections * 10) if total_sections > 0 else 0.0
-        
-        improvements_made = [
-            "Enhanced section headings for better SEO",
-            "Optimized keyword distribution across sections",
-            "Improved content flow and logical progression",
-            "Balanced word count distribution",
-            "Enhanced subheadings for better readability"
-        ]
-        
-        return OptimizationResults(
-            overall_quality_score=round(quality_score, 1),
-            improvements_made=improvements_made,
-            optimization_focus=focus
-        )
-    
    def collect_research_coverage(self, research):
        """Collect research coverage metrics for UI display."""
        from models.blog_models import ResearchCoverage
--- a/backend/services/blog_writer/outline/outline_generator.py
+++ b/backend/services/blog_writer/outline/outline_generator.py
@@ -1,7 +1,8 @@
 """
 Outline Generator - AI-powered outline generation from research data.

-Generates comprehensive, SEO-optimized outlines using research intelligence.
+Generates comprehensive, SEO-optimized outlines using research intelligence
+and a keyword-curation engine that prevents keyword stuffing.
 """

 from typing import Dict, Any, List, Tuple
@@ -23,6 +24,7 @@ from .metadata_collector import MetadataCollector
 from .prompt_builder import PromptBuilder
 from .response_processor import ResponseProcessor
 from .parallel_processor import ParallelProcessor
+from .keyword_curator import KeywordCurator


 class OutlineGenerator:
@@ -41,6 +43,14 @@ class OutlineGenerator:
        self.prompt_builder = PromptBuilder()
        self.response_processor = ResponseProcessor()
        self.parallel_processor = ParallelProcessor(self.source_mapper, self.grounding_engine)
+        
+        # Keyword curation engine
+        self.keyword_curator = KeywordCurator()
+    
+    def _curate_keywords(self, research) -> Dict[str, Any]:
+        """Run keyword curation on the research data's keyword_analysis."""
+        raw_analysis = research.keyword_analysis if research else {}
+        return self.keyword_curator.curate(raw_analysis)
    
    async def generate(self, request: BlogOutlineRequest, user_id: str) -> BlogOutlineResponse:
        """
@@ -59,18 +69,24 @@ class OutlineGenerator:
        # Extract research insights
        research = request.research
        primary_keywords = research.keyword_analysis.get('primary', [])
-        secondary_keywords = research.keyword_analysis.get('secondary', [])
        content_angles = research.suggested_angles
        sources = research.sources
        search_intent = research.keyword_analysis.get('search_intent', 'informational')
        
+        # Curate keywords — reduces 40+ raw keywords to ~13 locked, role-assigned keywords
+        curated_keywords = self._curate_keywords(research)
+        
        # Check for custom instructions
        custom_instructions = getattr(request, 'custom_instructions', None)
+        # Selected (prioritized) content angle and competitive advantage, if any
+        selected_content_angle = getattr(request, 'selected_content_angle', None)
+        selected_competitive_advantage = getattr(request, 'selected_competitive_advantage', None)
        
-        # Build comprehensive outline generation prompt with rich research data
+        # Build comprehensive outline generation prompt with curated keyword payload
        outline_prompt = self.prompt_builder.build_outline_prompt(
-            primary_keywords, secondary_keywords, content_angles, sources,
-            search_intent, request, custom_instructions
+            curated_keywords, content_angles, sources,
+            search_intent, request, custom_instructions, selected_content_angle,
+            selected_competitive_advantage
        )
        
        logger.info("Generating AI-powered outline using research results")
@@ -107,7 +123,7 @@ class OutlineGenerator:
        ai_title_options = outline_data.get('title_options', [])
        content_angle_titles = self.title_generator.extract_content_angle_titles(research)
        
-        # Combine AI-generated titles with content angles
+        # Combine AI-generated titles with content angles (full primary keywords for title variety)
        title_options = self.title_generator.combine_title_options(ai_title_options, content_angle_titles, primary_keywords)
        
        logger.info(f"Generated optimized outline with {len(balanced_sections)} sections and {len(title_options)} title options")
@@ -115,7 +131,6 @@ class OutlineGenerator:
        # Collect metadata for enhanced UI
        source_mapping_stats = self.metadata_collector.collect_source_mapping_stats(mapped_sections, research)
        grounding_insights_data = self.metadata_collector.collect_grounding_insights(grounding_insights)
-        optimization_results = self.metadata_collector.collect_optimization_results(optimized_sections, "comprehensive optimization")
        research_coverage = self.metadata_collector.collect_research_coverage(research)
        
        return BlogOutlineResponse(
@@ -124,7 +139,6 @@ class OutlineGenerator:
            outline=balanced_sections,
            source_mapping_stats=source_mapping_stats,
            grounding_insights=grounding_insights_data,
-            optimization_results=optimization_results,
            research_coverage=research_coverage
        )
    
@@ -148,20 +162,26 @@ class OutlineGenerator:
        # Extract research insights
        research = request.research
        primary_keywords = research.keyword_analysis.get('primary', [])
-        secondary_keywords = research.keyword_analysis.get('secondary', [])
        content_angles = research.suggested_angles
        sources = research.sources
        search_intent = research.keyword_analysis.get('search_intent', 'informational')
        
+        # Curate keywords — reduces 40+ raw keywords to ~13 locked, role-assigned keywords
+        curated_keywords = self._curate_keywords(research)
+        
        # Check for custom instructions
        custom_instructions = getattr(request, 'custom_instructions', None)
+        # Selected (prioritized) content angle and competitive advantage, if any
+        selected_content_angle = getattr(request, 'selected_content_angle', None)
+        selected_competitive_advantage = getattr(request, 'selected_competitive_advantage', None)
        
        await task_manager.update_progress(task_id, "📊 Analyzing research data and building content strategy...")
        
-        # Build comprehensive outline generation prompt with rich research data
+        # Build comprehensive outline generation prompt with curated keyword payload
        outline_prompt = self.prompt_builder.build_outline_prompt(
-            primary_keywords, secondary_keywords, content_angles, sources,
-            search_intent, request, custom_instructions
+            curated_keywords, content_angles, sources,
+            search_intent, request, custom_instructions, selected_content_angle,
+            selected_competitive_advantage
        )
        
        await task_manager.update_progress(task_id, "🤖 Generating AI-powered outline with research insights...")
@@ -203,7 +223,7 @@ class OutlineGenerator:
        ai_title_options = outline_data.get('title_options', [])
        content_angle_titles = self.title_generator.extract_content_angle_titles(research)
        
-        # Combine AI-generated titles with content angles
+        # Combine AI-generated titles with content angles (full primary keywords for title variety)
        title_options = self.title_generator.combine_title_options(ai_title_options, content_angle_titles, primary_keywords)
        
        await task_manager.update_progress(task_id, "✅ Outline generation and optimization completed successfully!")
@@ -211,7 +231,6 @@ class OutlineGenerator:
        # Collect metadata for enhanced UI
        source_mapping_stats = self.metadata_collector.collect_source_mapping_stats(mapped_sections, research)
        grounding_insights_data = self.metadata_collector.collect_grounding_insights(grounding_insights)
-        optimization_results = self.metadata_collector.collect_optimization_results(optimized_sections, "comprehensive optimization")
        research_coverage = self.metadata_collector.collect_research_coverage(research)
        
        return BlogOutlineResponse(
@@ -220,7 +239,6 @@ class OutlineGenerator:
            outline=balanced_sections,
            source_mapping_stats=source_mapping_stats,
            grounding_insights=grounding_insights_data,
-            optimization_results=optimization_results,
            research_coverage=research_coverage
        )
    
@@ -320,4 +338,3 @@ class OutlineGenerator:
        return insights
    
    
-    
--- a/backend/services/blog_writer/outline/prompt_builder.py
+++ b/backend/services/blog_writer/outline/prompt_builder.py
@@ -1,10 +1,12 @@
 """
 Prompt Builder - Handles building of AI prompts for outline generation.

-Constructs comprehensive prompts with research data, keywords, and strategic requirements.
+Constructs comprehensive prompts using curated keyword payloads,
+research data, and strategic requirements.
 """

 from typing import Dict, Any, List
+from datetime import datetime


 class PromptBuilder:
@@ -14,53 +16,105 @@ class PromptBuilder:
        """Initialize the prompt builder."""
        pass
    
-    def build_outline_prompt(self, primary_keywords: List[str], secondary_keywords: List[str], 
+    def build_outline_prompt(self, curated_keywords: Dict[str, Any],
                           content_angles: List[str], sources: List, search_intent: str,
-                           request, custom_instructions: str = None) -> str:
-        """Build the comprehensive outline generation prompt using filtered research data."""
+                           request, custom_instructions: str = None,
+                           selected_content_angle: str = None,
+                           selected_competitive_advantage: str = None) -> str:
+        """Build the comprehensive outline generation prompt using curated keyword payload."""
        
-        # Use the filtered research data (already cleaned by ResearchDataFilter)
        research = request.research
        
-        primary_kw_text = ', '.join(primary_keywords) if primary_keywords else (request.topic or ', '.join(getattr(request.research, 'original_keywords', []) or ['the target topic']))
-        secondary_kw_text = ', '.join(secondary_keywords) if secondary_keywords else "None provided"
-        long_tail_text = ', '.join(research.keyword_analysis.get('long_tail', [])) if research and research.keyword_analysis else "None discovered"
-        semantic_text = ', '.join(research.keyword_analysis.get('semantic_keywords', [])) if research and research.keyword_analysis else "None discovered"
-        trending_text = ', '.join(research.keyword_analysis.get('trending_terms', [])) if research and research.keyword_analysis else "None discovered"
-        content_gap_text = ', '.join(research.keyword_analysis.get('content_gaps', [])) if research and research.keyword_analysis else "None identified"
+        primary_kw_text = ', '.join(curated_keywords.get('primary', [])) if curated_keywords.get('primary') else (request.topic or ', '.join(getattr(request.research, 'original_keywords', []) or ['the target topic']))
+        secondary_kw_text = ', '.join(curated_keywords.get('secondary', [])) if curated_keywords.get('secondary') else "None provided"
+        long_tail_text = ', '.join(curated_keywords.get('long_tail', [])) if curated_keywords.get('long_tail') else "None discovered"
+        semantic_text = ', '.join(curated_keywords.get('semantic', [])) if curated_keywords.get('semantic') else "None discovered"
+        trending_text = ', '.join(curated_keywords.get('trending', [])) if curated_keywords.get('trending') else "None discovered"
+        content_gap_text = ', '.join(curated_keywords.get('content_gap', [])) if curated_keywords.get('content_gap') else "None identified"
+        
        content_angle_text = ', '.join(content_angles) if content_angles else "No explicit angles provided; infer compelling angles from research insights."
        competitor_text = ', '.join(research.competitor_analysis.get('top_competitors', [])) if research and research.competitor_analysis else "Not available"
        opportunity_text = ', '.join(research.competitor_analysis.get('opportunities', [])) if research and research.competitor_analysis else "Not available"
        advantages_text = ', '.join(research.competitor_analysis.get('competitive_advantages', [])) if research and research.competitor_analysis else "Not available"
+        
+        # Extract additional UI-mapped context fields
+        analysis_insights_text = (research.keyword_analysis.get('analysis_insights', '') or '') if research and research.keyword_analysis else ''
+        market_positioning_text = (research.competitor_analysis.get('market_positioning', '') or '') if research and research.competitor_analysis else ''
+        difficulty_score = research.keyword_analysis.get('difficulty', None) if research and research.keyword_analysis else None
+
+        # Build selected angle prominence section
+        if selected_content_angle and selected_content_angle.strip():
+            selected_angle_section = f"""
+PRIORITY CONTENT ANGLE (MUST PRIORITIZE):
+- This outline MUST be built around the following selected content angle as its primary lens and narrative framework:
+  "{selected_content_angle}"
+- Every major section should connect back to this angle
+- Title options should reflect this angle
+- The overall narrative arc should follow this angle's implied storyline
+"""
+        else:
+            selected_angle_section = ""
+
+        # Build selected competitive advantage prominence section
+        if selected_competitive_advantage and selected_competitive_advantage.strip():
+            selected_advantage_section = f"""
+PRIORITY COMPETITIVE ADVANTAGE (MUST LEVERAGE):
+- This outline MUST prominently feature and leverage the following competitive advantage throughout the content:
+  "{selected_competitive_advantage}"
+- Weave this advantage into key sections as a differentiator
+- Frame the solutions and recommendations around this advantage
+- Use this advantage to counter competitor weaknesses mentioned in research
+"""
+        else:
+            selected_advantage_section = ""
+
+        # Import and use the KeywordCurator for the directive section
+        from .keyword_curator import KeywordCurator
+        keyword_directives = KeywordCurator().format_for_prompt(curated_keywords)
+
+        current_date = datetime.now().strftime("%B %d, %Y")
+        current_year = datetime.now().year

        return f"""Create a comprehensive blog outline for: {primary_kw_text}

 CONTEXT:
+Current Date: {current_date}
 Search Intent: {search_intent}
+{f"Keyword Difficulty: {difficulty_score}/10" if difficulty_score is not None else ""}
 Target: {request.word_count or 1500} words
 Industry: {getattr(request.persona, 'industry', 'General') if request.persona else 'General'}
 Audience: {getattr(request.persona, 'target_audience', 'General') if request.persona else 'General'}

-KEYWORDS:
-Primary: {primary_kw_text}
-Secondary: {secondary_kw_text}
-Long-tail: {long_tail_text}
-Semantic: {semantic_text}
-Trending: {trending_text}
-Content Gaps: {content_gap_text}
+OVERVIEW KEYWORD SUMMARY:
+- Primary: {primary_kw_text}
+- Secondary: {secondary_kw_text}
+- Long-tail: {long_tail_text}
+- Semantic: {semantic_text}
+- Trending: {trending_text}
+- Content Gap: {content_gap_text}
+
+{keyword_directives}
+
+RESEARCH INSIGHTS SYNTHESIS:
+{analysis_insights_text}

 CONTENT ANGLES / STORYLINES: {content_angle_text}
-
+{selected_angle_section}
+{selected_advantage_section}
 COMPETITIVE INTELLIGENCE:
 Top Competitors: {competitor_text}
 Market Opportunities: {opportunity_text}
 Competitive Advantages: {advantages_text}
+{f"Market Positioning: {market_positioning_text}" if market_positioning_text else ""}

 RESEARCH SOURCES: {len(sources)} authoritative sources available

 {f"CUSTOM INSTRUCTIONS: {custom_instructions}" if custom_instructions else ""}

 STRATEGIC REQUIREMENTS:
+- MUST prioritize and anchor the outline around the selected content angle above all others
+- MUST highlight and leverage the selected competitive advantage as a key differentiator
+- Follow the KEYWORD PLACEMENT DIRECTIVES — treat the locked keywords as the minimum anchor set; you MAY include closely related intent-matching variations where natural
 - Create SEO-optimized headings with natural keyword integration
 - Surface the strongest research-backed angles within the outline
 - Build logical narrative flow from problem to solution
@@ -78,11 +132,11 @@ Return JSON format:
    ],
    "outline": [
        {{
-            "heading": "Section heading with primary keyword",
+            "heading": "Section heading",
            "subheadings": ["Subheading 1", "Subheading 2", "Subheading 3"],
            "key_points": ["Key point 1", "Key point 2", "Key point 3"],
            "target_words": 300,
-            "keywords": ["primary keyword", "secondary keyword"]
+            "keywords": ["keyword 1", "keyword 2"]
        }}
    ]
 }}"""
--- a/backend/services/blog_writer/outline/title_generator.py
+++ b/backend/services/blog_writer/outline/title_generator.py
@@ -76,8 +76,8 @@ class TitleGenerator:
            formatted_title += '.'
        
        # Limit length to reasonable blog title size
-        if len(formatted_title) > 100:
-            formatted_title = formatted_title[:97] + "..."
+        if len(formatted_title) > 200:
+            formatted_title = formatted_title[:197] + "..."
        
        return formatted_title
    
--- a/backend/services/blog_writer/research/research_service.py
+++ b/backend/services/blog_writer/research/research_service.py
@@ -155,7 +155,7 @@ class ResearchService:
                    sources = raw_result.get('sources', [])
                    search_widget = ""  # Exa doesn't provide search widgets
                    search_queries = raw_result.get('search_queries', [])
-                    grounding_metadata = None  # Exa doesn't provide grounding metadata
+                    grounding_metadata = self._build_grounding_metadata_from_sources(sources, search_queries)
                    
                except RuntimeError as e:
                    # Fail fast - no fallback for testing/debugging
@@ -239,7 +239,7 @@ class ResearchService:
                    sources = raw_result.get('sources', [])
                    search_widget = ""  # Tavily doesn't provide search widgets
                    search_queries = raw_result.get('search_queries', [])
-                    grounding_metadata = None  # Tavily doesn't provide grounding metadata
+                    grounding_metadata = self._build_grounding_metadata_from_sources(sources, search_queries)
                    
                except RuntimeError as e:
                    # Fail fast - no fallback for testing/debugging
@@ -482,7 +482,7 @@ class ResearchService:
                    sources = raw_result.get('sources', []) or []
                    search_widget = ""  # Exa doesn't provide search widgets
                    search_queries = raw_result.get('search_queries', []) or []
-                    grounding_metadata = None  # Exa doesn't provide grounding metadata
+                    grounding_metadata = self._build_grounding_metadata_from_sources(sources, search_queries)
                    
                except RuntimeError as e:
                    # Fail fast - no fallback for testing/debugging
@@ -568,7 +568,7 @@ class ResearchService:
                    sources = raw_result.get('sources', []) or []
                    search_widget = ""  # Tavily doesn't provide search widgets
                    search_queries = raw_result.get('search_queries', []) or []
-                    grounding_metadata = None  # Tavily doesn't provide grounding metadata
+                    grounding_metadata = self._build_grounding_metadata_from_sources(sources, search_queries)
                    
                except RuntimeError as e:
                    # Fail fast - no fallback for testing/debugging
@@ -728,6 +728,58 @@ class ResearchService:
        
        return sources

+    def _build_grounding_metadata_from_sources(self, sources: List[Dict[str, Any]], search_queries: List[str]) -> Optional[GroundingMetadata]:
+        """Build GroundingMetadata from Exa/Tavily sources (which lack native Google grounding)."""
+        if not sources:
+            return None
+
+        grounding_chunks = []
+        grounding_supports = []
+        citations = []
+
+        for i, source in enumerate(sources):
+            score = source.get('credibility_score', 0.85)
+
+            chunk = GroundingChunk(
+                title=source.get('title', 'Untitled'),
+                url=source.get('url', ''),
+                confidence_score=score,
+            )
+            grounding_chunks.append(chunk)
+
+            highlights = source.get('highlights', [])
+            if highlights:
+                for h in highlights:
+                    grounding_supports.append(GroundingSupport(
+                        confidence_scores=[score],
+                        grounding_chunk_indices=[i],
+                        segment_text=h,
+                    ))
+            else:
+                excerpt = source.get('excerpt', '')
+                if excerpt:
+                    grounding_supports.append(GroundingSupport(
+                        confidence_scores=[score],
+                        grounding_chunk_indices=[i],
+                        segment_text=excerpt,
+                    ))
+
+            citations.append(Citation(
+                citation_type='inline',
+                start_index=0,
+                end_index=0,
+                text=(highlights[0] if highlights else source.get('excerpt', source.get('title', '')))[:200],
+                source_indices=[i],
+                reference=f'Source {i + 1}',
+            ))
+
+        return GroundingMetadata(
+            grounding_chunks=grounding_chunks,
+            grounding_supports=grounding_supports,
+            citations=citations,
+            web_search_queries=search_queries or [],
+        )
+
    def _normalize_cached_research_data(self, cached_data: Dict[str, Any]) -> Dict[str, Any]:
        """
        Normalize cached research data to fix None values in confidence_scores.