chore: push all remaining changes

- Blog writer enhancements and bug fixes - Wix integration improvements - Frontend UI updates - GSC dashboard docs cleanup - Image studio assets - LinkedIn requirements file - Various dependency updates
2026-06-12 20:32:03 +05:30
parent 63a0df2536
commit d90d441019
78 changed files with 3963 additions and 2899 deletions
--- a/backend/services/blog_writer/outline/grounding_engine.py
+++ b/backend/services/blog_writer/outline/grounding_engine.py
@@ -241,9 +241,23 @@ class GroundingContextEngine:
            else:
                authority_distribution['low'] += 1
        
+        # Extract actual high-authority sources from chunks
+        high_authority_sources = []
+        for chunk in grounding_metadata.grounding_chunks:
+            chunk_authority = self._calculate_chunk_authority(chunk)
+            if chunk_authority >= 0.8:
+                high_authority_sources.append({
+                    'title': chunk.title if chunk.title else 'Unknown Source',
+                    'url': chunk.url if chunk.url else '',
+                    'score': round(chunk_authority, 3)
+                })
+        # Sort by authority score descending, keep top 5
+        high_authority_sources.sort(key=lambda x: x['score'], reverse=True)
+        high_authority_sources = high_authority_sources[:5]
+
        return {
            'average_authority_score': sum(authority_scores) / len(authority_scores) if authority_scores else 0.0,
-            'high_authority_sources': [{'title': 'High Authority Source', 'url': 'example.com', 'score': 0.9}],  # Placeholder
+            'high_authority_sources': high_authority_sources,
            'authority_distribution': dict(authority_distribution)
        }
    
--- a/backend/services/blog_writer/outline/outline_generator.py
+++ b/backend/services/blog_writer/outline/outline_generator.py
@@ -52,6 +52,44 @@ class OutlineGenerator:
        raw_analysis = research.keyword_analysis if research else {}
        return self.keyword_curator.curate(raw_analysis)
    
+    def _build_optimization_context(self, research) -> str:
+        """Build a compact research context for the outline optimizer.
+        Provides keywords, competitor data, and top source summaries so
+        the optimizer doesn't run blind to the research."""
+        if not research:
+            return ""
+        parts = []
+        kw = research.keyword_analysis if research.keyword_analysis else {}
+        primary = kw.get('primary', [])
+        if primary:
+            parts.append(f"Primary keywords: {', '.join(primary[:5])}")
+        search_intent = kw.get('search_intent', '')
+        if search_intent:
+            parts.append(f"Search intent: {search_intent}")
+        comp = research.competitor_analysis if research.competitor_analysis else {}
+        top_competitors = comp.get('top_competitors', [])
+        if top_competitors:
+            parts.append(f"Top competitors: {', '.join(str(c) for c in top_competitors[:5])}")
+        content_gaps = kw.get('content_gaps', [])
+        if content_gaps:
+            parts.append(f"Content gaps: {'; '.join(str(g) for g in content_gaps[:5])}")
+        opportunities = comp.get('opportunities', [])
+        if opportunities:
+            parts.append(f"Opportunities: {'; '.join(str(o) for o in opportunities[:5])}")
+        sources = research.sources if research.sources else []
+        if sources:
+            top_sources = sorted(sources, key=lambda s: s.credibility_score or 0.8, reverse=True)[:5]
+            source_lines = []
+            for s in top_sources:
+                line = f"- {s.title}"
+                if s.summary:
+                    line += f": {s.summary[:150]}"
+                elif s.excerpt:
+                    line += f": {s.excerpt[:150]}"
+                source_lines.append(line)
+            parts.append("Key research sources:\n" + "\n".join(source_lines))
+        return "\n".join(parts)
+    
    async def generate(self, request: BlogOutlineRequest, user_id: str) -> BlogOutlineResponse:
        """
        Generate AI-powered outline using research results.
@@ -102,7 +140,7 @@ class OutlineGenerator:
        
        # Run parallel processing for speed optimization (user_id required)
        mapped_sections, grounding_insights = await self.parallel_processor.run_parallel_processing_async(
-            outline_sections, research, user_id
+            outline_sections, research, user_id, competitive_advantage=selected_competitive_advantage or ""
        )
        
        # Enhance sections with grounding insights
@@ -113,7 +151,8 @@ class OutlineGenerator:
        
        # Optimize outline for better flow, SEO, and engagement (user_id required)
        logger.info("Optimizing outline for better flow and engagement...")
-        optimized_sections = await self.outline_optimizer.optimize(grounding_enhanced_sections, "comprehensive optimization", user_id)
+        optimization_context = self._build_optimization_context(research)
+        optimized_sections = await self.outline_optimizer.optimize(grounding_enhanced_sections, "comprehensive optimization", user_id, research_context=optimization_context)
        
        # Rebalance word counts for optimal distribution
        target_words = request.word_count or 1500
@@ -202,7 +241,7 @@ class OutlineGenerator:
        
        # Run parallel processing for speed optimization (user_id required for subscription checks)
        mapped_sections, grounding_insights = await self.parallel_processor.run_parallel_processing(
-            outline_sections, research, user_id, task_id
+            outline_sections, research, user_id, task_id, competitive_advantage=selected_competitive_advantage or ""
        )
        
        # Enhance sections with grounding insights (depends on both previous tasks)
@@ -213,7 +252,8 @@ class OutlineGenerator:
        
        # Optimize outline for better flow, SEO, and engagement (user_id required for subscription checks)
        await task_manager.update_progress(task_id, "🎯 Optimizing outline for better flow and engagement...")
-        optimized_sections = await self.outline_optimizer.optimize(grounding_enhanced_sections, "comprehensive optimization", user_id)
+        optimization_context = self._build_optimization_context(research)
+        optimized_sections = await self.outline_optimizer.optimize(grounding_enhanced_sections, "comprehensive optimization", user_id, research_context=optimization_context)
        
        # Rebalance word counts for optimal distribution
        await task_manager.update_progress(task_id, "⚖️ Rebalancing word count distribution...")
--- a/backend/services/blog_writer/outline/outline_optimizer.py
+++ b/backend/services/blog_writer/outline/outline_optimizer.py
@@ -4,7 +4,7 @@ Outline Optimizer - AI-powered outline optimization and rebalancing.
 Optimizes outlines for better flow, SEO, and engagement.
 """

-from typing import List
+from typing import List, Dict, Any, Optional
 from loguru import logger

 from models.blog_models import BlogOutlineSection
@@ -13,13 +13,14 @@ from models.blog_models import BlogOutlineSection
 class OutlineOptimizer:
    """Optimizes outlines for better flow, SEO, and engagement."""
    
-    async def optimize(self, outline: List[BlogOutlineSection], focus: str, user_id: str) -> List[BlogOutlineSection]:
+    async def optimize(self, outline: List[BlogOutlineSection], focus: str, user_id: str, research_context: str = "") -> List[BlogOutlineSection]:
        """Optimize entire outline for better flow, SEO, and engagement.
        
        Args:
            outline: List of outline sections to optimize
            focus: Optimization focus (e.g., "general optimization")
            user_id: User ID (required for subscription checks and usage tracking)
+            research_context: Optional research context to ground optimization
            
        Returns:
            List of optimized outline sections
@@ -40,19 +41,28 @@ Current Outline:
 Optimization Focus: {focus}

 Goals: Improve narrative flow, enhance SEO, increase engagement, ensure comprehensive coverage.
+"""
+        if research_context:
+            optimization_prompt += f"""
+Research Context (use this to ground your optimization in real data):
+{research_context}

+Ensure the optimized outline reflects the research insights above — headings should address the key topics, keywords should align with search intent, and sections should cover the most important angles from the research.
+"""
+
+        optimization_prompt += """
 Return JSON format:
-{{
+{
    "outline": [
-        {{
+        {
            "heading": "Optimized heading",
            "subheadings": ["subheading 1", "subheading 2"],
            "key_points": ["point 1", "point 2"],
            "target_words": 300,
            "keywords": ["keyword1", "keyword2"]
-        }}
+        }
    ]
-}}"""
+}"""
        
        try:
            from services.llm_providers.main_text_generation import llm_text_gen
@@ -112,26 +122,34 @@ Return JSON format:
        return outline
    
    def rebalance_word_counts(self, outline: List[BlogOutlineSection], target_words: int) -> List[BlogOutlineSection]:
-        """Rebalance word count distribution across sections."""
+        """Rebalance word count distribution across sections, weighting by source count."""
        total_sections = len(outline)
        if total_sections == 0:
            return outline
        
-        # Calculate target distribution
-        intro_words = int(target_words * 0.12)  # 12% for intro
-        conclusion_words = int(target_words * 0.12)  # 12% for conclusion
+        intro_words = int(target_words * 0.12)
+        conclusion_words = int(target_words * 0.12)
        main_content_words = target_words - intro_words - conclusion_words
        
-        # Distribute main content words across sections
-        words_per_section = main_content_words // total_sections
-        remainder = main_content_words % total_sections
+        # Weight sections by research density (sections with more sources get more words)
+        main_sections = outline[1:-1] if total_sections > 2 else outline
+        source_weights = []
+        for section in main_sections:
+            ref_count = len(getattr(section, 'references', []) or [])
+            source_weights.append(1.0 + ref_count * 0.5)
+        
+        total_weight = sum(source_weights) if source_weights else len(main_sections)
        
        for i, section in enumerate(outline):
-            if i == 0:  # First section (intro)
+            if i == 0 and total_sections > 2:
                section.target_words = intro_words
-            elif i == total_sections - 1:  # Last section (conclusion)
+            elif i == total_sections - 1 and total_sections > 2:
                section.target_words = conclusion_words
-            else:  # Main content sections
-                section.target_words = words_per_section + (1 if i < remainder else 0)
+            else:
+                main_idx = i - 1 if total_sections > 2 else i
+                if main_idx < len(source_weights):
+                    section.target_words = int(main_content_words * source_weights[main_idx] / total_weight)
+                else:
+                    section.target_words = main_content_words // max(len(main_sections), 1)
        
        return outline
--- a/backend/services/blog_writer/outline/outline_service.py
+++ b/backend/services/blog_writer/outline/outline_service.py
@@ -233,9 +233,9 @@ class OutlineService:
        """Enhance a section using AI with research context."""
        return await self.section_enhancer.enhance(section, focus)
    
-    async def optimize_outline_with_ai(self, outline: List[BlogOutlineSection], focus: str = "general optimization") -> List[BlogOutlineSection]:
+    async def optimize_outline_with_ai(self, outline: List[BlogOutlineSection], focus: str = "general optimization", research_context: str = "") -> List[BlogOutlineSection]:
        """Optimize entire outline for better flow, SEO, and engagement."""
-        return await self.outline_optimizer.optimize(outline, focus)
+        return await self.outline_optimizer.optimize(outline, focus, research_context=research_context)
    
    def rebalance_word_counts(self, outline: List[BlogOutlineSection], target_words: int) -> List[BlogOutlineSection]:
        """Rebalance word count distribution across sections."""
--- a/backend/services/blog_writer/outline/parallel_processor.py
+++ b/backend/services/blog_writer/outline/parallel_processor.py
@@ -17,7 +17,7 @@ class ParallelProcessor:
        self.source_mapper = source_mapper
        self.grounding_engine = grounding_engine
    
-    async def run_parallel_processing(self, outline_sections, research, user_id: str, task_id: str = None) -> Tuple[Any, Any]:
+    async def run_parallel_processing(self, outline_sections, research, user_id: str, task_id: str = None, competitive_advantage: str = "") -> Tuple[Any, Any]:
        """
        Run source mapping and grounding insights extraction in parallel.
        
@@ -26,6 +26,7 @@ class ParallelProcessor:
            research: Research data object
            user_id: User ID (required for subscription checks and usage tracking)
            task_id: Optional task ID for progress updates
+            competitive_advantage: Selected competitive advantage for preferential source matching
            
        Returns:
            Tuple of (mapped_sections, grounding_insights)
@@ -44,7 +45,7 @@ class ParallelProcessor:
        
        # Run these tasks in parallel to save time
        source_mapping_task = asyncio.create_task(
-            self._run_source_mapping(outline_sections, research, task_id, user_id)
+            self._run_source_mapping(outline_sections, research, task_id, user_id, competitive_advantage)
        )
        
        grounding_insights_task = asyncio.create_task(
@@ -59,7 +60,7 @@ class ParallelProcessor:
        
        return mapped_sections, grounding_insights
    
-    async def run_parallel_processing_async(self, outline_sections, research, user_id: str) -> Tuple[Any, Any]:
+    async def run_parallel_processing_async(self, outline_sections, research, user_id: str, competitive_advantage: str = "") -> Tuple[Any, Any]:
        """
        Run parallel processing without progress updates (for non-progress methods).
        
@@ -67,6 +68,7 @@ class ParallelProcessor:
            outline_sections: List of outline sections to process
            research: Research data object
            user_id: User ID (required for subscription checks and usage tracking)
+            competitive_advantage: Selected competitive advantage for preferential source matching
            
        Returns:
            Tuple of (mapped_sections, grounding_insights)
@@ -81,7 +83,7 @@ class ParallelProcessor:
        
        # Run these tasks in parallel to save time
        source_mapping_task = asyncio.create_task(
-            self._run_source_mapping_async(outline_sections, research, user_id)
+            self._run_source_mapping_async(outline_sections, research, user_id, competitive_advantage)
        )
        
        grounding_insights_task = asyncio.create_task(
@@ -96,12 +98,12 @@ class ParallelProcessor:
        
        return mapped_sections, grounding_insights
    
-    async def _run_source_mapping(self, outline_sections, research, task_id, user_id: str):
+    async def _run_source_mapping(self, outline_sections, research, task_id, user_id: str, competitive_advantage: str = ""):
        """Run source mapping in parallel."""
        if task_id:
            from api.blog_writer.task_manager import task_manager
            await task_manager.update_progress(task_id, "🔗 Applying intelligent source-to-section mapping...")
-        return self.source_mapper.map_sources_to_sections(outline_sections, research, user_id)
+        return self.source_mapper.map_sources_to_sections(outline_sections, research, user_id, competitive_advantage=competitive_advantage)
    
    async def _run_grounding_insights_extraction(self, research, task_id):
        """Run grounding insights extraction in parallel."""
@@ -110,10 +112,10 @@ class ParallelProcessor:
            await task_manager.update_progress(task_id, "🧠 Extracting grounding metadata insights...")
        return self.grounding_engine.extract_contextual_insights(research.grounding_metadata)
    
-    async def _run_source_mapping_async(self, outline_sections, research, user_id: str):
+    async def _run_source_mapping_async(self, outline_sections, research, user_id: str, competitive_advantage: str = ""):
        """Run source mapping in parallel (async version without progress updates)."""
        logger.info("Applying intelligent source-to-section mapping...")
-        return self.source_mapper.map_sources_to_sections(outline_sections, research, user_id)
+        return self.source_mapper.map_sources_to_sections(outline_sections, research, user_id, competitive_advantage=competitive_advantage)
    
    async def _run_grounding_insights_extraction_async(self, research):
        """Run grounding insights extraction in parallel (async version without progress updates)."""
--- a/backend/services/blog_writer/outline/prompt_builder.py
+++ b/backend/services/blog_writer/outline/prompt_builder.py
@@ -37,27 +37,60 @@ class PromptBuilder:
        opportunity_text = ', '.join(research.competitor_analysis.get('opportunities', [])) if research and research.competitor_analysis else "Not available"
        advantages_text = ', '.join(research.competitor_analysis.get('competitive_advantages', [])) if research and research.competitor_analysis else "Not available"
        competitor_headings_text = ', '.join(research.competitor_analysis.get('competitor_headings', [])[:3]) if research and research.competitor_analysis and research.competitor_analysis.get('competitor_headings') else ""
+        content_gaps_text = ', '.join(research.competitor_analysis.get('content_gaps', [])) if research and research.competitor_analysis and research.competitor_analysis.get('content_gaps') else ""
+        industry_leaders_text = ', '.join(research.competitor_analysis.get('industry_leaders', [])) if research and research.competitor_analysis and research.competitor_analysis.get('industry_leaders') else ""
        
        # Extract additional UI-mapped context fields
        analysis_insights_text = (research.keyword_analysis.get('analysis_insights', '') or '') if research and research.keyword_analysis else ''
        market_positioning_text = (research.competitor_analysis.get('market_positioning', '') or '') if research and research.competitor_analysis else ''
        difficulty_score = research.keyword_analysis.get('difficulty', None) if research and research.keyword_analysis else None
+        
+        # Extract search queries as intent signals
+        search_queries_text = ', '.join(research.search_queries) if research and hasattr(research, 'search_queries') and research.search_queries else ""

-        # Extract top 3 authoritative source excerpts as factual data points
+        # Build numbered source list — all sources with index, title, excerpt, and highlights
+        # The LLM will reference these indices when assigning sources to sections
+        source_list_text = ""
+        if sources:
+            source_lines = []
+            for i, src in enumerate(sources, 1):
+                title = getattr(src, 'title', '') or ''
+                excerpt = getattr(src, 'excerpt', '') or ''
+                highlights = getattr(src, 'highlights', []) or []
+                summary = getattr(src, 'summary', '') or ''
+                source_type = getattr(src, 'source_type', '') or ''
+                author = getattr(src, 'author', '') or ''
+                
+                line = f"  [{i}] {title}"
+                if source_type:
+                    line += f" [{source_type}]"
+                if author:
+                    line += f" by {author}"
+                if summary:
+                    line += f" — {summary[:1000]}"
+                elif excerpt:
+                    line += f" — {excerpt[:1000]}"
+                if highlights:
+                    line += f" | Key findings: {'; '.join(h[:250] for h in highlights[:3])}"
+                source_lines.append(line)
+            if source_lines:
+                source_list_text = "RESEARCH SOURCES (numbered for reference):\n" + "\n".join(source_lines)
+        
+        # Top factual excerpts for depth (keep as supplement)
        source_excerpts_text = ""
        if sources:
            sorted_sources = sorted(
                [s for s in sources if (s.excerpt or s.summary)],
                key=lambda s: s.credibility_score or 0.8, reverse=True
-            )[:3]
+            )[:5]
            excerpts = []
            for i, src in enumerate(sorted_sources, 1):
                excerpt = src.excerpt or src.summary or ""
-                if len(excerpt) > 300:
-                    excerpt = excerpt[:297] + "..."
+                if len(excerpt) > 500:
+                    excerpt = excerpt[:497] + "..."
                excerpts.append(f"  {i}. \"{src.title}\" — {excerpt}")
            if excerpts:
-                source_excerpts_text = "FACTUAL DATA POINTS FROM RESEARCH:\n" + "\n".join(excerpts)
+                source_excerpts_text = "DETAILED FACTS FROM TOP SOURCES:\n" + "\n".join(excerpts)

        # Extract recency: newest source publication date
        newest_date_str = ""
@@ -76,12 +109,12 @@ class PromptBuilder:
        grounding_evidence_text = ""
        if research and research.grounding_metadata and research.grounding_metadata.grounding_supports:
            supports = research.grounding_metadata.grounding_supports
-            top_supports = [s for s in supports if s.segment_text and len(s.segment_text) > 20][:3]
+            top_supports = [s for s in supports if s.segment_text and len(s.segment_text) > 20][:5]
            if top_supports:
                evidence_parts = []
                for i, s in enumerate(top_supports, 1):
-                    text = s.segment_text[:250]
-                    if len(s.segment_text) > 250:
+                    text = s.segment_text[:400]
+                    if len(s.segment_text) > 400:
                        text += "..."
                    evidence_parts.append(f"  {i}. {text}")
                grounding_evidence_text = "VERIFIED EVIDENCE (high-confidence snippets):\n" + "\n".join(evidence_parts)
@@ -151,8 +184,11 @@ Market Opportunities: {opportunity_text}
 Competitive Advantages: {advantages_text}
 {f"Market Positioning: {market_positioning_text}" if market_positioning_text else ""}
 {f"Competitor Headings (AVOID duplicating): {competitor_headings_text}" if competitor_headings_text else ""}
+{f"Content Gaps (MUST address these gaps): {content_gaps_text}" if content_gaps_text else ""}
+{f"Industry Leaders: {industry_leaders_text}" if industry_leaders_text else ""}
+{f"Search Intent Signals: {search_queries_text}" if search_queries_text else ""}

-RESEARCH SOURCES: {len(sources)} authoritative sources available
+{source_list_text}
 {newest_date_str}

 {source_excerpts_text}
@@ -168,8 +204,9 @@ STRATEGIC REQUIREMENTS:
 - Create SEO-optimized headings with natural keyword integration
 - Surface the strongest research-backed angles within the outline
 - Build logical narrative flow from problem to solution
- Include data-driven insights from research sources
- Address content gaps and market opportunities
+- Include data-driven insights from research sources — use the numbered sources above
+- For each section, assign the most relevant source indices using the [N] numbers above
+- Address content gaps and market opportunities — if content gaps are listed, dedicate sections to fill those gaps
 - Optimize for search intent and user questions
 - Ensure engaging, actionable content throughout

@@ -186,7 +223,8 @@ Return JSON format:
            "subheadings": ["Subheading 1", "Subheading 2", "Subheading 3"],
            "key_points": ["Key point 1", "Key point 2", "Key point 3"],
            "target_words": 300,
-            "keywords": ["keyword 1", "keyword 2"]
+            "keywords": ["keyword 1", "keyword 2"],
+            "source_indices": [1, 3, 5]
        }}
    ]
 }}"""
@@ -220,9 +258,14 @@ Return JSON format:
                            "keywords": {
                                "type": "array",
                                "items": {"type": "string"}
+                            },
+                            "source_indices": {
+                                "type": "array",
+                                "items": {"type": "integer"},
+                                "description": "Indices of research sources (from the numbered list above) that support this section"
                            }
                        },
-                        "required": ["heading", "subheadings", "key_points", "target_words", "keywords"]
+                        "required": ["heading", "subheadings", "key_points", "target_words", "keywords", "source_indices"]
                    }
                }
            },
--- a/backend/services/blog_writer/outline/response_processor.py
+++ b/backend/services/blog_writer/outline/response_processor.py
@@ -100,18 +100,37 @@ class ResponseProcessor:
                    raise ValueError(f"AI outline generation failed: {error_str}")
    
    def convert_to_sections(self, outline_data: Dict[str, Any], sources: List) -> List[BlogOutlineSection]:
-        """Convert outline data to BlogOutlineSection objects."""
+        """Convert outline data to BlogOutlineSection objects.
+        
+        If the LLM assigned source_indices to sections, populate references
+        directly from those indices. Indices are 1-based (matching the [N] 
+        labels in the prompt) — converted to 0-based for list access.
+        Sections without source_indices will be populated by the algorithmic
+        source mapper in a later step.
+        """
        outline_sections = []
        for i, section_data in enumerate(outline_data.get('outline', [])):
            if not isinstance(section_data, dict) or 'heading' not in section_data:
                continue
-                
+            
+            # Parse LLM-assigned source indices (1-based)
+            raw_indices = section_data.get('source_indices', [])
+            section_refs = []
+            if raw_indices and sources:
+                for idx in raw_indices:
+                    try:
+                        source_idx = int(idx) - 1  # Convert 1-based → 0-based
+                        if 0 <= source_idx < len(sources):
+                            section_refs.append(sources[source_idx])
+                    except (ValueError, TypeError):
+                        pass
+            
            section = BlogOutlineSection(
                id=f"s{i+1}",
                heading=section_data.get('heading', f'Section {i+1}'),
                subheadings=section_data.get('subheadings', []),
                key_points=section_data.get('key_points', []),
-                references=[],  # Will be populated by intelligent mapping
+                references=section_refs,  # LLM-assigned if provided, else []
                target_words=section_data.get('target_words', 200),
                keywords=section_data.get('keywords', [])
            )
--- a/backend/services/blog_writer/outline/source_mapper.py
+++ b/backend/services/blog_writer/outline/source_mapper.py
@@ -41,10 +41,33 @@ class SourceToSectionMapper:
            'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by',
            'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'do', 'does', 'did',
            'will', 'would', 'could', 'should', 'may', 'might', 'must', 'can', 'this', 'that', 'these', 'those',
-            'how', 'what', 'when', 'where', 'why', 'who', 'which', 'how', 'much', 'many', 'more', 'most',
+            'how', 'what', 'when', 'where', 'why', 'who', 'which', 'much', 'many', 'more', 'most',
            'some', 'any', 'all', 'each', 'every', 'other', 'another', 'such', 'no', 'not', 'only', 'own',
            'same', 'so', 'than', 'too', 'very', 'just', 'now', 'here', 'there', 'up', 'down', 'out', 'off',
-            'over', 'under', 'again', 'further', 'then', 'once'
+            'over', 'under', 'again', 'further', 'then', 'once', 'also', 'into', 'about', 'between',
+            'through', 'during', 'before', 'after', 'above', 'below', 'from', 'since', 'until', 'while',
+            'because', 'however', 'therefore', 'thus', 'hence', 'yet', 'still', 'already', 'even'
+        }
+        
+        # Common abbreviation/synonym pairs for fuzzy matching
+        self._synonym_map = {
+            'ai': ['artificial intelligence', 'machine intelligence'],
+            'ml': ['machine learning'],
+            'dl': ['deep learning'],
+            'nlp': ['natural language processing'],
+            'iot': ['internet of things'],
+            'saas': ['software as a service'],
+            'b2b': ['business to business'],
+            'b2c': ['business to consumer'],
+            'cx': ['customer experience'],
+            'ux': ['user experience'],
+            'roi': ['return on investment'],
+            'kpi': ['key performance indicator'],
+            'crm': ['customer relationship management'],
+            'erp': ['enterprise resource planning'],
+            'seo': ['search engine optimization'],
+            'cto': ['chief technology officer'],
+            'vp': ['vice president'],
        }
        
        logger.info("✅ SourceToSectionMapper initialized with intelligent mapping algorithms")
@@ -53,15 +76,21 @@ class SourceToSectionMapper:
        self, 
        sections: List[BlogOutlineSection], 
        research_data: BlogResearchResponse,
-        user_id: str
+        user_id: str,
+        competitive_advantage: str = ""
    ) -> List[BlogOutlineSection]:
        """
        Map research sources to outline sections using intelligent algorithms.
        
+        Sections that already have LLM-assigned references (from source_indices
+        in the outline prompt) are preserved. Algorithmic mapping fills gaps
+        for sections without LLM-assigned sources.
+        
        Args:
            sections: List of outline sections to map sources to
            research_data: Research data containing sources and metadata
            user_id: User ID (required for subscription checks and usage tracking)
+            competitive_advantage: Selected competitive advantage to preferentially match
            
        Returns:
            List of outline sections with intelligently mapped sources
@@ -76,16 +105,39 @@ class SourceToSectionMapper:
            logger.warning("No sections or sources to map")
            return sections
        
-        logger.info(f"Mapping {len(research_data.sources)} sources to {len(sections)} sections")
+        # Separate sections with LLM-assigned references from those without
+        sections_with_refs = [s for s in sections if s.references]
+        sections_without_refs = [s for s in sections if not s.references]
        
-        # Step 1: Algorithmic mapping
-        mapping_results = self._algorithmic_source_mapping(sections, research_data)
+        logger.info(
+            f"Mapping {len(research_data.sources)} sources to {len(sections)} sections "
+            f"({len(sections_with_refs)} with LLM-assigned references, "
+            f"{len(sections_without_refs)} need algorithmic mapping)"
+        )
        
-        # Step 2: AI validation and improvement (single prompt, user_id required for subscription checks)
-        validated_mapping = self._ai_validate_mapping(mapping_results, research_data, user_id)
+        if sections_without_refs:
+            # Step 1: Algorithmic mapping for sections without LLM-assigned references
+            mapping_results = self._algorithmic_source_mapping(sections_without_refs, research_data, competitive_advantage)
+            
+            # Step 2: AI validation and improvement
+            validated_mapping = self._ai_validate_mapping(mapping_results, research_data, user_id)
+            
+            # Step 3: Apply mapping only to sections that need it
+            mapped_sections_with = self._apply_mapping_to_sections(sections_without_refs, validated_mapping)
+        else:
+            mapped_sections_with = []
        
-        # Step 3: Apply validated mapping to sections
-        mapped_sections = self._apply_mapping_to_sections(sections, validated_mapping)
+        # Combine: keep LLM-assigned sections as-is, add algorithmically mapped ones
+        mapped_sections = list(sections_with_refs) + mapped_sections_with
+        
+        # Preserve original ordering
+        original_ids = [s.id for s in sections]
+        mapped_sections.sort(key=lambda s: original_ids.index(s.id) if s.id in original_ids else 999)
+        
+        # Warn if any section still has zero references
+        for s in mapped_sections:
+            if not s.references:
+                logger.warning(f"Section '{s.heading}' (id={s.id}) has ZERO sources — content generator will use keyword-based fallback")
        
        logger.info("✅ Source-to-section mapping completed successfully")
        return mapped_sections
@@ -93,7 +145,8 @@ class SourceToSectionMapper:
    def _algorithmic_source_mapping(
        self, 
        sections: List[BlogOutlineSection], 
-        research_data: BlogResearchResponse
+        research_data: BlogResearchResponse,
+        competitive_advantage: str = ""
    ) -> Dict[str, List[Tuple[ResearchSource, float]]]:
        """
        Perform algorithmic mapping of sources to sections.
@@ -101,6 +154,7 @@ class SourceToSectionMapper:
        Args:
            sections: List of outline sections
            research_data: Research data with sources
+            competitive_advantage: Selected competitive advantage to boost matching
            
        Returns:
            Dictionary mapping section IDs to list of (source, score) tuples
@@ -114,7 +168,7 @@ class SourceToSectionMapper:
                # Calculate multi-dimensional relevance score
                semantic_score = self._calculate_semantic_similarity(section, source)
                keyword_score = self._calculate_keyword_relevance(section, source, research_data)
-                contextual_score = self._calculate_contextual_relevance(section, source, research_data)
+                contextual_score = self._calculate_contextual_relevance(section, source, research_data, competitive_advantage)
                
                # Weighted total score
                total_score = (
@@ -140,38 +194,54 @@ class SourceToSectionMapper:
    def _calculate_semantic_similarity(self, section: BlogOutlineSection, source: ResearchSource) -> float:
        """
        Calculate semantic similarity between section and source.
-        
-        Args:
-            section: Outline section
-            source: Research source
-            
-        Returns:
-            Semantic similarity score (0.0 to 1.0)
+        Uses word overlap, stem matching, bigram overlap, title-boost, and synonym expansion.
        """
-        # Extract text content for comparison
        section_text = self._extract_section_text(section)
        source_text = self._extract_source_text(source)
        
-        # Calculate word overlap
        section_words = self._extract_meaningful_words(section_text)
        source_words = self._extract_meaningful_words(source_text)
        
        if not section_words or not source_words:
            return 0.0
        
-        # Calculate Jaccard similarity
-        intersection = len(set(section_words) & set(source_words))
-        union = len(set(section_words) | set(source_words))
+        section_set = set(section_words)
+        source_set = set(source_words)
        
-        jaccard_similarity = intersection / union if union > 0 else 0.0
+        # 1. Jaccard similarity on raw words
+        intersection = len(section_set & source_set)
+        union = len(section_set | source_set)
+        jaccard = intersection / union if union > 0 else 0.0
        
-        # Boost score for exact phrase matches
-        phrase_boost = self._calculate_phrase_similarity(section_text, source_text)
+        # 2. Stem matching — catches word variants (e.g., "running" vs "runs")
+        section_stems = set(self._stem_word(w) for w in section_words)
+        source_stems = set(self._stem_word(w) for w in source_words)
+        stem_intersection = len(section_stems & source_stems)
+        stem_union = len(section_stems | source_stems)
+        stem_similarity = stem_intersection / stem_union if stem_union > 0 else 0.0
        
-        # Combine Jaccard similarity with phrase boost
-        semantic_score = min(1.0, jaccard_similarity + phrase_boost)
+        # 3. Bigram overlap — catches multi-word concepts (e.g., "machine learning")
+        section_bigrams = set(self._extract_bigrams(section_text))
+        source_bigrams = set(self._extract_bigrams(source_text))
+        bigram_overlap = len(section_bigrams & source_bigrams)
+        bigram_score = min(0.3, bigram_overlap * 0.1) if (section_bigrams or source_bigrams) else 0.0
        
-        return semantic_score
+        # 4. Title-boost — section heading matching source title is a strong signal
+        heading = (section.heading or '').lower()
+        source_title = (source.title or '').lower()
+        heading_words = set(self._extract_meaningful_words(heading))
+        title_words = set(self._extract_meaningful_words(source_title))
+        title_overlap = len(heading_words & title_words) / len(heading_words | title_words) if (heading_words or title_words) else 0.0
+        title_boost = min(0.3, title_overlap * 0.5)
+        
+        # 5. Synonym expansion — expand abbreviations and match across synonym pairs
+        synonym_score = self._calculate_synonym_overlap(section_words, source_words)
+        
+        # Combine: Jaccard + stem give base, bigram + title + synonyms boost
+        base_similarity = max(jaccard, stem_similarity)
+        combined = min(1.0, base_similarity + bigram_score + title_boost + synonym_score + 0.0)
+        
+        return combined
    
    def _calculate_keyword_relevance(
        self, 
@@ -219,7 +289,8 @@ class SourceToSectionMapper:
        self, 
        section: BlogOutlineSection, 
        source: ResearchSource, 
-        research_data: BlogResearchResponse
+        research_data: BlogResearchResponse,
+        competitive_advantage: str = ""
    ) -> float:
        """
        Calculate contextual relevance based on section content and source context.
@@ -228,6 +299,7 @@ class SourceToSectionMapper:
            section: Outline section
            source: Research source
            research_data: Research data with context
+            competitive_advantage: Selected competitive advantage to boost matching
            
        Returns:
            Contextual relevance score (0.0 to 1.0)
@@ -264,6 +336,15 @@ class SourceToSectionMapper:
            industry_score = sum(1 for word in industry_words if word in source_text) / len(industry_words) if industry_words else 0.0
            contextual_score += industry_score * 0.2
        
+        # 4. Competitive advantage boost — sources that match the advantage get a score lift
+        if competitive_advantage:
+            advantage_words = set(self._extract_meaningful_words(competitive_advantage.lower()))
+            if advantage_words:
+                advantage_in_section = sum(1 for w in advantage_words if w in section_text) / len(advantage_words)
+                advantage_in_source = sum(1 for w in advantage_words if w in source_text) / len(advantage_words)
+                if advantage_in_section > 0.3 and advantage_in_source > 0.3:
+                    contextual_score += 0.25 * (advantage_in_section + advantage_in_source)
+        
        return min(1.0, contextual_score)
    
    def _ai_validate_mapping(
@@ -360,10 +441,15 @@ class SourceToSectionMapper:
        return " ".join(text_parts)
    
    def _extract_source_text(self, source: ResearchSource) -> str:
-        """Extract all text content from a source."""
+        """Extract all text content from a source, including full text for better matching."""
        text_parts = [source.title]
+        if source.summary:
+            text_parts.append(source.summary)
        if source.excerpt:
            text_parts.append(source.excerpt)
+        content = getattr(source, 'content', '') or ''
+        if content:
+            text_parts.append(content[:500])
        return " ".join(text_parts)
    
    def _extract_meaningful_words(self, text: str) -> List[str]:
@@ -382,6 +468,41 @@ class SourceToSectionMapper:
        
        return meaningful_words
    
+    def _stem_word(self, word: str) -> str:
+        """Rudimentary suffix-stripping stemmer for English words."""
+        if len(word) <= 3:
+            return word
+        for suffix in ['ization', 'ation', 'tion', 'sion', 'ment', 'ness', 'ity', 'ing', 'able', 'ible', 'ful', 'less', 'ous', 'ive', 'ally', 'ly', 'er', 'ed', 'es', 's']:
+            if word.endswith(suffix) and len(word) - len(suffix) >= 3:
+                return word[:-len(suffix)]
+        return word
+    
+    def _extract_bigrams(self, text: str) -> List[str]:
+        """Extract meaningful two-word phrases from text."""
+        words = self._extract_meaningful_words(text)
+        if len(words) < 2:
+            return []
+        return [f"{words[i]} {words[i+1]}" for i in range(len(words) - 1)]
+    
+    def _calculate_synonym_overlap(self, section_words: List[str], source_words: List[str]) -> float:
+        """Score overlap via abbreviation/synonym expansion."""
+        section_set = set(section_words)
+        source_set = set(source_words)
+        extra_matches = 0
+        total_terms = len(section_set | source_set) or 1
+        
+        for abbr, expansions in self._synonym_map.items():
+            abbr_in_section = abbr in section_set
+            abbr_in_source = abbr in source_set
+            for expansion in expansions:
+                exp_words = set(expansion.split())
+                exp_in_section = exp_words.issubset(section_set)
+                exp_in_source = exp_words.issubset(source_set)
+                if (abbr_in_section and exp_in_source) or (abbr_in_source and exp_in_section):
+                    extra_matches += 1
+        
+        return min(0.2, extra_matches * 0.05)
+    
    def _calculate_phrase_similarity(self, text1: str, text2: str) -> float:
        """Calculate phrase similarity boost score."""
        if not text1 or not text2: