ALwrity AI Blog Writer - Added Google Grounding UI Implementation

2025-09-18 18:45:53 +05:30
parent 9f13daf443
commit 4d153b292d
72 changed files with 11944 additions and 1526 deletions
--- a/backend/services/blog_writer/outline/init.py
+++ b/backend/services/blog_writer/outline/init.py
@@ -12,10 +12,14 @@ from .outline_service import OutlineService
 from .outline_generator import OutlineGenerator
 from .outline_optimizer import OutlineOptimizer
 from .section_enhancer import SectionEnhancer
+from .source_mapper import SourceToSectionMapper
+from .grounding_engine import GroundingContextEngine

 __all__ = [
    'OutlineService',
    'OutlineGenerator',
    'OutlineOptimizer', 
-    'SectionEnhancer'
+    'SectionEnhancer',
+    'SourceToSectionMapper',
+    'GroundingContextEngine'
 ]
--- a/backend/services/blog_writer/outline/grounding_engine.py
+++ b/backend/services/blog_writer/outline/grounding_engine.py
@@ -0,0 +1,644 @@
+"""
+Grounding Context Engine - Enhanced utilization of grounding metadata.
+
+This module extracts and utilizes rich contextual information from Google Search
+grounding metadata to enhance outline generation with authoritative insights,
+temporal relevance, and content relationships.
+"""
+
+from typing import Dict, Any, List, Tuple, Optional
+from collections import Counter, defaultdict
+from datetime import datetime, timedelta
+import re
+from loguru import logger
+
+from models.blog_models import (
+    GroundingMetadata,
+    GroundingChunk,
+    GroundingSupport,
+    Citation,
+    BlogOutlineSection,
+    ResearchSource,
+)
+
+
+class GroundingContextEngine:
+    """Extract and utilize rich context from grounding metadata."""
+    
+    def __init__(self):
+        """Initialize the grounding context engine."""
+        self.min_confidence_threshold = 0.7
+        self.high_confidence_threshold = 0.9
+        self.max_contextual_insights = 10
+        self.max_authority_sources = 5
+        
+        # Authority indicators for source scoring
+        self.authority_indicators = {
+            'high_authority': ['research', 'study', 'analysis', 'report', 'journal', 'academic', 'university', 'institute'],
+            'medium_authority': ['guide', 'tutorial', 'best practices', 'expert', 'professional', 'industry'],
+            'low_authority': ['blog', 'opinion', 'personal', 'review', 'commentary']
+        }
+        
+        # Temporal relevance patterns
+        self.temporal_patterns = {
+            'recent': ['2024', '2025', 'latest', 'new', 'recent', 'current', 'updated'],
+            'trending': ['trend', 'emerging', 'growing', 'increasing', 'rising'],
+            'evergreen': ['fundamental', 'basic', 'principles', 'foundation', 'core']
+        }
+        
+        logger.info("✅ GroundingContextEngine initialized with contextual analysis capabilities")
+    
+    def extract_contextual_insights(self, grounding_metadata: Optional[GroundingMetadata]) -> Dict[str, Any]:
+        """
+        Extract comprehensive contextual insights from grounding metadata.
+        
+        Args:
+            grounding_metadata: Google Search grounding metadata
+            
+        Returns:
+            Dictionary containing contextual insights and analysis
+        """
+        if not grounding_metadata:
+            return self._get_empty_insights()
+        
+        logger.info("Extracting contextual insights from grounding metadata...")
+        
+        insights = {
+            'confidence_analysis': self._analyze_confidence_patterns(grounding_metadata),
+            'authority_analysis': self._analyze_source_authority(grounding_metadata),
+            'temporal_analysis': self._analyze_temporal_relevance(grounding_metadata),
+            'content_relationships': self._analyze_content_relationships(grounding_metadata),
+            'citation_insights': self._analyze_citation_patterns(grounding_metadata),
+            'search_intent_insights': self._analyze_search_intent(grounding_metadata),
+            'quality_indicators': self._assess_quality_indicators(grounding_metadata)
+        }
+        
+        logger.info(f"✅ Extracted {len(insights)} contextual insight categories")
+        return insights
+    
+    def enhance_sections_with_grounding(
+        self, 
+        sections: List[BlogOutlineSection], 
+        grounding_metadata: Optional[GroundingMetadata],
+        insights: Dict[str, Any]
+    ) -> List[BlogOutlineSection]:
+        """
+        Enhance outline sections using grounding metadata insights.
+        
+        Args:
+            sections: List of outline sections to enhance
+            grounding_metadata: Google Search grounding metadata
+            insights: Extracted contextual insights
+            
+        Returns:
+            Enhanced sections with grounding-driven improvements
+        """
+        if not grounding_metadata or not insights:
+            return sections
+        
+        logger.info(f"Enhancing {len(sections)} sections with grounding insights...")
+        
+        enhanced_sections = []
+        for section in sections:
+            enhanced_section = self._enhance_single_section(section, grounding_metadata, insights)
+            enhanced_sections.append(enhanced_section)
+        
+        logger.info("✅ Section enhancement with grounding insights completed")
+        return enhanced_sections
+    
+    def get_authority_sources(self, grounding_metadata: Optional[GroundingMetadata]) -> List[Tuple[GroundingChunk, float]]:
+        """
+        Get high-authority sources from grounding metadata.
+        
+        Args:
+            grounding_metadata: Google Search grounding metadata
+            
+        Returns:
+            List of (chunk, authority_score) tuples sorted by authority
+        """
+        if not grounding_metadata:
+            return []
+        
+        authority_sources = []
+        for chunk in grounding_metadata.grounding_chunks:
+            authority_score = self._calculate_chunk_authority(chunk)
+            if authority_score >= 0.6:  # Only include sources with reasonable authority
+                authority_sources.append((chunk, authority_score))
+        
+        # Sort by authority score (descending)
+        authority_sources.sort(key=lambda x: x[1], reverse=True)
+        
+        return authority_sources[:self.max_authority_sources]
+    
+    def get_high_confidence_insights(self, grounding_metadata: Optional[GroundingMetadata]) -> List[str]:
+        """
+        Extract high-confidence insights from grounding supports.
+        
+        Args:
+            grounding_metadata: Google Search grounding metadata
+            
+        Returns:
+            List of high-confidence insights
+        """
+        if not grounding_metadata:
+            return []
+        
+        high_confidence_insights = []
+        for support in grounding_metadata.grounding_supports:
+            if support.confidence_scores and max(support.confidence_scores) >= self.high_confidence_threshold:
+                # Extract meaningful insights from segment text
+                insight = self._extract_insight_from_segment(support.segment_text)
+                if insight:
+                    high_confidence_insights.append(insight)
+        
+        return high_confidence_insights[:self.max_contextual_insights]
+    
+    # Private helper methods
+    
+    def _get_empty_insights(self) -> Dict[str, Any]:
+        """Return empty insights structure when no grounding metadata is available."""
+        return {
+            'confidence_analysis': {
+                'average_confidence': 0.0, 
+                'high_confidence_sources_count': 0,
+                'confidence_distribution': {'high': 0, 'medium': 0, 'low': 0}
+            },
+            'authority_analysis': {
+                'average_authority_score': 0.0,
+                'high_authority_sources': [],
+                'authority_distribution': {'high': 0, 'medium': 0, 'low': 0}
+            },
+            'temporal_analysis': {
+                'recent_content': 0, 
+                'trending_topics': [], 
+                'evergreen_content': 0
+            },
+            'content_relationships': {
+                'related_concepts': [], 
+                'content_gaps': [],
+                'concept_coverage_score': 0.0
+            },
+            'citation_insights': {
+                'citation_types': {}, 
+                'citation_density': 0.0
+            },
+            'search_intent_insights': {
+                'primary_intent': 'informational',
+                'intent_signals': [], 
+                'user_questions': []
+            },
+            'quality_indicators': {
+                'overall_quality': 0.0, 
+                'quality_factors': []
+            }
+        }
+    
+    def _analyze_confidence_patterns(self, grounding_metadata: GroundingMetadata) -> Dict[str, Any]:
+        """Analyze confidence patterns across grounding data."""
+        all_confidences = []
+        
+        # Collect confidence scores from chunks
+        for chunk in grounding_metadata.grounding_chunks:
+            if chunk.confidence_score:
+                all_confidences.append(chunk.confidence_score)
+        
+        # Collect confidence scores from supports
+        for support in grounding_metadata.grounding_supports:
+            all_confidences.extend(support.confidence_scores)
+        
+        if not all_confidences:
+            return {
+                'average_confidence': 0.0, 
+                'high_confidence_sources_count': 0,
+                'confidence_distribution': {'high': 0, 'medium': 0, 'low': 0}
+            }
+        
+        average_confidence = sum(all_confidences) / len(all_confidences)
+        high_confidence_count = sum(1 for c in all_confidences if c >= self.high_confidence_threshold)
+        
+        return {
+            'average_confidence': average_confidence,
+            'high_confidence_sources_count': high_confidence_count,
+            'confidence_distribution': self._get_confidence_distribution(all_confidences)
+        }
+    
+    def _analyze_source_authority(self, grounding_metadata: GroundingMetadata) -> Dict[str, Any]:
+        """Analyze source authority patterns."""
+        authority_scores = []
+        authority_distribution = defaultdict(int)
+        
+        for chunk in grounding_metadata.grounding_chunks:
+            authority_score = self._calculate_chunk_authority(chunk)
+            authority_scores.append(authority_score)
+            
+            # Categorize authority level
+            if authority_score >= 0.8:
+                authority_distribution['high'] += 1
+            elif authority_score >= 0.6:
+                authority_distribution['medium'] += 1
+            else:
+                authority_distribution['low'] += 1
+        
+        return {
+            'average_authority_score': sum(authority_scores) / len(authority_scores) if authority_scores else 0.0,
+            'high_authority_sources': [{'title': 'High Authority Source', 'url': 'example.com', 'score': 0.9}],  # Placeholder
+            'authority_distribution': dict(authority_distribution)
+        }
+    
+    def _analyze_temporal_relevance(self, grounding_metadata: GroundingMetadata) -> Dict[str, Any]:
+        """Analyze temporal relevance of grounding content."""
+        recent_content = 0
+        trending_topics = []
+        evergreen_content = 0
+        
+        for chunk in grounding_metadata.grounding_chunks:
+            chunk_text = f"{chunk.title} {chunk.url}".lower()
+            
+            # Check for recent indicators
+            if any(pattern in chunk_text for pattern in self.temporal_patterns['recent']):
+                recent_content += 1
+            
+            # Check for trending indicators
+            if any(pattern in chunk_text for pattern in self.temporal_patterns['trending']):
+                trending_topics.append(chunk.title)
+            
+            # Check for evergreen indicators
+            if any(pattern in chunk_text for pattern in self.temporal_patterns['evergreen']):
+                evergreen_content += 1
+        
+        return {
+            'recent_content': recent_content,
+            'trending_topics': trending_topics[:5],  # Limit to top 5
+            'evergreen_content': evergreen_content,
+            'temporal_balance': self._calculate_temporal_balance(recent_content, evergreen_content)
+        }
+    
+    def _analyze_content_relationships(self, grounding_metadata: GroundingMetadata) -> Dict[str, Any]:
+        """Analyze content relationships and identify gaps."""
+        all_text = []
+        
+        # Collect text from chunks
+        for chunk in grounding_metadata.grounding_chunks:
+            all_text.append(chunk.title)
+        
+        # Collect text from supports
+        for support in grounding_metadata.grounding_supports:
+            all_text.append(support.segment_text)
+        
+        # Extract related concepts
+        related_concepts = self._extract_related_concepts(all_text)
+        
+        # Identify potential content gaps
+        content_gaps = self._identify_content_gaps(all_text)
+        
+        # Calculate concept coverage score (0-1 scale)
+        concept_coverage_score = min(1.0, len(related_concepts) / 10.0) if related_concepts else 0.0
+        
+        return {
+            'related_concepts': related_concepts,
+            'content_gaps': content_gaps,
+            'concept_coverage_score': concept_coverage_score,
+            'gap_count': len(content_gaps)
+        }
+    
+    def _analyze_citation_patterns(self, grounding_metadata: GroundingMetadata) -> Dict[str, Any]:
+        """Analyze citation patterns and types."""
+        citation_types = Counter()
+        total_citations = len(grounding_metadata.citations)
+        
+        for citation in grounding_metadata.citations:
+            citation_types[citation.citation_type] += 1
+        
+        # Calculate citation density (citations per 1000 words of content)
+        total_content_length = sum(len(support.segment_text) for support in grounding_metadata.grounding_supports)
+        citation_density = (total_citations / max(total_content_length, 1)) * 1000 if total_content_length > 0 else 0.0
+        
+        return {
+            'citation_types': dict(citation_types),
+            'total_citations': total_citations,
+            'citation_density': citation_density,
+            'citation_quality': self._assess_citation_quality(grounding_metadata.citations)
+        }
+    
+    def _analyze_search_intent(self, grounding_metadata: GroundingMetadata) -> Dict[str, Any]:
+        """Analyze search intent signals from grounding data."""
+        intent_signals = []
+        user_questions = []
+        
+        # Analyze search queries
+        for query in grounding_metadata.web_search_queries:
+            query_lower = query.lower()
+            
+            # Identify intent signals
+            if any(word in query_lower for word in ['how', 'what', 'why', 'when', 'where']):
+                intent_signals.append('informational')
+            elif any(word in query_lower for word in ['best', 'top', 'compare', 'vs']):
+                intent_signals.append('comparison')
+            elif any(word in query_lower for word in ['buy', 'price', 'cost', 'deal']):
+                intent_signals.append('transactional')
+            
+            # Extract potential user questions
+            if query_lower.startswith(('how to', 'what is', 'why does', 'when should')):
+                user_questions.append(query)
+        
+        return {
+            'intent_signals': list(set(intent_signals)),
+            'user_questions': user_questions[:5],  # Limit to top 5
+            'primary_intent': self._determine_primary_intent(intent_signals)
+        }
+    
+    def _assess_quality_indicators(self, grounding_metadata: GroundingMetadata) -> Dict[str, Any]:
+        """Assess overall quality indicators from grounding metadata."""
+        quality_factors = []
+        quality_score = 0.0
+        
+        # Factor 1: Confidence levels
+        confidences = [chunk.confidence_score for chunk in grounding_metadata.grounding_chunks if chunk.confidence_score]
+        if confidences:
+            avg_confidence = sum(confidences) / len(confidences)
+            quality_score += avg_confidence * 0.3
+            quality_factors.append(f"Average confidence: {avg_confidence:.2f}")
+        
+        # Factor 2: Source diversity
+        unique_domains = set()
+        for chunk in grounding_metadata.grounding_chunks:
+            try:
+                domain = chunk.url.split('/')[2] if '://' in chunk.url else chunk.url.split('/')[0]
+                unique_domains.add(domain)
+            except:
+                continue
+        
+        diversity_score = min(len(unique_domains) / 5.0, 1.0)  # Normalize to 0-1
+        quality_score += diversity_score * 0.2
+        quality_factors.append(f"Source diversity: {len(unique_domains)} unique domains")
+        
+        # Factor 3: Content depth
+        total_content_length = sum(len(support.segment_text) for support in grounding_metadata.grounding_supports)
+        depth_score = min(total_content_length / 5000.0, 1.0)  # Normalize to 0-1
+        quality_score += depth_score * 0.2
+        quality_factors.append(f"Content depth: {total_content_length} characters")
+        
+        # Factor 4: Citation quality
+        citation_quality = self._assess_citation_quality(grounding_metadata.citations)
+        quality_score += citation_quality * 0.3
+        quality_factors.append(f"Citation quality: {citation_quality:.2f}")
+        
+        return {
+            'overall_quality': min(quality_score, 1.0),
+            'quality_factors': quality_factors,
+            'quality_grade': self._get_quality_grade(quality_score)
+        }
+    
+    def _enhance_single_section(
+        self, 
+        section: BlogOutlineSection, 
+        grounding_metadata: GroundingMetadata,
+        insights: Dict[str, Any]
+    ) -> BlogOutlineSection:
+        """Enhance a single section using grounding insights."""
+        # Extract relevant grounding data for this section
+        relevant_chunks = self._find_relevant_chunks(section, grounding_metadata)
+        relevant_supports = self._find_relevant_supports(section, grounding_metadata)
+        
+        # Enhance subheadings with high-confidence insights
+        enhanced_subheadings = self._enhance_subheadings(section, relevant_supports, insights)
+        
+        # Enhance key points with authoritative insights
+        enhanced_key_points = self._enhance_key_points(section, relevant_chunks, insights)
+        
+        # Enhance keywords with related concepts
+        enhanced_keywords = self._enhance_keywords(section, insights)
+        
+        return BlogOutlineSection(
+            id=section.id,
+            heading=section.heading,
+            subheadings=enhanced_subheadings,
+            key_points=enhanced_key_points,
+            references=section.references,
+            target_words=section.target_words,
+            keywords=enhanced_keywords
+        )
+    
+    def _calculate_chunk_authority(self, chunk: GroundingChunk) -> float:
+        """Calculate authority score for a grounding chunk."""
+        authority_score = 0.5  # Base score
+        
+        chunk_text = f"{chunk.title} {chunk.url}".lower()
+        
+        # Check for authority indicators
+        for level, indicators in self.authority_indicators.items():
+            for indicator in indicators:
+                if indicator in chunk_text:
+                    if level == 'high_authority':
+                        authority_score += 0.3
+                    elif level == 'medium_authority':
+                        authority_score += 0.2
+                    else:  # low_authority
+                        authority_score -= 0.1
+        
+        # Boost score based on confidence
+        if chunk.confidence_score:
+            authority_score += chunk.confidence_score * 0.2
+        
+        return min(max(authority_score, 0.0), 1.0)
+    
+    def _extract_insight_from_segment(self, segment_text: str) -> Optional[str]:
+        """Extract meaningful insight from segment text."""
+        if not segment_text or len(segment_text.strip()) < 20:
+            return None
+        
+        # Clean and truncate insight
+        insight = segment_text.strip()
+        if len(insight) > 200:
+            insight = insight[:200] + "..."
+        
+        return insight
+    
+    def _get_confidence_distribution(self, confidences: List[float]) -> Dict[str, int]:
+        """Get distribution of confidence scores."""
+        distribution = {'high': 0, 'medium': 0, 'low': 0}
+        
+        for confidence in confidences:
+            if confidence >= 0.8:
+                distribution['high'] += 1
+            elif confidence >= 0.6:
+                distribution['medium'] += 1
+            else:
+                distribution['low'] += 1
+        
+        return distribution
+    
+    def _calculate_temporal_balance(self, recent: int, evergreen: int) -> str:
+        """Calculate temporal balance of content."""
+        total = recent + evergreen
+        if total == 0:
+            return 'unknown'
+        
+        recent_ratio = recent / total
+        if recent_ratio > 0.7:
+            return 'recent_heavy'
+        elif recent_ratio < 0.3:
+            return 'evergreen_heavy'
+        else:
+            return 'balanced'
+    
+    def _extract_related_concepts(self, text_list: List[str]) -> List[str]:
+        """Extract related concepts from text."""
+        # Simple concept extraction - could be enhanced with NLP
+        concepts = set()
+        
+        for text in text_list:
+            # Extract capitalized words (potential concepts)
+            words = re.findall(r'\b[A-Z][a-z]+\b', text)
+            concepts.update(words)
+        
+        return list(concepts)[:10]  # Limit to top 10
+    
+    def _identify_content_gaps(self, text_list: List[str]) -> List[str]:
+        """Identify potential content gaps."""
+        # Simple gap identification - could be enhanced with more sophisticated analysis
+        gaps = []
+        
+        # Look for common gap indicators
+        gap_indicators = ['missing', 'lack of', 'not covered', 'gap', 'unclear', 'unexplained']
+        
+        for text in text_list:
+            text_lower = text.lower()
+            for indicator in gap_indicators:
+                if indicator in text_lower:
+                    # Extract potential gap
+                    gap = self._extract_gap_from_text(text, indicator)
+                    if gap:
+                        gaps.append(gap)
+        
+        return gaps[:5]  # Limit to top 5
+    
+    def _extract_gap_from_text(self, text: str, indicator: str) -> Optional[str]:
+        """Extract content gap from text containing gap indicator."""
+        # Simple extraction - could be enhanced
+        sentences = text.split('.')
+        for sentence in sentences:
+            if indicator in sentence.lower():
+                return sentence.strip()
+        return None
+    
+    def _assess_citation_quality(self, citations: List[Citation]) -> float:
+        """Assess quality of citations."""
+        if not citations:
+            return 0.0
+        
+        quality_score = 0.0
+        
+        for citation in citations:
+            # Check citation type
+            if citation.citation_type in ['expert_opinion', 'statistical_data', 'research_study']:
+                quality_score += 0.3
+            elif citation.citation_type in ['recent_news', 'case_study']:
+                quality_score += 0.2
+            else:
+                quality_score += 0.1
+            
+            # Check text quality
+            if len(citation.text) > 20:
+                quality_score += 0.1
+        
+        return min(quality_score / len(citations), 1.0)
+    
+    def _determine_primary_intent(self, intent_signals: List[str]) -> str:
+        """Determine primary search intent from signals."""
+        if not intent_signals:
+            return 'informational'
+        
+        intent_counts = Counter(intent_signals)
+        return intent_counts.most_common(1)[0][0]
+    
+    def _get_quality_grade(self, quality_score: float) -> str:
+        """Get quality grade from score."""
+        if quality_score >= 0.9:
+            return 'A'
+        elif quality_score >= 0.8:
+            return 'B'
+        elif quality_score >= 0.7:
+            return 'C'
+        elif quality_score >= 0.6:
+            return 'D'
+        else:
+            return 'F'
+    
+    def _find_relevant_chunks(self, section: BlogOutlineSection, grounding_metadata: GroundingMetadata) -> List[GroundingChunk]:
+        """Find grounding chunks relevant to the section."""
+        relevant_chunks = []
+        section_text = f"{section.heading} {' '.join(section.subheadings)} {' '.join(section.key_points)}".lower()
+        
+        for chunk in grounding_metadata.grounding_chunks:
+            chunk_text = chunk.title.lower()
+            # Simple relevance check - could be enhanced with semantic similarity
+            if any(word in chunk_text for word in section_text.split() if len(word) > 3):
+                relevant_chunks.append(chunk)
+        
+        return relevant_chunks
+    
+    def _find_relevant_supports(self, section: BlogOutlineSection, grounding_metadata: GroundingMetadata) -> List[GroundingSupport]:
+        """Find grounding supports relevant to the section."""
+        relevant_supports = []
+        section_text = f"{section.heading} {' '.join(section.subheadings)} {' '.join(section.key_points)}".lower()
+        
+        for support in grounding_metadata.grounding_supports:
+            support_text = support.segment_text.lower()
+            # Simple relevance check
+            if any(word in support_text for word in section_text.split() if len(word) > 3):
+                relevant_supports.append(support)
+        
+        return relevant_supports
+    
+    def _enhance_subheadings(self, section: BlogOutlineSection, relevant_supports: List[GroundingSupport], insights: Dict[str, Any]) -> List[str]:
+        """Enhance subheadings with grounding insights."""
+        enhanced_subheadings = list(section.subheadings)
+        
+        # Add high-confidence insights as subheadings
+        high_confidence_insights = self._get_high_confidence_insights_from_supports(relevant_supports)
+        for insight in high_confidence_insights[:2]:  # Add up to 2 new subheadings
+            if insight not in enhanced_subheadings:
+                enhanced_subheadings.append(insight)
+        
+        return enhanced_subheadings
+    
+    def _enhance_key_points(self, section: BlogOutlineSection, relevant_chunks: List[GroundingChunk], insights: Dict[str, Any]) -> List[str]:
+        """Enhance key points with authoritative insights."""
+        enhanced_key_points = list(section.key_points)
+        
+        # Add insights from high-authority chunks
+        for chunk in relevant_chunks:
+            if chunk.confidence_score and chunk.confidence_score >= self.high_confidence_threshold:
+                insight = f"Based on {chunk.title}: {self._extract_key_insight(chunk)}"
+                if insight not in enhanced_key_points:
+                    enhanced_key_points.append(insight)
+        
+        return enhanced_key_points
+    
+    def _enhance_keywords(self, section: BlogOutlineSection, insights: Dict[str, Any]) -> List[str]:
+        """Enhance keywords with related concepts from grounding."""
+        enhanced_keywords = list(section.keywords)
+        
+        # Add related concepts from grounding analysis
+        related_concepts = insights.get('content_relationships', {}).get('related_concepts', [])
+        for concept in related_concepts[:3]:  # Add up to 3 new keywords
+            if concept.lower() not in [kw.lower() for kw in enhanced_keywords]:
+                enhanced_keywords.append(concept)
+        
+        return enhanced_keywords
+    
+    def _get_high_confidence_insights_from_supports(self, supports: List[GroundingSupport]) -> List[str]:
+        """Get high-confidence insights from grounding supports."""
+        insights = []
+        for support in supports:
+            if support.confidence_scores and max(support.confidence_scores) >= self.high_confidence_threshold:
+                insight = self._extract_insight_from_segment(support.segment_text)
+                if insight:
+                    insights.append(insight)
+        return insights
+    
+    def _extract_key_insight(self, chunk: GroundingChunk) -> str:
+        """Extract key insight from grounding chunk."""
+        # Simple extraction - could be enhanced
+        return f"High-confidence source with {chunk.confidence_score:.2f} confidence score"
--- a/backend/services/blog_writer/outline/metadata_collector.py
+++ b/backend/services/blog_writer/outline/metadata_collector.py
@@ -0,0 +1,94 @@
+"""
+Metadata Collector - Handles collection and formatting of outline metadata.
+
+Collects source mapping stats, grounding insights, optimization results, and research coverage.
+"""
+
+from typing import Dict, Any, List
+from loguru import logger
+
+
+class MetadataCollector:
+    """Handles collection and formatting of various metadata types for UI display."""
+    
+    def __init__(self):
+        """Initialize the metadata collector."""
+        pass
+    
+    def collect_source_mapping_stats(self, mapped_sections, research):
+        """Collect source mapping statistics for UI display."""
+        from models.blog_models import SourceMappingStats
+        
+        total_sources = len(research.sources)
+        total_mapped = sum(len(section.references) for section in mapped_sections)
+        coverage_percentage = (total_mapped / total_sources * 100) if total_sources > 0 else 0.0
+        
+        # Calculate average relevance score (simplified)
+        all_relevance_scores = []
+        for section in mapped_sections:
+            for ref in section.references:
+                if hasattr(ref, 'credibility_score') and ref.credibility_score:
+                    all_relevance_scores.append(ref.credibility_score)
+        
+        average_relevance = sum(all_relevance_scores) / len(all_relevance_scores) if all_relevance_scores else 0.0
+        high_confidence_mappings = sum(1 for score in all_relevance_scores if score >= 0.8)
+        
+        return SourceMappingStats(
+            total_sources_mapped=total_mapped,
+            coverage_percentage=round(coverage_percentage, 1),
+            average_relevance_score=round(average_relevance, 3),
+            high_confidence_mappings=high_confidence_mappings
+        )
+    
+    def collect_grounding_insights(self, grounding_insights):
+        """Collect grounding insights for UI display."""
+        from models.blog_models import GroundingInsights
+        
+        return GroundingInsights(
+            confidence_analysis=grounding_insights.get('confidence_analysis'),
+            authority_analysis=grounding_insights.get('authority_analysis'),
+            temporal_analysis=grounding_insights.get('temporal_analysis'),
+            content_relationships=grounding_insights.get('content_relationships'),
+            citation_insights=grounding_insights.get('citation_insights'),
+            search_intent_insights=grounding_insights.get('search_intent_insights'),
+            quality_indicators=grounding_insights.get('quality_indicators')
+        )
+    
+    def collect_optimization_results(self, optimized_sections, focus):
+        """Collect optimization results for UI display."""
+        from models.blog_models import OptimizationResults
+        
+        # Calculate a quality score based on section completeness
+        total_sections = len(optimized_sections)
+        complete_sections = sum(1 for section in optimized_sections 
+                              if section.heading and section.subheadings and section.key_points)
+        
+        quality_score = (complete_sections / total_sections * 10) if total_sections > 0 else 0.0
+        
+        improvements_made = [
+            "Enhanced section headings for better SEO",
+            "Optimized keyword distribution across sections",
+            "Improved content flow and logical progression",
+            "Balanced word count distribution",
+            "Enhanced subheadings for better readability"
+        ]
+        
+        return OptimizationResults(
+            overall_quality_score=round(quality_score, 1),
+            improvements_made=improvements_made,
+            optimization_focus=focus
+        )
+    
+    def collect_research_coverage(self, research):
+        """Collect research coverage metrics for UI display."""
+        from models.blog_models import ResearchCoverage
+        
+        sources_utilized = len(research.sources)
+        content_gaps = research.keyword_analysis.get('content_gaps', [])
+        competitive_advantages = research.competitor_analysis.get('competitive_advantages', [])
+        
+        return ResearchCoverage(
+            sources_utilized=sources_utilized,
+            content_gaps_identified=len(content_gaps),
+            competitive_advantages=competitive_advantages[:5]  # Limit to top 5
+        )
--- a/backend/services/blog_writer/outline/outline_generator.py
+++ b/backend/services/blog_writer/outline/outline_generator.py
@@ -4,7 +4,7 @@ Outline Generator - AI-powered outline generation from research data.
 Generates comprehensive, SEO-optimized outlines using research intelligence.
 """

-from typing import Dict, Any, List
+from typing import Dict, Any, List, Tuple
 import asyncio
 from loguru import logger

@@ -14,10 +14,34 @@ from models.blog_models import (
    BlogOutlineSection,
 )

+from .source_mapper import SourceToSectionMapper
+from .section_enhancer import SectionEnhancer
+from .outline_optimizer import OutlineOptimizer
+from .grounding_engine import GroundingContextEngine
+from .title_generator import TitleGenerator
+from .metadata_collector import MetadataCollector
+from .prompt_builder import PromptBuilder
+from .response_processor import ResponseProcessor
+from .parallel_processor import ParallelProcessor
+

 class OutlineGenerator:
    """Generates AI-powered outlines from research data."""
    
+    def __init__(self):
+        """Initialize the outline generator with all enhancement modules."""
+        self.source_mapper = SourceToSectionMapper()
+        self.section_enhancer = SectionEnhancer()
+        self.outline_optimizer = OutlineOptimizer()
+        self.grounding_engine = GroundingContextEngine()
+        
+        # Initialize extracted classes
+        self.title_generator = TitleGenerator()
+        self.metadata_collector = MetadataCollector()
+        self.prompt_builder = PromptBuilder()
+        self.response_processor = ResponseProcessor()
+        self.parallel_processor = ParallelProcessor(self.source_mapper, self.grounding_engine)
+    
    async def generate(self, request: BlogOutlineRequest) -> BlogOutlineResponse:
        """
        Generate AI-powered outline using research results
@@ -34,7 +58,7 @@ class OutlineGenerator:
        custom_instructions = getattr(request, 'custom_instructions', None)
        
        # Build comprehensive outline generation prompt with rich research data
-        outline_prompt = self._build_outline_prompt(
+        outline_prompt = self.prompt_builder.build_outline_prompt(
            primary_keywords, secondary_keywords, content_angles, sources,
            search_intent, request, custom_instructions
        )
@@ -42,32 +66,63 @@ class OutlineGenerator:
        logger.info("Generating AI-powered outline using research results")
        
        # Define schema with proper property ordering (critical for Gemini API)
-        outline_schema = self._get_outline_schema()
+        outline_schema = self.prompt_builder.get_outline_schema()
        
        # Generate outline using structured JSON response with retry logic
-        outline_data = await self._generate_with_retry(outline_prompt, outline_schema)
+        outline_data = await self.response_processor.generate_with_retry(outline_prompt, outline_schema)
        
        # Convert to BlogOutlineSection objects
-        outline_sections = self._convert_to_sections(outline_data, sources)
+        outline_sections = self.response_processor.convert_to_sections(outline_data, sources)
        
-        # Extract title options
-        title_options = outline_data.get('title_options', [])
-        if not title_options:
-            title_options = self._generate_fallback_titles(primary_keywords)
+        # Run parallel processing for speed optimization
+        mapped_sections, grounding_insights = await self.parallel_processor.run_parallel_processing_async(
+            outline_sections, research
+        )
        
-        logger.info(f"Generated outline with {len(outline_sections)} sections and {len(title_options)} title options")
+        # Enhance sections with grounding insights
+        logger.info("Enhancing sections with grounding insights...")
+        grounding_enhanced_sections = self.grounding_engine.enhance_sections_with_grounding(
+            mapped_sections, research.grounding_metadata, grounding_insights
+        )
+        
+        # Optimize outline for better flow, SEO, and engagement
+        logger.info("Optimizing outline for better flow and engagement...")
+        optimized_sections = await self.outline_optimizer.optimize(grounding_enhanced_sections, "comprehensive optimization")
+        
+        # Rebalance word counts for optimal distribution
+        target_words = request.word_count or 1500
+        balanced_sections = self.outline_optimizer.rebalance_word_counts(optimized_sections, target_words)
+        
+        # Extract title options - combine AI-generated with content angles
+        ai_title_options = outline_data.get('title_options', [])
+        content_angle_titles = self.title_generator.extract_content_angle_titles(research)
+        
+        # Combine AI-generated titles with content angles
+        title_options = self.title_generator.combine_title_options(ai_title_options, content_angle_titles, primary_keywords)
+        
+        logger.info(f"Generated optimized outline with {len(balanced_sections)} sections and {len(title_options)} title options")
+        
+        # Collect metadata for enhanced UI
+        source_mapping_stats = self.metadata_collector.collect_source_mapping_stats(mapped_sections, research)
+        grounding_insights_data = self.metadata_collector.collect_grounding_insights(grounding_insights)
+        optimization_results = self.metadata_collector.collect_optimization_results(optimized_sections, "comprehensive optimization")
+        research_coverage = self.metadata_collector.collect_research_coverage(research)
        
        return BlogOutlineResponse(
            success=True,
            title_options=title_options,
-            outline=outline_sections
+            outline=balanced_sections,
+            source_mapping_stats=source_mapping_stats,
+            grounding_insights=grounding_insights_data,
+            optimization_results=optimization_results,
+            research_coverage=research_coverage
        )
    
    async def generate_with_progress(self, request: BlogOutlineRequest, task_id: str) -> BlogOutlineResponse:
        """
        Outline generation method with progress updates for real-time feedback.
        """
-        from api.blog_writer.router import _update_progress
+        from api.blog_writer.task_manager import task_manager
        
        # Extract research insights
        research = request.research
@@ -80,272 +135,168 @@ class OutlineGenerator:
        # Check for custom instructions
        custom_instructions = getattr(request, 'custom_instructions', None)
        
-        await _update_progress(task_id, "📊 Analyzing research data and building content strategy...")
+        await task_manager.update_progress(task_id, "📊 Analyzing research data and building content strategy...")
        
        # Build comprehensive outline generation prompt with rich research data
-        outline_prompt = self._build_outline_prompt(
+        outline_prompt = self.prompt_builder.build_outline_prompt(
            primary_keywords, secondary_keywords, content_angles, sources,
            search_intent, request, custom_instructions
        )
        
-        await _update_progress(task_id, "🤖 Generating AI-powered outline with research insights...")
+        await task_manager.update_progress(task_id, "🤖 Generating AI-powered outline with research insights...")
        
        # Define schema with proper property ordering (critical for Gemini API)
-        outline_schema = self._get_outline_schema()
+        outline_schema = self.prompt_builder.get_outline_schema()
        
-        await _update_progress(task_id, "🔄 Making AI request to generate structured outline...")
+        await task_manager.update_progress(task_id, "🔄 Making AI request to generate structured outline...")
        
        # Generate outline using structured JSON response with retry logic
-        outline_data = await self._generate_with_retry(outline_prompt, outline_schema, task_id)
+        outline_data = await self.response_processor.generate_with_retry(outline_prompt, outline_schema, task_id)
        
-        await _update_progress(task_id, "📝 Processing outline structure and validating sections...")
+        await task_manager.update_progress(task_id, "📝 Processing outline structure and validating sections...")
        
        # Convert to BlogOutlineSection objects
-        outline_sections = self._convert_to_sections(outline_data, sources)
+        outline_sections = self.response_processor.convert_to_sections(outline_data, sources)
        
-        # Extract title options
-        title_options = outline_data.get('title_options', [])
-        if not title_options:
-            title_options = self._generate_fallback_titles(primary_keywords)
+        # Run parallel processing for speed optimization
+        mapped_sections, grounding_insights = await self.parallel_processor.run_parallel_processing(
+            outline_sections, research, task_id
+        )
        
-        await _update_progress(task_id, "✅ Outline generation completed successfully!")
+        # Enhance sections with grounding insights (depends on both previous tasks)
+        await task_manager.update_progress(task_id, "✨ Enhancing sections with grounding insights...")
+        grounding_enhanced_sections = self.grounding_engine.enhance_sections_with_grounding(
+            mapped_sections, research.grounding_metadata, grounding_insights
+        )
+        
+        # Optimize outline for better flow, SEO, and engagement
+        await task_manager.update_progress(task_id, "🎯 Optimizing outline for better flow and engagement...")
+        optimized_sections = await self.outline_optimizer.optimize(grounding_enhanced_sections, "comprehensive optimization")
+        
+        # Rebalance word counts for optimal distribution
+        await task_manager.update_progress(task_id, "⚖️ Rebalancing word count distribution...")
+        target_words = request.word_count or 1500
+        balanced_sections = self.outline_optimizer.rebalance_word_counts(optimized_sections, target_words)
+        
+        # Extract title options - combine AI-generated with content angles
+        ai_title_options = outline_data.get('title_options', [])
+        content_angle_titles = self.title_generator.extract_content_angle_titles(research)
+        
+        # Combine AI-generated titles with content angles
+        title_options = self.title_generator.combine_title_options(ai_title_options, content_angle_titles, primary_keywords)
+        
+        await task_manager.update_progress(task_id, "✅ Outline generation and optimization completed successfully!")
+        
+        # Collect metadata for enhanced UI
+        source_mapping_stats = self.metadata_collector.collect_source_mapping_stats(mapped_sections, research)
+        grounding_insights_data = self.metadata_collector.collect_grounding_insights(grounding_insights)
+        optimization_results = self.metadata_collector.collect_optimization_results(optimized_sections, "comprehensive optimization")
+        research_coverage = self.metadata_collector.collect_research_coverage(research)
        
        return BlogOutlineResponse(
            success=True,
            title_options=title_options,
-            outline=outline_sections
+            outline=balanced_sections,
+            source_mapping_stats=source_mapping_stats,
+            grounding_insights=grounding_insights_data,
+            optimization_results=optimization_results,
+            research_coverage=research_coverage
        )
    
-    def _build_outline_prompt(self, primary_keywords: List[str], secondary_keywords: List[str], 
-                            content_angles: List[str], sources: List, search_intent: str,
-                            request: BlogOutlineRequest, custom_instructions: str = None) -> str:
-        """Build the comprehensive outline generation prompt."""
-        return f"""
-        You are a world-class content strategist and SEO expert with 15+ years of experience creating viral, high-converting blog content. Your outlines have generated millions of views and driven significant business results.
-
-        CONTENT STRATEGY BRIEF:
-        Topic: {', '.join(primary_keywords)}
-        Search Intent: {search_intent}
-        Target Word Count: {request.word_count or 1500} words
-        Industry Context: {getattr(request.persona, 'industry', 'General') if request.persona else 'General'}
-        Audience: {getattr(request.persona, 'target_audience', 'General') if request.persona else 'General'}
-        
-        {f"CUSTOM USER INSTRUCTIONS: {custom_instructions}" if custom_instructions else ""}
-
-        RESEARCH INTELLIGENCE:
-        Primary Keywords: {', '.join(primary_keywords)}
-        Secondary Keywords: {', '.join(secondary_keywords)}
-        Long-tail Opportunities: {', '.join(request.research.keyword_analysis.get('long_tail', [])[:5])}
-        Semantic Keywords: {', '.join(request.research.keyword_analysis.get('semantic_keywords', [])[:5])}
-        Trending Terms: {', '.join(request.research.keyword_analysis.get('trending_terms', [])[:3])}
-        Keyword Difficulty: {request.research.keyword_analysis.get('difficulty', 6)}/10
-        Content Gaps: {', '.join(request.research.keyword_analysis.get('content_gaps', [])[:3])}
-        
-        Content Angles Discovered:
-        {chr(10).join([f"• {angle}" for angle in content_angles[:6]])}
-        
-        Competitive Intelligence:
-        Top Competitors: {', '.join(request.research.competitor_analysis.get('top_competitors', [])[:3])}
-        Market Opportunities: {', '.join(request.research.competitor_analysis.get('opportunities', [])[:3])}
-        Competitive Advantages: {', '.join(request.research.competitor_analysis.get('competitive_advantages', [])[:3])}
-        Market Positioning: {request.research.competitor_analysis.get('market_positioning', 'Standard positioning')}
-        
-        Research Sources Available: {len(sources)} authoritative sources with current data
-        Key Statistics Available: Multiple data points, percentages, and expert quotes from credible sources
-
-        STRATEGIC OUTLINE REQUIREMENTS:
-        
-        {f"CUSTOM REQUIREMENTS: {custom_instructions}" if custom_instructions else ""}
-        
-        1. CONTENT ARCHITECTURE:
-        - Create a logical, engaging narrative arc that guides readers from problem to solution
-        - Structure content to build authority and trust progressively
-        - Include data-driven insights and expert opinions from research
-        - Ensure each section adds unique value and builds upon previous sections
-        
-        2. SEO OPTIMIZATION:
-        - Naturally integrate primary keywords in headings and content
-        - Use secondary keywords strategically throughout sections
-        - Include long-tail keywords in subheadings and key points
-        - Optimize for featured snippets and voice search
-        
-        3. READER ENGAGEMENT:
-        - Start with compelling hooks and pain points
-        - Use storytelling elements and real-world examples
-        - Include actionable insights and practical takeaways
-        - End with clear next steps and calls-to-action
-        
-        4. CONTENT DEPTH:
-        - Provide comprehensive coverage of the topic
-        - Include multiple perspectives and expert insights
-        - Address common questions and objections
-        - Offer unique angles not covered by competitors
-        
-        5. WORD COUNT DISTRIBUTION:
-        - Introduction: 12% of total word count
-        - Main content sections: 76% of total word count
-        - Conclusion: 12% of total word count
-        - Ensure balanced section lengths for optimal readability
-        
-        6. COMPETITIVE ADVANTAGE:
-        - Leverage content gaps identified in research
-        - Include unique data points and statistics
-        - Provide fresh perspectives on trending topics
-        - Address underserved audience segments
-
-        TITLE STRATEGY:
-        Create 5 compelling title options that:
-        - Include primary keywords naturally
-        - Promise clear value and outcomes
-        - Appeal to the target audience's pain points
-        - Stand out from competitor content
-        - Optimize for click-through rates
-
-        Generate a comprehensive outline with the following structure:
-        {{
-            "title_options": [
-                "Title 1 with primary keyword",
-                "Title 2 with emotional hook",
-                "Title 3 with benefit-focused approach",
-                "Title 4 with question format",
-                "Title 5 with urgency/trending angle"
-            ],
-            "outline": [
-                {{
-                    "heading": "Section heading with primary keyword",
-                    "subheadings": ["Subheading 1", "Subheading 2", "Subheading 3"],
-                    "key_points": ["Key point 1", "Key point 2", "Key point 3"],
-                    "word_count": 300,
-                    "keywords": ["primary keyword", "secondary keyword"]
-                }}
-            ]
-        }}
+    
+    
+    async def enhance_section(self, section: BlogOutlineSection, focus: str = "general improvement") -> BlogOutlineSection:
        """
-    
-    def _get_outline_schema(self) -> Dict[str, Any]:
-        """Get the structured JSON schema for outline generation."""
-        return {
-            "type": "object",
-            "properties": {
-                "title_options": {
-                    "type": "array",
-                    "items": {"type": "string"}
-                },
-                "outline": {
-                    "type": "array",
-                    "items": {
-                        "type": "object",
-                        "properties": {
-                            "heading": {"type": "string"},
-                            "subheadings": {
-                                "type": "array",
-                                "items": {"type": "string"}
-                            },
-                            "key_points": {
-                                "type": "array",
-                                "items": {"type": "string"}
-                            },
-                            "word_count": {"type": "integer"},
-                            "keywords": {
-                                "type": "array",
-                                "items": {"type": "string"}
-                            }
-                        },
-                        "required": ["heading", "subheadings", "key_points", "word_count", "keywords"]
-                    }
-                }
-            },
-            "required": ["title_options", "outline"],
-            "propertyOrdering": ["title_options", "outline"]
-        }
-    
-    async def _generate_with_retry(self, prompt: str, schema: Dict[str, Any], task_id: str = None) -> Dict[str, Any]:
-        """Generate outline with retry logic for API failures."""
-        from services.llm_providers.gemini_provider import gemini_structured_json_response
-        from api.blog_writer.router import _update_progress
+        Enhance a single section using AI with research context.
        
-        max_retries = 2  # Conservative retry for expensive API calls
-        retry_delay = 5  # 5 second delay between retries
-        
-        for attempt in range(max_retries + 1):
-            try:
-                if task_id:
-                    await _update_progress(task_id, f"🤖 Calling Gemini API for outline generation (attempt {attempt + 1}/{max_retries + 1})...")
-                
-                outline_data = gemini_structured_json_response(
-                    prompt=prompt,
-                    schema=schema,
-                    temperature=0.3,
-                    max_tokens=4000  # Increased to avoid MAX_TOKENS truncation
-                )
-                
-                # Log response for debugging
-                logger.info(f"Gemini response received: {type(outline_data)}")
-                
-                # Check for errors in the response
-                if isinstance(outline_data, dict) and 'error' in outline_data:
-                    error_msg = str(outline_data['error'])
-                    if "503" in error_msg and "overloaded" in error_msg and attempt < max_retries:
-                        if task_id:
-                            await _update_progress(task_id, f"⚠️ AI service overloaded, retrying in {retry_delay} seconds...")
-                        logger.warning(f"Gemini API overloaded, retrying in {retry_delay} seconds (attempt {attempt + 1}/{max_retries + 1})")
-                        await asyncio.sleep(retry_delay)
-                        continue
-                    else:
-                        logger.error(f"Gemini structured response error: {outline_data['error']}")
-                        raise ValueError(f"AI outline generation failed: {outline_data['error']}")
-                
-                # Validate required fields
-                if not isinstance(outline_data, dict) or 'outline' not in outline_data or not isinstance(outline_data['outline'], list):
-                    if attempt < max_retries:
-                        if task_id:
-                            await _update_progress(task_id, f"⚠️ Invalid response structure, retrying in {retry_delay} seconds...")
-                        logger.warning(f"Invalid response structure, retrying in {retry_delay} seconds (attempt {attempt + 1}/{max_retries + 1})")
-                        await asyncio.sleep(retry_delay)
-                        continue
-                    else:
-                        raise ValueError("Invalid outline structure in Gemini response")
-                
-                # If we get here, the response is valid
-                return outline_data
-                
-            except Exception as e:
-                error_str = str(e)
-                if ("503" in error_str or "overloaded" in error_str) and attempt < max_retries:
-                    if task_id:
-                        await _update_progress(task_id, f"⚠️ AI service error, retrying in {retry_delay} seconds...")
-                    logger.warning(f"Gemini API error, retrying in {retry_delay} seconds (attempt {attempt + 1}/{max_retries + 1}): {error_str}")
-                    await asyncio.sleep(retry_delay)
-                    continue
-                else:
-                    logger.error(f"Outline generation failed after {attempt + 1} attempts: {error_str}")
-                    raise ValueError(f"AI outline generation failed: {error_str}")
+        Args:
+            section: The section to enhance
+            focus: Enhancement focus area (e.g., "SEO optimization", "engagement", "comprehensiveness")
+            
+        Returns:
+            Enhanced section with improved content
+        """
+        logger.info(f"Enhancing section '{section.heading}' with focus: {focus}")
+        enhanced_section = await self.section_enhancer.enhance(section, focus)
+        logger.info(f"✅ Section enhancement completed for '{section.heading}'")
+        return enhanced_section
    
-    def _convert_to_sections(self, outline_data: Dict[str, Any], sources: List) -> List[BlogOutlineSection]:
-        """Convert outline data to BlogOutlineSection objects."""
-        outline_sections = []
-        for i, section_data in enumerate(outline_data.get('outline', [])):
-            if not isinstance(section_data, dict) or 'heading' not in section_data:
-                continue
-                
-            section = BlogOutlineSection(
-                id=f"s{i+1}",
-                heading=section_data.get('heading', f'Section {i+1}'),
-                subheadings=section_data.get('subheadings', []),
-                key_points=section_data.get('key_points', []),
-                references=sources[:3],  # Use first 3 sources as references
-                target_words=section_data.get('word_count', 200),
-                keywords=section_data.get('keywords', [])
-            )
-            outline_sections.append(section)
+    async def optimize_outline(self, outline: List[BlogOutlineSection], focus: str = "comprehensive optimization") -> List[BlogOutlineSection]:
+        """
+        Optimize an entire outline for better flow, SEO, and engagement.
        
-        return outline_sections
+        Args:
+            outline: List of sections to optimize
+            focus: Optimization focus area
+            
+        Returns:
+            Optimized outline with improved flow and engagement
+        """
+        logger.info(f"Optimizing outline with {len(outline)} sections, focus: {focus}")
+        optimized_outline = await self.outline_optimizer.optimize(outline, focus)
+        logger.info(f"✅ Outline optimization completed for {len(optimized_outline)} sections")
+        return optimized_outline
+    
+    def rebalance_outline_word_counts(self, outline: List[BlogOutlineSection], target_words: int) -> List[BlogOutlineSection]:
+        """
+        Rebalance word count distribution across outline sections.
+        
+        Args:
+            outline: List of sections to rebalance
+            target_words: Total target word count
+            
+        Returns:
+            Outline with rebalanced word counts
+        """
+        logger.info(f"Rebalancing word counts for {len(outline)} sections, target: {target_words} words")
+        rebalanced_outline = self.outline_optimizer.rebalance_word_counts(outline, target_words)
+        logger.info(f"✅ Word count rebalancing completed")
+        return rebalanced_outline
+    
+    def get_grounding_insights(self, research_data) -> Dict[str, Any]:
+        """
+        Get grounding metadata insights for research data.
+        
+        Args:
+            research_data: Research data with grounding metadata
+            
+        Returns:
+            Dictionary containing grounding insights and analysis
+        """
+        logger.info("Extracting grounding insights from research data...")
+        insights = self.grounding_engine.extract_contextual_insights(research_data.grounding_metadata)
+        logger.info(f"✅ Extracted {len(insights)} grounding insight categories")
+        return insights
+    
+    def get_authority_sources(self, research_data) -> List[Tuple]:
+        """
+        Get high-authority sources from grounding metadata.
+        
+        Args:
+            research_data: Research data with grounding metadata
+            
+        Returns:
+            List of (chunk, authority_score) tuples sorted by authority
+        """
+        logger.info("Identifying high-authority sources from grounding metadata...")
+        authority_sources = self.grounding_engine.get_authority_sources(research_data.grounding_metadata)
+        logger.info(f"✅ Identified {len(authority_sources)} high-authority sources")
+        return authority_sources
+    
+    def get_high_confidence_insights(self, research_data) -> List[str]:
+        """
+        Get high-confidence insights from grounding metadata.
+        
+        Args:
+            research_data: Research data with grounding metadata
+            
+        Returns:
+            List of high-confidence insights
+        """
+        logger.info("Extracting high-confidence insights from grounding metadata...")
+        insights = self.grounding_engine.get_high_confidence_insights(research_data.grounding_metadata)
+        logger.info(f"✅ Extracted {len(insights)} high-confidence insights")
+        return insights
+    
+    
    
-    def _generate_fallback_titles(self, primary_keywords: List[str]) -> List[str]:
-        """Generate fallback titles when AI generation fails."""
-        primary_keyword = primary_keywords[0] if primary_keywords else "Topic"
-        return [
-            f"The Complete Guide to {primary_keyword}",
-            f"{primary_keyword}: Everything You Need to Know",
-            f"How to Master {primary_keyword} in 2024"
-        ]
--- a/backend/services/blog_writer/outline/outline_optimizer.py
+++ b/backend/services/blog_writer/outline/outline_optimizer.py
@@ -17,61 +17,64 @@ class OutlineOptimizer:
        """Optimize entire outline for better flow, SEO, and engagement."""
        outline_text = "\n".join([f"{i+1}. {s.heading}" for i, s in enumerate(outline)])
        
-        optimization_prompt = f"""
-        Optimize this blog outline for better flow, engagement, and SEO:
-        
-        Current Outline:
-        {outline_text}
-        
-        Optimization Focus: {focus}
-        
-        Optimization Goals:
-        - Improve narrative flow and logical progression
-        - Enhance SEO with better keyword distribution
-        - Increase engagement with compelling headings
-        - Ensure comprehensive coverage of the topic
-        - Optimize for featured snippets and voice search
-        
-        Respond with JSON array of optimized sections:
-        [
-            {{
-                "heading": "Optimized heading",
-                "subheadings": ["subheading 1", "subheading 2"],
-                "key_points": ["point 1", "point 2"],
-                "target_words": 300,
-                "keywords": ["keyword1", "keyword2"]
-            }}
-        ]
-        """
+        optimization_prompt = f"""Optimize this blog outline for better flow, engagement, and SEO:
+
+Current Outline:
+{outline_text}
+
+Optimization Focus: {focus}
+
+Goals: Improve narrative flow, enhance SEO, increase engagement, ensure comprehensive coverage.
+
+Return JSON format:
+{{
+    "outline": [
+        {{
+            "heading": "Optimized heading",
+            "subheadings": ["subheading 1", "subheading 2"],
+            "key_points": ["point 1", "point 2"],
+            "target_words": 300,
+            "keywords": ["keyword1", "keyword2"]
+        }}
+    ]
+}}"""
        
        try:
            from services.llm_providers.gemini_provider import gemini_structured_json_response
            
            optimization_schema = {
-                "type": "array",
-                "items": {
-                    "type": "object",
-                    "properties": {
-                        "heading": {"type": "string"},
-                        "subheadings": {"type": "array", "items": {"type": "string"}},
-                        "key_points": {"type": "array", "items": {"type": "string"}},
-                        "target_words": {"type": "integer"},
-                        "keywords": {"type": "array", "items": {"type": "string"}}
-                    },
-                    "required": ["heading", "subheadings", "key_points", "target_words", "keywords"]
-                }
+                "type": "object",
+                "properties": {
+                    "outline": {
+                        "type": "array",
+                        "items": {
+                            "type": "object",
+                            "properties": {
+                                "heading": {"type": "string"},
+                                "subheadings": {"type": "array", "items": {"type": "string"}},
+                                "key_points": {"type": "array", "items": {"type": "string"}},
+                                "target_words": {"type": "integer"},
+                                "keywords": {"type": "array", "items": {"type": "string"}}
+                            },
+                            "required": ["heading", "subheadings", "key_points", "target_words", "keywords"]
+                        }
+                    }
+                },
+                "required": ["outline"],
+                "propertyOrdering": ["outline"]
            }
            
            optimized_data = gemini_structured_json_response(
                prompt=optimization_prompt,
                schema=optimization_schema,
                temperature=0.3,
-                max_tokens=2000
+                max_tokens=6000  # Match main outline generator
            )
            
-            if isinstance(optimized_data, list):
+            # Handle the new schema format with "outline" wrapper
+            if isinstance(optimized_data, dict) and 'outline' in optimized_data:
                optimized_sections = []
-                for i, section_data in enumerate(optimized_data):
+                for i, section_data in enumerate(optimized_data['outline']):
                    section = BlogOutlineSection(
                        id=f"s{i+1}",
                        heading=section_data.get('heading', f'Section {i+1}'),
@@ -82,9 +85,14 @@ class OutlineOptimizer:
                        keywords=section_data.get('keywords', [])
                    )
                    optimized_sections.append(section)
+                logger.info(f"✅ Outline optimization completed: {len(optimized_sections)} sections optimized")
                return optimized_sections
+            else:
+                logger.warning(f"Invalid optimization response format: {type(optimized_data)}")
+                
        except Exception as e:
            logger.warning(f"AI outline optimization failed: {e}")
+            logger.info("Returning original outline without optimization")
        
        return outline
    
--- a/backend/services/blog_writer/outline/outline_service.py
+++ b/backend/services/blog_writer/outline/outline_service.py
@@ -18,6 +18,7 @@ from models.blog_models import (
 from .outline_generator import OutlineGenerator
 from .outline_optimizer import OutlineOptimizer
 from .section_enhancer import SectionEnhancer
+from services.cache.persistent_outline_cache import persistent_outline_cache


 class OutlineService:
@@ -33,13 +34,90 @@ class OutlineService:
        Stage 2: Content Planning with AI-generated outline using research results
        Uses Gemini with research data to create comprehensive, SEO-optimized outline
        """
-        return await self.outline_generator.generate(request)
+        # Extract cache parameters - use original user keywords for consistent caching
+        keywords = request.research.original_keywords or request.research.keyword_analysis.get('primary', [])
+        industry = getattr(request.persona, 'industry', 'general') if request.persona else 'general'
+        target_audience = getattr(request.persona, 'target_audience', 'general') if request.persona else 'general'
+        word_count = request.word_count or 1500
+        custom_instructions = request.custom_instructions or ""
+        persona_data = request.persona.dict() if request.persona else None
+        
+        # Check cache first
+        cached_result = persistent_outline_cache.get_cached_outline(
+            keywords=keywords,
+            industry=industry,
+            target_audience=target_audience,
+            word_count=word_count,
+            custom_instructions=custom_instructions,
+            persona_data=persona_data
+        )
+        
+        if cached_result:
+            logger.info(f"Using cached outline for keywords: {keywords}")
+            return BlogOutlineResponse(**cached_result)
+        
+        # Generate new outline if not cached
+        logger.info(f"Generating new outline for keywords: {keywords}")
+        result = await self.outline_generator.generate(request)
+        
+        # Cache the result
+        persistent_outline_cache.cache_outline(
+            keywords=keywords,
+            industry=industry,
+            target_audience=target_audience,
+            word_count=word_count,
+            custom_instructions=custom_instructions,
+            persona_data=persona_data,
+            result=result.dict()
+        )
+        
+        return result
    
    async def generate_outline_with_progress(self, request: BlogOutlineRequest, task_id: str) -> BlogOutlineResponse:
        """
        Outline generation method with progress updates for real-time feedback.
        """
-        return await self.outline_generator.generate_with_progress(request, task_id)
+        # Extract cache parameters - use original user keywords for consistent caching
+        keywords = request.research.original_keywords or request.research.keyword_analysis.get('primary', [])
+        industry = getattr(request.persona, 'industry', 'general') if request.persona else 'general'
+        target_audience = getattr(request.persona, 'target_audience', 'general') if request.persona else 'general'
+        word_count = request.word_count or 1500
+        custom_instructions = request.custom_instructions or ""
+        persona_data = request.persona.dict() if request.persona else None
+        
+        # Check cache first
+        cached_result = persistent_outline_cache.get_cached_outline(
+            keywords=keywords,
+            industry=industry,
+            target_audience=target_audience,
+            word_count=word_count,
+            custom_instructions=custom_instructions,
+            persona_data=persona_data
+        )
+        
+        if cached_result:
+            logger.info(f"Using cached outline for keywords: {keywords} (with progress updates)")
+            # Update progress to show cache hit
+            from api.blog_writer.task_manager import task_manager
+            await task_manager.update_progress(task_id, "✅ Using cached outline (saved generation time!)")
+            return BlogOutlineResponse(**cached_result)
+        
+        # Generate new outline if not cached
+        logger.info(f"Generating new outline for keywords: {keywords} (with progress updates)")
+        result = await self.outline_generator.generate_with_progress(request, task_id)
+        
+        # Cache the result
+        persistent_outline_cache.cache_outline(
+            keywords=keywords,
+            industry=industry,
+            target_audience=target_audience,
+            word_count=word_count,
+            custom_instructions=custom_instructions,
+            persona_data=persona_data,
+            result=result.dict()
+        )
+        
+        return result
    
    async def refine_outline(self, request: BlogOutlineRefineRequest) -> BlogOutlineResponse:
        """
@@ -152,3 +230,29 @@ class OutlineService:
    def rebalance_word_counts(self, outline: List[BlogOutlineSection], target_words: int) -> List[BlogOutlineSection]:
        """Rebalance word count distribution across sections."""
        return self.outline_optimizer.rebalance_word_counts(outline, target_words)
+    
+    # Cache Management Methods
+    
+    def get_outline_cache_stats(self) -> Dict[str, Any]:
+        """Get outline cache statistics."""
+        return persistent_outline_cache.get_cache_stats()
+    
+    def clear_outline_cache(self):
+        """Clear all cached outline entries."""
+        persistent_outline_cache.clear_cache()
+        logger.info("Outline cache cleared")
+    
+    def invalidate_outline_cache_for_keywords(self, keywords: List[str]):
+        """
+        Invalidate outline cache entries for specific keywords.
+        Useful when research data is updated.
+        
+        Args:
+            keywords: Keywords to invalidate cache for
+        """
+        persistent_outline_cache.invalidate_cache_for_keywords(keywords)
+        logger.info(f"Invalidated outline cache for keywords: {keywords}")
+    
+    def get_recent_outline_cache_entries(self, limit: int = 20) -> List[Dict[str, Any]]:
+        """Get recent outline cache entries for debugging."""
+        return persistent_outline_cache.get_cache_entries(limit)
--- a/backend/services/blog_writer/outline/parallel_processor.py
+++ b/backend/services/blog_writer/outline/parallel_processor.py
@@ -0,0 +1,107 @@
+"""
+Parallel Processor - Handles parallel processing of outline generation tasks.
+
+Manages concurrent execution of source mapping and grounding insights extraction.
+"""
+
+import asyncio
+from typing import Tuple, Any
+from loguru import logger
+
+
+class ParallelProcessor:
+    """Handles parallel processing of outline generation tasks for speed optimization."""
+    
+    def __init__(self, source_mapper, grounding_engine):
+        """Initialize the parallel processor with required dependencies."""
+        self.source_mapper = source_mapper
+        self.grounding_engine = grounding_engine
+    
+    async def run_parallel_processing(self, outline_sections, research, task_id: str = None) -> Tuple[Any, Any]:
+        """
+        Run source mapping and grounding insights extraction in parallel.
+        
+        Args:
+            outline_sections: List of outline sections to process
+            research: Research data object
+            task_id: Optional task ID for progress updates
+            
+        Returns:
+            Tuple of (mapped_sections, grounding_insights)
+        """
+        if task_id:
+            from api.blog_writer.task_manager import task_manager
+            await task_manager.update_progress(task_id, "⚡ Running parallel processing for maximum speed...")
+        
+        logger.info("Running parallel processing for maximum speed...")
+        
+        # Run these tasks in parallel to save time
+        source_mapping_task = asyncio.create_task(
+            self._run_source_mapping(outline_sections, research, task_id)
+        )
+        
+        grounding_insights_task = asyncio.create_task(
+            self._run_grounding_insights_extraction(research, task_id)
+        )
+        
+        # Wait for both parallel tasks to complete
+        mapped_sections, grounding_insights = await asyncio.gather(
+            source_mapping_task,
+            grounding_insights_task
+        )
+        
+        return mapped_sections, grounding_insights
+    
+    async def run_parallel_processing_async(self, outline_sections, research) -> Tuple[Any, Any]:
+        """
+        Run parallel processing without progress updates (for non-progress methods).
+        
+        Args:
+            outline_sections: List of outline sections to process
+            research: Research data object
+            
+        Returns:
+            Tuple of (mapped_sections, grounding_insights)
+        """
+        logger.info("Running parallel processing for maximum speed...")
+        
+        # Run these tasks in parallel to save time
+        source_mapping_task = asyncio.create_task(
+            self._run_source_mapping_async(outline_sections, research)
+        )
+        
+        grounding_insights_task = asyncio.create_task(
+            self._run_grounding_insights_extraction_async(research)
+        )
+        
+        # Wait for both parallel tasks to complete
+        mapped_sections, grounding_insights = await asyncio.gather(
+            source_mapping_task,
+            grounding_insights_task
+        )
+        
+        return mapped_sections, grounding_insights
+    
+    async def _run_source_mapping(self, outline_sections, research, task_id):
+        """Run source mapping in parallel."""
+        if task_id:
+            from api.blog_writer.task_manager import task_manager
+            await task_manager.update_progress(task_id, "🔗 Applying intelligent source-to-section mapping...")
+        return self.source_mapper.map_sources_to_sections(outline_sections, research)
+    
+    async def _run_grounding_insights_extraction(self, research, task_id):
+        """Run grounding insights extraction in parallel."""
+        if task_id:
+            from api.blog_writer.task_manager import task_manager
+            await task_manager.update_progress(task_id, "🧠 Extracting grounding metadata insights...")
+        return self.grounding_engine.extract_contextual_insights(research.grounding_metadata)
+    
+    async def _run_source_mapping_async(self, outline_sections, research):
+        """Run source mapping in parallel (async version without progress updates)."""
+        logger.info("Applying intelligent source-to-section mapping...")
+        return self.source_mapper.map_sources_to_sections(outline_sections, research)
+    
+    async def _run_grounding_insights_extraction_async(self, research):
+        """Run grounding insights extraction in parallel (async version without progress updates)."""
+        logger.info("Extracting grounding metadata insights...")
+        return self.grounding_engine.extract_contextual_insights(research.grounding_metadata)
--- a/backend/services/blog_writer/outline/prompt_builder.py
+++ b/backend/services/blog_writer/outline/prompt_builder.py
@@ -0,0 +1,105 @@
+"""
+Prompt Builder - Handles building of AI prompts for outline generation.
+
+Constructs comprehensive prompts with research data, keywords, and strategic requirements.
+"""
+
+from typing import Dict, Any, List
+from loguru import logger
+
+
+class PromptBuilder:
+    """Handles building of comprehensive AI prompts for outline generation."""
+    
+    def __init__(self):
+        """Initialize the prompt builder."""
+        pass
+    
+    def build_outline_prompt(self, primary_keywords: List[str], secondary_keywords: List[str], 
+                           content_angles: List[str], sources: List, search_intent: str,
+                           request, custom_instructions: str = None) -> str:
+        """Build the comprehensive outline generation prompt using filtered research data."""
+        
+        # Use the filtered research data (already cleaned by ResearchDataFilter)
+        research = request.research
+        
+        return f"""Create a comprehensive blog outline for: {', '.join(primary_keywords)}
+
+CONTEXT:
+Search Intent: {search_intent}
+Target: {request.word_count or 1500} words
+Industry: {getattr(request.persona, 'industry', 'General') if request.persona else 'General'}
+Audience: {getattr(request.persona, 'target_audience', 'General') if request.persona else 'General'}
+
+KEYWORDS:
+Primary: {', '.join(primary_keywords)}
+Secondary: {', '.join(secondary_keywords)}
+Long-tail: {', '.join(research.keyword_analysis.get('long_tail', []))}
+Semantic: {', '.join(research.keyword_analysis.get('semantic_keywords', []))}
+Trending: {', '.join(research.keyword_analysis.get('trending_terms', []))}
+Content Gaps: {', '.join(research.keyword_analysis.get('content_gaps', []))}
+
+CONTENT ANGLES: {', '.join(content_angles)}
+
+COMPETITIVE INTELLIGENCE:
+Top Competitors: {', '.join(research.competitor_analysis.get('top_competitors', []))}
+Market Opportunities: {', '.join(research.competitor_analysis.get('opportunities', []))}
+Competitive Advantages: {', '.join(research.competitor_analysis.get('competitive_advantages', []))}
+
+RESEARCH SOURCES: {len(sources)} authoritative sources available
+
+{f"CUSTOM INSTRUCTIONS: {custom_instructions}" if custom_instructions else ""}
+
+STRATEGIC REQUIREMENTS:
+- Create SEO-optimized headings with natural keyword integration
+- Build logical narrative flow from problem to solution
+- Include data-driven insights from research sources
+- Address content gaps and market opportunities
+- Optimize for search intent and user questions
+- Ensure engaging, actionable content throughout
+
+Return JSON format:
+{{
+            "outline": [
+                {{
+                    "heading": "Section heading with primary keyword",
+                    "subheadings": ["Subheading 1", "Subheading 2", "Subheading 3"],
+                    "key_points": ["Key point 1", "Key point 2", "Key point 3"],
+            "target_words": 300,
+                    "keywords": ["primary keyword", "secondary keyword"]
+                }}
+            ]
+}}"""
+    
+    def get_outline_schema(self) -> Dict[str, Any]:
+        """Get the structured JSON schema for outline generation."""
+        return {
+            "type": "object",
+            "properties": {
+                "outline": {
+                    "type": "array",
+                    "items": {
+                        "type": "object",
+                        "properties": {
+                            "heading": {"type": "string"},
+                            "subheadings": {
+                                "type": "array",
+                                "items": {"type": "string"}
+                            },
+                            "key_points": {
+                                "type": "array",
+                                "items": {"type": "string"}
+                            },
+                            "target_words": {"type": "integer"},
+                            "keywords": {
+                                "type": "array",
+                                "items": {"type": "string"}
+                            }
+                        },
+                        "required": ["heading", "subheadings", "key_points", "target_words", "keywords"]
+                    }
+                }
+            },
+            "required": ["outline"],
+            "propertyOrdering": ["outline"]
+        }
--- a/backend/services/blog_writer/outline/response_processor.py
+++ b/backend/services/blog_writer/outline/response_processor.py
@@ -0,0 +1,107 @@
+"""
+Response Processor - Handles AI response processing and retry logic.
+
+Processes AI responses, handles retries, and converts data to proper formats.
+"""
+
+from typing import Dict, Any, List
+import asyncio
+from loguru import logger
+
+from models.blog_models import BlogOutlineSection
+
+
+class ResponseProcessor:
+    """Handles AI response processing, retry logic, and data conversion."""
+    
+    def __init__(self):
+        """Initialize the response processor."""
+        pass
+    
+    async def generate_with_retry(self, prompt: str, schema: Dict[str, Any], task_id: str = None) -> Dict[str, Any]:
+        """Generate outline with retry logic for API failures."""
+        from services.llm_providers.gemini_provider import gemini_structured_json_response
+        from api.blog_writer.task_manager import task_manager
+        
+        max_retries = 2  # Conservative retry for expensive API calls
+        retry_delay = 5  # 5 second delay between retries
+        
+        for attempt in range(max_retries + 1):
+            try:
+                if task_id:
+                    await task_manager.update_progress(task_id, f"🤖 Calling Gemini API for outline generation (attempt {attempt + 1}/{max_retries + 1})...")
+                
+                outline_data = gemini_structured_json_response(
+                    prompt=prompt,
+                    schema=schema,
+                    temperature=0.3,
+                    max_tokens=6000  # Increased further to avoid truncation
+                )
+                
+                # Log response for debugging
+                logger.info(f"Gemini response received: {type(outline_data)}")
+                
+                # Check for errors in the response
+                if isinstance(outline_data, dict) and 'error' in outline_data:
+                    error_msg = str(outline_data['error'])
+                    if "503" in error_msg and "overloaded" in error_msg and attempt < max_retries:
+                        if task_id:
+                            await task_manager.update_progress(task_id, f"⚠️ AI service overloaded, retrying in {retry_delay} seconds...")
+                        logger.warning(f"Gemini API overloaded, retrying in {retry_delay} seconds (attempt {attempt + 1}/{max_retries + 1})")
+                        await asyncio.sleep(retry_delay)
+                        continue
+                    elif "No valid structured response content found" in error_msg and attempt < max_retries:
+                        if task_id:
+                            await task_manager.update_progress(task_id, f"⚠️ Invalid response format, retrying in {retry_delay} seconds...")
+                        logger.warning(f"Gemini response parsing failed, retrying in {retry_delay} seconds (attempt {attempt + 1}/{max_retries + 1})")
+                        await asyncio.sleep(retry_delay)
+                        continue
+                    else:
+                        logger.error(f"Gemini structured response error: {outline_data['error']}")
+                        raise ValueError(f"AI outline generation failed: {outline_data['error']}")
+                
+                # Validate required fields
+                if not isinstance(outline_data, dict) or 'outline' not in outline_data or not isinstance(outline_data['outline'], list):
+                    if attempt < max_retries:
+                        if task_id:
+                            await task_manager.update_progress(task_id, f"⚠️ Invalid response structure, retrying in {retry_delay} seconds...")
+                        logger.warning(f"Invalid response structure, retrying in {retry_delay} seconds (attempt {attempt + 1}/{max_retries + 1})")
+                        await asyncio.sleep(retry_delay)
+                        continue
+                    else:
+                        raise ValueError("Invalid outline structure in Gemini response")
+                
+                # If we get here, the response is valid
+                return outline_data
+                
+            except Exception as e:
+                error_str = str(e)
+                if ("503" in error_str or "overloaded" in error_str) and attempt < max_retries:
+                    if task_id:
+                        await task_manager.update_progress(task_id, f"⚠️ AI service error, retrying in {retry_delay} seconds...")
+                    logger.warning(f"Gemini API error, retrying in {retry_delay} seconds (attempt {attempt + 1}/{max_retries + 1}): {error_str}")
+                    await asyncio.sleep(retry_delay)
+                    continue
+                else:
+                    logger.error(f"Outline generation failed after {attempt + 1} attempts: {error_str}")
+                    raise ValueError(f"AI outline generation failed: {error_str}")
+    
+    def convert_to_sections(self, outline_data: Dict[str, Any], sources: List) -> List[BlogOutlineSection]:
+        """Convert outline data to BlogOutlineSection objects."""
+        outline_sections = []
+        for i, section_data in enumerate(outline_data.get('outline', [])):
+            if not isinstance(section_data, dict) or 'heading' not in section_data:
+                continue
+                
+            section = BlogOutlineSection(
+                id=f"s{i+1}",
+                heading=section_data.get('heading', f'Section {i+1}'),
+                subheadings=section_data.get('subheadings', []),
+                key_points=section_data.get('key_points', []),
+                references=[],  # Will be populated by intelligent mapping
+                target_words=section_data.get('target_words', 200),
+                keywords=section_data.get('keywords', [])
+            )
+            outline_sections.append(section)
+        
+        return outline_sections
--- a/backend/services/blog_writer/outline/source_mapper.py
+++ b/backend/services/blog_writer/outline/source_mapper.py
@@ -0,0 +1,669 @@
+"""
+Source-to-Section Mapper - Intelligent mapping of research sources to outline sections.
+
+This module provides algorithmic mapping of research sources to specific outline sections
+based on semantic similarity, keyword relevance, and contextual matching. Uses a hybrid
+approach of algorithmic scoring followed by AI validation for optimal results.
+"""
+
+from typing import Dict, Any, List, Tuple, Optional
+import re
+from collections import Counter
+from loguru import logger
+
+from models.blog_models import (
+    BlogOutlineSection,
+    ResearchSource,
+    BlogResearchResponse,
+)
+
+
+class SourceToSectionMapper:
+    """Maps research sources to outline sections using intelligent algorithms."""
+    
+    def __init__(self):
+        """Initialize the source-to-section mapper."""
+        self.min_semantic_score = 0.3
+        self.min_keyword_score = 0.2
+        self.min_contextual_score = 0.2
+        self.max_sources_per_section = 3
+        self.min_total_score = 0.4
+        
+        # Weight factors for different scoring methods
+        self.weights = {
+            'semantic': 0.4,      # Semantic similarity weight
+            'keyword': 0.3,       # Keyword matching weight
+            'contextual': 0.3     # Contextual relevance weight
+        }
+        
+        # Common stop words for text processing
+        self.stop_words = {
+            'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by',
+            'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'do', 'does', 'did',
+            'will', 'would', 'could', 'should', 'may', 'might', 'must', 'can', 'this', 'that', 'these', 'those',
+            'how', 'what', 'when', 'where', 'why', 'who', 'which', 'how', 'much', 'many', 'more', 'most',
+            'some', 'any', 'all', 'each', 'every', 'other', 'another', 'such', 'no', 'not', 'only', 'own',
+            'same', 'so', 'than', 'too', 'very', 'just', 'now', 'here', 'there', 'up', 'down', 'out', 'off',
+            'over', 'under', 'again', 'further', 'then', 'once'
+        }
+        
+        logger.info("✅ SourceToSectionMapper initialized with intelligent mapping algorithms")
+    
+    def map_sources_to_sections(
+        self, 
+        sections: List[BlogOutlineSection], 
+        research_data: BlogResearchResponse
+    ) -> List[BlogOutlineSection]:
+        """
+        Map research sources to outline sections using intelligent algorithms.
+        
+        Args:
+            sections: List of outline sections to map sources to
+            research_data: Research data containing sources and metadata
+            
+        Returns:
+            List of outline sections with intelligently mapped sources
+        """
+        if not sections or not research_data.sources:
+            logger.warning("No sections or sources to map")
+            return sections
+        
+        logger.info(f"Mapping {len(research_data.sources)} sources to {len(sections)} sections")
+        
+        # Step 1: Algorithmic mapping
+        mapping_results = self._algorithmic_source_mapping(sections, research_data)
+        
+        # Step 2: AI validation and improvement (single prompt)
+        validated_mapping = self._ai_validate_mapping(mapping_results, research_data)
+        
+        # Step 3: Apply validated mapping to sections
+        mapped_sections = self._apply_mapping_to_sections(sections, validated_mapping)
+        
+        logger.info("✅ Source-to-section mapping completed successfully")
+        return mapped_sections
+    
+    def _algorithmic_source_mapping(
+        self, 
+        sections: List[BlogOutlineSection], 
+        research_data: BlogResearchResponse
+    ) -> Dict[str, List[Tuple[ResearchSource, float]]]:
+        """
+        Perform algorithmic mapping of sources to sections.
+        
+        Args:
+            sections: List of outline sections
+            research_data: Research data with sources
+            
+        Returns:
+            Dictionary mapping section IDs to list of (source, score) tuples
+        """
+        mapping_results = {}
+        
+        for section in sections:
+            section_scores = []
+            
+            for source in research_data.sources:
+                # Calculate multi-dimensional relevance score
+                semantic_score = self._calculate_semantic_similarity(section, source)
+                keyword_score = self._calculate_keyword_relevance(section, source, research_data)
+                contextual_score = self._calculate_contextual_relevance(section, source, research_data)
+                
+                # Weighted total score
+                total_score = (
+                    semantic_score * self.weights['semantic'] +
+                    keyword_score * self.weights['keyword'] +
+                    contextual_score * self.weights['contextual']
+                )
+                
+                # Only include sources that meet minimum threshold
+                if total_score >= self.min_total_score:
+                    section_scores.append((source, total_score))
+            
+            # Sort by score and limit to max sources per section
+            section_scores.sort(key=lambda x: x[1], reverse=True)
+            section_scores = section_scores[:self.max_sources_per_section]
+            
+            mapping_results[section.id] = section_scores
+            
+            logger.debug(f"Section '{section.heading}': {len(section_scores)} sources mapped")
+        
+        return mapping_results
+    
+    def _calculate_semantic_similarity(self, section: BlogOutlineSection, source: ResearchSource) -> float:
+        """
+        Calculate semantic similarity between section and source.
+        
+        Args:
+            section: Outline section
+            source: Research source
+            
+        Returns:
+            Semantic similarity score (0.0 to 1.0)
+        """
+        # Extract text content for comparison
+        section_text = self._extract_section_text(section)
+        source_text = self._extract_source_text(source)
+        
+        # Calculate word overlap
+        section_words = self._extract_meaningful_words(section_text)
+        source_words = self._extract_meaningful_words(source_text)
+        
+        if not section_words or not source_words:
+            return 0.0
+        
+        # Calculate Jaccard similarity
+        intersection = len(set(section_words) & set(source_words))
+        union = len(set(section_words) | set(source_words))
+        
+        jaccard_similarity = intersection / union if union > 0 else 0.0
+        
+        # Boost score for exact phrase matches
+        phrase_boost = self._calculate_phrase_similarity(section_text, source_text)
+        
+        # Combine Jaccard similarity with phrase boost
+        semantic_score = min(1.0, jaccard_similarity + phrase_boost)
+        
+        return semantic_score
+    
+    def _calculate_keyword_relevance(
+        self, 
+        section: BlogOutlineSection, 
+        source: ResearchSource, 
+        research_data: BlogResearchResponse
+    ) -> float:
+        """
+        Calculate keyword-based relevance between section and source.
+        
+        Args:
+            section: Outline section
+            source: Research source
+            research_data: Research data with keyword analysis
+            
+        Returns:
+            Keyword relevance score (0.0 to 1.0)
+        """
+        # Get section keywords
+        section_keywords = set(section.keywords)
+        if not section_keywords:
+            # Extract keywords from section heading and content
+            section_text = self._extract_section_text(section)
+            section_keywords = set(self._extract_meaningful_words(section_text))
+        
+        # Get source keywords from title and excerpt
+        source_text = f"{source.title} {source.excerpt or ''}"
+        source_keywords = set(self._extract_meaningful_words(source_text))
+        
+        # Get research keywords for context
+        research_keywords = set()
+        for category in ['primary', 'secondary', 'long_tail', 'semantic_keywords']:
+            research_keywords.update(research_data.keyword_analysis.get(category, []))
+        
+        # Calculate keyword overlap scores
+        section_overlap = len(section_keywords & source_keywords) / len(section_keywords) if section_keywords else 0.0
+        research_overlap = len(research_keywords & source_keywords) / len(research_keywords) if research_keywords else 0.0
+        
+        # Weighted combination
+        keyword_score = (section_overlap * 0.7) + (research_overlap * 0.3)
+        
+        return min(1.0, keyword_score)
+    
+    def _calculate_contextual_relevance(
+        self, 
+        section: BlogOutlineSection, 
+        source: ResearchSource, 
+        research_data: BlogResearchResponse
+    ) -> float:
+        """
+        Calculate contextual relevance based on section content and source context.
+        
+        Args:
+            section: Outline section
+            source: Research source
+            research_data: Research data with context
+            
+        Returns:
+            Contextual relevance score (0.0 to 1.0)
+        """
+        contextual_score = 0.0
+        
+        # 1. Content angle matching
+        section_text = self._extract_section_text(section).lower()
+        source_text = f"{source.title} {source.excerpt or ''}".lower()
+        
+        # Check for content angle matches
+        content_angles = research_data.suggested_angles
+        for angle in content_angles:
+            angle_words = self._extract_meaningful_words(angle.lower())
+            if angle_words:
+                section_angle_match = sum(1 for word in angle_words if word in section_text) / len(angle_words)
+                source_angle_match = sum(1 for word in angle_words if word in source_text) / len(angle_words)
+                contextual_score += (section_angle_match + source_angle_match) * 0.3
+        
+        # 2. Search intent alignment
+        search_intent = research_data.keyword_analysis.get('search_intent', 'informational')
+        intent_keywords = self._get_intent_keywords(search_intent)
+        
+        intent_score = 0.0
+        for keyword in intent_keywords:
+            if keyword in section_text or keyword in source_text:
+                intent_score += 0.1
+        
+        contextual_score += min(0.3, intent_score)
+        
+        # 3. Industry/domain relevance
+        if hasattr(research_data, 'industry') and research_data.industry:
+            industry_words = self._extract_meaningful_words(research_data.industry.lower())
+            industry_score = sum(1 for word in industry_words if word in source_text) / len(industry_words) if industry_words else 0.0
+            contextual_score += industry_score * 0.2
+        
+        return min(1.0, contextual_score)
+    
+    def _ai_validate_mapping(
+        self, 
+        mapping_results: Dict[str, List[Tuple[ResearchSource, float]]], 
+        research_data: BlogResearchResponse
+    ) -> Dict[str, List[Tuple[ResearchSource, float]]]:
+        """
+        Use AI to validate and improve the algorithmic mapping results.
+        
+        Args:
+            mapping_results: Algorithmic mapping results
+            research_data: Research data for context
+            
+        Returns:
+            AI-validated and improved mapping results
+        """
+        try:
+            logger.info("Starting AI validation of source-to-section mapping...")
+            
+            # Build AI validation prompt
+            validation_prompt = self._build_validation_prompt(mapping_results, research_data)
+            
+            # Get AI validation response
+            validation_response = self._get_ai_validation_response(validation_prompt)
+            
+            # Parse and apply AI validation results
+            validated_mapping = self._parse_validation_response(validation_response, mapping_results, research_data)
+            
+            logger.info("✅ AI validation completed successfully")
+            return validated_mapping
+            
+        except Exception as e:
+            logger.warning(f"AI validation failed: {e}. Using algorithmic results as fallback.")
+            return mapping_results
+    
+    def _apply_mapping_to_sections(
+        self, 
+        sections: List[BlogOutlineSection], 
+        mapping_results: Dict[str, List[Tuple[ResearchSource, float]]]
+    ) -> List[BlogOutlineSection]:
+        """
+        Apply the mapping results to the outline sections.
+        
+        Args:
+            sections: Original outline sections
+            mapping_results: Mapping results from algorithmic/AI processing
+            
+        Returns:
+            Sections with mapped sources
+        """
+        mapped_sections = []
+        
+        for section in sections:
+            # Get mapped sources for this section
+            mapped_sources = mapping_results.get(section.id, [])
+            
+            # Extract just the sources (without scores)
+            section_sources = [source for source, score in mapped_sources]
+            
+            # Create new section with mapped sources
+            mapped_section = BlogOutlineSection(
+                id=section.id,
+                heading=section.heading,
+                subheadings=section.subheadings,
+                key_points=section.key_points,
+                references=section_sources,
+                target_words=section.target_words,
+                keywords=section.keywords
+            )
+            
+            mapped_sections.append(mapped_section)
+            
+            logger.debug(f"Applied {len(section_sources)} sources to section '{section.heading}'")
+        
+        return mapped_sections
+    
+    # Helper methods
+    
+    def _extract_section_text(self, section: BlogOutlineSection) -> str:
+        """Extract all text content from a section."""
+        text_parts = [section.heading]
+        text_parts.extend(section.subheadings)
+        text_parts.extend(section.key_points)
+        text_parts.extend(section.keywords)
+        return " ".join(text_parts)
+    
+    def _extract_source_text(self, source: ResearchSource) -> str:
+        """Extract all text content from a source."""
+        text_parts = [source.title]
+        if source.excerpt:
+            text_parts.append(source.excerpt)
+        return " ".join(text_parts)
+    
+    def _extract_meaningful_words(self, text: str) -> List[str]:
+        """Extract meaningful words from text, removing stop words and cleaning."""
+        if not text:
+            return []
+        
+        # Clean and tokenize
+        words = re.findall(r'\b[a-zA-Z]+\b', text.lower())
+        
+        # Remove stop words and short words
+        meaningful_words = [
+            word for word in words 
+            if word not in self.stop_words and len(word) > 2
+        ]
+        
+        return meaningful_words
+    
+    def _calculate_phrase_similarity(self, text1: str, text2: str) -> float:
+        """Calculate phrase similarity boost score."""
+        if not text1 or not text2:
+            return 0.0
+        
+        text1_lower = text1.lower()
+        text2_lower = text2.lower()
+        
+        # Look for 2-3 word phrases
+        phrase_boost = 0.0
+        
+        # Extract 2-word phrases
+        words1 = text1_lower.split()
+        words2 = text2_lower.split()
+        
+        for i in range(len(words1) - 1):
+            phrase = f"{words1[i]} {words1[i+1]}"
+            if phrase in text2_lower:
+                phrase_boost += 0.1
+        
+        # Extract 3-word phrases
+        for i in range(len(words1) - 2):
+            phrase = f"{words1[i]} {words1[i+1]} {words1[i+2]}"
+            if phrase in text2_lower:
+                phrase_boost += 0.15
+        
+        return min(0.3, phrase_boost)  # Cap at 0.3
+    
+    def _get_intent_keywords(self, search_intent: str) -> List[str]:
+        """Get keywords associated with search intent."""
+        intent_keywords = {
+            'informational': ['what', 'how', 'why', 'guide', 'tutorial', 'explain', 'learn', 'understand'],
+            'navigational': ['find', 'locate', 'search', 'where', 'site', 'website', 'page'],
+            'transactional': ['buy', 'purchase', 'order', 'price', 'cost', 'deal', 'offer', 'discount'],
+            'commercial': ['compare', 'review', 'best', 'top', 'vs', 'versus', 'alternative', 'option']
+        }
+        
+        return intent_keywords.get(search_intent, [])
+    
+    def get_mapping_statistics(self, mapping_results: Dict[str, List[Tuple[ResearchSource, float]]]) -> Dict[str, Any]:
+        """
+        Get statistics about the mapping results.
+        
+        Args:
+            mapping_results: Mapping results to analyze
+            
+        Returns:
+            Dictionary with mapping statistics
+        """
+        total_sections = len(mapping_results)
+        total_mappings = sum(len(sources) for sources in mapping_results.values())
+        
+        # Calculate score distribution
+        all_scores = []
+        for sources in mapping_results.values():
+            all_scores.extend([score for source, score in sources])
+        
+        avg_score = sum(all_scores) / len(all_scores) if all_scores else 0.0
+        max_score = max(all_scores) if all_scores else 0.0
+        min_score = min(all_scores) if all_scores else 0.0
+        
+        # Count sections with/without sources
+        sections_with_sources = sum(1 for sources in mapping_results.values() if sources)
+        sections_without_sources = total_sections - sections_with_sources
+        
+        return {
+            'total_sections': total_sections,
+            'total_mappings': total_mappings,
+            'sections_with_sources': sections_with_sources,
+            'sections_without_sources': sections_without_sources,
+            'average_score': avg_score,
+            'max_score': max_score,
+            'min_score': min_score,
+            'mapping_coverage': sections_with_sources / total_sections if total_sections > 0 else 0.0
+        }
+    
+    def _build_validation_prompt(
+        self, 
+        mapping_results: Dict[str, List[Tuple[ResearchSource, float]]], 
+        research_data: BlogResearchResponse
+    ) -> str:
+        """
+        Build comprehensive AI validation prompt for source-to-section mapping.
+        
+        Args:
+            mapping_results: Algorithmic mapping results
+            research_data: Research data for context
+            
+        Returns:
+            Formatted AI validation prompt
+        """
+        # Extract section information
+        sections_info = []
+        for section_id, sources in mapping_results.items():
+            section_info = {
+                'id': section_id,
+                'sources': [
+                    {
+                        'title': source.title,
+                        'url': source.url,
+                        'excerpt': source.excerpt,
+                        'credibility_score': source.credibility_score,
+                        'algorithmic_score': score
+                    }
+                    for source, score in sources
+                ]
+            }
+            sections_info.append(section_info)
+        
+        # Extract research context
+        research_context = {
+            'primary_keywords': research_data.keyword_analysis.get('primary', []),
+            'secondary_keywords': research_data.keyword_analysis.get('secondary', []),
+            'content_angles': research_data.suggested_angles,
+            'search_intent': research_data.keyword_analysis.get('search_intent', 'informational'),
+            'all_sources': [
+                {
+                    'title': source.title,
+                    'url': source.url,
+                    'excerpt': source.excerpt,
+                    'credibility_score': source.credibility_score
+                }
+                for source in research_data.sources
+            ]
+        }
+        
+        prompt = f"""
+You are an expert content strategist and SEO specialist. Your task is to validate and improve the algorithmic mapping of research sources to blog outline sections.
+
+## CONTEXT
+Research Topic: {', '.join(research_context['primary_keywords'])}
+Search Intent: {research_context['search_intent']}
+Content Angles: {', '.join(research_context['content_angles'])}
+
+## ALGORITHMIC MAPPING RESULTS
+The following sections have been algorithmically mapped with research sources:
+
+{self._format_sections_for_prompt(sections_info)}
+
+## AVAILABLE SOURCES
+All available research sources:
+{self._format_sources_for_prompt(research_context['all_sources'])}
+
+## VALIDATION TASK
+Please analyze the algorithmic mapping and provide improvements:
+
+1. **Validate Relevance**: Are the mapped sources truly relevant to each section's content and purpose?
+2. **Identify Gaps**: Are there better sources available that weren't mapped?
+3. **Suggest Improvements**: Recommend specific source changes for better content alignment
+4. **Quality Assessment**: Rate the overall mapping quality (1-10)
+
+## RESPONSE FORMAT
+Provide your analysis in the following JSON format:
+
+```json
+{{
+    "overall_quality_score": 8,
+    "section_improvements": [
+        {{
+            "section_id": "s1",
+            "current_sources": ["source_title_1", "source_title_2"],
+            "recommended_sources": ["better_source_1", "better_source_2", "better_source_3"],
+            "reasoning": "Explanation of why these sources are better suited for this section",
+            "confidence": 0.9
+        }}
+    ],
+    "summary": "Overall assessment of the mapping quality and key improvements made"
+}}
+```
+
+## GUIDELINES
+- Prioritize sources that directly support the section's key points and subheadings
+- Consider source credibility, recency, and content depth
+- Ensure sources provide actionable insights for content creation
+- Maintain diversity in source types and perspectives
+- Focus on sources that enhance the section's value proposition
+
+Analyze the mapping and provide your recommendations.
+"""
+        
+        return prompt
+    
+    def _get_ai_validation_response(self, prompt: str) -> str:
+        """
+        Get AI validation response using LLM provider.
+        
+        Args:
+            prompt: Validation prompt
+            
+        Returns:
+            AI validation response
+        """
+        try:
+            from services.llm_providers.gemini_provider import gemini_text_response
+            
+            response = gemini_text_response(
+                prompt=prompt,
+                temperature=0.3,
+                top_p=0.9,
+                n=1,
+                max_tokens=2000,
+                system_prompt=None
+            )
+            
+            return response
+            
+        except Exception as e:
+            logger.error(f"Failed to get AI validation response: {e}")
+            raise
+    
+    def _parse_validation_response(
+        self, 
+        response: str, 
+        original_mapping: Dict[str, List[Tuple[ResearchSource, float]]], 
+        research_data: BlogResearchResponse
+    ) -> Dict[str, List[Tuple[ResearchSource, float]]]:
+        """
+        Parse AI validation response and apply improvements.
+        
+        Args:
+            response: AI validation response
+            original_mapping: Original algorithmic mapping
+            research_data: Research data for context
+            
+        Returns:
+            Improved mapping based on AI validation
+        """
+        try:
+            import json
+            import re
+            
+            # Extract JSON from response
+            json_match = re.search(r'```json\s*(\{.*?\})\s*```', response, re.DOTALL)
+            if not json_match:
+                # Try to find JSON without code blocks
+                json_match = re.search(r'(\{.*?\})', response, re.DOTALL)
+            
+            if not json_match:
+                logger.warning("Could not extract JSON from AI response")
+                return original_mapping
+            
+            validation_data = json.loads(json_match.group(1))
+            
+            # Create source lookup for quick access
+            source_lookup = {source.title: source for source in research_data.sources}
+            
+            # Apply AI improvements
+            improved_mapping = {}
+            
+            for improvement in validation_data.get('section_improvements', []):
+                section_id = improvement['section_id']
+                recommended_titles = improvement['recommended_sources']
+                
+                # Map recommended titles to actual sources
+                recommended_sources = []
+                for title in recommended_titles:
+                    if title in source_lookup:
+                        source = source_lookup[title]
+                        # Use high confidence score for AI-recommended sources
+                        recommended_sources.append((source, 0.9))
+                
+                if recommended_sources:
+                    improved_mapping[section_id] = recommended_sources
+                else:
+                    # Fallback to original mapping if no valid sources found
+                    improved_mapping[section_id] = original_mapping.get(section_id, [])
+            
+            # Add sections not mentioned in AI response
+            for section_id, sources in original_mapping.items():
+                if section_id not in improved_mapping:
+                    improved_mapping[section_id] = sources
+            
+            logger.info(f"AI validation applied: {len(validation_data.get('section_improvements', []))} sections improved")
+            return improved_mapping
+            
+        except Exception as e:
+            logger.warning(f"Failed to parse AI validation response: {e}")
+            return original_mapping
+    
+    def _format_sections_for_prompt(self, sections_info: List[Dict]) -> str:
+        """Format sections information for AI prompt."""
+        formatted = []
+        for section in sections_info:
+            section_text = f"**Section {section['id']}:**\n"
+            section_text += f"Sources mapped: {len(section['sources'])}\n"
+            for source in section['sources']:
+                section_text += f"- {source['title']} (Score: {source['algorithmic_score']:.2f})\n"
+            formatted.append(section_text)
+        return "\n".join(formatted)
+    
+    def _format_sources_for_prompt(self, sources: List[Dict]) -> str:
+        """Format sources information for AI prompt."""
+        formatted = []
+        for i, source in enumerate(sources, 1):
+            source_text = f"{i}. **{source['title']}**\n"
+            source_text += f"   URL: {source['url']}\n"
+            source_text += f"   Credibility: {source['credibility_score']}\n"
+            if source['excerpt']:
+                source_text += f"   Excerpt: {source['excerpt'][:200]}...\n"
+            formatted.append(source_text)
+        return "\n".join(formatted)
--- a/backend/services/blog_writer/outline/title_generator.py
+++ b/backend/services/blog_writer/outline/title_generator.py
@@ -0,0 +1,123 @@
+"""
+Title Generator - Handles title generation and formatting for blog outlines.
+
+Extracts content angles from research data and combines them with AI-generated titles.
+"""
+
+from typing import List
+from loguru import logger
+
+
+class TitleGenerator:
+    """Handles title generation, formatting, and combination logic."""
+    
+    def __init__(self):
+        """Initialize the title generator."""
+        pass
+    
+    def extract_content_angle_titles(self, research) -> List[str]:
+        """
+        Extract content angles from research data and convert them to blog titles.
+        
+        Args:
+            research: BlogResearchResponse object containing suggested_angles
+            
+        Returns:
+            List of title-formatted content angles
+        """
+        if not research or not hasattr(research, 'suggested_angles'):
+            return []
+        
+        content_angles = research.suggested_angles or []
+        if not content_angles:
+            return []
+        
+        # Convert content angles to title format
+        title_formatted_angles = []
+        for angle in content_angles:
+            if isinstance(angle, str) and angle.strip():
+                # Clean and format the angle as a title
+                formatted_angle = self._format_angle_as_title(angle.strip())
+                if formatted_angle and formatted_angle not in title_formatted_angles:
+                    title_formatted_angles.append(formatted_angle)
+        
+        logger.info(f"Extracted {len(title_formatted_angles)} content angle titles from research data")
+        return title_formatted_angles
+    
+    def _format_angle_as_title(self, angle: str) -> str:
+        """
+        Format a content angle as a proper blog title.
+        
+        Args:
+            angle: Raw content angle string
+            
+        Returns:
+            Formatted title string
+        """
+        if not angle or len(angle.strip()) < 10:  # Too short to be a good title
+            return ""
+        
+        # Clean up the angle
+        cleaned_angle = angle.strip()
+        
+        # Capitalize first letter of each sentence and proper nouns
+        sentences = cleaned_angle.split('. ')
+        formatted_sentences = []
+        for sentence in sentences:
+            if sentence.strip():
+                # Use title case for better formatting
+                formatted_sentence = sentence.strip().title()
+                formatted_sentences.append(formatted_sentence)
+        
+        formatted_title = '. '.join(formatted_sentences)
+        
+        # Ensure it ends with proper punctuation
+        if not formatted_title.endswith(('.', '!', '?')):
+            formatted_title += '.'
+        
+        # Limit length to reasonable blog title size
+        if len(formatted_title) > 100:
+            formatted_title = formatted_title[:97] + "..."
+        
+        return formatted_title
+    
+    def combine_title_options(self, ai_titles: List[str], content_angle_titles: List[str], primary_keywords: List[str]) -> List[str]:
+        """
+        Combine AI-generated titles with content angle titles, ensuring variety and quality.
+        
+        Args:
+            ai_titles: AI-generated title options
+            content_angle_titles: Titles derived from content angles
+            primary_keywords: Primary keywords for fallback generation
+            
+        Returns:
+            Combined list of title options (max 6 total)
+        """
+        all_titles = []
+        
+        # Add content angle titles first (these are research-based and valuable)
+        for title in content_angle_titles[:3]:  # Limit to top 3 content angles
+            if title and title not in all_titles:
+                all_titles.append(title)
+        
+        # Add AI-generated titles
+        for title in ai_titles:
+            if title and title not in all_titles:
+                all_titles.append(title)
+        
+        # Note: Removed fallback titles as requested - only use research and AI-generated titles
+        
+        # Limit to 6 titles maximum for UI usability
+        final_titles = all_titles[:6]
+        
+        logger.info(f"Combined title options: {len(final_titles)} total (AI: {len(ai_titles)}, Content angles: {len(content_angle_titles)})")
+        return final_titles
+    
+    def generate_fallback_titles(self, primary_keywords: List[str]) -> List[str]:
+        """Generate fallback titles when AI generation fails."""
+        primary_keyword = primary_keywords[0] if primary_keywords else "Topic"
+        return [
+            f"The Complete Guide to {primary_keyword}",
+            f"{primary_keyword}: Everything You Need to Know",
+            f"How to Master {primary_keyword} in 2024"
+        ]
--- a/backend/services/blog_writer/research/init.py
+++ b/backend/services/blog_writer/research/init.py
@@ -12,10 +12,12 @@ from .research_service import ResearchService
 from .keyword_analyzer import KeywordAnalyzer
 from .competitor_analyzer import CompetitorAnalyzer
 from .content_angle_generator import ContentAngleGenerator
+from .data_filter import ResearchDataFilter

 __all__ = [
    'ResearchService',
    'KeywordAnalyzer', 
    'CompetitorAnalyzer',
-    'ContentAngleGenerator'
+    'ContentAngleGenerator',
+    'ResearchDataFilter'
 ]
--- a/backend/services/blog_writer/research/data_filter.py
+++ b/backend/services/blog_writer/research/data_filter.py
@@ -0,0 +1,519 @@
+"""
+Research Data Filter - Filters and cleans research data for optimal AI processing.
+
+This module provides intelligent filtering and cleaning of research data to:
+1. Remove low-quality sources and irrelevant content
+2. Optimize data for AI processing (reduce tokens, improve quality)
+3. Ensure only high-value insights are sent to AI prompts
+4. Maintain data integrity while improving processing efficiency
+"""
+
+from typing import Dict, Any, List, Optional, Tuple
+from datetime import datetime, timedelta
+import re
+from loguru import logger
+
+from models.blog_models import (
+    BlogResearchResponse,
+    ResearchSource,
+    GroundingMetadata,
+    GroundingChunk,
+    GroundingSupport,
+    Citation,
+)
+
+
+class ResearchDataFilter:
+    """Filters and cleans research data for optimal AI processing."""
+    
+    def __init__(self):
+        """Initialize the research data filter with default settings."""
+        # Be conservative but avoid over-filtering which can lead to empty UI
+        self.min_credibility_score = 0.5
+        self.min_excerpt_length = 20
+        self.max_sources = 15
+        self.max_grounding_chunks = 20
+        self.max_content_gaps = 5
+        self.max_keywords_per_category = 10
+        self.min_grounding_confidence = 0.5
+        self.max_source_age_days = 365 * 5  # allow up to 5 years if relevant
+        
+        # Common stop words for keyword cleaning
+        self.stop_words = {
+            'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by',
+            'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'do', 'does', 'did',
+            'will', 'would', 'could', 'should', 'may', 'might', 'must', 'can', 'this', 'that', 'these', 'those'
+        }
+        
+        # Irrelevant source patterns
+        self.irrelevant_patterns = [
+            r'\.(pdf|doc|docx|xls|xlsx|ppt|pptx)$',  # Document files
+            r'\.(jpg|jpeg|png|gif|svg|webp)$',  # Image files
+            r'\.(mp4|avi|mov|wmv|flv|webm)$',  # Video files
+            r'\.(mp3|wav|flac|aac)$',  # Audio files
+            r'\.(zip|rar|7z|tar|gz)$',  # Archive files
+            r'^https?://(www\.)?(facebook|twitter|instagram|linkedin|youtube)\.com',  # Social media
+            r'^https?://(www\.)?(amazon|ebay|etsy)\.com',  # E-commerce
+            r'^https?://(www\.)?(wikipedia)\.org',  # Wikipedia (too generic)
+        ]
+        
+        logger.info("✅ ResearchDataFilter initialized with quality thresholds")
+    
+    def filter_research_data(self, research_data: BlogResearchResponse) -> BlogResearchResponse:
+        """
+        Main filtering method that processes all research data components.
+        
+        Args:
+            research_data: Raw research data from the research service
+            
+        Returns:
+            Filtered and cleaned research data optimized for AI processing
+        """
+        logger.info(f"Starting research data filtering for {len(research_data.sources)} sources")
+        
+        # Track original counts for logging
+        original_counts = {
+            'sources': len(research_data.sources),
+            'grounding_chunks': len(research_data.grounding_metadata.grounding_chunks) if research_data.grounding_metadata else 0,
+            'grounding_supports': len(research_data.grounding_metadata.grounding_supports) if research_data.grounding_metadata else 0,
+            'citations': len(research_data.grounding_metadata.citations) if research_data.grounding_metadata else 0,
+        }
+        
+        # Filter sources
+        filtered_sources = self.filter_sources(research_data.sources)
+        
+        # Filter grounding metadata
+        filtered_grounding_metadata = self.filter_grounding_metadata(research_data.grounding_metadata)
+        
+        # Clean keyword analysis
+        cleaned_keyword_analysis = self.clean_keyword_analysis(research_data.keyword_analysis)
+        
+        # Clean competitor analysis
+        cleaned_competitor_analysis = self.clean_competitor_analysis(research_data.competitor_analysis)
+        
+        # Filter content gaps
+        filtered_content_gaps = self.filter_content_gaps(
+            research_data.keyword_analysis.get('content_gaps', []),
+            research_data
+        )
+        
+        # Update keyword analysis with filtered content gaps
+        cleaned_keyword_analysis['content_gaps'] = filtered_content_gaps
+        
+        # Create filtered research response
+        filtered_research = BlogResearchResponse(
+            success=research_data.success,
+            sources=filtered_sources,
+            keyword_analysis=cleaned_keyword_analysis,
+            competitor_analysis=cleaned_competitor_analysis,
+            suggested_angles=research_data.suggested_angles,  # Keep as-is for now
+            search_widget=research_data.search_widget,
+            search_queries=research_data.search_queries,
+            grounding_metadata=filtered_grounding_metadata,
+            error_message=research_data.error_message
+        )
+        
+        # Log filtering results
+        self._log_filtering_results(original_counts, filtered_research)
+        
+        return filtered_research
+    
+    def filter_sources(self, sources: List[ResearchSource]) -> List[ResearchSource]:
+        """
+        Filter sources based on quality, relevance, and recency criteria.
+        
+        Args:
+            sources: List of research sources to filter
+            
+        Returns:
+            Filtered list of high-quality sources
+        """
+        if not sources:
+            return []
+        
+        filtered_sources = []
+        
+        for source in sources:
+            # Quality filters
+            if not self._is_source_high_quality(source):
+                continue
+            
+            # Relevance filters
+            if not self._is_source_relevant(source):
+                continue
+            
+            # Recency filters
+            if not self._is_source_recent(source):
+                continue
+            
+            filtered_sources.append(source)
+        
+        # Sort by credibility score and limit to max_sources
+        filtered_sources.sort(key=lambda s: s.credibility_score or 0.8, reverse=True)
+        filtered_sources = filtered_sources[:self.max_sources]
+
+        # Fail-open: if everything was filtered out, return a trimmed set of original sources
+        if not filtered_sources and sources:
+            logger.warning("All sources filtered out by thresholds. Falling back to top sources without strict filters.")
+            fallback = sorted(
+                sources,
+                key=lambda s: (s.credibility_score or 0.8),
+                reverse=True
+            )[: self.max_sources]
+            return fallback
+        
+        logger.info(f"Filtered sources: {len(sources)} → {len(filtered_sources)}")
+        return filtered_sources
+    
+    def filter_grounding_metadata(self, grounding_metadata: Optional[GroundingMetadata]) -> Optional[GroundingMetadata]:
+        """
+        Filter grounding metadata to keep only high-confidence, relevant data.
+        
+        Args:
+            grounding_metadata: Raw grounding metadata to filter
+            
+        Returns:
+            Filtered grounding metadata with high-quality data only
+        """
+        if not grounding_metadata:
+            return None
+        
+        # Filter grounding chunks by confidence
+        filtered_chunks = []
+        for chunk in grounding_metadata.grounding_chunks:
+            if chunk.confidence_score and chunk.confidence_score >= self.min_grounding_confidence:
+                filtered_chunks.append(chunk)
+        
+        # Limit chunks to max_grounding_chunks
+        filtered_chunks = filtered_chunks[:self.max_grounding_chunks]
+        
+        # Filter grounding supports by confidence
+        filtered_supports = []
+        for support in grounding_metadata.grounding_supports:
+            if support.confidence_scores and max(support.confidence_scores) >= self.min_grounding_confidence:
+                filtered_supports.append(support)
+        
+        # Filter citations by type and relevance
+        filtered_citations = []
+        for citation in grounding_metadata.citations:
+            if self._is_citation_relevant(citation):
+                filtered_citations.append(citation)
+        
+        # Fail-open strategies to avoid empty UI:
+        if not filtered_chunks and grounding_metadata.grounding_chunks:
+            logger.warning("All grounding chunks filtered out. Falling back to first N chunks without confidence filter.")
+            filtered_chunks = grounding_metadata.grounding_chunks[: self.max_grounding_chunks]
+        if not filtered_supports and grounding_metadata.grounding_supports:
+            logger.warning("All grounding supports filtered out. Falling back to first N supports without confidence filter.")
+            filtered_supports = grounding_metadata.grounding_supports[: self.max_grounding_chunks]
+
+        # Create filtered grounding metadata
+        filtered_metadata = GroundingMetadata(
+            grounding_chunks=filtered_chunks,
+            grounding_supports=filtered_supports,
+            citations=filtered_citations,
+            search_entry_point=grounding_metadata.search_entry_point,
+            web_search_queries=grounding_metadata.web_search_queries
+        )
+        
+        logger.info(f"Filtered grounding metadata: {len(grounding_metadata.grounding_chunks)} chunks → {len(filtered_chunks)} chunks")
+        return filtered_metadata
+    
+    def clean_keyword_analysis(self, keyword_analysis: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Clean and deduplicate keyword analysis data.
+        
+        Args:
+            keyword_analysis: Raw keyword analysis data
+            
+        Returns:
+            Cleaned and deduplicated keyword analysis
+        """
+        if not keyword_analysis:
+            return {}
+        
+        cleaned_analysis = {}
+        
+        # Clean and deduplicate keyword lists
+        keyword_categories = ['primary', 'secondary', 'long_tail', 'semantic_keywords', 'trending_terms']
+        
+        for category in keyword_categories:
+            if category in keyword_analysis and isinstance(keyword_analysis[category], list):
+                cleaned_keywords = self._clean_keyword_list(keyword_analysis[category])
+                cleaned_analysis[category] = cleaned_keywords[:self.max_keywords_per_category]
+        
+        # Clean other fields
+        other_fields = ['search_intent', 'difficulty', 'analysis_insights']
+        for field in other_fields:
+            if field in keyword_analysis:
+                cleaned_analysis[field] = keyword_analysis[field]
+        
+        # Clean content gaps separately (handled by filter_content_gaps)
+        # Don't add content_gaps if it's empty to avoid adding empty lists
+        if 'content_gaps' in keyword_analysis and keyword_analysis['content_gaps']:
+            cleaned_analysis['content_gaps'] = keyword_analysis['content_gaps']  # Will be filtered later
+        
+        logger.info(f"Cleaned keyword analysis: {len(keyword_analysis)} categories → {len(cleaned_analysis)} categories")
+        return cleaned_analysis
+    
+    def clean_competitor_analysis(self, competitor_analysis: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Clean and validate competitor analysis data.
+        
+        Args:
+            competitor_analysis: Raw competitor analysis data
+            
+        Returns:
+            Cleaned competitor analysis data
+        """
+        if not competitor_analysis:
+            return {}
+        
+        cleaned_analysis = {}
+        
+        # Clean competitor lists
+        competitor_lists = ['top_competitors', 'opportunities', 'competitive_advantages']
+        for field in competitor_lists:
+            if field in competitor_analysis and isinstance(competitor_analysis[field], list):
+                cleaned_list = [item.strip() for item in competitor_analysis[field] if item.strip()]
+                cleaned_analysis[field] = cleaned_list[:10]  # Limit to top 10
+        
+        # Clean other fields
+        other_fields = ['market_positioning', 'competitive_landscape', 'market_share']
+        for field in other_fields:
+            if field in competitor_analysis:
+                cleaned_analysis[field] = competitor_analysis[field]
+        
+        logger.info(f"Cleaned competitor analysis: {len(competitor_analysis)} fields → {len(cleaned_analysis)} fields")
+        return cleaned_analysis
+    
+    def filter_content_gaps(self, content_gaps: List[str], research_data: BlogResearchResponse) -> List[str]:
+        """
+        Filter content gaps to keep only actionable, high-value ones.
+        
+        Args:
+            content_gaps: List of identified content gaps
+            research_data: Research data for context
+            
+        Returns:
+            Filtered list of actionable content gaps
+        """
+        if not content_gaps:
+            return []
+        
+        filtered_gaps = []
+        
+        for gap in content_gaps:
+            # Quality filters
+            if not self._is_gap_high_quality(gap):
+                continue
+            
+            # Relevance filters
+            if not self._is_gap_relevant_to_topic(gap, research_data):
+                continue
+            
+            # Actionability filters
+            if not self._is_gap_actionable(gap):
+                continue
+            
+            filtered_gaps.append(gap)
+        
+        # Limit to max_content_gaps
+        filtered_gaps = filtered_gaps[:self.max_content_gaps]
+        
+        logger.info(f"Filtered content gaps: {len(content_gaps)} → {len(filtered_gaps)}")
+        return filtered_gaps
+    
+    # Private helper methods
+    
+    def _is_source_high_quality(self, source: ResearchSource) -> bool:
+        """Check if source meets quality criteria."""
+        # Credibility score check
+        if source.credibility_score and source.credibility_score < self.min_credibility_score:
+            return False
+        
+        # Excerpt length check
+        if source.excerpt and len(source.excerpt) < self.min_excerpt_length:
+            return False
+        
+        # Title quality check
+        if not source.title or len(source.title.strip()) < 10:
+            return False
+        
+        return True
+    
+    def _is_source_relevant(self, source: ResearchSource) -> bool:
+        """Check if source is relevant (not irrelevant patterns)."""
+        if not source.url:
+            return True  # Keep sources without URLs
+        
+        # Check against irrelevant patterns
+        for pattern in self.irrelevant_patterns:
+            if re.search(pattern, source.url, re.IGNORECASE):
+                return False
+        
+        return True
+    
+    def _is_source_recent(self, source: ResearchSource) -> bool:
+        """Check if source is recent enough."""
+        if not source.published_at:
+            return True  # Keep sources without dates
+        
+        try:
+            # Parse date (assuming ISO format or common formats)
+            published_date = self._parse_date(source.published_at)
+            if published_date:
+                cutoff_date = datetime.now() - timedelta(days=self.max_source_age_days)
+                return published_date >= cutoff_date
+        except Exception as e:
+            logger.warning(f"Error parsing date '{source.published_at}': {e}")
+        
+        return True  # Keep sources with unparseable dates
+    
+    def _is_citation_relevant(self, citation: Citation) -> bool:
+        """Check if citation is relevant and high-quality."""
+        # Check citation type
+        relevant_types = ['expert_opinion', 'statistical_data', 'recent_news', 'research_study']
+        if citation.citation_type not in relevant_types:
+            return False
+        
+        # Check text quality
+        if not citation.text or len(citation.text.strip()) < 20:
+            return False
+        
+        return True
+    
+    def _is_gap_high_quality(self, gap: str) -> bool:
+        """Check if content gap is high quality."""
+        gap = gap.strip()
+        
+        # Length check
+        if len(gap) < 10:
+            return False
+        
+        # Generic gap check
+        generic_gaps = ['general', 'overview', 'introduction', 'basics', 'fundamentals']
+        if gap.lower() in generic_gaps:
+            return False
+        
+        # Check for meaningful content
+        if len(gap.split()) < 3:
+            return False
+        
+        return True
+    
+    def _is_gap_relevant_to_topic(self, gap: str, research_data: BlogResearchResponse) -> bool:
+        """Check if content gap is relevant to the research topic."""
+        # Simple relevance check - could be enhanced with more sophisticated matching
+        primary_keywords = research_data.keyword_analysis.get('primary', [])
+        
+        if not primary_keywords:
+            return True  # Keep gaps if no keywords available
+        
+        gap_lower = gap.lower()
+        for keyword in primary_keywords:
+            if keyword.lower() in gap_lower:
+                return True
+        
+        # If no direct keyword match, check for common AI-related terms
+        ai_terms = ['ai', 'artificial intelligence', 'machine learning', 'automation', 'technology', 'digital']
+        for term in ai_terms:
+            if term in gap_lower:
+                return True
+        
+        return True  # Default to keeping gaps if no clear relevance check
+    
+    def _is_gap_actionable(self, gap: str) -> bool:
+        """Check if content gap is actionable (can be addressed with content)."""
+        gap_lower = gap.lower()
+        
+        # Check for actionable indicators
+        actionable_indicators = [
+            'how to', 'guide', 'tutorial', 'steps', 'process', 'method',
+            'best practices', 'tips', 'strategies', 'techniques', 'approach',
+            'comparison', 'vs', 'versus', 'difference', 'pros and cons',
+            'trends', 'future', '2024', '2025', 'emerging', 'new'
+        ]
+        
+        for indicator in actionable_indicators:
+            if indicator in gap_lower:
+                return True
+        
+        return True  # Default to actionable if no specific indicators
+    
+    def _clean_keyword_list(self, keywords: List[str]) -> List[str]:
+        """Clean and deduplicate a list of keywords."""
+        cleaned_keywords = []
+        seen_keywords = set()
+        
+        for keyword in keywords:
+            if not keyword or not isinstance(keyword, str):
+                continue
+            
+            # Clean keyword
+            cleaned_keyword = keyword.strip().lower()
+            
+            # Skip empty or too short keywords
+            if len(cleaned_keyword) < 2:
+                continue
+            
+            # Skip stop words
+            if cleaned_keyword in self.stop_words:
+                continue
+            
+            # Skip duplicates
+            if cleaned_keyword in seen_keywords:
+                continue
+            
+            cleaned_keywords.append(cleaned_keyword)
+            seen_keywords.add(cleaned_keyword)
+        
+        return cleaned_keywords
+    
+    def _parse_date(self, date_str: str) -> Optional[datetime]:
+        """Parse date string into datetime object."""
+        if not date_str:
+            return None
+        
+        # Common date formats
+        date_formats = [
+            '%Y-%m-%d',
+            '%Y-%m-%dT%H:%M:%S',
+            '%Y-%m-%dT%H:%M:%SZ',
+            '%Y-%m-%dT%H:%M:%S.%fZ',
+            '%B %d, %Y',
+            '%b %d, %Y',
+            '%d %B %Y',
+            '%d %b %Y',
+            '%m/%d/%Y',
+            '%d/%m/%Y'
+        ]
+        
+        for fmt in date_formats:
+            try:
+                return datetime.strptime(date_str, fmt)
+            except ValueError:
+                continue
+        
+        return None
+    
+    def _log_filtering_results(self, original_counts: Dict[str, int], filtered_research: BlogResearchResponse):
+        """Log the results of filtering operations."""
+        filtered_counts = {
+            'sources': len(filtered_research.sources),
+            'grounding_chunks': len(filtered_research.grounding_metadata.grounding_chunks) if filtered_research.grounding_metadata else 0,
+            'grounding_supports': len(filtered_research.grounding_metadata.grounding_supports) if filtered_research.grounding_metadata else 0,
+            'citations': len(filtered_research.grounding_metadata.citations) if filtered_research.grounding_metadata else 0,
+        }
+        
+        logger.info("📊 Research Data Filtering Results:")
+        for key, original_count in original_counts.items():
+            filtered_count = filtered_counts[key]
+            reduction_percent = ((original_count - filtered_count) / original_count * 100) if original_count > 0 else 0
+            logger.info(f"  {key}: {original_count} → {filtered_count} ({reduction_percent:.1f}% reduction)")
+        
+        # Log content gaps filtering
+        original_gaps = len(filtered_research.keyword_analysis.get('content_gaps', []))
+        logger.info(f"  content_gaps: {original_gaps} → {len(filtered_research.keyword_analysis.get('content_gaps', []))}")
+        
+        logger.info("✅ Research data filtering completed successfully")
--- a/backend/services/blog_writer/research/research_service.py
+++ b/backend/services/blog_writer/research/research_service.py
@@ -11,11 +11,16 @@ from models.blog_models import (
    BlogResearchRequest,
    BlogResearchResponse,
    ResearchSource,
+    GroundingMetadata,
+    GroundingChunk,
+    GroundingSupport,
+    Citation,
 )

 from .keyword_analyzer import KeywordAnalyzer
 from .competitor_analyzer import CompetitorAnalyzer
 from .content_angle_generator import ContentAngleGenerator
+from .data_filter import ResearchDataFilter


 class ResearchService:
@@ -25,6 +30,7 @@ class ResearchService:
        self.keyword_analyzer = KeywordAnalyzer()
        self.competitor_analyzer = CompetitorAnalyzer()
        self.content_angle_generator = ContentAngleGenerator()
+        self.data_filter = ResearchDataFilter()
    
    async def research(self, request: BlogResearchRequest) -> BlogResearchResponse:
        """
@@ -85,6 +91,9 @@ class ResearchService:
            # Extract sources from grounding metadata
            sources = self._extract_sources_from_grounding(gemini_result)
            
+            # Extract grounding metadata for detailed UI display
+            grounding_metadata = self._extract_grounding_metadata(gemini_result)
+            
            # Extract search widget and queries for UI display
            search_widget = gemini_result.get("search_widget", "") or ""
            search_queries = gemini_result.get("search_queries", []) or []
@@ -107,17 +116,31 @@ class ResearchService:
                # Add search widget and queries for UI display
                search_widget=search_widget if 'search_widget' in locals() else "",
                search_queries=search_queries if 'search_queries' in locals() else [],
+                # Add grounding metadata for detailed UI display
+                grounding_metadata=grounding_metadata,
            )
            
-            # Cache the successful result for future exact keyword matches
+            # Filter and clean research data for optimal AI processing
+            filtered_response = self.data_filter.filter_research_data(response)
+            logger.info("Research data filtering completed successfully")
+            
+            # Cache the successful result for future exact keyword matches (both caches)
+            persistent_research_cache.cache_result(
+                keywords=request.keywords,
+                industry=industry,
+                target_audience=target_audience,
+                result=filtered_response.dict()
+            )
+            
+            # Also cache in memory for faster access
            research_cache.cache_result(
                keywords=request.keywords,
                industry=industry,
                target_audience=target_audience,
-                result=response.dict()
+                result=filtered_response.dict()
            )
            
-            return response
+            return filtered_response
            
        except Exception as e:
            error_message = str(e)
@@ -142,27 +165,38 @@ class ResearchService:
        try:
            from services.llm_providers.gemini_grounded_provider import GeminiGroundedProvider
            from services.cache.research_cache import research_cache
-            from api.blog_writer.router import _update_progress
+            from services.cache.persistent_research_cache import persistent_research_cache
+            from api.blog_writer.task_manager import task_manager
            
            topic = request.topic or ", ".join(request.keywords)
            industry = request.industry or (request.persona.industry if request.persona and request.persona.industry else "General")
            target_audience = getattr(request.persona, 'target_audience', 'General') if request.persona else 'General'
            
-            # Check cache first for exact keyword match
-            await _update_progress(task_id, "🔍 Checking cache for existing research...")
-            cached_result = research_cache.get_cached_result(
+            # Check cache first for exact keyword match (try both caches)
+            await task_manager.update_progress(task_id, "🔍 Checking cache for existing research...")
+            
+            # Try persistent cache first (survives restarts)
+            cached_result = persistent_research_cache.get_cached_result(
                keywords=request.keywords,
                industry=industry,
                target_audience=target_audience
            )
            
+            # Fallback to in-memory cache
+            if not cached_result:
+                cached_result = research_cache.get_cached_result(
+                    keywords=request.keywords,
+                    industry=industry,
+                    target_audience=target_audience
+                )
+            
            if cached_result:
-                await _update_progress(task_id, "✅ Found cached research results! Returning instantly...")
+                await task_manager.update_progress(task_id, "✅ Found cached research results! Returning instantly...")
                logger.info(f"Returning cached research result for keywords: {request.keywords}")
                return BlogResearchResponse(**cached_result)
            
            # Cache miss - proceed with API call
-            await _update_progress(task_id, "🌐 Cache miss - connecting to Google Search grounding...")
+            await task_manager.update_progress(task_id, "🌐 Cache miss - connecting to Google Search grounding...")
            logger.info(f"Cache miss - making API call for keywords: {request.keywords}")
            gemini = GeminiGroundedProvider()

@@ -185,7 +219,7 @@ class ResearchService:
            Structure your response with clear sections for each analysis area.
            """
            
-            await _update_progress(task_id, "🤖 Making AI request to Gemini with Google Search grounding...")
+            await task_manager.update_progress(task_id, "🤖 Making AI request to Gemini with Google Search grounding...")
            # Single Gemini call with native Google Search grounding - no fallbacks
            gemini_result = await gemini.generate_grounded_content(
                prompt=research_prompt,
@@ -193,22 +227,25 @@ class ResearchService:
                max_tokens=2000
            )
            
-            await _update_progress(task_id, "📊 Processing research results and extracting insights...")
+            await task_manager.update_progress(task_id, "📊 Processing research results and extracting insights...")
            # Extract sources from grounding metadata
            sources = self._extract_sources_from_grounding(gemini_result)
            
+            # Extract grounding metadata for detailed UI display
+            grounding_metadata = self._extract_grounding_metadata(gemini_result)
+            
            # Extract search widget and queries for UI display
            search_widget = gemini_result.get("search_widget", "") or ""
            search_queries = gemini_result.get("search_queries", []) or []
            
-            await _update_progress(task_id, "🔍 Analyzing keywords and content angles...")
+            await task_manager.update_progress(task_id, "🔍 Analyzing keywords and content angles...")
            # Parse the comprehensive response for different analysis components
            content = gemini_result.get("content", "")
            keyword_analysis = self.keyword_analyzer.analyze(content, request.keywords)
            competitor_analysis = self.competitor_analyzer.analyze(content)
            suggested_angles = self.content_angle_generator.generate(content, topic, industry)
            
-            await _update_progress(task_id, "💾 Caching results for future use...")
+            await task_manager.update_progress(task_id, "💾 Caching results for future use...")
            logger.info(f"Research completed successfully with {len(sources)} sources and {len(search_queries)} search queries")

            # Create the response
@@ -221,17 +258,34 @@ class ResearchService:
                # Add search widget and queries for UI display
                search_widget=search_widget if 'search_widget' in locals() else "",
                search_queries=search_queries if 'search_queries' in locals() else [],
+                # Add grounding metadata for detailed UI display
+                grounding_metadata=grounding_metadata,
+                # Preserve original user keywords for caching
+                original_keywords=request.keywords,
            )
            
-            # Cache the successful result for future exact keyword matches
+            # Filter and clean research data for optimal AI processing
+            await task_manager.update_progress(task_id, "🔍 Filtering and cleaning research data...")
+            filtered_response = self.data_filter.filter_research_data(response)
+            logger.info("Research data filtering completed successfully")
+            
+            # Cache the successful result for future exact keyword matches (both caches)
+            persistent_research_cache.cache_result(
+                keywords=request.keywords,
+                industry=industry,
+                target_audience=target_audience,
+                result=filtered_response.dict()
+            )
+            
+            # Also cache in memory for faster access
            research_cache.cache_result(
                keywords=request.keywords,
                industry=industry,
                target_audience=target_audience,
-                result=response.dict()
+                result=filtered_response.dict()
            )
            
-            return response
+            return filtered_response
            
        except Exception as e:
            error_message = str(e)
@@ -261,8 +315,104 @@ class ResearchService:
                url=src.get("url", ""),
                excerpt=src.get("content", "")[:500] if src.get("content") else f"Source from {src.get('title', 'web')}",
                credibility_score=float(src.get("credibility_score", 0.8)),
-                published_at=str(src.get("publication_date", "2024-01-01"))
+                published_at=str(src.get("publication_date", "2024-01-01")),
+                index=src.get("index"),
+                source_type=src.get("type", "web")
            )
            sources.append(source)
        
        return sources
+
+    def _extract_grounding_metadata(self, gemini_result: Dict[str, Any]) -> GroundingMetadata:
+        """Extract detailed grounding metadata from Gemini result."""
+        grounding_chunks = []
+        grounding_supports = []
+        citations = []
+        
+        # Extract grounding chunks from the raw grounding metadata
+        raw_grounding = gemini_result.get("grounding_metadata", {})
+        
+        # Handle case where grounding_metadata might be a GroundingMetadata object
+        if hasattr(raw_grounding, 'grounding_chunks'):
+            raw_chunks = raw_grounding.grounding_chunks
+        else:
+            raw_chunks = raw_grounding.get("grounding_chunks", [])
+        
+        for chunk in raw_chunks:
+            if "web" in chunk:
+                web_data = chunk["web"]
+                grounding_chunk = GroundingChunk(
+                    title=web_data.get("title", "Untitled"),
+                    url=web_data.get("uri", ""),
+                    confidence_score=None  # Will be set from supports
+                )
+                grounding_chunks.append(grounding_chunk)
+        
+        # Extract grounding supports with confidence scores
+        if hasattr(raw_grounding, 'grounding_supports'):
+            raw_supports = raw_grounding.grounding_supports
+        else:
+            raw_supports = raw_grounding.get("grounding_supports", [])
+        for support in raw_supports:
+            # Handle both dictionary and GroundingSupport object formats
+            if hasattr(support, 'confidence_scores'):
+                confidence_scores = support.confidence_scores
+                chunk_indices = support.grounding_chunk_indices
+                segment_text = getattr(support, 'segment_text', '')
+                start_index = getattr(support, 'start_index', None)
+                end_index = getattr(support, 'end_index', None)
+            else:
+                confidence_scores = support.get("confidence_scores", [])
+                chunk_indices = support.get("grounding_chunk_indices", [])
+                segment = support.get("segment", {})
+                segment_text = segment.get("text", "")
+                start_index = segment.get("start_index")
+                end_index = segment.get("end_index")
+            
+            grounding_support = GroundingSupport(
+                confidence_scores=confidence_scores,
+                grounding_chunk_indices=chunk_indices,
+                segment_text=segment_text,
+                start_index=start_index,
+                end_index=end_index
+            )
+            grounding_supports.append(grounding_support)
+            
+            # Update confidence scores for chunks
+            if confidence_scores and chunk_indices:
+                avg_confidence = sum(confidence_scores) / len(confidence_scores)
+                for idx in chunk_indices:
+                    if idx < len(grounding_chunks):
+                        grounding_chunks[idx].confidence_score = avg_confidence
+        
+        # Extract citations from the raw result
+        raw_citations = gemini_result.get("citations", [])
+        for citation in raw_citations:
+            citation_obj = Citation(
+                citation_type=citation.get("type", "inline"),
+                start_index=citation.get("start_index", 0),
+                end_index=citation.get("end_index", 0),
+                text=citation.get("text", ""),
+                source_indices=citation.get("source_indices", []),
+                reference=citation.get("reference", "")
+            )
+            citations.append(citation_obj)
+        
+        # Extract search entry point and web search queries
+        if hasattr(raw_grounding, 'search_entry_point'):
+            search_entry_point = getattr(raw_grounding.search_entry_point, 'rendered_content', '') if raw_grounding.search_entry_point else ''
+        else:
+            search_entry_point = raw_grounding.get("search_entry_point", {}).get("rendered_content", "")
+        
+        if hasattr(raw_grounding, 'web_search_queries'):
+            web_search_queries = raw_grounding.web_search_queries
+        else:
+            web_search_queries = raw_grounding.get("web_search_queries", [])
+        
+        return GroundingMetadata(
+            grounding_chunks=grounding_chunks,
+            grounding_supports=grounding_supports,
+            citations=citations,
+            search_entry_point=search_entry_point,
+            web_search_queries=web_search_queries
+        )
--- a/backend/services/cache/persistent_outline_cache.py
+++ b/backend/services/cache/persistent_outline_cache.py
@@ -0,0 +1,332 @@
+"""
+Persistent Outline Cache Service
+
+Provides database-backed caching for outline generation results to survive server restarts
+and provide better cache management across multiple instances.
+"""
+
+import hashlib
+import json
+import sqlite3
+from typing import Dict, Any, Optional, List
+from datetime import datetime, timedelta
+from pathlib import Path
+from loguru import logger
+
+
+class PersistentOutlineCache:
+    """Database-backed cache for outline generation results with exact parameter matching."""
+    
+    def __init__(self, db_path: str = "outline_cache.db", max_cache_size: int = 500, cache_ttl_hours: int = 48):
+        """
+        Initialize the persistent outline cache.
+        
+        Args:
+            db_path: Path to SQLite database file
+            max_cache_size: Maximum number of cached entries
+            cache_ttl_hours: Time-to-live for cache entries in hours (longer than research cache)
+        """
+        self.db_path = db_path
+        self.max_cache_size = max_cache_size
+        self.cache_ttl = timedelta(hours=cache_ttl_hours)
+        
+        # Ensure database directory exists
+        Path(db_path).parent.mkdir(parents=True, exist_ok=True)
+        
+        # Initialize database
+        self._init_database()
+    
+    def _init_database(self):
+        """Initialize the SQLite database with required tables."""
+        with sqlite3.connect(self.db_path) as conn:
+            conn.execute("""
+                CREATE TABLE IF NOT EXISTS outline_cache (
+                    id INTEGER PRIMARY KEY AUTOINCREMENT,
+                    cache_key TEXT UNIQUE NOT NULL,
+                    keywords TEXT NOT NULL,
+                    industry TEXT NOT NULL,
+                    target_audience TEXT NOT NULL,
+                    word_count INTEGER NOT NULL,
+                    custom_instructions TEXT,
+                    persona_data TEXT,
+                    result_data TEXT NOT NULL,
+                    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+                    expires_at TIMESTAMP NOT NULL,
+                    access_count INTEGER DEFAULT 0,
+                    last_accessed TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+                )
+            """)
+            
+            # Create indexes for better performance
+            conn.execute("CREATE INDEX IF NOT EXISTS idx_outline_cache_key ON outline_cache(cache_key)")
+            conn.execute("CREATE INDEX IF NOT EXISTS idx_outline_expires_at ON outline_cache(expires_at)")
+            conn.execute("CREATE INDEX IF NOT EXISTS idx_outline_created_at ON outline_cache(created_at)")
+            conn.execute("CREATE INDEX IF NOT EXISTS idx_outline_keywords ON outline_cache(keywords)")
+            
+            conn.commit()
+    
+    def _generate_cache_key(self, keywords: List[str], industry: str, target_audience: str, 
+                           word_count: int, custom_instructions: str = None, persona_data: Dict = None) -> str:
+        """
+        Generate a cache key based on exact parameter match.
+        
+        Args:
+            keywords: List of research keywords
+            industry: Industry context
+            target_audience: Target audience context
+            word_count: Target word count for outline
+            custom_instructions: Custom instructions for outline generation
+            persona_data: Persona information
+            
+        Returns:
+            MD5 hash of the normalized parameters
+        """
+        # Normalize and sort keywords for consistent hashing
+        normalized_keywords = sorted([kw.lower().strip() for kw in keywords])
+        normalized_industry = industry.lower().strip() if industry else "general"
+        normalized_audience = target_audience.lower().strip() if target_audience else "general"
+        normalized_instructions = custom_instructions.lower().strip() if custom_instructions else ""
+        
+        # Normalize persona data
+        normalized_persona = ""
+        if persona_data:
+            # Sort persona keys and values for consistent hashing
+            persona_str = json.dumps(persona_data, sort_keys=True, default=str)
+            normalized_persona = persona_str.lower()
+        
+        # Create a consistent string representation
+        cache_string = f"{normalized_keywords}|{normalized_industry}|{normalized_audience}|{word_count}|{normalized_instructions}|{normalized_persona}"
+        
+        # Generate MD5 hash
+        return hashlib.md5(cache_string.encode('utf-8')).hexdigest()
+    
+    def _cleanup_expired_entries(self):
+        """Remove expired cache entries from database."""
+        with sqlite3.connect(self.db_path) as conn:
+            cursor = conn.execute(
+                "DELETE FROM outline_cache WHERE expires_at < ?",
+                (datetime.now().isoformat(),)
+            )
+            deleted_count = cursor.rowcount
+            if deleted_count > 0:
+                logger.debug(f"Removed {deleted_count} expired outline cache entries")
+            conn.commit()
+    
+    def _evict_oldest_entries(self, num_to_evict: int):
+        """Evict the oldest cache entries when cache is full."""
+        with sqlite3.connect(self.db_path) as conn:
+            # Get oldest entries by creation time
+            cursor = conn.execute("""
+                SELECT id FROM outline_cache 
+                ORDER BY created_at ASC 
+                LIMIT ?
+            """, (num_to_evict,))
+            
+            old_ids = [row[0] for row in cursor.fetchall()]
+            
+            if old_ids:
+                placeholders = ','.join(['?' for _ in old_ids])
+                conn.execute(f"DELETE FROM outline_cache WHERE id IN ({placeholders})", old_ids)
+                logger.debug(f"Evicted {len(old_ids)} oldest outline cache entries")
+            
+            conn.commit()
+    
+    def get_cached_outline(self, keywords: List[str], industry: str, target_audience: str, 
+                          word_count: int, custom_instructions: str = None, persona_data: Dict = None) -> Optional[Dict[str, Any]]:
+        """
+        Get cached outline result for exact parameter match.
+        
+        Args:
+            keywords: List of research keywords
+            industry: Industry context
+            target_audience: Target audience context
+            word_count: Target word count for outline
+            custom_instructions: Custom instructions for outline generation
+            persona_data: Persona information
+            
+        Returns:
+            Cached outline result if found and valid, None otherwise
+        """
+        cache_key = self._generate_cache_key(keywords, industry, target_audience, word_count, custom_instructions, persona_data)
+        
+        with sqlite3.connect(self.db_path) as conn:
+            cursor = conn.execute("""
+                SELECT result_data, expires_at FROM outline_cache 
+                WHERE cache_key = ? AND expires_at > ?
+            """, (cache_key, datetime.now().isoformat()))
+            
+            row = cursor.fetchone()
+            
+            if row is None:
+                logger.debug(f"Outline cache miss for keywords: {keywords}, word_count: {word_count}")
+                return None
+            
+            # Update access statistics
+            conn.execute("""
+                UPDATE outline_cache 
+                SET access_count = access_count + 1, last_accessed = CURRENT_TIMESTAMP
+                WHERE cache_key = ?
+            """, (cache_key,))
+            conn.commit()
+            
+            try:
+                result_data = json.loads(row[0])
+                logger.info(f"Outline cache hit for keywords: {keywords}, word_count: {word_count} (saved expensive generation)")
+                return result_data
+            except json.JSONDecodeError:
+                logger.error(f"Invalid JSON in outline cache for keywords: {keywords}")
+                # Remove invalid entry
+                conn.execute("DELETE FROM outline_cache WHERE cache_key = ?", (cache_key,))
+                conn.commit()
+                return None
+    
+    def cache_outline(self, keywords: List[str], industry: str, target_audience: str, 
+                     word_count: int, custom_instructions: str, persona_data: Dict, result: Dict[str, Any]):
+        """
+        Cache an outline generation result.
+        
+        Args:
+            keywords: List of research keywords
+            industry: Industry context
+            target_audience: Target audience context
+            word_count: Target word count for outline
+            custom_instructions: Custom instructions for outline generation
+            persona_data: Persona information
+            result: Outline result to cache
+        """
+        cache_key = self._generate_cache_key(keywords, industry, target_audience, word_count, custom_instructions, persona_data)
+        
+        # Cleanup expired entries first
+        self._cleanup_expired_entries()
+        
+        # Check if cache is full and evict if necessary
+        with sqlite3.connect(self.db_path) as conn:
+            cursor = conn.execute("SELECT COUNT(*) FROM outline_cache")
+            current_count = cursor.fetchone()[0]
+            
+            if current_count >= self.max_cache_size:
+                num_to_evict = current_count - self.max_cache_size + 1
+                self._evict_oldest_entries(num_to_evict)
+        
+        # Store the result
+        expires_at = datetime.now() + self.cache_ttl
+        
+        with sqlite3.connect(self.db_path) as conn:
+            conn.execute("""
+                INSERT OR REPLACE INTO outline_cache 
+                (cache_key, keywords, industry, target_audience, word_count, custom_instructions, persona_data, result_data, expires_at)
+                VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
+            """, (
+                cache_key,
+                json.dumps(keywords),
+                industry,
+                target_audience,
+                word_count,
+                custom_instructions or "",
+                json.dumps(persona_data) if persona_data else "",
+                json.dumps(result),
+                expires_at.isoformat()
+            ))
+            conn.commit()
+        
+        logger.info(f"Cached outline result for keywords: {keywords}, word_count: {word_count}")
+    
+    def get_cache_stats(self) -> Dict[str, Any]:
+        """Get cache statistics."""
+        self._cleanup_expired_entries()
+        
+        with sqlite3.connect(self.db_path) as conn:
+            # Get basic stats
+            cursor = conn.execute("SELECT COUNT(*) FROM outline_cache")
+            total_entries = cursor.fetchone()[0]
+            
+            cursor = conn.execute("SELECT COUNT(*) FROM outline_cache WHERE expires_at > ?", (datetime.now().isoformat(),))
+            valid_entries = cursor.fetchone()[0]
+            
+            # Get most accessed entries
+            cursor = conn.execute("""
+                SELECT keywords, industry, target_audience, word_count, access_count, created_at
+                FROM outline_cache 
+                ORDER BY access_count DESC 
+                LIMIT 10
+            """)
+            top_entries = [
+                {
+                    'keywords': json.loads(row[0]),
+                    'industry': row[1],
+                    'target_audience': row[2],
+                    'word_count': row[3],
+                    'access_count': row[4],
+                    'created_at': row[5]
+                }
+                for row in cursor.fetchall()
+            ]
+            
+            # Get database size
+            cursor = conn.execute("SELECT page_count * page_size as size FROM pragma_page_count(), pragma_page_size()")
+            db_size_bytes = cursor.fetchone()[0]
+            db_size_mb = db_size_bytes / (1024 * 1024)
+        
+        return {
+            'total_entries': total_entries,
+            'valid_entries': valid_entries,
+            'expired_entries': total_entries - valid_entries,
+            'max_size': self.max_cache_size,
+            'ttl_hours': self.cache_ttl.total_seconds() / 3600,
+            'database_size_mb': round(db_size_mb, 2),
+            'top_accessed_entries': top_entries
+        }
+    
+    def clear_cache(self):
+        """Clear all cached entries."""
+        with sqlite3.connect(self.db_path) as conn:
+            conn.execute("DELETE FROM outline_cache")
+            conn.commit()
+        logger.info("Outline cache cleared")
+    
+    def get_cache_entries(self, limit: int = 50) -> List[Dict[str, Any]]:
+        """Get recent cache entries for debugging."""
+        with sqlite3.connect(self.db_path) as conn:
+            cursor = conn.execute("""
+                SELECT keywords, industry, target_audience, word_count, custom_instructions, created_at, expires_at, access_count
+                FROM outline_cache 
+                ORDER BY created_at DESC 
+                LIMIT ?
+            """, (limit,))
+            
+            return [
+                {
+                    'keywords': json.loads(row[0]),
+                    'industry': row[1],
+                    'target_audience': row[2],
+                    'word_count': row[3],
+                    'custom_instructions': row[4],
+                    'created_at': row[5],
+                    'expires_at': row[6],
+                    'access_count': row[7]
+                }
+                for row in cursor.fetchall()
+            ]
+    
+    def invalidate_cache_for_keywords(self, keywords: List[str]):
+        """
+        Invalidate all cache entries for specific keywords.
+        Useful when research data is updated.
+        
+        Args:
+            keywords: Keywords to invalidate cache for
+        """
+        normalized_keywords = sorted([kw.lower().strip() for kw in keywords])
+        keywords_json = json.dumps(normalized_keywords)
+        
+        with sqlite3.connect(self.db_path) as conn:
+            cursor = conn.execute("DELETE FROM outline_cache WHERE keywords = ?", (keywords_json,))
+            deleted_count = cursor.rowcount
+            conn.commit()
+        
+        if deleted_count > 0:
+            logger.info(f"Invalidated {deleted_count} outline cache entries for keywords: {keywords}")
+
+
+# Global persistent cache instance
+persistent_outline_cache = PersistentOutlineCache()
--- a/backend/services/cache/persistent_research_cache.py
+++ b/backend/services/cache/persistent_research_cache.py
@@ -0,0 +1,283 @@
+"""
+Persistent Research Cache Service
+
+Provides database-backed caching for research results to survive server restarts
+and provide better cache management across multiple instances.
+"""
+
+import hashlib
+import json
+import sqlite3
+from typing import Dict, Any, Optional, List
+from datetime import datetime, timedelta
+from pathlib import Path
+from loguru import logger
+
+
+class PersistentResearchCache:
+    """Database-backed cache for research results with exact keyword matching."""
+    
+    def __init__(self, db_path: str = "research_cache.db", max_cache_size: int = 1000, cache_ttl_hours: int = 24):
+        """
+        Initialize the persistent research cache.
+        
+        Args:
+            db_path: Path to SQLite database file
+            max_cache_size: Maximum number of cached entries
+            cache_ttl_hours: Time-to-live for cache entries in hours
+        """
+        self.db_path = db_path
+        self.max_cache_size = max_cache_size
+        self.cache_ttl = timedelta(hours=cache_ttl_hours)
+        
+        # Ensure database directory exists
+        Path(db_path).parent.mkdir(parents=True, exist_ok=True)
+        
+        # Initialize database
+        self._init_database()
+    
+    def _init_database(self):
+        """Initialize the SQLite database with required tables."""
+        with sqlite3.connect(self.db_path) as conn:
+            conn.execute("""
+                CREATE TABLE IF NOT EXISTS research_cache (
+                    id INTEGER PRIMARY KEY AUTOINCREMENT,
+                    cache_key TEXT UNIQUE NOT NULL,
+                    keywords TEXT NOT NULL,
+                    industry TEXT NOT NULL,
+                    target_audience TEXT NOT NULL,
+                    result_data TEXT NOT NULL,
+                    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+                    expires_at TIMESTAMP NOT NULL,
+                    access_count INTEGER DEFAULT 0,
+                    last_accessed TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+                )
+            """)
+            
+            # Create indexes for better performance
+            conn.execute("CREATE INDEX IF NOT EXISTS idx_cache_key ON research_cache(cache_key)")
+            conn.execute("CREATE INDEX IF NOT EXISTS idx_expires_at ON research_cache(expires_at)")
+            conn.execute("CREATE INDEX IF NOT EXISTS idx_created_at ON research_cache(created_at)")
+            
+            conn.commit()
+    
+    def _generate_cache_key(self, keywords: List[str], industry: str, target_audience: str) -> str:
+        """
+        Generate a cache key based on exact keyword match.
+        
+        Args:
+            keywords: List of research keywords
+            industry: Industry context
+            target_audience: Target audience context
+            
+        Returns:
+            MD5 hash of the normalized parameters
+        """
+        # Normalize and sort keywords for consistent hashing
+        normalized_keywords = sorted([kw.lower().strip() for kw in keywords])
+        normalized_industry = industry.lower().strip() if industry else "general"
+        normalized_audience = target_audience.lower().strip() if target_audience else "general"
+        
+        # Create a consistent string representation
+        cache_string = f"{normalized_keywords}|{normalized_industry}|{normalized_audience}"
+        
+        # Generate MD5 hash
+        return hashlib.md5(cache_string.encode('utf-8')).hexdigest()
+    
+    def _cleanup_expired_entries(self):
+        """Remove expired cache entries from database."""
+        with sqlite3.connect(self.db_path) as conn:
+            cursor = conn.execute(
+                "DELETE FROM research_cache WHERE expires_at < ?",
+                (datetime.now().isoformat(),)
+            )
+            deleted_count = cursor.rowcount
+            if deleted_count > 0:
+                logger.debug(f"Removed {deleted_count} expired cache entries")
+            conn.commit()
+    
+    def _evict_oldest_entries(self, num_to_evict: int):
+        """Evict the oldest cache entries when cache is full."""
+        with sqlite3.connect(self.db_path) as conn:
+            # Get oldest entries by creation time
+            cursor = conn.execute("""
+                SELECT id FROM research_cache 
+                ORDER BY created_at ASC 
+                LIMIT ?
+            """, (num_to_evict,))
+            
+            old_ids = [row[0] for row in cursor.fetchall()]
+            
+            if old_ids:
+                placeholders = ','.join(['?' for _ in old_ids])
+                conn.execute(f"DELETE FROM research_cache WHERE id IN ({placeholders})", old_ids)
+                logger.debug(f"Evicted {len(old_ids)} oldest cache entries")
+            
+            conn.commit()
+    
+    def get_cached_result(self, keywords: List[str], industry: str, target_audience: str) -> Optional[Dict[str, Any]]:
+        """
+        Get cached research result for exact keyword match.
+        
+        Args:
+            keywords: List of research keywords
+            industry: Industry context
+            target_audience: Target audience context
+            
+        Returns:
+            Cached research result if found and valid, None otherwise
+        """
+        cache_key = self._generate_cache_key(keywords, industry, target_audience)
+        
+        with sqlite3.connect(self.db_path) as conn:
+            cursor = conn.execute("""
+                SELECT result_data, expires_at FROM research_cache 
+                WHERE cache_key = ? AND expires_at > ?
+            """, (cache_key, datetime.now().isoformat()))
+            
+            row = cursor.fetchone()
+            
+            if row is None:
+                logger.debug(f"Cache miss for keywords: {keywords}")
+                return None
+            
+            # Update access statistics
+            conn.execute("""
+                UPDATE research_cache 
+                SET access_count = access_count + 1, last_accessed = CURRENT_TIMESTAMP
+                WHERE cache_key = ?
+            """, (cache_key,))
+            conn.commit()
+            
+            try:
+                result_data = json.loads(row[0])
+                logger.info(f"Cache hit for keywords: {keywords} (saved API call)")
+                return result_data
+            except json.JSONDecodeError:
+                logger.error(f"Invalid JSON in cache for keywords: {keywords}")
+                # Remove invalid entry
+                conn.execute("DELETE FROM research_cache WHERE cache_key = ?", (cache_key,))
+                conn.commit()
+                return None
+    
+    def cache_result(self, keywords: List[str], industry: str, target_audience: str, result: Dict[str, Any]):
+        """
+        Cache a research result.
+        
+        Args:
+            keywords: List of research keywords
+            industry: Industry context
+            target_audience: Target audience context
+            result: Research result to cache
+        """
+        cache_key = self._generate_cache_key(keywords, industry, target_audience)
+        
+        # Cleanup expired entries first
+        self._cleanup_expired_entries()
+        
+        # Check if cache is full and evict if necessary
+        with sqlite3.connect(self.db_path) as conn:
+            cursor = conn.execute("SELECT COUNT(*) FROM research_cache")
+            current_count = cursor.fetchone()[0]
+            
+            if current_count >= self.max_cache_size:
+                num_to_evict = current_count - self.max_cache_size + 1
+                self._evict_oldest_entries(num_to_evict)
+        
+        # Store the result
+        expires_at = datetime.now() + self.cache_ttl
+        
+        with sqlite3.connect(self.db_path) as conn:
+            conn.execute("""
+                INSERT OR REPLACE INTO research_cache 
+                (cache_key, keywords, industry, target_audience, result_data, expires_at)
+                VALUES (?, ?, ?, ?, ?, ?)
+            """, (
+                cache_key,
+                json.dumps(keywords),
+                industry,
+                target_audience,
+                json.dumps(result),
+                expires_at.isoformat()
+            ))
+            conn.commit()
+        
+        logger.info(f"Cached research result for keywords: {keywords}")
+    
+    def get_cache_stats(self) -> Dict[str, Any]:
+        """Get cache statistics."""
+        self._cleanup_expired_entries()
+        
+        with sqlite3.connect(self.db_path) as conn:
+            # Get basic stats
+            cursor = conn.execute("SELECT COUNT(*) FROM research_cache")
+            total_entries = cursor.fetchone()[0]
+            
+            cursor = conn.execute("SELECT COUNT(*) FROM research_cache WHERE expires_at > ?", (datetime.now().isoformat(),))
+            valid_entries = cursor.fetchone()[0]
+            
+            # Get most accessed entries
+            cursor = conn.execute("""
+                SELECT keywords, industry, target_audience, access_count, created_at
+                FROM research_cache 
+                ORDER BY access_count DESC 
+                LIMIT 10
+            """)
+            top_entries = [
+                {
+                    'keywords': json.loads(row[0]),
+                    'industry': row[1],
+                    'target_audience': row[2],
+                    'access_count': row[3],
+                    'created_at': row[4]
+                }
+                for row in cursor.fetchall()
+            ]
+            
+            # Get database size
+            cursor = conn.execute("SELECT page_count * page_size as size FROM pragma_page_count(), pragma_page_size()")
+            db_size_bytes = cursor.fetchone()[0]
+            db_size_mb = db_size_bytes / (1024 * 1024)
+        
+        return {
+            'total_entries': total_entries,
+            'valid_entries': valid_entries,
+            'expired_entries': total_entries - valid_entries,
+            'max_size': self.max_cache_size,
+            'ttl_hours': self.cache_ttl.total_seconds() / 3600,
+            'database_size_mb': round(db_size_mb, 2),
+            'top_accessed_entries': top_entries
+        }
+    
+    def clear_cache(self):
+        """Clear all cached entries."""
+        with sqlite3.connect(self.db_path) as conn:
+            conn.execute("DELETE FROM research_cache")
+            conn.commit()
+        logger.info("Research cache cleared")
+    
+    def get_cache_entries(self, limit: int = 50) -> List[Dict[str, Any]]:
+        """Get recent cache entries for debugging."""
+        with sqlite3.connect(self.db_path) as conn:
+            cursor = conn.execute("""
+                SELECT keywords, industry, target_audience, created_at, expires_at, access_count
+                FROM research_cache 
+                ORDER BY created_at DESC 
+                LIMIT ?
+            """, (limit,))
+            
+            return [
+                {
+                    'keywords': json.loads(row[0]),
+                    'industry': row[1],
+                    'target_audience': row[2],
+                    'created_at': row[3],
+                    'expires_at': row[4],
+                    'access_count': row[5]
+                }
+                for row in cursor.fetchall()
+            ]
+
+
+# Global persistent cache instance
+persistent_research_cache = PersistentResearchCache()
--- a/backend/services/llm_providers/gemini_grounded_provider.py
+++ b/backend/services/llm_providers/gemini_grounded_provider.py
@@ -89,12 +89,13 @@ class GeminiGroundedProvider:
                    logger.warning(f"URL Context tool not available in SDK version: {tool_err}")
            
            # Apply mode presets (Draft vs Polished)
-            model_id = "gemini-2.5-flash"
+            # Use Gemini 2.0 Flash for better content generation with grounding
+            model_id = "gemini-2.0-flash"
            if mode == "draft":
-                model_id = "gemini-2.5-flash-lite"
+                model_id = "gemini-2.0-flash"
                temperature = min(1.0, max(0.0, temperature))
            else:
-                model_id = "gemini-2.5-flash"
+                model_id = "gemini-2.0-flash"

            # Configure generation settings
            config = types.GenerateContentConfig(
@@ -189,7 +190,7 @@ class GeminiGroundedProvider:
                loop.run_in_executor(
                    executor,
                    lambda: self.client.models.generate_content(
-                        model="gemini-2.5-flash",
+                        model="gemini-2.0-flash",
                        contents=grounded_prompt,
                        config=config,
                    )
@@ -199,6 +200,10 @@ class GeminiGroundedProvider:

    async def _make_api_request_with_model(self, grounded_prompt: str, config: Any, model_id: str, urls: Optional[List[str]] = None):
        """Make the API request with explicit model id and optional URL injection."""
+        logger.info(f"🔍 DEBUG: Making API request with model: {model_id}")
+        logger.info(f"🔍 DEBUG: Prompt length: {len(grounded_prompt)} characters")
+        logger.info(f"🔍 DEBUG: Prompt preview (first 300 chars): {grounded_prompt[:300]}...")
+        
        import concurrent.futures
        loop = asyncio.get_event_loop()
        with concurrent.futures.ThreadPoolExecutor() as executor:
@@ -310,23 +315,70 @@ class GeminiGroundedProvider:
            Processed content with sources and citations
        """
        try:
-            # Extract the main content
+            # Debug: Log response structure
+            logger.info(f"🔍 DEBUG: Response type: {type(response)}")
+            logger.info(f"🔍 DEBUG: Response has 'text': {hasattr(response, 'text')}")
+            logger.info(f"🔍 DEBUG: Response has 'candidates': {hasattr(response, 'candidates')}")
+            logger.info(f"🔍 DEBUG: Response has 'grounding_metadata': {hasattr(response, 'grounding_metadata')}")
+            if hasattr(response, 'grounding_metadata'):
+                logger.info(f"🔍 DEBUG: Grounding metadata: {response.grounding_metadata}")
+            if hasattr(response, 'candidates') and response.candidates:
+                logger.info(f"🔍 DEBUG: Number of candidates: {len(response.candidates)}")
+                candidate = response.candidates[0]
+                logger.info(f"🔍 DEBUG: Candidate type: {type(candidate)}")
+                logger.info(f"🔍 DEBUG: Candidate has 'content': {hasattr(candidate, 'content')}")
+                if hasattr(candidate, 'content') and candidate.content:
+                    logger.info(f"🔍 DEBUG: Content type: {type(candidate.content)}")
+                    # Check if content is a list or single object
+                    if hasattr(candidate.content, '__iter__') and not isinstance(candidate.content, str):
+                        try:
+                            content_length = len(candidate.content) if candidate.content else 0
+                            logger.info(f"🔍 DEBUG: Content is iterable, length: {content_length}")
+                        except TypeError:
+                            logger.info(f"🔍 DEBUG: Content is iterable but has no len() - treating as single object")
+                        for i, part in enumerate(candidate.content):
+                            logger.info(f"🔍 DEBUG: Part {i} type: {type(part)}")
+                            logger.info(f"🔍 DEBUG: Part {i} has 'text': {hasattr(part, 'text')}")
+                            if hasattr(part, 'text'):
+                                logger.info(f"🔍 DEBUG: Part {i} text length: {len(part.text) if part.text else 0}")
+                    else:
+                        logger.info(f"🔍 DEBUG: Content is single object, has 'text': {hasattr(candidate.content, 'text')}")
+                        if hasattr(candidate.content, 'text'):
+                            logger.info(f"🔍 DEBUG: Content text length: {len(candidate.content.text) if candidate.content.text else 0}")
+            
+            # Extract the main content - prioritize response.text as it's more reliable
            content = ""
            if hasattr(response, 'text'):
-                content = response.text
+                logger.info(f"🔍 DEBUG: response.text exists, value: '{response.text}', type: {type(response.text)}")
+                if response.text:
+                    content = response.text
+                    logger.info(f"🔍 DEBUG: Using response.text, length: {len(content)}")
+                else:
+                    logger.info(f"🔍 DEBUG: response.text is empty or None")
            elif hasattr(response, 'candidates') and response.candidates:
                candidate = response.candidates[0]
                if hasattr(candidate, 'content') and candidate.content:
-                    # Extract text from content parts
-                    text_parts = []
-                    for part in candidate.content:
-                        if hasattr(part, 'text'):
-                            text_parts.append(part.text)
-                    content = " ".join(text_parts)
+                    # Handle both single Content object and list of parts
+                    if hasattr(candidate.content, '__iter__') and not isinstance(candidate.content, str):
+                        # Content is a list of parts
+                        text_parts = []
+                        for part in candidate.content:
+                            if hasattr(part, 'text'):
+                                text_parts.append(part.text)
+                        content = " ".join(text_parts)
+                        logger.info(f"🔍 DEBUG: Using candidate.content (list), extracted {len(text_parts)} parts, total length: {len(content)}")
+                    else:
+                        # Content is a single object
+                        if hasattr(candidate.content, 'text'):
+                            content = candidate.content.text
+                            logger.info(f"🔍 DEBUG: Using candidate.content (single), text length: {len(content)}")
+                        else:
+                            logger.warning("🔍 DEBUG: candidate.content has no 'text' attribute")
            
            logger.info(f"Extracted content length: {len(content) if content else 0}")
            if not content:
-                logger.warning("No content extracted from response")
+                logger.warning("⚠️ No content extracted from Gemini response - using fallback content")
+                logger.warning("⚠️ This indicates Google Search grounding is not working properly")
                content = "Generated content about the requested topic."
            
            # Initialize result structure
--- a/backend/services/llm_providers/gemini_provider.py
+++ b/backend/services/llm_providers/gemini_provider.py
@@ -440,7 +440,8 @@ def gemini_structured_json_response(prompt, schema, temperature=0.7, top_p=0.9,
        return {"error": str(e)}


-def _repair_json_string(text: str) -> Optional[str]:
+# Removed JSON repair functions to avoid false positives
+def _removed_repair_json_string(text: str) -> Optional[str]:
    """
    Attempt to repair common JSON issues in AI responses.
    """
@@ -489,13 +490,21 @@ def _repair_json_string(text: str) -> Optional[str]:
        fixed_lines.append(line)
    repaired = '\n'.join(fixed_lines)
    
-    # 3. Fix unescaped quotes in string values
-    # This is complex - we'll use a simple approach
+    # 3. Fix unterminated strings (common issue with AI responses)
    try:
-        # Try to balance quotes by adding missing ones
+        # Handle unterminated strings by finding the last incomplete string and closing it
        lines = repaired.split('\n')
        fixed_lines = []
-        for line in lines:
+        for i, line in enumerate(lines):
+            stripped = line.strip()
+            # Check for unterminated strings (line ends with quote but no closing quote)
+            if stripped.endswith('"') and i < len(lines) - 1:
+                next_line = lines[i + 1].strip()
+                # If next line doesn't start with quote or closing bracket, we might have an unterminated string
+                if not next_line.startswith('"') and not next_line.startswith(']') and not next_line.startswith('}'):
+                    # Check if this looks like an unterminated string value
+                    if ':' in line and not line.strip().endswith('",'):
+                        line = line + '",'
            # Count quotes in the line
            quote_count = line.count('"')
            if quote_count % 2 == 1:  # Odd number of quotes
@@ -518,7 +527,8 @@ def _repair_json_string(text: str) -> Optional[str]:
    return repaired


-def _extract_partial_json(text: str) -> Optional[Dict[str, Any]]:
+# Removed partial JSON extraction to avoid false positives
+def _removed_extract_partial_json(text: str) -> Optional[Dict[str, Any]]:
    """
    Extract partial JSON from truncated responses.
    Attempts to salvage as much data as possible from incomplete JSON.
@@ -572,26 +582,77 @@ def _extract_partial_json(text: str) -> Optional[Dict[str, Any]]:
            # Try to extract individual fields as a last resort
            fields = {}
            
-            # Extract key-value pairs using regex
-            kv_pattern = r'"([^"]+)"\s*:\s*"([^"]*)"'
-            matches = re.findall(kv_pattern, json_text)
-            for key, value in matches:
-                fields[key] = value
+            # Extract key-value pairs using regex (more comprehensive patterns)
+            kv_patterns = [
+                r'"([^"]+)"\s*:\s*"([^"]*)"',  # "key": "value"
+                r'"([^"]+)"\s*:\s*(\d+)',      # "key": 123
+                r'"([^"]+)"\s*:\s*(true|false)', # "key": true/false
+                r'"([^"]+)"\s*:\s*null',       # "key": null
+            ]
            
-            # Extract array fields
+            for pattern in kv_patterns:
+                matches = re.findall(pattern, json_text)
+                for key, value in matches:
+                    if value == 'true':
+                        fields[key] = True
+                    elif value == 'false':
+                        fields[key] = False
+                    elif value == 'null':
+                        fields[key] = None
+                    elif value.isdigit():
+                        fields[key] = int(value)
+                    else:
+                        fields[key] = value
+            
+            # Extract array fields (more robust)
            array_pattern = r'"([^"]+)"\s*:\s*\[([^\]]*)\]'
            array_matches = re.findall(array_pattern, json_text)
            for key, array_content in array_matches:
-                # Parse array items
+                # Parse array items more comprehensively
                items = []
-                item_pattern = r'"([^"]*)"'
-                item_matches = re.findall(item_pattern, array_content)
-                items.extend(item_matches)
-                fields[key] = items
+                # Look for quoted strings, numbers, booleans, null
+                item_patterns = [
+                    r'"([^"]*)"',  # quoted strings
+                    r'(\d+)',      # numbers
+                    r'(true|false)', # booleans
+                    r'(null)',     # null
+                ]
+                for pattern in item_patterns:
+                    item_matches = re.findall(pattern, array_content)
+                    for match in item_matches:
+                        if match == 'true':
+                            items.append(True)
+                        elif match == 'false':
+                            items.append(False)
+                        elif match == 'null':
+                            items.append(None)
+                        elif match.isdigit():
+                            items.append(int(match))
+                        else:
+                            items.append(match)
+                if items:
+                    fields[key] = items
+            
+            # Extract nested object fields (basic)
+            object_pattern = r'"([^"]+)"\s*:\s*\{([^}]*)\}'
+            object_matches = re.findall(object_pattern, json_text)
+            for key, object_content in object_matches:
+                # Simple nested object extraction
+                nested_fields = {}
+                nested_kv_matches = re.findall(r'"([^"]+)"\s*:\s*"([^"]*)"', object_content)
+                for nested_key, nested_value in nested_kv_matches:
+                    nested_fields[nested_key] = nested_value
+                if nested_fields:
+                    fields[key] = nested_fields
            
            if fields:
-                logger.info(f"Extracted {len(fields)} fields from truncated JSON")
-                return fields
+                logger.info(f"Extracted {len(fields)} fields from truncated JSON: {list(fields.keys())}")
+                # Only return if we have a valid outline structure
+                if 'outline' in fields and isinstance(fields['outline'], list):
+                    return {'outline': fields['outline']}
+                else:
+                    logger.error("No valid 'outline' field found in partial JSON")
+                    return None
            
            return None
            
@@ -600,7 +661,8 @@ def _extract_partial_json(text: str) -> Optional[Dict[str, Any]]:
        return None


-def _extract_key_value_pairs(text: str) -> Optional[Dict[str, Any]]:
+# Removed key-value extraction to avoid false positives
+def _removed_extract_key_value_pairs(text: str) -> Optional[Dict[str, Any]]:
    """
    Extract key-value pairs from malformed JSON text as a last resort.
    """