Added image generation to blog writer

2025-10-31 15:59:16 +05:30
parent 3219e6bbe4
commit cdb41aec1b
80 changed files with 7662 additions and 3951 deletions
--- a/backend/services/blog_writer/content/enhanced_content_generator.py
+++ b/backend/services/blog_writer/content/enhanced_content_generator.py
@@ -1,12 +1,14 @@
 """
-EnhancedContentGenerator - thin orchestrator combining URL selection and Gemini provider.
+EnhancedContentGenerator - thin orchestrator for section generation.

-Provides Draft vs Polished modes and optional URL Context usage.
+Provider parity:
+- Uses main_text_generation.llm_text_gen to respect GPT_PROVIDER (Gemini/HF)
+- No direct provider coupling here; Google grounding remains in research only
 """

 from typing import Any, Dict

-from services.llm_providers.gemini_grounded_provider import GeminiGroundedProvider
+from services.llm_providers.main_text_generation import llm_text_gen
 from .source_url_manager import SourceURLManager
 from .context_memory import ContextMemory
 from .transition_generator import TransitionGenerator
@@ -15,24 +17,37 @@ from .flow_analyzer import FlowAnalyzer

 class EnhancedContentGenerator:
    def __init__(self):
-        self.provider = GeminiGroundedProvider()
        self.url_manager = SourceURLManager()
        self.memory = ContextMemory(max_entries=12)
        self.transitioner = TransitionGenerator()
        self.flow = FlowAnalyzer()

    async def generate_section(self, section: Any, research: Any, mode: str = "polished") -> Dict[str, Any]:
-        urls = self.url_manager.pick_relevant_urls(section, research)
        prev_summary = self.memory.build_previous_sections_summary(limit=2)
-        prompt = self._build_prompt(section, research, prev_summary)
-        result = await self.provider.generate_grounded_content(
-            prompt=prompt,
-            content_type="linkedin_article",
-            temperature=0.6 if mode == "polished" else 0.8,
-            max_tokens=2048,
-            urls=urls,
-            mode=mode,
-        )
+        urls = self.url_manager.pick_relevant_urls(section, research)
+        prompt = self._build_prompt(section, research, prev_summary, urls)
+        # Provider-agnostic text generation (respect GPT_PROVIDER & circuit-breaker)
+        content_text: str = ""
+        try:
+            ai_resp = llm_text_gen(
+                prompt=prompt,
+                json_struct=None,
+                system_prompt=None,
+            )
+            if isinstance(ai_resp, dict) and ai_resp.get("text"):
+                content_text = ai_resp.get("text", "")
+            elif isinstance(ai_resp, str):
+                content_text = ai_resp
+            else:
+                # Fallback best-effort extraction
+                content_text = str(ai_resp or "")
+        except Exception as e:
+            content_text = ""
+
+        result = {
+            "content": content_text,
+            "sources": [{"title": u.get("title", ""), "url": u.get("url", "")} for u in urls] if urls else [],
+        }
        # Generate transition and compute intelligent flow metrics
        previous_text = prev_summary
        current_text = result.get("content", "")
@@ -56,19 +71,22 @@ class EnhancedContentGenerator:
            pass
        return result

-    def _build_prompt(self, section: Any, research: Any, prev_summary: str) -> str:
+    def _build_prompt(self, section: Any, research: Any, prev_summary: str, urls: list) -> str:
        heading = getattr(section, 'heading', 'Section')
        key_points = getattr(section, 'key_points', [])
        keywords = getattr(section, 'keywords', [])
        target_words = getattr(section, 'target_words', 300)
+        url_block = "\n".join([f"- {u.get('title','')} ({u.get('url','')})" for u in urls]) if urls else "(no specific URLs provided)"

        return (
            f"You are writing the blog section '{heading}'.\n\n"
-            f"Context summary: {prev_summary}\n"
-            f"Key points: {', '.join(key_points)}\n"
-            f"Keywords: {', '.join(keywords)}\n"
-            f"Target word count: {target_words}.\n"
-            "Use only factual info from provided sources; add short transition, then body."
+            f"Context summary (previous sections): {prev_summary}\n\n"
+            f"Authoring requirements:\n"
+            f"- Target word count: ~{target_words}\n"
+            f"- Use the following key points: {', '.join(key_points)}\n"
+            f"- Include these keywords naturally: {', '.join(keywords)}\n"
+            f"- Cite insights from these sources when relevant (do not output raw URLs):\n{url_block}\n\n"
+            "Write engaging, well-structured markdown with clear paragraphs (2-4 sentences each) separated by double line breaks."
        )


--- a/backend/services/blog_writer/content/medium_blog_generator.py
+++ b/backend/services/blog_writer/content/medium_blog_generator.py
@@ -15,7 +15,7 @@ from models.blog_models import (
    MediumGeneratedSection,
    ResearchSource,
 )
-from services.llm_providers.gemini_provider import gemini_structured_json_response
+from services.llm_providers.main_text_generation import llm_text_gen
 from services.cache.persistent_content_cache import persistent_content_cache


@@ -176,11 +176,9 @@ class MediumBlogGenerator:
            f"Sections to write:\n{json.dumps(payload, ensure_ascii=False, indent=2)}"
        )

-        ai_resp = gemini_structured_json_response(
+        ai_resp = llm_text_gen(
            prompt=prompt,
-            schema=schema,
-            temperature=0.2,
-            max_tokens=8192,
+            json_struct=schema,
            system_prompt=system,
        )

--- a/backend/services/blog_writer/core/blog_writer_service.py
+++ b/backend/services/blog_writer/core/blog_writer_service.py
@@ -275,11 +275,17 @@ class BlogWriterService:
            # Initialize metadata generator
            metadata_generator = BlogSEOMetadataGenerator()
            
-            # Generate comprehensive metadata
+            # Extract outline and seo_analysis from request
+            outline = request.outline if hasattr(request, 'outline') else None
+            seo_analysis = request.seo_analysis if hasattr(request, 'seo_analysis') else None
+            
+            # Generate comprehensive metadata with full context
            metadata_results = await metadata_generator.generate_comprehensive_metadata(
                blog_content=request.content,
                blog_title=request.title or "Untitled Blog Post",
-                research_data=request.research_data or {}
+                research_data=request.research_data or {},
+                outline=outline,
+                seo_analysis=seo_analysis
            )
            
            # Convert to BlogSEOMetadataResponse format
--- a/backend/services/blog_writer/outline/outline_optimizer.py
+++ b/backend/services/blog_writer/outline/outline_optimizer.py
@@ -40,7 +40,7 @@ Return JSON format:
 }}"""
        
        try:
-            from services.llm_providers.gemini_provider import gemini_structured_json_response
+            from services.llm_providers.main_text_generation import llm_text_gen
            
            optimization_schema = {
                "type": "object",
@@ -64,11 +64,10 @@ Return JSON format:
                "propertyOrdering": ["outline"]
            }
            
-            optimized_data = gemini_structured_json_response(
+            optimized_data = llm_text_gen(
                prompt=optimization_prompt,
-                schema=optimization_schema,
-                temperature=0.3,
-                max_tokens=6000  # Match main outline generator
+                json_struct=optimization_schema,
+                system_prompt=None
            )
            
            # Handle the new schema format with "outline" wrapper
--- a/backend/services/blog_writer/outline/response_processor.py
+++ b/backend/services/blog_writer/outline/response_processor.py
@@ -20,7 +20,7 @@ class ResponseProcessor:
    
    async def generate_with_retry(self, prompt: str, schema: Dict[str, Any], task_id: str = None) -> Dict[str, Any]:
        """Generate outline with retry logic for API failures."""
-        from services.llm_providers.gemini_provider import gemini_structured_json_response
+        from services.llm_providers.main_text_generation import llm_text_gen
        from api.blog_writer.task_manager import task_manager
        
        max_retries = 2  # Conservative retry for expensive API calls
@@ -29,17 +29,16 @@ class ResponseProcessor:
        for attempt in range(max_retries + 1):
            try:
                if task_id:
-                    await task_manager.update_progress(task_id, f"🤖 Calling Gemini API for outline generation (attempt {attempt + 1}/{max_retries + 1})...")
+                    await task_manager.update_progress(task_id, f"🤖 Calling AI API for outline generation (attempt {attempt + 1}/{max_retries + 1})...")
                
-                outline_data = gemini_structured_json_response(
+                outline_data = llm_text_gen(
                    prompt=prompt,
-                    schema=schema,
-                    temperature=0.3,
-                    max_tokens=6000  # Increased further to avoid truncation
+                    json_struct=schema,
+                    system_prompt=None
                )
                
                # Log response for debugging
-                logger.info(f"Gemini response received: {type(outline_data)}")
+                logger.info(f"AI response received: {type(outline_data)}")
                
                # Check for errors in the response
                if isinstance(outline_data, dict) and 'error' in outline_data:
@@ -47,17 +46,17 @@ class ResponseProcessor:
                    if "503" in error_msg and "overloaded" in error_msg and attempt < max_retries:
                        if task_id:
                            await task_manager.update_progress(task_id, f"⚠️ AI service overloaded, retrying in {retry_delay} seconds...")
-                        logger.warning(f"Gemini API overloaded, retrying in {retry_delay} seconds (attempt {attempt + 1}/{max_retries + 1})")
+                        logger.warning(f"AI API overloaded, retrying in {retry_delay} seconds (attempt {attempt + 1}/{max_retries + 1})")
                        await asyncio.sleep(retry_delay)
                        continue
                    elif "No valid structured response content found" in error_msg and attempt < max_retries:
                        if task_id:
                            await task_manager.update_progress(task_id, f"⚠️ Invalid response format, retrying in {retry_delay} seconds...")
-                        logger.warning(f"Gemini response parsing failed, retrying in {retry_delay} seconds (attempt {attempt + 1}/{max_retries + 1})")
+                        logger.warning(f"AI response parsing failed, retrying in {retry_delay} seconds (attempt {attempt + 1}/{max_retries + 1})")
                        await asyncio.sleep(retry_delay)
                        continue
                    else:
-                        logger.error(f"Gemini structured response error: {outline_data['error']}")
+                        logger.error(f"AI structured response error: {outline_data['error']}")
                        raise ValueError(f"AI outline generation failed: {outline_data['error']}")
                
                # Validate required fields
@@ -69,7 +68,7 @@ class ResponseProcessor:
                        await asyncio.sleep(retry_delay)
                        continue
                    else:
-                        raise ValueError("Invalid outline structure in Gemini response")
+                        raise ValueError("Invalid outline structure in AI response")
                
                # If we get here, the response is valid
                return outline_data
@@ -79,7 +78,7 @@ class ResponseProcessor:
                if ("503" in error_str or "overloaded" in error_str) and attempt < max_retries:
                    if task_id:
                        await task_manager.update_progress(task_id, f"⚠️ AI service error, retrying in {retry_delay} seconds...")
-                    logger.warning(f"Gemini API error, retrying in {retry_delay} seconds (attempt {attempt + 1}/{max_retries + 1}): {error_str}")
+                    logger.warning(f"AI API error, retrying in {retry_delay} seconds (attempt {attempt + 1}/{max_retries + 1}): {error_str}")
                    await asyncio.sleep(retry_delay)
                    continue
                else:
--- a/backend/services/blog_writer/outline/section_enhancer.py
+++ b/backend/services/blog_writer/outline/section_enhancer.py
@@ -44,7 +44,7 @@ class SectionEnhancer:
        """
        
        try:
-            from services.llm_providers.gemini_provider import gemini_structured_json_response
+            from services.llm_providers.main_text_generation import llm_text_gen
            
            enhancement_schema = {
                "type": "object",
@@ -58,11 +58,10 @@ class SectionEnhancer:
                "required": ["heading", "subheadings", "key_points", "target_words", "keywords"]
            }
            
-            enhanced_data = gemini_structured_json_response(
+            enhanced_data = llm_text_gen(
                prompt=enhancement_prompt,
-                schema=enhancement_schema,
-                temperature=0.4,
-                max_tokens=1000
+                json_struct=enhancement_schema,
+                system_prompt=None
            )
            
            if isinstance(enhanced_data, dict) and 'error' not in enhanced_data:
--- a/backend/services/blog_writer/outline/source_mapper.py
+++ b/backend/services/blog_writer/outline/source_mapper.py
@@ -559,14 +559,11 @@ Analyze the mapping and provide your recommendations.
            AI validation response
        """
        try:
-            from services.llm_providers.gemini_provider import gemini_text_response
+            from services.llm_providers.main_text_generation import llm_text_gen
            
-            response = gemini_text_response(
+            response = llm_text_gen(
                prompt=prompt,
-                temperature=0.3,
-                top_p=0.9,
-                n=1,
-                max_tokens=2000,
+                json_struct=None,
                system_prompt=None
            )
            
--- a/backend/services/blog_writer/seo/blog_content_seo_analyzer.py
+++ b/backend/services/blog_writer/seo/blog_content_seo_analyzer.py
@@ -10,13 +10,13 @@ import re
 import textstat
 from datetime import datetime
 from typing import Dict, Any, List, Optional
-from loguru import logger
+from utils.logger_utils import get_service_logger

 from services.seo_analyzer import (
    ContentAnalyzer, KeywordAnalyzer, 
    URLStructureAnalyzer, AIInsightGenerator
 )
-from services.llm_providers.gemini_provider import gemini_structured_json_response
+from services.llm_providers.main_text_generation import llm_text_gen


 class BlogContentSEOAnalyzer:
@@ -24,11 +24,13 @@ class BlogContentSEOAnalyzer:
    
    def __init__(self):
        """Initialize the blog content SEO analyzer"""
+        # Service-specific logger (no global reconfiguration)
+        global logger
+        logger = get_service_logger("blog_content_seo_analyzer")
        self.content_analyzer = ContentAnalyzer()
        self.keyword_analyzer = KeywordAnalyzer()
        self.url_analyzer = URLStructureAnalyzer()
        self.ai_insights = AIInsightGenerator()
-        self.gemini_provider = gemini_structured_json_response
        
        logger.info("BlogContentSEOAnalyzer initialized")
    
@@ -598,7 +600,7 @@ class BlogContentSEOAnalyzer:
        return recommendations
    
    async def _run_ai_analysis(self, blog_content: str, keywords_data: Dict[str, Any], non_ai_results: Dict[str, Any]) -> Dict[str, Any]:
-        """Run single AI analysis for structured insights"""
+        """Run single AI analysis for structured insights (provider-agnostic)"""
        try:
            # Prepare context for AI analysis
            context = {
@@ -610,7 +612,6 @@ class BlogContentSEOAnalyzer:
            # Create AI prompt for structured analysis
            prompt = self._create_ai_analysis_prompt(context)
            
-            # Get structured response from Gemini
            schema = {
                "type": "object",
                "properties": {
@@ -653,18 +654,17 @@ class BlogContentSEOAnalyzer:
                }
            }
            
-            ai_response = self.gemini_provider(
+            # Provider-agnostic structured response respecting GPT_PROVIDER
+            ai_response = llm_text_gen(
                prompt=prompt,
-                schema=schema,
-                temperature=0.2,
-                max_tokens=8192
+                json_struct=schema,
+                system_prompt=None
            )
            
            return ai_response
            
        except Exception as e:
            logger.error(f"AI analysis failed: {e}")
-            # Fail fast - don't return mock data
            raise e
    
    def _create_ai_analysis_prompt(self, context: Dict[str, Any]) -> str:
--- a/backend/services/blog_writer/seo/blog_seo_metadata_generator.py
+++ b/backend/services/blog_writer/seo/blog_seo_metadata_generator.py
@@ -12,7 +12,7 @@ from datetime import datetime
 from typing import Dict, Any, List, Optional
 from loguru import logger

-from services.llm_providers.gemini_provider import gemini_structured_json_response
+from services.llm_providers.main_text_generation import llm_text_gen


 class BlogSEOMetadataGenerator:
@@ -20,14 +20,15 @@ class BlogSEOMetadataGenerator:
    
    def __init__(self):
        """Initialize the metadata generator"""
-        self.gemini_provider = gemini_structured_json_response
        logger.info("BlogSEOMetadataGenerator initialized")
    
    async def generate_comprehensive_metadata(
        self, 
        blog_content: str, 
        blog_title: str,
-        research_data: Dict[str, Any]
+        research_data: Dict[str, Any],
+        outline: Optional[List[Dict[str, Any]]] = None,
+        seo_analysis: Optional[Dict[str, Any]] = None
    ) -> Dict[str, Any]:
        """
        Generate comprehensive SEO metadata using maximum 2 AI calls
@@ -36,6 +37,8 @@ class BlogSEOMetadataGenerator:
            blog_content: The blog content to analyze
            blog_title: The blog title
            research_data: Research data containing keywords and insights
+            outline: Outline structure with sections and headings
+            seo_analysis: SEO analysis results from previous phase
            
        Returns:
            Comprehensive metadata including all SEO elements
@@ -49,11 +52,15 @@ class BlogSEOMetadataGenerator:
            
            # Call 1: Generate core SEO metadata (parallel with Call 2)
            logger.info("Generating core SEO metadata")
-            core_metadata_task = self._generate_core_metadata(blog_content, blog_title, keywords_data)
+            core_metadata_task = self._generate_core_metadata(
+                blog_content, blog_title, keywords_data, outline, seo_analysis
+            )
            
            # Call 2: Generate social media and structured data (parallel with Call 1)
            logger.info("Generating social media and structured data")
-            social_metadata_task = self._generate_social_metadata(blog_content, blog_title, keywords_data)
+            social_metadata_task = self._generate_social_metadata(
+                blog_content, blog_title, keywords_data, outline, seo_analysis
+            )
            
            # Wait for both calls to complete
            core_metadata, social_metadata = await asyncio.gather(
@@ -105,12 +112,16 @@ class BlogSEOMetadataGenerator:
        self, 
        blog_content: str, 
        blog_title: str, 
-        keywords_data: Dict[str, Any]
+        keywords_data: Dict[str, Any],
+        outline: Optional[List[Dict[str, Any]]] = None,
+        seo_analysis: Optional[Dict[str, Any]] = None
    ) -> Dict[str, Any]:
        """Generate core SEO metadata (Call 1)"""
        try:
            # Create comprehensive prompt for core metadata
-            prompt = self._create_core_metadata_prompt(blog_content, blog_title, keywords_data)
+            prompt = self._create_core_metadata_prompt(
+                blog_content, blog_title, keywords_data, outline, seo_analysis
+            )
            
            # Define simplified structured schema for core metadata
            schema = {
@@ -155,17 +166,26 @@ class BlogSEOMetadataGenerator:
                "required": ["seo_title", "meta_description", "url_slug", "blog_tags", "blog_categories", "social_hashtags", "reading_time", "focus_keyword"]
            }
            
-            # Get structured response from Gemini
-            ai_response = self.gemini_provider(
-                prompt,
-                schema,
-                temperature=0.3,
-                max_tokens=2048
+            # Get structured response using provider-agnostic llm_text_gen
+            ai_response_raw = llm_text_gen(
+                prompt=prompt,
+                json_struct=schema,
+                system_prompt=None
            )
            
+            # Handle response: llm_text_gen may return dict (from structured JSON) or str (needs parsing)
+            ai_response = ai_response_raw
+            if isinstance(ai_response_raw, str):
+                try:
+                    import json
+                    ai_response = json.loads(ai_response_raw)
+                except json.JSONDecodeError:
+                    logger.error(f"Failed to parse JSON response: {ai_response_raw[:200]}...")
+                    ai_response = None
+            
            # Check if we got a valid response
            if not ai_response or not isinstance(ai_response, dict):
-                logger.error("Core metadata generation failed: Invalid response from Gemini")
+                logger.error("Core metadata generation failed: Invalid response from LLM")
                # Return fallback response
                primary_keywords = ', '.join(keywords_data.get('primary_keywords', ['content']))
                word_count = len(blog_content.split())
@@ -193,12 +213,16 @@ class BlogSEOMetadataGenerator:
        self, 
        blog_content: str, 
        blog_title: str, 
-        keywords_data: Dict[str, Any]
+        keywords_data: Dict[str, Any],
+        outline: Optional[List[Dict[str, Any]]] = None,
+        seo_analysis: Optional[Dict[str, Any]] = None
    ) -> Dict[str, Any]:
        """Generate social media and structured data (Call 2)"""
        try:
            # Create comprehensive prompt for social metadata
-            prompt = self._create_social_metadata_prompt(blog_content, blog_title, keywords_data)
+            prompt = self._create_social_metadata_prompt(
+                blog_content, blog_title, keywords_data, outline, seo_analysis
+            )
            
            # Define simplified structured schema for social metadata
            schema = {
@@ -246,17 +270,26 @@ class BlogSEOMetadataGenerator:
                "required": ["open_graph", "twitter_card", "json_ld_schema"]
            }
            
-            # Get structured response from Gemini
-            ai_response = self.gemini_provider(
-                prompt,
-                schema,
-                temperature=0.3,
-                max_tokens=2048
+            # Get structured response using provider-agnostic llm_text_gen
+            ai_response_raw = llm_text_gen(
+                prompt=prompt,
+                json_struct=schema,
+                system_prompt=None
            )
            
+            # Handle response: llm_text_gen may return dict (from structured JSON) or str (needs parsing)
+            ai_response = ai_response_raw
+            if isinstance(ai_response_raw, str):
+                try:
+                    import json
+                    ai_response = json.loads(ai_response_raw)
+                except json.JSONDecodeError:
+                    logger.error(f"Failed to parse JSON response: {ai_response_raw[:200]}...")
+                    ai_response = None
+            
            # Check if we got a valid response
            if not ai_response or not isinstance(ai_response, dict) or not ai_response.get('open_graph') or not ai_response.get('twitter_card') or not ai_response.get('json_ld_schema'):
-                logger.error("Social metadata generation failed: Invalid or empty response from Gemini")
+                logger.error("Social metadata generation failed: Invalid or empty response from LLM")
                # Return fallback response
                return {
                    'open_graph': {
@@ -301,11 +334,47 @@ class BlogSEOMetadataGenerator:
            logger.error(f"Social metadata generation failed: {e}")
            raise e
    
+    def _extract_content_highlights(self, blog_content: str, max_length: int = 2500) -> str:
+        """Extract key sections from blog content for prompt context"""
+        try:
+            lines = blog_content.split('\n')
+            
+            # Get first paragraph (introduction)
+            intro = ""
+            for line in lines[:20]:
+                if line.strip() and not line.strip().startswith('#'):
+                    intro += line.strip() + " "
+                    if len(intro) > 300:
+                        break
+            
+            # Get section headings
+            headings = [line.strip() for line in lines if line.strip().startswith('##')][:6]
+            
+            # Get conclusion if available
+            conclusion = ""
+            for line in reversed(lines[-20:]):
+                if line.strip() and not line.strip().startswith('#'):
+                    conclusion = line.strip() + " " + conclusion
+                    if len(conclusion) > 300:
+                        break
+            
+            highlights = f"INTRODUCTION: {intro[:300]}...\n\n"
+            highlights += f"SECTION HEADINGS: {' | '.join([h.replace('##', '').strip() for h in headings])}\n\n"
+            if conclusion:
+                highlights += f"CONCLUSION: {conclusion[:300]}..."
+            
+            return highlights[:max_length]
+        except Exception as e:
+            logger.warning(f"Failed to extract content highlights: {e}")
+            return blog_content[:2000] + "..."
+    
    def _create_core_metadata_prompt(
        self, 
        blog_content: str, 
        blog_title: str, 
-        keywords_data: Dict[str, Any]
+        keywords_data: Dict[str, Any],
+        outline: Optional[List[Dict[str, Any]]] = None,
+        seo_analysis: Optional[Dict[str, Any]] = None
    ) -> str:
        """Create high-quality prompt for core metadata generation"""
        
@@ -314,30 +383,106 @@ class BlogSEOMetadataGenerator:
        search_intent = keywords_data.get('search_intent', 'informational')
        target_audience = keywords_data.get('target_audience', 'general')
        industry = keywords_data.get('industry', 'general')
-        
-        # Calculate word count for reading time estimation
        word_count = len(blog_content.split())
        
+        # Extract outline structure
+        outline_context = ""
+        if outline:
+            headings = [s.get('heading', '') for s in outline if s.get('heading')]
+            outline_context = f"""
+OUTLINE STRUCTURE:
+- Total sections: {len(outline)}
+- Section headings: {', '.join(headings[:8])}
+- Content hierarchy: Well-structured with {len(outline)} main sections
+"""
+        
+        # Extract SEO analysis insights
+        seo_context = ""
+        if seo_analysis:
+            overall_score = seo_analysis.get('overall_score', seo_analysis.get('seo_score', 0))
+            category_scores = seo_analysis.get('category_scores', {})
+            applied_recs = seo_analysis.get('applied_recommendations', [])
+            
+            seo_context = f"""
+SEO ANALYSIS RESULTS:
+- Overall SEO Score: {overall_score}/100
+- Category Scores: Structure {category_scores.get('structure', category_scores.get('Structure', 0))}, Keywords {category_scores.get('keywords', category_scores.get('Keywords', 0))}, Readability {category_scores.get('readability', category_scores.get('Readability', 0))}
+- Applied Recommendations: {len(applied_recs)} SEO optimizations have been applied
+- Content Quality: Optimized for search engines with keyword focus
+"""
+        
+        # Get more content context (key sections instead of just first 1000 chars)
+        content_preview = self._extract_content_highlights(blog_content)
+        
        prompt = f"""
-Generate SEO metadata for this blog post.
+Generate comprehensive, personalized SEO metadata for this blog post.

-BLOG TITLE: {blog_title}
-BLOG CONTENT: {blog_content[:1000]}...
+=== BLOG CONTENT CONTEXT ===
+TITLE: {blog_title}
+CONTENT PREVIEW (key sections): {content_preview}
+WORD COUNT: {word_count} words
+READING TIME ESTIMATE: {max(1, word_count // 200)} minutes
+
+{outline_context}
+
+=== KEYWORD & AUDIENCE DATA ===
 PRIMARY KEYWORDS: {primary_keywords}
 SEMANTIC KEYWORDS: {semantic_keywords}
-WORD COUNT: {word_count}
+SEARCH INTENT: {search_intent}
+TARGET AUDIENCE: {target_audience}
+INDUSTRY: {industry}

-Generate:
-1. SEO TITLE (50-60 characters) - include primary keyword
-2. META DESCRIPTION (150-160 characters) - include CTA
-3. URL SLUG (lowercase, hyphens, 3-5 words)
-4. BLOG TAGS (5-8 relevant tags)
-5. BLOG CATEGORIES (2-3 categories)
-6. SOCIAL HASHTAGS (5-10 hashtags with #)
-7. READING TIME (calculate from {word_count} words)
-8. FOCUS KEYWORD (primary keyword for SEO)
+{seo_context}

-Make it compelling and SEO-optimized.
+=== METADATA GENERATION REQUIREMENTS ===
+1. SEO TITLE (50-60 characters, must include primary keyword):
+   - Front-load primary keyword
+   - Make it compelling and click-worthy
+   - Include power words if appropriate for {target_audience} audience
+   - Optimized for {search_intent} search intent
+
+2. META DESCRIPTION (150-160 characters, must include CTA):
+   - Include primary keyword naturally in first 120 chars
+   - Add compelling call-to-action (e.g., "Learn more", "Discover how", "Get started")
+   - Highlight value proposition for {target_audience} audience
+   - Use {industry} industry-specific terminology where relevant
+
+3. URL SLUG (lowercase, hyphens, 3-5 words):
+   - Include primary keyword
+   - Remove stop words
+   - Keep it concise and readable
+
+4. BLOG TAGS (5-8 relevant tags):
+   - Mix of primary, semantic, and long-tail keywords
+   - Industry-specific tags for {industry}
+   - Audience-relevant tags for {target_audience}
+
+5. BLOG CATEGORIES (2-3 categories):
+   - Based on content structure and {industry} industry standards
+   - Reflect main themes from outline sections
+
+6. SOCIAL HASHTAGS (5-10 hashtags with #):
+   - Include primary keyword as hashtag
+   - Industry-specific hashtags for {industry}
+   - Trending/relevant hashtags for {target_audience}
+
+7. READING TIME (calculate from {word_count} words):
+   - Average reading speed: 200 words/minute
+   - Round to nearest minute
+
+8. FOCUS KEYWORD (primary keyword for SEO):
+   - Select the most important primary keyword
+   - Should match the main topic and search intent
+
+=== QUALITY REQUIREMENTS ===
+- All metadata must be unique, not generic
+- Incorporate insights from SEO analysis if provided
+- Reflect the actual content structure from outline
+- Use language appropriate for {target_audience} audience
+- Optimize for {search_intent} search intent
+- Make descriptions compelling and action-oriented
+
+Generate metadata that is personalized, compelling, and SEO-optimized.
 """
        return prompt
    
@@ -345,7 +490,9 @@ Make it compelling and SEO-optimized.
        self, 
        blog_content: str, 
        blog_title: str, 
-        keywords_data: Dict[str, Any]
+        keywords_data: Dict[str, Any],
+        outline: Optional[List[Dict[str, Any]]] = None,
+        seo_analysis: Optional[Dict[str, Any]] = None
    ) -> str:
        """Create high-quality prompt for social metadata generation"""
        
@@ -353,49 +500,68 @@ Make it compelling and SEO-optimized.
        search_intent = keywords_data.get('search_intent', 'informational')
        target_audience = keywords_data.get('target_audience', 'general')
        industry = keywords_data.get('industry', 'general')
-        
        current_date = datetime.now().isoformat()
        
+        # Add outline and SEO context similar to core metadata prompt
+        outline_context = ""
+        if outline:
+            headings = [s.get('heading', '') for s in outline if s.get('heading')]
+            outline_context = f"\nOUTLINE SECTIONS: {', '.join(headings[:6])}\n"
+        
+        seo_context = ""
+        if seo_analysis:
+            overall_score = seo_analysis.get('overall_score', seo_analysis.get('seo_score', 0))
+            seo_context = f"\nSEO SCORE: {overall_score}/100 (optimized content)\n"
+        
+        content_preview = self._extract_content_highlights(blog_content, 1500)
+        
        prompt = f"""
-Generate social media metadata for this blog post.
+Generate engaging social media metadata for this blog post.

-BLOG TITLE: {blog_title}
-BLOG CONTENT: {blog_content[:800]}...
-PRIMARY KEYWORDS: {primary_keywords}
+=== CONTENT ===
+TITLE: {blog_title}
+CONTENT: {content_preview}
+{outline_context}
+{seo_context}
+KEYWORDS: {primary_keywords}
+TARGET AUDIENCE: {target_audience}
+INDUSTRY: {industry}
 CURRENT DATE: {current_date}

-Generate:
+=== GENERATION REQUIREMENTS ===

 1. OPEN GRAPH (Facebook/LinkedIn):
-   - title: 60 chars max
-   - description: 160 chars max  
-   - image: image URL
+   - title: 60 chars max, include primary keyword, compelling for {target_audience}
+   - description: 160 chars max, include CTA and value proposition
+   - image: Suggest an appropriate image URL (placeholder if none available)
   - type: "article"
-   - site_name: site name
-   - url: canonical URL
+   - site_name: Use appropriate site name for {industry} industry
+   - url: Generate canonical URL structure

 2. TWITTER CARD:
   - card: "summary_large_image"
-   - title: 70 chars max
-   - description: 200 chars max with hashtags
-   - image: image URL
-   - site: @sitename
-   - creator: @author
+   - title: 70 chars max, optimized for Twitter audience
+   - description: 200 chars max with relevant hashtags inline
+   - image: Match Open Graph image
+   - site: @yourwebsite (placeholder, user should update)
+   - creator: @author (placeholder, user should update)

-3. JSON-LD SCHEMA:
+3. JSON-LD SCHEMA (Article):
   - @context: "https://schema.org"
   - @type: "Article"
-   - headline: article title
-   - description: article description
-   - author: {{"@type": "Person", "name": "Author Name"}}
-   - publisher: {{"@type": "Organization", "name": "Site Name"}}
-   - datePublished: ISO date
-   - dateModified: ISO date
-   - mainEntityOfPage: canonical URL
-   - keywords: array of keywords
-   - wordCount: word count
+   - headline: Article title (optimized)
+   - description: Article description (150-200 chars)
+   - author: {{"@type": "Person", "name": "Author Name"}} (placeholder)
+   - publisher: {{"@type": "Organization", "name": "Site Name", "logo": {{"@type": "ImageObject", "url": "logo-url"}}}}
+   - datePublished: {current_date}
+   - dateModified: {current_date}
+   - mainEntityOfPage: {{"@type": "WebPage", "@id": "canonical-url"}}
+   - keywords: Array of primary and semantic keywords
+   - wordCount: {len(blog_content.split())}
+   - articleSection: Primary category based on content
+   - inLanguage: "en-US"

-Make it engaging and SEO-optimized.
+Make it engaging, personalized for {target_audience}, and optimized for {industry} industry.
 """
        return prompt
    
--- a/backend/services/blog_writer/seo/blog_seo_recommendation_applier.py
+++ b/backend/services/blog_writer/seo/blog_seo_recommendation_applier.py
@@ -0,0 +1,269 @@
+"""Blog SEO Recommendation Applier
+
+Applies actionable SEO recommendations to existing blog content using the
+provider-agnostic `llm_text_gen` dispatcher. Ensures GPT_PROVIDER parity.
+"""
+
+import asyncio
+from typing import Dict, Any, List
+from utils.logger_utils import get_service_logger
+
+from services.llm_providers.main_text_generation import llm_text_gen
+
+
+logger = get_service_logger("blog_seo_recommendation_applier")
+
+
+class BlogSEORecommendationApplier:
+    """Apply actionable SEO recommendations to blog content."""
+
+    def __init__(self):
+        logger.debug("Initialized BlogSEORecommendationApplier")
+
+    async def apply_recommendations(self, payload: Dict[str, Any]) -> Dict[str, Any]:
+        """Apply recommendations and return updated content."""
+
+        title = payload.get("title", "Untitled Blog")
+        sections: List[Dict[str, Any]] = payload.get("sections", [])
+        outline = payload.get("outline", [])
+        research = payload.get("research", {})
+        recommendations = payload.get("recommendations", [])
+        persona = payload.get("persona", {})
+        tone = payload.get("tone")
+        audience = payload.get("audience")
+
+        if not sections:
+            return {"success": False, "error": "No sections provided for recommendation application"}
+
+        if not recommendations:
+            logger.warning("apply_recommendations called without recommendations")
+            return {"success": True, "title": title, "sections": sections, "applied": []}
+
+        prompt = self._build_prompt(
+            title=title,
+            sections=sections,
+            outline=outline,
+            research=research,
+            recommendations=recommendations,
+            persona=persona,
+            tone=tone,
+            audience=audience,
+        )
+
+        schema = {
+            "type": "object",
+            "properties": {
+                "title": {"type": "string"},
+                "sections": {
+                    "type": "array",
+                    "items": {
+                        "type": "object",
+                        "properties": {
+                            "id": {"type": "string"},
+                            "heading": {"type": "string"},
+                            "content": {"type": "string"},
+                            "notes": {"type": "array", "items": {"type": "string"}},
+                        },
+                        "required": ["id", "heading", "content"],
+                    },
+                },
+                "applied_recommendations": {
+                    "type": "array",
+                    "items": {
+                        "type": "object",
+                        "properties": {
+                            "category": {"type": "string"},
+                            "summary": {"type": "string"},
+                        },
+                    },
+                },
+            },
+            "required": ["sections"],
+        }
+
+        logger.info("Applying SEO recommendations via llm_text_gen")
+
+        result = await asyncio.to_thread(
+            llm_text_gen,
+            prompt,
+            None,
+            schema,
+        )
+
+        if not result or result.get("error"):
+            error_msg = result.get("error", "Unknown error") if result else "No response from text generator"
+            logger.error(f"SEO recommendation application failed: {error_msg}")
+            return {"success": False, "error": error_msg}
+
+        raw_sections = result.get("sections", []) or []
+        normalized_sections: List[Dict[str, Any]] = []
+
+        # Build lookup table from updated sections using their identifiers
+        updated_map: Dict[str, Dict[str, Any]] = {}
+        for updated in raw_sections:
+            section_id = str(
+                updated.get("id")
+                or updated.get("section_id")
+                or updated.get("heading")
+                or ""
+            ).strip()
+
+            if not section_id:
+                continue
+
+            heading = (
+                updated.get("heading")
+                or updated.get("title")
+                or section_id
+            )
+
+            content_text = updated.get("content", "")
+            if isinstance(content_text, list):
+                content_text = "\n\n".join(str(p).strip() for p in content_text if p)
+
+            updated_map[section_id] = {
+                "id": section_id,
+                "heading": heading,
+                "content": str(content_text).strip(),
+                "notes": updated.get("notes", []),
+            }
+
+        if not updated_map and raw_sections:
+            logger.warning("Updated sections missing identifiers; falling back to positional mapping")
+
+        for index, original in enumerate(sections):
+            fallback_id = str(
+                original.get("id")
+                or original.get("section_id")
+                or f"section_{index + 1}"
+            ).strip()
+
+            mapped = updated_map.get(fallback_id)
+
+            if not mapped and raw_sections:
+                # Fall back to positional match if identifier lookup failed
+                candidate = raw_sections[index] if index < len(raw_sections) else {}
+                heading = (
+                    candidate.get("heading")
+                    or candidate.get("title")
+                    or original.get("heading")
+                    or original.get("title")
+                    or f"Section {index + 1}"
+                )
+                content_text = candidate.get("content") or original.get("content", "")
+                if isinstance(content_text, list):
+                    content_text = "\n\n".join(str(p).strip() for p in content_text if p)
+                mapped = {
+                    "id": fallback_id,
+                    "heading": heading,
+                    "content": str(content_text).strip(),
+                    "notes": candidate.get("notes", []),
+                }
+
+            if not mapped:
+                # Fallback to original content if nothing else available
+                mapped = {
+                    "id": fallback_id,
+                    "heading": original.get("heading") or original.get("title") or f"Section {index + 1}",
+                    "content": str(original.get("content", "")).strip(),
+                    "notes": original.get("notes", []),
+                }
+
+            normalized_sections.append(mapped)
+
+        applied = result.get("applied_recommendations", [])
+
+        logger.info("SEO recommendations applied successfully")
+
+        return {
+            "success": True,
+            "title": result.get("title", title),
+            "sections": normalized_sections,
+            "applied": applied,
+        }
+
+    def _build_prompt(
+        self,
+        *,
+        title: str,
+        sections: List[Dict[str, Any]],
+        outline: List[Dict[str, Any]],
+        research: Dict[str, Any],
+        recommendations: List[Dict[str, Any]],
+        persona: Dict[str, Any],
+        tone: str | None,
+        audience: str | None,
+    ) -> str:
+        """Construct prompt for applying recommendations."""
+
+        sections_str = []
+        for section in sections:
+            sections_str.append(
+                f"ID: {section.get('id', 'section')}, Heading: {section.get('heading', 'Untitled')}\n"
+                f"Current Content:\n{section.get('content', '')}\n"
+            )
+
+        outline_str = "\n".join(
+            [
+                f"- {item.get('heading', 'Section')} (Target words: {item.get('target_words', 'N/A')})"
+                for item in outline
+            ]
+        )
+
+        research_summary = research.get("keyword_analysis", {}) if research else {}
+        primary_keywords = ", ".join(research_summary.get("primary", [])[:10]) or "None"
+
+        recommendations_str = []
+        for rec in recommendations:
+            recommendations_str.append(
+                f"Category: {rec.get('category', 'General')} | Priority: {rec.get('priority', 'Medium')}\n"
+                f"Recommendation: {rec.get('recommendation', '')}\n"
+                f"Impact: {rec.get('impact', '')}\n"
+            )
+
+        persona_str = (
+            f"Persona: {persona}\n"
+            if persona
+            else "Persona: (not provided)\n"
+        )
+
+        style_guidance = []
+        if tone:
+            style_guidance.append(f"Desired tone: {tone}")
+        if audience:
+            style_guidance.append(f"Target audience: {audience}")
+        style_str = "\n".join(style_guidance) if style_guidance else "Maintain current tone and audience alignment."
+
+        prompt = f"""
+You are an expert SEO content strategist. Update the blog content to apply the actionable recommendations.
+
+Current Title: {title}
+
+Primary Keywords (for context): {primary_keywords}
+
+Outline Overview:
+{outline_str or 'No outline supplied'}
+
+Existing Sections:
+{''.join(sections_str)}
+
+Actionable Recommendations to Apply:
+{''.join(recommendations_str)}
+
+{persona_str}
+{style_str}
+
+Instructions:
+1. Carefully apply the recommendations while preserving factual accuracy and research alignment.
+2. Keep section identifiers (IDs) unchanged so the frontend can map updates correctly.
+3. Improve clarity, flow, and SEO optimization per the guidance.
+4. Return updated sections in the requested JSON format.
+5. Provide a short summary of which recommendations were addressed.
+"""
+
+        return prompt
+
+
+__all__ = ["BlogSEORecommendationApplier"]
+
+