story writer backend migration complete, Blog writer SEO and story writer backend migration complete, Blog writer SEO and story writer frontend migration complete

2025-11-13 16:14:26 +05:30
parent 7191c7e7f0
commit 3b9356e2c8
124 changed files with 20055 additions and 1208 deletions
--- a/backend/services/blog_writer/content/introduction_generator.py
+++ b/backend/services/blog_writer/content/introduction_generator.py
@@ -0,0 +1,186 @@
+"""
+Introduction Generator - Generates varied blog introductions based on content and research.
+
+Generates 3 different introduction options for the user to choose from.
+"""
+
+from typing import Dict, Any, List
+from loguru import logger
+
+from models.blog_models import BlogResearchResponse, BlogOutlineSection
+
+
+class IntroductionGenerator:
+    """Generates blog introductions using research and content data."""
+    
+    def __init__(self):
+        """Initialize the introduction generator."""
+        pass
+    
+    def build_introduction_prompt(
+        self,
+        blog_title: str,
+        research: BlogResearchResponse,
+        outline: List[BlogOutlineSection],
+        sections_content: Dict[str, str],
+        primary_keywords: List[str],
+        search_intent: str
+    ) -> str:
+        """Build a prompt for generating blog introductions."""
+        
+        # Extract key research insights
+        keyword_analysis = research.keyword_analysis or {}
+        content_angles = research.suggested_angles or []
+        
+        # Get a summary of the first few sections for context
+        section_summaries = []
+        for i, section in enumerate(outline[:3], 1):
+            section_id = section.id
+            content = sections_content.get(section_id, '')
+            if content:
+                # Take first 200 chars as summary
+                summary = content[:200] + '...' if len(content) > 200 else content
+                section_summaries.append(f"{i}. {section.heading}: {summary}")
+        
+        sections_text = '\n'.join(section_summaries) if section_summaries else "Content sections are being generated."
+        
+        primary_kw_text = ', '.join(primary_keywords) if primary_keywords else "the topic"
+        content_angle_text = ', '.join(content_angles[:3]) if content_angles else "General insights"
+        
+        return f"""Generate exactly 3 varied blog introductions for the following blog post.
+
+BLOG TITLE: {blog_title}
+
+PRIMARY KEYWORDS: {primary_kw_text}
+SEARCH INTENT: {search_intent}
+CONTENT ANGLES: {content_angle_text}
+
+BLOG CONTENT SUMMARY:
+{sections_text}
+
+REQUIREMENTS FOR EACH INTRODUCTION:
+- 80-120 words in length
+- Hook the reader immediately with a compelling opening
+- Clearly state the value proposition and what readers will learn
+- Include the primary keyword naturally within the first 2 sentences
+- Each introduction should have a different angle/approach:
+  1. First: Problem-focused (highlight the challenge readers face)
+  2. Second: Benefit-focused (emphasize the value and outcomes)
+  3. Third: Story/statistic-focused (use a compelling fact or narrative hook)
+- Maintain a professional yet engaging tone
+- Avoid generic phrases - be specific and benefit-driven
+
+Return ONLY a JSON array of exactly 3 introductions:
+[
+  "First introduction (80-120 words, problem-focused)",
+  "Second introduction (80-120 words, benefit-focused)",
+  "Third introduction (80-120 words, story/statistic-focused)"
+]"""
+    
+    def get_introduction_schema(self) -> Dict[str, Any]:
+        """Get the JSON schema for introduction generation."""
+        return {
+            "type": "array",
+            "items": {
+                "type": "string",
+                "minLength": 80,
+                "maxLength": 150
+            },
+            "minItems": 3,
+            "maxItems": 3
+        }
+    
+    async def generate_introductions(
+        self,
+        blog_title: str,
+        research: BlogResearchResponse,
+        outline: List[BlogOutlineSection],
+        sections_content: Dict[str, str],
+        primary_keywords: List[str],
+        search_intent: str,
+        user_id: str
+    ) -> List[str]:
+        """Generate 3 varied blog introductions.
+        
+        Args:
+            blog_title: The blog post title
+            research: Research data with keywords and insights
+            outline: Blog outline sections
+            sections_content: Dictionary mapping section IDs to their content
+            primary_keywords: Primary keywords for the blog
+            search_intent: Search intent (informational, commercial, etc.)
+            user_id: User ID for API calls
+            
+        Returns:
+            List of 3 introduction options
+        """
+        from services.llm_providers.main_text_generation import llm_text_gen
+        
+        if not user_id:
+            raise ValueError("user_id is required for introduction generation")
+        
+        # Build prompt
+        prompt = self.build_introduction_prompt(
+            blog_title=blog_title,
+            research=research,
+            outline=outline,
+            sections_content=sections_content,
+            primary_keywords=primary_keywords,
+            search_intent=search_intent
+        )
+        
+        # Get schema
+        schema = self.get_introduction_schema()
+        
+        logger.info(f"Generating blog introductions for user {user_id}")
+        
+        try:
+            # Generate introductions using structured JSON response
+            result = llm_text_gen(
+                prompt=prompt,
+                json_struct=schema,
+                system_prompt="You are an expert content writer specializing in creating compelling blog introductions that hook readers and clearly communicate value.",
+                user_id=user_id
+            )
+            
+            # Handle response - could be array directly or wrapped in dict
+            if isinstance(result, list):
+                introductions = result
+            elif isinstance(result, dict):
+                # Try common keys
+                introductions = result.get('introductions', result.get('options', result.get('intros', [])))
+                if not introductions and isinstance(result.get('response'), list):
+                    introductions = result['response']
+            else:
+                logger.warning(f"Unexpected introduction generation result type: {type(result)}")
+                introductions = []
+            
+            # Validate and clean introductions
+            cleaned_introductions = []
+            for intro in introductions:
+                if isinstance(intro, str) and len(intro.strip()) >= 50:  # Minimum reasonable length
+                    cleaned = intro.strip()
+                    # Ensure it's within reasonable bounds
+                    if len(cleaned) <= 200:  # Allow slight overflow for quality
+                        cleaned_introductions.append(cleaned)
+            
+            # Ensure we have exactly 3 introductions
+            if len(cleaned_introductions) < 3:
+                logger.warning(f"Generated only {len(cleaned_introductions)} introductions, expected 3")
+                # Pad with placeholder if needed
+                while len(cleaned_introductions) < 3:
+                    cleaned_introductions.append(f"{blog_title} - A comprehensive guide covering essential insights and practical strategies.")
+            
+            # Return exactly 3 introductions
+            return cleaned_introductions[:3]
+            
+        except Exception as e:
+            logger.error(f"Failed to generate introductions: {e}")
+            # Fallback: generate simple introductions
+            fallback_introductions = [
+                f"In this comprehensive guide, we'll explore {primary_keywords[0] if primary_keywords else 'essential insights'} and provide actionable strategies.",
+                f"Discover everything you need to know about {primary_keywords[0] if primary_keywords else 'this topic'} and how it can transform your approach.",
+                f"Whether you're new to {primary_keywords[0] if primary_keywords else 'this topic'} or looking to deepen your understanding, this guide has you covered."
+            ]
+            return fallback_introductions
+
--- a/backend/services/blog_writer/outline/prompt_builder.py
+++ b/backend/services/blog_writer/outline/prompt_builder.py
@@ -5,7 +5,6 @@ Constructs comprehensive prompts with research data, keywords, and strategic req
 """

 from typing import Dict, Any, List
-from loguru import logger


 class PromptBuilder:
@@ -23,7 +22,18 @@ class PromptBuilder:
        # Use the filtered research data (already cleaned by ResearchDataFilter)
        research = request.research
        
-        return f"""Create a comprehensive blog outline for: {', '.join(primary_keywords)}
+        primary_kw_text = ', '.join(primary_keywords) if primary_keywords else (request.topic or ', '.join(getattr(request.research, 'original_keywords', []) or ['the target topic']))
+        secondary_kw_text = ', '.join(secondary_keywords) if secondary_keywords else "None provided"
+        long_tail_text = ', '.join(research.keyword_analysis.get('long_tail', [])) if research and research.keyword_analysis else "None discovered"
+        semantic_text = ', '.join(research.keyword_analysis.get('semantic_keywords', [])) if research and research.keyword_analysis else "None discovered"
+        trending_text = ', '.join(research.keyword_analysis.get('trending_terms', [])) if research and research.keyword_analysis else "None discovered"
+        content_gap_text = ', '.join(research.keyword_analysis.get('content_gaps', [])) if research and research.keyword_analysis else "None identified"
+        content_angle_text = ', '.join(content_angles) if content_angles else "No explicit angles provided; infer compelling angles from research insights."
+        competitor_text = ', '.join(research.competitor_analysis.get('top_competitors', [])) if research and research.competitor_analysis else "Not available"
+        opportunity_text = ', '.join(research.competitor_analysis.get('opportunities', [])) if research and research.competitor_analysis else "Not available"
+        advantages_text = ', '.join(research.competitor_analysis.get('competitive_advantages', [])) if research and research.competitor_analysis else "Not available"
+
+        return f"""Create a comprehensive blog outline for: {primary_kw_text}

 CONTEXT:
 Search Intent: {search_intent}
@@ -32,19 +42,19 @@ Industry: {getattr(request.persona, 'industry', 'General') if request.persona el
 Audience: {getattr(request.persona, 'target_audience', 'General') if request.persona else 'General'}

 KEYWORDS:
-Primary: {', '.join(primary_keywords)}
-Secondary: {', '.join(secondary_keywords)}
-Long-tail: {', '.join(research.keyword_analysis.get('long_tail', []))}
-Semantic: {', '.join(research.keyword_analysis.get('semantic_keywords', []))}
-Trending: {', '.join(research.keyword_analysis.get('trending_terms', []))}
-Content Gaps: {', '.join(research.keyword_analysis.get('content_gaps', []))}
+Primary: {primary_kw_text}
+Secondary: {secondary_kw_text}
+Long-tail: {long_tail_text}
+Semantic: {semantic_text}
+Trending: {trending_text}
+Content Gaps: {content_gap_text}

-CONTENT ANGLES: {', '.join(content_angles)}
+CONTENT ANGLES / STORYLINES: {content_angle_text}

 COMPETITIVE INTELLIGENCE:
-Top Competitors: {', '.join(research.competitor_analysis.get('top_competitors', []))}
-Market Opportunities: {', '.join(research.competitor_analysis.get('opportunities', []))}
-Competitive Advantages: {', '.join(research.competitor_analysis.get('competitive_advantages', []))}
+Top Competitors: {competitor_text}
+Market Opportunities: {opportunity_text}
+Competitive Advantages: {advantages_text}

 RESEARCH SOURCES: {len(sources)} authoritative sources available

@@ -52,6 +62,7 @@ RESEARCH SOURCES: {len(sources)} authoritative sources available

 STRATEGIC REQUIREMENTS:
 - Create SEO-optimized headings with natural keyword integration
+- Surface the strongest research-backed angles within the outline
 - Build logical narrative flow from problem to solution
 - Include data-driven insights from research sources
 - Address content gaps and market opportunities
@@ -59,23 +70,34 @@ STRATEGIC REQUIREMENTS:
 - Ensure engaging, actionable content throughout

 Return JSON format:
-{{
-            "outline": [
-                {{
-                    "heading": "Section heading with primary keyword",
-                    "subheadings": ["Subheading 1", "Subheading 2", "Subheading 3"],
-                    "key_points": ["Key point 1", "Key point 2", "Key point 3"],
+{
+    "title_options": [
+        "Title option 1",
+        "Title option 2",
+        "Title option 3"
+    ],
+    "outline": [
+        {
+            "heading": "Section heading with primary keyword",
+            "subheadings": ["Subheading 1", "Subheading 2", "Subheading 3"],
+            "key_points": ["Key point 1", "Key point 2", "Key point 3"],
            "target_words": 300,
-                    "keywords": ["primary keyword", "secondary keyword"]
-                }}
-            ]
-}}"""
+            "keywords": ["primary keyword", "secondary keyword"]
+        }
+    ]
+}"""
    
    def get_outline_schema(self) -> Dict[str, Any]:
        """Get the structured JSON schema for outline generation."""
        return {
            "type": "object",
            "properties": {
+                "title_options": {
+                    "type": "array",
+                    "items": {
+                        "type": "string"
+                    }
+                },
                "outline": {
                    "type": "array",
                    "items": {
@@ -100,6 +122,6 @@ Return JSON format:
                    }
                }
            },
-            "required": ["outline"],
-            "propertyOrdering": ["outline"]
+            "required": ["title_options", "outline"],
+            "propertyOrdering": ["title_options", "outline"]
        }
--- a/backend/services/blog_writer/outline/seo_title_generator.py
+++ b/backend/services/blog_writer/outline/seo_title_generator.py
@@ -0,0 +1,198 @@
+"""
+SEO Title Generator - Specialized service for generating SEO-optimized blog titles.
+
+Generates 5 premium SEO-optimized titles using research data and outline context.
+"""
+
+from typing import Dict, Any, List
+from loguru import logger
+
+from models.blog_models import BlogResearchResponse, BlogOutlineSection
+
+
+class SEOTitleGenerator:
+    """Generates SEO-optimized blog titles using research and outline data."""
+    
+    def __init__(self):
+        """Initialize the SEO title generator."""
+        pass
+    
+    def build_title_prompt(
+        self,
+        research: BlogResearchResponse,
+        outline: List[BlogOutlineSection],
+        primary_keywords: List[str],
+        secondary_keywords: List[str],
+        content_angles: List[str],
+        search_intent: str,
+        word_count: int = 1500
+    ) -> str:
+        """Build a specialized prompt for SEO title generation."""
+        
+        # Extract key research insights
+        keyword_analysis = research.keyword_analysis or {}
+        competitor_analysis = research.competitor_analysis or {}
+        
+        primary_kw_text = ', '.join(primary_keywords) if primary_keywords else "the target topic"
+        secondary_kw_text = ', '.join(secondary_keywords) if secondary_keywords else "None provided"
+        long_tail_text = ', '.join(keyword_analysis.get('long_tail', [])) if keyword_analysis else "None discovered"
+        semantic_text = ', '.join(keyword_analysis.get('semantic_keywords', [])) if keyword_analysis else "None discovered"
+        trending_text = ', '.join(keyword_analysis.get('trending_terms', [])) if keyword_analysis else "None discovered"
+        content_gap_text = ', '.join(keyword_analysis.get('content_gaps', [])) if keyword_analysis else "None identified"
+        content_angle_text = ', '.join(content_angles) if content_angles else "No explicit angles provided"
+        
+        # Extract outline structure summary
+        outline_summary = []
+        for i, section in enumerate(outline[:5], 1):  # Limit to first 5 sections for context
+            outline_summary.append(f"{i}. {section.heading}")
+            if section.subheadings:
+                outline_summary.append(f"   Subtopics: {', '.join(section.subheadings[:3])}")
+        
+        outline_text = '\n'.join(outline_summary) if outline_summary else "No outline available"
+        
+        return f"""Generate exactly 5 SEO-optimized blog titles for: {primary_kw_text}
+
+RESEARCH CONTEXT:
+Primary Keywords: {primary_kw_text}
+Secondary Keywords: {secondary_kw_text}
+Long-tail Keywords: {long_tail_text}
+Semantic Keywords: {semantic_text}
+Trending Terms: {trending_text}
+Content Gaps: {content_gap_text}
+Search Intent: {search_intent}
+Content Angles: {content_angle_text}
+
+OUTLINE STRUCTURE:
+{outline_text}
+
+COMPETITIVE INTELLIGENCE:
+Top Competitors: {', '.join(competitor_analysis.get('top_competitors', [])) if competitor_analysis else 'Not available'}
+Market Opportunities: {', '.join(competitor_analysis.get('opportunities', [])) if competitor_analysis else 'Not available'}
+
+SEO REQUIREMENTS:
+- Each title must be 50-65 characters (optimal for search engine display)
+- Include the primary keyword within the first 55 characters
+- Highlight a unique value proposition from the research angles
+- Use power words that drive clicks (e.g., "Ultimate", "Complete", "Essential", "Proven")
+- Avoid generic phrasing - be specific and benefit-focused
+- Target the search intent: {search_intent}
+- Ensure titles are compelling and click-worthy
+
+Return ONLY a JSON array of exactly 5 titles:
+[
+  "Title 1 (50-65 chars)",
+  "Title 2 (50-65 chars)",
+  "Title 3 (50-65 chars)",
+  "Title 4 (50-65 chars)",
+  "Title 5 (50-65 chars)"
+]"""
+    
+    def get_title_schema(self) -> Dict[str, Any]:
+        """Get the JSON schema for title generation."""
+        return {
+            "type": "array",
+            "items": {
+                "type": "string",
+                "minLength": 50,
+                "maxLength": 65
+            },
+            "minItems": 5,
+            "maxItems": 5
+        }
+    
+    async def generate_seo_titles(
+        self,
+        research: BlogResearchResponse,
+        outline: List[BlogOutlineSection],
+        primary_keywords: List[str],
+        secondary_keywords: List[str],
+        content_angles: List[str],
+        search_intent: str,
+        word_count: int,
+        user_id: str
+    ) -> List[str]:
+        """Generate SEO-optimized titles using research and outline data.
+        
+        Args:
+            research: Research data with keywords and insights
+            outline: Blog outline sections
+            primary_keywords: Primary keywords for the blog
+            secondary_keywords: Secondary keywords
+            content_angles: Content angles from research
+            search_intent: Search intent (informational, commercial, etc.)
+            word_count: Target word count
+            user_id: User ID for API calls
+            
+        Returns:
+            List of 5 SEO-optimized titles
+        """
+        from services.llm_providers.main_text_generation import llm_text_gen
+        
+        if not user_id:
+            raise ValueError("user_id is required for title generation")
+        
+        # Build specialized prompt
+        prompt = self.build_title_prompt(
+            research=research,
+            outline=outline,
+            primary_keywords=primary_keywords,
+            secondary_keywords=secondary_keywords,
+            content_angles=content_angles,
+            search_intent=search_intent,
+            word_count=word_count
+        )
+        
+        # Get schema
+        schema = self.get_title_schema()
+        
+        logger.info(f"Generating SEO-optimized titles for user {user_id}")
+        
+        try:
+            # Generate titles using structured JSON response
+            result = llm_text_gen(
+                prompt=prompt,
+                json_struct=schema,
+                system_prompt="You are an expert SEO content strategist specializing in creating compelling, search-optimized blog titles.",
+                user_id=user_id
+            )
+            
+            # Handle response - could be array directly or wrapped in dict
+            if isinstance(result, list):
+                titles = result
+            elif isinstance(result, dict):
+                # Try common keys
+                titles = result.get('titles', result.get('title_options', result.get('options', [])))
+                if not titles and isinstance(result.get('response'), list):
+                    titles = result['response']
+            else:
+                logger.warning(f"Unexpected title generation result type: {type(result)}")
+                titles = []
+            
+            # Validate and clean titles
+            cleaned_titles = []
+            for title in titles:
+                if isinstance(title, str) and len(title.strip()) >= 30:  # Minimum reasonable length
+                    cleaned = title.strip()
+                    # Ensure it's within reasonable bounds (allow slight overflow for quality)
+                    if len(cleaned) <= 70:  # Allow slight overflow for quality
+                        cleaned_titles.append(cleaned)
+            
+            # Ensure we have exactly 5 titles
+            if len(cleaned_titles) < 5:
+                logger.warning(f"Generated only {len(cleaned_titles)} titles, expected 5")
+                # Pad with placeholder if needed (shouldn't happen with proper schema)
+                while len(cleaned_titles) < 5:
+                    cleaned_titles.append(f"{primary_keywords[0] if primary_keywords else 'Blog'} - Comprehensive Guide")
+            
+            # Return exactly 5 titles
+            return cleaned_titles[:5]
+            
+        except Exception as e:
+            logger.error(f"Failed to generate SEO titles: {e}")
+            # Fallback: generate simple titles from keywords
+            fallback_titles = []
+            primary = primary_keywords[0] if primary_keywords else "Blog Post"
+            for i in range(5):
+                fallback_titles.append(f"{primary}: Complete Guide {i+1}")
+            return fallback_titles
+
--- a/backend/services/blog_writer/research/research_service.py
+++ b/backend/services/blog_writer/research/research_service.py
@@ -74,7 +74,9 @@ class ResearchService:
            if cached_result:
                logger.info(f"Returning cached research result for keywords: {request.keywords}")
                blog_writer_logger.log_operation_end("research", 0, success=True, cache_hit=True)
-                return BlogResearchResponse(**cached_result)
+                # Normalize cached data to fix None values in confidence_scores
+                normalized_result = self._normalize_cached_research_data(cached_result)
+                return BlogResearchResponse(**normalized_result)
            
            # User ID validation (validation logic is now in Google Grounding provider)
            if not user_id:
@@ -421,7 +423,9 @@ class ResearchService:
            if cached_result:
                await task_manager.update_progress(task_id, "✅ Found cached research results! Returning instantly...")
                logger.info(f"Returning cached research result for keywords: {request.keywords}")
-                return BlogResearchResponse(**cached_result)
+                # Normalize cached data to fix None values in confidence_scores
+                normalized_result = self._normalize_cached_research_data(cached_result)
+                return BlogResearchResponse(**normalized_result)
            
            # User ID validation
            if not user_id:
@@ -759,6 +763,49 @@ class ResearchService:
        
        return sources

+    def _normalize_cached_research_data(self, cached_data: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Normalize cached research data to fix None values in confidence_scores.
+        Ensures all GroundingSupport objects have confidence_scores as a list.
+        """
+        if not isinstance(cached_data, dict):
+            return cached_data
+        
+        normalized = cached_data.copy()
+        
+        # Normalize grounding_metadata if present
+        if "grounding_metadata" in normalized and normalized["grounding_metadata"]:
+            grounding_metadata = normalized["grounding_metadata"].copy() if isinstance(normalized["grounding_metadata"], dict) else {}
+            
+            # Normalize grounding_supports
+            if "grounding_supports" in grounding_metadata and isinstance(grounding_metadata["grounding_supports"], list):
+                normalized_supports = []
+                for support in grounding_metadata["grounding_supports"]:
+                    if isinstance(support, dict):
+                        normalized_support = support.copy()
+                        # Fix confidence_scores: ensure it's a list, not None
+                        if normalized_support.get("confidence_scores") is None:
+                            normalized_support["confidence_scores"] = []
+                        elif not isinstance(normalized_support.get("confidence_scores"), list):
+                            # If it's not a list, try to convert or default to empty list
+                            normalized_support["confidence_scores"] = []
+                        # Fix grounding_chunk_indices: ensure it's a list, not None
+                        if normalized_support.get("grounding_chunk_indices") is None:
+                            normalized_support["grounding_chunk_indices"] = []
+                        elif not isinstance(normalized_support.get("grounding_chunk_indices"), list):
+                            normalized_support["grounding_chunk_indices"] = []
+                        # Ensure segment_text is a string
+                        if normalized_support.get("segment_text") is None:
+                            normalized_support["segment_text"] = ""
+                        normalized_supports.append(normalized_support)
+                    else:
+                        normalized_supports.append(support)
+                grounding_metadata["grounding_supports"] = normalized_supports
+            
+            normalized["grounding_metadata"] = grounding_metadata
+        
+        return normalized
+
    def _extract_grounding_metadata(self, gemini_result: Dict[str, Any]) -> GroundingMetadata:
        """Extract detailed grounding metadata from Gemini result."""
        grounding_chunks = []