chore: bulk commit of local changes across blog writer, SEO dashboard, scheduler, docs-site, and frontend

2026-06-05 12:40:04 +05:30
parent b894bc0abb
commit e54aaa7a3e
74 changed files with 5667 additions and 996 deletions
--- a/backend/services/blog_writer/outline/grounding_engine.py
+++ b/backend/services/blog_writer/outline/grounding_engine.py
@@ -40,8 +40,10 @@ class GroundingContextEngine:
        }
        
        # Temporal relevance patterns
+        cy = str(datetime.now().year)
+        ny = str(datetime.now().year + 1)
        self.temporal_patterns = {
-            'recent': ['2024', '2025', 'latest', 'new', 'recent', 'current', 'updated'],
+            'recent': [cy, ny, 'latest', 'new', 'recent', 'current', 'updated'],
            'trending': ['trend', 'emerging', 'growing', 'increasing', 'rising'],
            'evergreen': ['fundamental', 'basic', 'principles', 'foundation', 'core']
        }
--- a/backend/services/blog_writer/outline/keyword_curator.py
+++ b/backend/services/blog_writer/outline/keyword_curator.py
@@ -137,6 +137,15 @@ class KeywordCurator:
            lines.append(f"### Competitive advantage signal (must weave into narrative): {content_gap[0]}")
            lines.append("   → This is your primary differentiation hook. Surface it prominently in the unique value section.")

+        lines.append("")
+        lines.append("### SUGGESTED SECTION → KEYWORD MAPPING")
+        lines.append("Map each outline section's keyword focus according to its narrative role:")
+        lines.append("- Hook / Introduction → lead with primary and trending keywords for timeliness & relevance")
+        lines.append("- Problem / Pain Point → anchor on secondary and long-tail keywords (informational intent)")
+        lines.append("- Solution / How-To → weave in primary and secondary keywords for solution-oriented search")
+        lines.append("- Comparison / Analysis → embed semantic keywords to prevent topical drift into tangents")
+        lines.append("- Case Studies / Evidence → surface content gap keywords as differentiation proof points")
+        lines.append("- Future / Trends → leverage trending and content gap keywords for forward-looking authority")
        lines.append("")
        lines.append("GUIDELINE: Treat these as the primary keyword anchors. You may include closely related")
        lines.append("intent-matching variations where natural, but avoid inserting every raw research keyword.")
@@ -176,7 +185,11 @@ class KeywordCurator:
        slot_key: Optional[str] = None,
    ) -> List[str]:
        """
-        Pick up to N items from a keyword list.
+        Pick up to N items from a keyword list with diversity sampling.
+        
+        When the raw list is significantly larger than the limit, selects
+        evenly-spaced entries to capture semantic diversity rather than
+        just the first N entries.
        
        Args:
            data: The raw keyword_analysis dict.
@@ -184,11 +197,24 @@ class KeywordCurator:
            slot_key: The internal slot name for looking up the limit.
                      Falls back to source_key if not provided.
        Returns:
-            Sliced list of at most N strings.
+            List of at most N strings with diversity sampling.
        """
        limit_key = slot_key or source_key
        limit = self.SLOTS.get(limit_key, 5)
        raw: Any = data.get(source_key, [])
        if not isinstance(raw, list):
            return []
-        return raw[:limit]
+        if len(raw) <= limit:
+            return raw
+        if len(raw) <= limit * 2:
+            return raw[:limit]
+        indices = set()
+        if limit >= 2:
+            indices.add(0)
+            indices.add(len(raw) - 1)
+            step = (len(raw) - 1) / max(limit - 1, 1)
+            for i in range(1, limit - 1):
+                indices.add(int(round(i * step)))
+        else:
+            indices.add(0)
+        return [raw[i] for i in sorted(indices) if i < len(raw)][:limit]
--- a/backend/services/blog_writer/outline/outline_generator.py
+++ b/backend/services/blog_writer/outline/outline_generator.py
@@ -124,7 +124,8 @@ class OutlineGenerator:
        content_angle_titles = self.title_generator.extract_content_angle_titles(research)
        
        # Combine AI-generated titles with content angles (full primary keywords for title variety)
-        title_options = self.title_generator.combine_title_options(ai_title_options, content_angle_titles, primary_keywords)
+        research_topic = getattr(request, 'topic', '') or ''
+        title_options = self.title_generator.combine_title_options(ai_title_options, content_angle_titles, primary_keywords, research_topic)
        
        logger.info(f"Generated optimized outline with {len(balanced_sections)} sections and {len(title_options)} title options")
        
@@ -224,7 +225,8 @@ class OutlineGenerator:
        content_angle_titles = self.title_generator.extract_content_angle_titles(research)
        
        # Combine AI-generated titles with content angles (full primary keywords for title variety)
-        title_options = self.title_generator.combine_title_options(ai_title_options, content_angle_titles, primary_keywords)
+        research_topic = getattr(request, 'topic', '') or ''
+        title_options = self.title_generator.combine_title_options(ai_title_options, content_angle_titles, primary_keywords, research_topic)
        
        await task_manager.update_progress(task_id, "✅ Outline generation and optimization completed successfully!")
        
--- a/backend/services/blog_writer/outline/prompt_builder.py
+++ b/backend/services/blog_writer/outline/prompt_builder.py
@@ -36,12 +36,56 @@ class PromptBuilder:
        competitor_text = ', '.join(research.competitor_analysis.get('top_competitors', [])) if research and research.competitor_analysis else "Not available"
        opportunity_text = ', '.join(research.competitor_analysis.get('opportunities', [])) if research and research.competitor_analysis else "Not available"
        advantages_text = ', '.join(research.competitor_analysis.get('competitive_advantages', [])) if research and research.competitor_analysis else "Not available"
+        competitor_headings_text = ', '.join(research.competitor_analysis.get('competitor_headings', [])[:3]) if research and research.competitor_analysis and research.competitor_analysis.get('competitor_headings') else ""
        
        # Extract additional UI-mapped context fields
        analysis_insights_text = (research.keyword_analysis.get('analysis_insights', '') or '') if research and research.keyword_analysis else ''
        market_positioning_text = (research.competitor_analysis.get('market_positioning', '') or '') if research and research.competitor_analysis else ''
        difficulty_score = research.keyword_analysis.get('difficulty', None) if research and research.keyword_analysis else None

+        # Extract top 3 authoritative source excerpts as factual data points
+        source_excerpts_text = ""
+        if sources:
+            sorted_sources = sorted(
+                [s for s in sources if (s.excerpt or s.summary)],
+                key=lambda s: s.credibility_score or 0.8, reverse=True
+            )[:3]
+            excerpts = []
+            for i, src in enumerate(sorted_sources, 1):
+                excerpt = src.excerpt or src.summary or ""
+                if len(excerpt) > 300:
+                    excerpt = excerpt[:297] + "..."
+                excerpts.append(f"  {i}. \"{src.title}\" — {excerpt}")
+            if excerpts:
+                source_excerpts_text = "FACTUAL DATA POINTS FROM RESEARCH:\n" + "\n".join(excerpts)
+
+        # Extract recency: newest source publication date
+        newest_date_str = ""
+        if sources:
+            valid_dates = [s.published_at for s in sources if s.published_at]
+            if valid_dates:
+                try:
+                    parsed = [d for d in valid_dates if d[:4].isdigit()]
+                    if parsed:
+                        sorted_dates = sorted(parsed, reverse=True)
+                        newest_date_str = f"Most Recent Source: {sorted_dates[0]}"
+                except Exception:
+                    pass
+
+        # Extract top grounding evidence snippets as verified data points
+        grounding_evidence_text = ""
+        if research and research.grounding_metadata and research.grounding_metadata.grounding_supports:
+            supports = research.grounding_metadata.grounding_supports
+            top_supports = [s for s in supports if s.segment_text and len(s.segment_text) > 20][:3]
+            if top_supports:
+                evidence_parts = []
+                for i, s in enumerate(top_supports, 1):
+                    text = s.segment_text[:250]
+                    if len(s.segment_text) > 250:
+                        text += "..."
+                    evidence_parts.append(f"  {i}. {text}")
+                grounding_evidence_text = "VERIFIED EVIDENCE (high-confidence snippets):\n" + "\n".join(evidence_parts)
+
        # Build selected angle prominence section
        if selected_content_angle and selected_content_angle.strip():
            selected_angle_section = f"""
@@ -106,8 +150,14 @@ Top Competitors: {competitor_text}
 Market Opportunities: {opportunity_text}
 Competitive Advantages: {advantages_text}
 {f"Market Positioning: {market_positioning_text}" if market_positioning_text else ""}
+{f"Competitor Headings (AVOID duplicating): {competitor_headings_text}" if competitor_headings_text else ""}

 RESEARCH SOURCES: {len(sources)} authoritative sources available
+{newest_date_str}
+
+{source_excerpts_text}
+
+{grounding_evidence_text}

 {f"CUSTOM INSTRUCTIONS: {custom_instructions}" if custom_instructions else ""}

--- a/backend/services/blog_writer/outline/title_generator.py
+++ b/backend/services/blog_writer/outline/title_generator.py
@@ -54,58 +54,58 @@ class TitleGenerator:
        Returns:
            Formatted title string
        """
-        if not angle or len(angle.strip()) < 10:  # Too short to be a good title
+        if not angle or len(angle.strip()) < 10:
            return ""
        
-        # Clean up the angle
        cleaned_angle = angle.strip()
        
-        # Capitalize first letter of each sentence and proper nouns
-        sentences = cleaned_angle.split('. ')
-        formatted_sentences = []
-        for sentence in sentences:
-            if sentence.strip():
-                # Use title case for better formatting
-                formatted_sentence = sentence.strip().title()
-                formatted_sentences.append(formatted_sentence)
-        
-        formatted_title = '. '.join(formatted_sentences)
-        
-        # Ensure it ends with proper punctuation
-        if not formatted_title.endswith(('.', '!', '?')):
-            formatted_title += '.'
+        # Use sentence case: capitalize first letter, rest as-is
+        if cleaned_angle:
+            cleaned_angle = cleaned_angle[0].upper() + cleaned_angle[1:]
        
        # Limit length to reasonable blog title size
-        if len(formatted_title) > 200:
-            formatted_title = formatted_title[:197] + "..."
+        if len(cleaned_angle) > 120:
+            cleaned_angle = cleaned_angle[:117] + "..."
        
-        return formatted_title
+        return cleaned_angle
    
-    def combine_title_options(self, ai_titles: List[str], content_angle_titles: List[str], primary_keywords: List[str]) -> List[str]:
+    def combine_title_options(self, ai_titles: List[str], content_angle_titles: List[str], primary_keywords: List[str], research_topic: str = "") -> List[str]:
        """
        Combine AI-generated titles with content angle titles, ensuring variety and quality.
        
+        AI titles (proper SEO titles generated by LLM) take priority.
+        Content angle titles (long-format descriptions) are used as fallback.
+        The research topic is the last resort when nothing else exists.
+        
        Args:
-            ai_titles: AI-generated title options
-            content_angle_titles: Titles derived from content angles
+            ai_titles: AI-generated title options (proper blog titles, 50-65 chars)
+            content_angle_titles: Titles derived from content angles (longer, descriptive)
            primary_keywords: Primary keywords for fallback generation
+            research_topic: Original user research topic as ultimate fallback
            
        Returns:
            Combined list of title options (max 6 total)
        """
        all_titles = []
        
-        # Add content angle titles first (these are research-based and valuable)
-        for title in content_angle_titles[:3]:  # Limit to top 3 content angles
-            if title and title not in all_titles:
-                all_titles.append(title)
-        
-        # Add AI-generated titles
+        # 1. AI-generated titles first (proper SEO titles from LLM)
        for title in ai_titles:
            if title and title not in all_titles:
                all_titles.append(title)
        
-        # Note: Removed fallback titles as requested - only use research and AI-generated titles
+        # 2. Content angle titles as fallback (research-based, but verbose)
+        for title in content_angle_titles[:3]:
+            if title and title not in all_titles:
+                all_titles.append(title)
+        
+        # 3. Research topic as last resort when nothing was generated
+        if not all_titles and research_topic:
+            all_titles.append(research_topic)
+        
+        # 4. Primary keyword fallback as absolute last resort
+        if not all_titles and primary_keywords:
+            kw = primary_keywords[0]
+            all_titles.append(kw)
        
        # Limit to 6 titles maximum for UI usability
        final_titles = all_titles[:6]
@@ -115,9 +115,10 @@ class TitleGenerator:
    
    def generate_fallback_titles(self, primary_keywords: List[str]) -> List[str]:
        """Generate fallback titles when AI generation fails."""
+        from datetime import datetime
        primary_keyword = primary_keywords[0] if primary_keywords else "Topic"
        return [
            f"The Complete Guide to {primary_keyword}",
            f"{primary_keyword}: Everything You Need to Know",
-            f"How to Master {primary_keyword} in 2024"
+            f"How to Master {primary_keyword} in {datetime.now().year}"
        ]
--- a/backend/services/blog_writer/research/data_filter.py
+++ b/backend/services/blog_writer/research/data_filter.py
@@ -432,7 +432,7 @@ class ResearchDataFilter:
            'how to', 'guide', 'tutorial', 'steps', 'process', 'method',
            'best practices', 'tips', 'strategies', 'techniques', 'approach',
            'comparison', 'vs', 'versus', 'difference', 'pros and cons',
-            'trends', 'future', '2024', '2025', 'emerging', 'new'
+            'trends', 'future', str(datetime.now().year), str(datetime.now().year + 1), 'emerging', 'new'
        ]
        
        for indicator in actionable_indicators:
--- a/backend/services/blog_writer/research/research_service.py
+++ b/backend/services/blog_writer/research/research_service.py
@@ -720,7 +720,7 @@ class ResearchService:
                url=src.get("url", ""),
                excerpt=src.get("content", "")[:500] if src.get("content") else f"Source from {src.get('title', 'web')}",
                credibility_score=float(src.get("credibility_score", 0.8)),
-                published_at=str(src.get("publication_date", "2024-01-01")),
+                published_at=str(src.get("publication_date", f"{datetime.now().year}-01-01")),
                index=src.get("index"),
                source_type=src.get("type", "web")
            )
--- a/backend/services/blog_writer/research/research_strategies.py
+++ b/backend/services/blog_writer/research/research_strategies.py
@@ -6,6 +6,7 @@ Different strategies for executing research based on depth and focus.

 from abc import ABC, abstractmethod
 from typing import Dict, Any
+from datetime import datetime
 from loguru import logger

 from models.blog_models import BlogResearchRequest, ResearchMode, ResearchConfig
@@ -87,7 +88,7 @@ Provide analysis in this EXACT format:
 - For each: Quote/claim, source URL, published date, metric/context.

 REQUIREMENTS:
- Every claim MUST include a source URL (authoritative, recent: 2024-2025 preferred).
+- Every claim MUST include a source URL (authoritative, recent: {datetime.now().year}-{datetime.now().year + 1} preferred).
 - Use concrete numbers, dates, outcomes; avoid generic advice.
 - Keep bullets tight and scannable for spoken narration."""
        return prompt.strip()
@@ -116,7 +117,7 @@ Research Topic: "{topic}"{date_filter}{source_filter}

 Provide COMPLETE analysis in this EXACT format:

-## WHAT'S CHANGED (2024-2025)
+## WHAT'S CHANGED ({datetime.now().year}-{datetime.now().year + 1})
 [5-7 concise trend bullets with numbers + source URLs]

 ## PROOF & NUMBERS
@@ -151,7 +152,7 @@ Primary (3), Secondary (8-10), Long-tail (5-7) with intent hints.
 VERIFICATION REQUIREMENTS:
 - Minimum 2 authoritative sources per major claim.
 - Prefer industry reports > research papers > news > blogs.
- 2024-2025 data strongly preferred.
+- {datetime.now().year}-{datetime.now().year + 1} data strongly preferred.
 - All numbers must include timeframe and methodology.
 - Every bullet must be concise for spoken narration and actionable for {target_audience}."""
        return prompt.strip()
@@ -213,7 +214,7 @@ REQUIREMENTS:
 - Cite all claims with authoritative source URLs
 - Include specific numbers, dates, examples
 - Focus on actionable insights for {target_audience}
- Use 2024-2025 data when available"""
+- Use {datetime.now().year}-{datetime.now().year + 1} data when available"""
        return prompt.strip()