Merge branch 'recover-stash'

2026-05-23 13:13:18 +05:30
parent 33458c78c0 8f116ef4d1
commit d1cd28d407
40 changed files with 1870 additions and 859 deletions
--- a/backend/services/blog_writer/content/medium_blog_generator.py
+++ b/backend/services/blog_writer/content/medium_blog_generator.py
@@ -122,9 +122,6 @@ class MediumBlogGenerator:
        payload = {
            "title": req.title,
            "globalTargetWords": req.globalTargetWords or 1000,
-            "persona": req.persona.dict() if req.persona else None,
-            "tone": req.tone,
-            "audience": req.audience,
            "sections": [section_block(s) for s in req.sections],
        }

@@ -136,7 +133,6 @@ class MediumBlogGenerator:
            - Industry: {req.persona.industry or 'General'}
            - Tone: {req.persona.tone or 'Professional'}
            - Audience: {req.persona.audience or 'General readers'}
-            - Persona ID: {req.persona.persona_id or 'Default'}
            
            Write content that reflects this persona's expertise and communication style.
            Use industry-specific terminology and examples where appropriate.
@@ -154,40 +150,19 @@ class MediumBlogGenerator:
            "Return ONLY valid JSON with no markdown formatting or explanations."
        )

-        # Build persona-specific content instructions
-        persona_instructions = ""
-        if req.persona:
-            industry = req.persona.industry or 'General'
-            tone = req.persona.tone or 'Professional'
-            audience = req.persona.audience or 'General readers'
-            
-            persona_instructions = f"""
-            PERSONA-DRIVEN CONTENT REQUIREMENTS:
-            - Write as an expert in {industry} industry
-            - Use {tone} tone appropriate for {audience}
-            - Include industry-specific examples and terminology
-            - Demonstrate authority and expertise in the field
-            - Use language that resonates with {audience}
-            - Maintain consistent voice that reflects this persona's expertise
-            """
-
        prompt = (
-            f"Write blog content for the following sections. Each section should be {req.globalTargetWords or 1000} words total, distributed across all sections.\n\n"
+            f"Write blog content for the following sections. Total target: {req.globalTargetWords or 1000} words, distributed across all sections.\n\n"
            f"Blog Title: {req.title}\n\n"
            "For each section, write engaging content that:\n"
            "- Follows the key points provided\n"
            "- Uses the suggested keywords naturally\n"
            "- Meets the target word count\n"
-            "- Maintains professional tone\n"
-            "- References the provided sources when relevant\n"
            "- Breaks content into clear paragraphs (2-4 sentences each)\n"
-            "- Uses double line breaks (\\n\\n) between paragraphs for proper formatting\n"
+            "- Uses double line breaks (\\n\\n) between paragraphs\n"
            "- Starts with an engaging opening paragraph\n"
-            "- Ends with a strong concluding paragraph\n"
-            f"{persona_instructions}\n"
-            "IMPORTANT: Format the 'content' field with proper paragraph breaks using \\n\\n between paragraphs.\n\n"
-            "Return a JSON object with 'title' and 'sections' array. Each section should have 'id', 'heading', 'content', and 'wordCount'.\n\n"
-            f"Sections to write:\n{json.dumps(payload, ensure_ascii=False, indent=2)}"
+            "- Ends with a strong concluding paragraph\n\n"
+            "Return a JSON object with 'title' and 'sections' array. Each section must have 'id', 'heading', 'content', 'wordCount', and 'sources'.\n\n"
+            f"Sections:\n{json.dumps(payload, ensure_ascii=False, indent=2)}"
        )

        try:
@@ -195,7 +170,9 @@ class MediumBlogGenerator:
                prompt=prompt,
                json_struct=schema,
                system_prompt=system,
-                user_id=user_id
+                user_id=user_id,
+                max_tokens=None,
+                temperature=0.3,
            )
        except HTTPException:
            # Re-raise HTTPExceptions (e.g., 429 subscription limit) to preserve error details
--- a/backend/services/blog_writer/research/exa_provider.py
+++ b/backend/services/blog_writer/research/exa_provider.py
@@ -322,7 +322,7 @@ class ExaResearchProvider(BaseProvider):
                'text': getattr(result, 'text', ''),
                'publishedDate': getattr(result, 'publishedDate', ''),
                'author': getattr(result, 'author', ''),
-                'score': getattr(result, 'score', 0.5),
+                'score': (lambda v: v if v is not None else 0.5)(getattr(result, 'score', 0.5)),
            })
        
        # Track usage
--- a/backend/services/database.py
+++ b/backend/services/database.py
@@ -31,6 +31,7 @@ from models.product_marketing_models import Campaign, CampaignProposal, Campaign
 from models.product_asset_models import ProductAsset, ProductStyleTemplate, EcommerceExport
 # Podcast Maker models use SubscriptionBase, but import to ensure models are registered
 from models.podcast_models import PodcastProject
+
 # Research models use SubscriptionBase
 from models.research_models import ResearchProject
 # Video Studio models
--- a/backend/services/gsc_brainstorm_service.py
+++ b/backend/services/gsc_brainstorm_service.py
@@ -2,8 +2,9 @@
 GSC Brainstorm Service for ALwrity.

 Analyzes Google Search Console data to suggest blog topics the user should write about.
-Combines rule-based heuristics (high-impression/low-CTR keywords, near-page-1 positions)
-with LLM-powered strategic recommendations tailored to the user's topic intent.
+Combines rule-based heuristics with LLM-powered strategic recommendations tailored to
+the user's topic intent. Designed for non-SEO-experts: every insight includes plain-English
+explanations of WHY it matters and WHAT to do about it.
 """

 import json
@@ -21,9 +22,10 @@ class GSCBrainstormService:

    Flow:
    1. Fetch real GSC search analytics (query + page data, 30 days)
-    2. Apply rule-based filters (Content Optimization, Content Enhancement, Keyword Gap)
-    3. Generate LLM-powered strategic recommendations contextualised to the user's keywords
-    4. Return structured results
+    2. Compute derived metrics (CTR benchmarks, estimated traffic uplift, content formats)
+    3. Apply rule-based filters (Quick Wins, Optimization, Enhancement, Rising Stars, Page Issues)
+    4. Generate LLM-powered strategic recommendations contextualised to the user's keywords
+    5. Return structured results with all data exposed for rich frontend display
    """

    def __init__(self, gsc_service: GSCService = None):
@@ -39,18 +41,8 @@ class GSCBrainstormService:
        keywords: str,
        site_url: Optional[str] = None,
    ) -> Dict[str, Any]:
-        """
-        Generate blog topic suggestions from the user's GSC data.
-
-        Args:
-            user_id: Clerk user ID (must have GSC connected).
-            keywords: User's 3+ word topic intent (e.g. "content marketing strategy").
-            site_url: Optional site URL; auto-selected from user's first GSC site if omitted.
-
-        Returns:
-            Dict with content_opportunities, keyword_gaps, ai_recommendations, summary.
-        """
        self._user_id = user_id
+
        # 1. Resolve site_url
        if not site_url:
            sites = self.gsc_service.get_site_list(user_id)
@@ -59,6 +51,8 @@ class GSCBrainstormService:
                    "error": "No GSC sites found. Make sure your site is verified in Google Search Console.",
                    "content_opportunities": [],
                    "keyword_gaps": [],
+                    "quick_wins": [],
+                    "page_opportunities": [],
                    "ai_recommendations": {},
                    "summary": {},
                }
@@ -80,6 +74,8 @@ class GSCBrainstormService:
                "error": analytics.get("error", "Failed to fetch GSC data"),
                "content_opportunities": [],
                "keyword_gaps": [],
+                "quick_wins": [],
+                "page_opportunities": [],
                "ai_recommendations": {},
                "summary": {},
            }
@@ -93,9 +89,11 @@ class GSCBrainstormService:

        if not keywords_data:
            return {
-                "error": "No keyword data available for the selected period.",
+                "error": "No keyword data available for the selected period. This usually means your site is new to GSC or hasn't received search traffic yet.",
                "content_opportunities": [],
                "keyword_gaps": [],
+                "quick_wins": [],
+                "page_opportunities": [],
                "ai_recommendations": {},
                "summary": {
                    "site_url": site_url,
@@ -107,18 +105,23 @@ class GSCBrainstormService:
        # 4. Rule-based analysis
        content_opportunities = self._identify_content_opportunities(keywords_data)
        keyword_gaps = self._identify_keyword_gaps(keywords_data)
+        quick_wins = self._identify_quick_wins(keywords_data)
+        page_opportunities = self._identify_page_opportunities(pages_data)

        # 5. Summary metrics
        summary = self._compute_summary(keywords_data, pages_data, site_url, start_date, end_date)

-        # 6. AI recommendations (best-effort; don't fail the whole request on LLM error)
+        # 6. AI recommendations
        ai_recommendations = self._generate_ai_recommendations(
-            keywords_data, pages_data, summary, keywords
+            keywords_data, pages_data, summary, keywords,
+            content_opportunities, quick_wins, keyword_gaps,
        )

        return {
            "content_opportunities": content_opportunities,
            "keyword_gaps": keyword_gaps,
+            "quick_wins": quick_wins,
+            "page_opportunities": page_opportunities,
            "ai_recommendations": ai_recommendations,
            "summary": summary,
        }
@@ -168,39 +171,53 @@ class GSCBrainstormService:
        opportunities: List[Dict[str, Any]] = []

        # Rule 1: Content Optimization — high impressions, low CTR
+        # Meaning: Google is SHOWING your page for this query but people aren't clicking.
+        #          The content probably ranks but title/meta/snippet isn't compelling enough.
        for kw in keywords_data:
            if kw["impressions"] > 500 and kw["ctr"] < 3:
+                estimated_gain = int(kw["impressions"] * 0.05) - kw["clicks"]
                opportunities.append({
                    "type": "Content Optimization",
                    "keyword": kw["keyword"],
                    "opportunity": (
-                        f"Optimize existing content for '{kw['keyword']}' "
-                        f"to improve CTR from {kw['ctr']:.1f}% "
-                        f"(position {kw['position']:.1f})"
+                        f"Your site appears for '{kw['keyword']}' ({kw['impressions']:,} times/month) "
+                        f"but only {kw['ctr']:.1f}% click. Improving your title and meta description "
+                        f"could bring ~{max(estimated_gain, 5)} more clicks/month."
                    ),
-                    "potential_impact": "High",
+                    "potential_impact": "High" if kw["impressions"] > 1000 else "Medium",
                    "current_position": kw["position"],
+                    "current_ctr": kw["ctr"],
                    "impressions": kw["impressions"],
+                    "clicks": kw["clicks"],
+                    "estimated_traffic_gain": max(estimated_gain, 5),
                    "priority": "High" if kw["impressions"] > 1000 else "Medium",
+                    "suggested_format": GSCBrainstormService._suggest_format(kw["keyword"]),
                })

        # Rule 2: Content Enhancement — positions 11-20 with decent impressions
+        # Meaning: You're on page 2 of Google. A small content boost could push you to page 1,
+        #          where CTR increases dramatically (page 1 gets ~95% of all clicks).
        for kw in keywords_data:
            if 10 < kw["position"] <= 20 and kw["impressions"] > 100:
+                estimated_gain = int(kw["impressions"] * 0.08)
                opportunities.append({
                    "type": "Content Enhancement",
                    "keyword": kw["keyword"],
                    "opportunity": (
-                        f"Enhance content for '{kw['keyword']}' to move from "
-                        f"position {kw['position']:.1f} to the first page"
+                        f"'{kw['keyword']}' ranks #{kw['position']:.0f} (page 2). "
+                        f"Moving to page 1 could capture ~{estimated_gain} more clicks/month "
+                        f"from {kw['impressions']:,} impressions."
                    ),
-                    "potential_impact": "Medium",
+                    "potential_impact": "High" if kw["impressions"] > 500 else "Medium",
                    "current_position": kw["position"],
+                    "current_ctr": kw["ctr"],
                    "impressions": kw["impressions"],
-                    "priority": "Medium",
+                    "clicks": kw["clicks"],
+                    "estimated_traffic_gain": estimated_gain,
+                    "priority": "High" if kw["impressions"] > 500 else "Medium",
+                    "suggested_format": GSCBrainstormService._suggest_format(kw["keyword"]),
                })

-        # Sort by impressions descending, keep top 10
        opportunities.sort(key=lambda x: x["impressions"], reverse=True)
        return opportunities[:10]

@@ -212,15 +229,111 @@ class GSCBrainstormService:

        for kw in keywords_data:
            if 4 <= kw["position"] <= 20 and kw["impressions"] >= 50:
+                # Estimate traffic gain if this keyword moved to position 1-3
+                # Position 1 avg CTR ~31%, position 3 ~11%, current position CTR estimate
+                position_1_ctr = 31.0
+                current_ctr = kw["ctr"]
+                estimated_gain = max(int(kw["impressions"] * (position_1_ctr - current_ctr) / 100), 1)
+
                gaps.append({
                    "keyword": kw["keyword"],
                    "position": kw["position"],
                    "impressions": kw["impressions"],
+                    "current_ctr": kw["ctr"],
+                    "clicks": kw["clicks"],
+                    "estimated_traffic_if_page1": estimated_gain,
+                    "gap_from_page1": round(kw["position"] - 3, 1),
                })

        gaps.sort(key=lambda x: x["impressions"], reverse=True)
        return gaps[:10]

+    @staticmethod
+    def _identify_quick_wins(
+        keywords_data: List[Dict[str, Any]],
+    ) -> List[Dict[str, Any]]:
+        """Keywords already on page 1 (positions 4-10) that could reach top 3
+        with minor improvements — the highest-ROI opportunities."""
+        quick_wins: List[Dict[str, Any]] = []
+
+        for kw in keywords_data:
+            if 4 <= kw["position"] <= 10 and kw["impressions"] >= 100:
+                # Position 3 CTR ≈ 11%, position 5 CTR ≈ 6%
+                # Small improvements can yield big traffic gains
+                target_ctr = 11.0  # approximate CTR for position 3
+                estimated_gain = max(int(kw["impressions"] * (target_ctr - kw["ctr"]) / 100), 1)
+
+                quick_wins.append({
+                    "keyword": kw["keyword"],
+                    "position": kw["position"],
+                    "impressions": kw["impressions"],
+                    "current_ctr": kw["ctr"],
+                    "clicks": kw["clicks"],
+                    "estimated_traffic_gain": estimated_gain,
+                    "reason": (
+                        f"Already on page 1 at position #{kw['position']:.0f}. "
+                        f"Optimizing this page could increase CTR from {kw['ctr']:.1f}% "
+                        f"to ~{target_ctr:.0f}%, gaining ~{estimated_gain} clicks/month."
+                    ),
+                })
+
+        quick_wins.sort(key=lambda x: x["estimated_traffic_gain"], reverse=True)
+        return quick_wins[:5]
+
+    @staticmethod
+    def _identify_page_opportunities(
+        pages_data: List[Dict[str, Any]],
+    ) -> List[Dict[str, Any]]:
+        """Pages with high impressions but low CTR — the content or meta needs work."""
+        opportunities: List[Dict[str, Any]] = []
+
+        for pg in pages_data:
+            if pg["impressions"] > 300 and pg["ctr"] < 2.0:
+                short_page = pg["page"].rstrip("/").rsplit("/", 1)[-1].replace("-", " ").title()
+                if len(short_page) > 60:
+                    short_page = short_page[:57] + "..."
+                opportunities.append({
+                    "page": pg["page"],
+                    "page_title": short_page,
+                    "impressions": pg["impressions"],
+                    "clicks": pg["clicks"],
+                    "current_ctr": pg["ctr"],
+                    "current_position": pg["position"],
+                    "reason": (
+                        f"This page gets {pg['impressions']:,} impressions but only {pg['ctr']:.1f}% CTR. "
+                        f"Reviewing the title and meta description could significantly boost clicks."
+                    ),
+                })
+
+        opportunities.sort(key=lambda x: x["impressions"], reverse=True)
+        return opportunities[:5]
+
+    # ------------------------------------------------------------------ #
+    #  Content format suggestion
+    # ------------------------------------------------------------------ #
+
+    @staticmethod
+    def _suggest_format(keyword: str) -> str:
+        """Suggest a content format based on keyword patterns."""
+        kw = keyword.lower()
+        if any(w in kw for w in ["how to", "how do", "guide", "tutorial", "steps"]):
+            return "How-To Guide"
+        if any(w in kw for w in ["vs", "versus", "compare", "comparison", "difference"]):
+            return "Comparison"
+        if any(w in kw for w in ["best", "top", "recommended", "review", "reviews"]):
+            return "Top Picks / Review"
+        if any(w in kw for w in ["what is", "definition", "meaning", "explained"]):
+            return "Explainer"
+        if any(w in kw for w in ["list", "examples", "ideas", "tips", "ways"]):
+            return "Listicle"
+        if any(w in kw for w in ["free", "cheap", "alternative", "budget"]):
+            return "Budget / Alternative"
+        if any(w in kw for w in ["template", "calculator", "tool", "checker"]):
+            return "Tool / Template"
+        if any(w in kw for w in ["2024", "2025", "2026", "trends", "prediction", "future"]):
+            return "Trend Report"
+        return "In-Depth Article"
+
    # ------------------------------------------------------------------ #
    #  Summary metrics
    # ------------------------------------------------------------------ #
@@ -248,6 +361,16 @@ class GSCBrainstormService:
        top_keywords = sorted(keywords_data, key=lambda x: x["impressions"], reverse=True)[:5]
        top_pages = sorted(pages_data, key=lambda x: x["clicks"], reverse=True)[:3]

+        # Health score: 0-100 based on how many keywords are on page 1
+        total_kw = len(keywords_data) or 1
+        page1_pct = (pos_1_3 + pos_4_10) / total_kw * 100
+        top3_pct = pos_1_3 / total_kw * 100
+        health_score = round(min(top3_pct * 3 + page1_pct * 0.7, 100), 0)
+
+        # CTR benchmark: industry average is ~3.1% for position 1-10
+        ctr_benchmark = 3.1
+        ctr_vs_benchmark = round(avg_ctr - ctr_benchmark, 2)
+
        return {
            "site_url": site_url,
            "date_range": {"start": start_date, "end": end_date},
@@ -256,6 +379,8 @@ class GSCBrainstormService:
            "total_clicks": total_clicks,
            "avg_ctr": avg_ctr,
            "avg_position": avg_position,
+            "ctr_vs_benchmark": ctr_vs_benchmark,
+            "health_score": health_score,
            "keyword_distribution": {
                "positions_1_3": pos_1_3,
                "positions_4_10": pos_4_10,
@@ -263,11 +388,22 @@ class GSCBrainstormService:
                "positions_21_plus": pos_21_plus,
            },
            "top_keywords": [
-                {"keyword": kw["keyword"], "impressions": kw["impressions"], "position": kw["position"]}
+                {
+                    "keyword": kw["keyword"],
+                    "impressions": kw["impressions"],
+                    "clicks": kw["clicks"],
+                    "position": kw["position"],
+                    "ctr": kw["ctr"],
+                }
                for kw in top_keywords
            ],
            "top_pages": [
-                {"page": pg["page"], "clicks": pg["clicks"], "impressions": pg["impressions"]}
+                {
+                    "page": pg["page"],
+                    "clicks": pg["clicks"],
+                    "impressions": pg["impressions"],
+                    "ctr": pg["ctr"],
+                }
                for pg in top_pages
            ],
        }
@@ -282,60 +418,110 @@ class GSCBrainstormService:
        pages_data: List[Dict],
        summary: Dict,
        user_keywords: str,
+        content_opportunities: List[Dict],
+        quick_wins: List[Dict],
+        keyword_gaps: List[Dict],
    ) -> Dict[str, Any]:
        try:
-            top_kw = ", ".join(kw["keyword"] for kw in summary.get("top_keywords", []))
+            top_kw_list = summary.get("top_keywords", [])
+            top_kw_str = "\n".join(
+                f"  • {kw['keyword']}: {kw['impressions']:,} impressions, position {kw['position']}, {kw['ctr']:.1f}% CTR"
+                for kw in top_kw_list[:10]
+            )
            dist = summary.get("keyword_distribution", {})

-            prompt = f"""Analyze this Google Search Console data and suggest blog topics the user should write about.
+            opp_str = ""
+            if content_opportunities:
+                opp_str = "\nCONTENT OPPORTUNITIES (rule-based findings):\n" + "\n".join(
+                    f"  • {o['keyword']}: {o['opportunity']}"
+                    for o in content_opportunities[:5]
+                )
+            else:
+                opp_str = "\nNo major content opportunities detected from rule-based analysis."

-USER'S TOPIC INTENT: "{user_keywords}"
+            qw_str = ""
+            if quick_wins:
+                qw_str = "\nQUICK WINS (already on page 1, easy to optimize):\n" + "\n".join(
+                    f"  • {q['keyword']}: position #{q['position']:.0f}, {q['current_ctr']:.1f}% CTR, est. +{q['estimated_traffic_gain']} clicks/month"
+                    for q in quick_wins[:3]
+                )

-SEARCH PERFORMANCE SUMMARY:
- Total Keywords Tracked: {summary.get('total_keywords_analyzed', 0)}
+            prompt = f"""You are an expert SEO content strategist analyzing real Google Search Console data for a blog writer.
+
+The user wants to write about: "{user_keywords}"
+
+Here is their GSC data for the last 30 days:
+
+PERFORMANCE OVERVIEW:
+- Total Keywords: {summary.get('total_keywords_analyzed', 0)}
 - Total Impressions: {summary.get('total_impressions', 0):,}
 - Total Clicks: {summary.get('total_clicks', 0):,}
- Average CTR: {summary.get('avg_ctr', 0):.2f}%
+- Average CTR: {summary.get('avg_ctr', 0):.2f}% (industry avg for positions 1-10 is ~3.1%)
 - Average Position: {summary.get('avg_position', 0):.1f}
+- SEO Health Score: {summary.get('health_score', 0)}/100

-TOP PERFORMING KEYWORDS:
-{top_kw}
+TOP KEYWORDS BY IMPRESSIONS:
+{top_kw_str}

 KEYWORD POSITION DISTRIBUTION:
- Positions 1-3: {dist.get('positions_1_3', 0)}
- Positions 4-10: {dist.get('positions_4_10', 0)}
- Positions 11-20: {dist.get('positions_11_20', 0)}
- Positions 21+: {dist.get('positions_21_plus', 0)}
+- Position 1-3 (top results): {dist.get('positions_1_3', 0)} keywords
+- Position 4-10 (page 1): {dist.get('positions_4_10', 0)} keywords
+- Position 11-20 (page 2): {dist.get('positions_11_20', 0)} keywords
+- Position 21+ (page 3+): {dist.get('positions_21_plus', 0)} keywords
+{opp_str}
+{qw_str}

-Based on this data, provide:
+Based on this data, provide EXACT blog post suggestions the user should write.

-1. IMMEDIATE TOPIC OPPORTUNITIES (0-30 days):
-   - Specific blog post titles the user should write
-   - Each tied to a keyword opportunity from the data
-   - 3-5 suggestions
+For each suggestion include:
+1. A specific, compelling blog post TITLE (not vague topic)
+2. The keyword it targets and why (based on the data above)
+3. The recommended content format (how-to, listicle, comparison, etc.)
+4. Estimated impact (how many more clicks/month they could gain)

-2. CONTENT STRATEGY TOPICS (1-3 months):
-   - New topic clusters to build authority
-   - Content pillar ideas
-   - 3-5 suggestions
-
-3. LONG-TERM CONTENT VISION (3-12 months):
-   - Market expansion topics
-   - Authority-building content ideas
-   - 3-5 suggestions
-
-IMPORTANT: Relate every topic suggestion to the user's interest in "{user_keywords}".
-Return your response in this exact JSON format:
+Return your response in this EXACT JSON format (no markdown, no code fences):
 {{
-  "immediate_opportunities": ["topic 1", "topic 2", "topic 3"],
-  "content_strategy": ["strategy 1", "strategy 2", "strategy 3"],
-  "long_term_strategy": ["vision 1", "vision 2", "vision 3"]
-}}"""
+  "immediate_opportunities": [
+    {{
+      "title": "Specific Blog Post Title Here",
+      "keyword": "target keyword",
+      "reason": "Why this will work based on the data",
+      "format": "How-To Guide | Listicle | Comparison | Explainer | etc.",
+      "estimated_impact": "Estimated X more clicks/month"
+    }}
+  ],
+  "content_strategy": [
+    {{
+      "title": "Pillar Content Title",
+      "keyword": "target keyword",
+      "reason": "Strategic reasoning",
+      "format": "Content format",
+      "estimated_impact": "Expected impact"
+    }}
+  ],
+  "long_term_strategy": [
+    {{
+      "title": "Authority Building Title",
+      "keyword": "target keyword",
+      "reason": "Long-term reasoning",
+      "format": "Content format",
+      "estimated_impact": "Expected long-term impact"
+    }}
+  ]
+}}
+
+IMPORTANT:
+- Provide 3-5 items in each category
+- Every suggestion MUST relate to the user's interest in "{user_keywords}"
+- Titles should be specific and compelling, like real blog post headlines
+- Use the data above to justify each recommendation
+- Prioritize keywords with high impressions but low CTR or low position"""

            system_prompt = (
-                "You are an enterprise SEO content strategist. Provide specific, data-driven "
-                "blog topic suggestions that will improve the user's search performance. "
-                "Always respond with valid JSON matching the requested format."
+                "You are an expert SEO content strategist. You analyze Google Search Console data "
+                "and provide specific, actionable blog post recommendations that will drive real traffic. "
+                "You always respond with valid JSON matching the requested format. "
+                "Every recommendation must be backed by the data provided."
            )

            result = llm_text_gen(
@@ -350,27 +536,58 @@ Return your response in this exact JSON format:
                if parsed:
                    return parsed

-            return self._fallback_ai_recommendations(keywords_data)
+            return self._fallback_ai_recommendations(keywords_data, content_opportunities, quick_wins)

        except Exception as e:
            logger.warning(f"GSC brainstorm AI recommendations failed: {e}")
-            return self._fallback_ai_recommendations(keywords_data)
+            return self._fallback_ai_recommendations(keywords_data, content_opportunities, quick_wins)

-    @staticmethod
-    def _parse_ai_response(raw: str) -> Optional[Dict[str, List[str]]]:
+    def _parse_ai_response(self, raw: str) -> Optional[Dict[str, Any]]:
        try:
-            json_start = raw.find("{")
-            json_end = raw.rfind("}") + 1
+            # Strip markdown code fences if present
+            cleaned = raw.strip()
+            if cleaned.startswith("```"):
+                first_newline = cleaned.find("\n")
+                if first_newline != -1:
+                    cleaned = cleaned[first_newline + 1:]
+                if cleaned.endswith("```"):
+                    cleaned = cleaned[:-3].strip()
+
+            json_start = cleaned.find("{")
+            json_end = cleaned.rfind("}") + 1
            if json_start == -1 or json_end == 0:
                return None

-            chunk = raw[json_start:json_end]
+            chunk = cleaned[json_start:json_end]
            parsed = json.loads(chunk)

+            def normalize_section(section: Any) -> List[Dict[str, str]]:
+                if not isinstance(section, list):
+                    return []
+                result = []
+                for item in section:
+                    if isinstance(item, str):
+                        result.append({
+                            "title": item.split(":")[0].strip() if ":" in item else item[:60],
+                            "keyword": "",
+                            "reason": item,
+                            "format": "",
+                            "estimated_impact": "",
+                        })
+                    elif isinstance(item, dict):
+                        result.append({
+                            "title": str(item.get("title", "")),
+                            "keyword": str(item.get("keyword", "")),
+                            "reason": str(item.get("reason", "")),
+                            "format": str(item.get("format", "")),
+                            "estimated_impact": str(item.get("estimated_impact", "")),
+                        })
+                return result
+
            return {
-                "immediate_opportunities": parsed.get("immediate_opportunities", [])[:5],
-                "content_strategy": parsed.get("content_strategy", [])[:5],
-                "long_term_strategy": parsed.get("long_term_strategy", [])[:5],
+                "immediate_opportunities": normalize_section(parsed.get("immediate_opportunities", []))[:5],
+                "content_strategy": normalize_section(parsed.get("content_strategy", []))[:5],
+                "long_term_strategy": normalize_section(parsed.get("long_term_strategy", []))[:5],
            }
        except (json.JSONDecodeError, ValueError) as e:
            logger.warning(f"Failed to parse AI brainstorm response as JSON: {e}")
@@ -379,26 +596,53 @@ Return your response in this exact JSON format:
    @staticmethod
    def _fallback_ai_recommendations(
        keywords_data: List[Dict],
+        content_opportunities: List[Dict],
+        quick_wins: List[Dict],
    ) -> Dict[str, Any]:
        top_kw = keywords_data[:3] if keywords_data else []
        immediate = []
-        for kw in top_kw:
-            immediate.append(
-                f"Write a comprehensive guide on '{kw['keyword']}' "
-                f"(currently at position {kw['position']:.1f} with "
-                f"{kw['impressions']} impressions)"
-            )
+
+        # Build from quick wins first (highest ROI)
+        for qw in quick_wins[:2]:
+            immediate.append({
+                "title": f"How to Rank #{int(qw['position'])} for '{qw['keyword']}' — Optimization Guide",
+                "keyword": qw["keyword"],
+                "reason": qw.get("reason", f"Already on page 1 at position {qw['position']:.0f}"),
+                "format": "How-To Guide",
+                "estimated_impact": f"+{qw.get('estimated_traffic_gain', 10)} clicks/month",
+            })
+
+        # Then from content opportunities
+        for opp in content_opportunities[:2]:
+            immediate.append({
+                "title": f"Complete Guide to {opp['keyword'].title()}",
+                "keyword": opp["keyword"],
+                "reason": opp.get("opportunity", f"{opp['impressions']:,} impressions with room to improve"),
+                "format": opp.get("suggested_format", "In-Depth Article"),
+                "estimated_impact": f"+{opp.get('estimated_traffic_gain', 10)} clicks/month",
+            })
+
+        # Fill remaining with top keywords
+        remaining = 5 - len(immediate)
+        for kw in top_kw[:remaining]:
+            immediate.append({
+                "title": f"The Ultimate Guide to {kw['keyword'].title()}",
+                "keyword": kw["keyword"],
+                "reason": f"Top keyword with {kw['impressions']:,} impressions (position {kw['position']:.1f})",
+                "format": "In-Depth Article",
+                "estimated_impact": f"+{max(int(kw['impressions'] * 0.03), 5)} clicks/month",
+            })

        return {
-            "immediate_opportunities": immediate or ["No keyword data available for recommendations"],
+            "immediate_opportunities": immediate or [{"title": "No keyword data available", "keyword": "", "reason": "Connect GSC to get personalized suggestions", "format": "", "estimated_impact": ""}],
            "content_strategy": [
-                "Develop topic clusters around your top-performing keywords",
-                "Create comparison and vs-style content for competitive terms",
-                "Build FAQ sections targeting question-based queries",
+                {"title": "Topic Cluster: Build Authority Around Your Core Topics", "keyword": "", "reason": "Clustered content ranks higher and captures more long-tail queries", "format": "Pillar Page + Spokes", "estimated_impact": "+50-200 clicks/month over 3 months"},
+                {"title": "Comparison Guide: Your Product vs. Alternatives", "keyword": "", "reason": "Comparison content captures high-intent searchers ready to decide", "format": "Comparison", "estimated_impact": "+20-80 clicks/month"},
+                {"title": "FAQ: Answer What Your Audience Is Asking", "keyword": "", "reason": "FAQs capture featured snippets and voice search queries", "format": "FAQ / Listicle", "estimated_impact": "+30-100 clicks/month"},
            ],
            "long_term_strategy": [
-                "Build domain authority through pillar content",
-                "Expand into adjacent topic areas",
-                "Develop thought leadership content series",
+                {"title": "Pillar Content: The Definitive Resource in Your Niche", "keyword": "", "reason": "Comprehensive guides become authoritative references that attract backlinks", "format": "Long-Form Guide", "estimated_impact": "+100-500 clicks/month over 6-12 months"},
+                {"title": "Trend Report: What's Next in Your Industry", "keyword": "", "reason": "Forward-looking content captures emerging search demand early", "format": "Trend Report", "estimated_impact": "+50-200 clicks/month"},
+                {"title": "Thought Leadership: Expert Roundup and Insights", "keyword": "", "reason": "Expert content builds E-E-A-T signals that improve overall domain authority", "format": "Expert Roundup", "estimated_impact": "+30-100 clicks/month per piece"},
            ],
        }
--- a/backend/services/gsc_service.py
+++ b/backend/services/gsc_service.py
@@ -250,10 +250,10 @@ class GSCService:
            flow = Flow.from_client_config(
                self.client_config,
                scopes=self.scopes,
-                redirect_uri=redirect_uri
+                redirect_uri=redirect_uri,
+                autogenerate_code_verifier=False,
            )
-            
-            # Use a custom state that includes user_id for routing the callback to the correct DB
+
            random_state = secrets.token_urlsafe(32)
            state = f"{user_id}:{random_state}"
            
@@ -300,7 +300,7 @@ class GSCService:
                logger.error(f"User database not found for user {user_id}")
                return False

-            # Verify state in user's DB
+            # Verify state in user's DB (but don't delete yet — delete after successful token exchange)
            with sqlite3.connect(db_path) as conn:
                cursor = conn.cursor()
                cursor.execute('SELECT user_id FROM gsc_oauth_states WHERE state = ?', (state,))
@@ -309,10 +309,6 @@ class GSCService:
                if not result:
                    logger.error(f"Invalid or expired GSC OAuth state for user {user_id}")
                    return False
-                
-                # Clean up state
-                cursor.execute('DELETE FROM gsc_oauth_states WHERE state = ?', (state,))
-                conn.commit()
            
            # Exchange code for credentials
            if not self.client_config:
@@ -322,12 +318,22 @@ class GSCService:
            flow = Flow.from_client_config(
                self.client_config,
                scopes=self.scopes,
-                redirect_uri=os.getenv('GSC_REDIRECT_URI', 'http://localhost:8000/gsc/callback')
+                redirect_uri=os.getenv('GSC_REDIRECT_URI', 'http://localhost:8000/gsc/callback'),
+                autogenerate_code_verifier=False,
            )
            
            flow.fetch_token(code=authorization_code)
            credentials = flow.credentials
            
+            # State consumed successfully — clean up
+            try:
+                with sqlite3.connect(db_path) as conn:
+                    cursor = conn.cursor()
+                    cursor.execute('DELETE FROM gsc_oauth_states WHERE state = ?', (state,))
+                    conn.commit()
+            except Exception as cleanup_err:
+                logger.warning(f"Failed to clean up OAuth state: {cleanup_err}")
+            
            # Save credentials
            return self.save_user_credentials(user_id, credentials)
            
--- a/backend/services/hallucination_detector.py
+++ b/backend/services/hallucination_detector.py
@@ -343,18 +343,28 @@ class HallucinationDetector:
            logger.error(f"Error in batch evidence search: {str(e)}")
            return []

+    def _map_source_refs_from_reasoning(self, reasoning: str, sources: List[Dict[str, Any]]) -> List[int]:
+        """Parse 'Source N' references from reasoning text and return 0-based indices."""
+        import re
+        indices = set()
+        for match in re.finditer(r'Source\s+(\d+)', reasoning):
+            ref = int(match.group(1))
+            if 1 <= ref <= len(sources):
+                indices.add(ref - 1)  # convert 1-based → 0-based
+        return sorted(indices)
+
    async def _assess_claims_batch(self, claims: List[str], sources: List[Dict[str, Any]], user_id: str = None) -> List[Claim]:
        """Assess multiple claims against sources in one LLM call."""
        try:
            claims_to_assess = claims[:3]

            combined_sources = "\n\n".join([
-                f"Source {i+1}: {src.get('url','')}\nText: {src.get('text','')[:1000]}"
+                f"Source [{i}]: {src.get('url','')}\nText: {src.get('text','')[:1000]}"
                for i, src in enumerate(sources)
            ])

            claims_text = "\n".join([
-                f"Claim {i+1}: {claim}"
+                f"Claim {i}: {claim}"
                for i, claim in enumerate(claims_to_assess)
            ])

@@ -367,12 +377,14 @@ class HallucinationDetector:
                '      "claim_index": 0,\n'
                '      "assessment": "supported" or "refuted" or "insufficient_information",\n'
                '      "confidence": number between 0.0 and 1.0,\n'
-                '      "supporting_sources": [array of source indices that support the claim],\n'
-                '      "refuting_sources": [array of source indices that refute the claim],\n'
+                '      "supporting_sources": [array of 0-based source indices, e.g. [0, 2] for Source [0] and Source [2]],\n'
+                '      "refuting_sources": [array of 0-based source indices, e.g. [1] for Source [1]],\n'
                '      "reasoning": "brief explanation of your assessment"\n'
                '    }\n'
                '  ]\n'
                "}\n\n"
+                "IMPORTANT: Source indices are 0-based. Source [0] is the first source, Source [1] is the second, etc.\n"
+                "For every 'supported' or 'refuted' claim you MUST include the relevant source indices.\n\n"
                f"Claims to verify:\n{claims_text}\n\n"
                f"Sources:\n{combined_sources}\n\n"
                "Return only the JSON object:"
@@ -407,6 +419,15 @@ class HallucinationDetector:
                            if isinstance(idx, int) and 0 <= idx < len(sources):
                                refuting_sources.append(sources[idx])

+                    # Fallback: parse "Source N" from reasoning text when LLM omits indices
+                    if not supporting_sources and not refuting_sources and sources and assessment.get('reasoning'):
+                        ref_indices = self._map_source_refs_from_reasoning(assessment.get('reasoning', ''), sources)
+                        if ref_indices:
+                            if assessment.get('assessment') == 'supported':
+                                supporting_sources = [sources[i] for i in ref_indices]
+                            elif assessment.get('assessment') == 'refuted':
+                                refuting_sources = [sources[i] for i in ref_indices]
+
                    verified_claims.append(Claim(
                        text=claim,
                        confidence=float(assessment.get('confidence', 0.5)),
@@ -464,7 +485,7 @@ class HallucinationDetector:
        """Assess whether sources support or refute the claim using LLM."""
        try:
            combined_sources = "\n\n".join([
-                f"Source {i+1}: {src.get('url','')}\nText: {src.get('text','')[:2000]}"
+                f"Source [{i}]: {src.get('url','')}\nText: {src.get('text','')[:2000]}"
                for i, src in enumerate(sources)
            ])

@@ -474,10 +495,12 @@ class HallucinationDetector:
                "{\n"
                '  "assessment": "supported" or "refuted" or "insufficient_information",\n'
                '  "confidence": number between 0.0 and 1.0,\n'
-                '  "supporting_sources": [array of source indices that support the claim],\n'
-                '  "refuting_sources": [array of source indices that refute the claim],\n'
+                '  "supporting_sources": [array of 0-based source indices, e.g. [0, 2] for Source [0] and Source [2]],\n'
+                '  "refuting_sources": [array of 0-based source indices, e.g. [1] for Source [1]],\n'
                '  "reasoning": "brief explanation of your assessment"\n'
                "}\n\n"
+                "IMPORTANT: Source indices are 0-based. Source [0] is the first source, Source [1] is the second, etc.\n"
+                "For 'supported' or 'refuted' you MUST include the relevant source indices.\n\n"
                f"Claim to verify: {claim}\n\n"
                f"Sources:\n{combined_sources}\n\n"
                "Return only the JSON object:"
@@ -508,6 +531,15 @@ class HallucinationDetector:
                    if isinstance(idx, int) and 0 <= idx < len(sources):
                        refuting_sources.append(sources[idx])

+            # Fallback: parse "Source N" from reasoning text when LLM omits indices
+            if not supporting_sources and not refuting_sources and sources and result.get('reasoning'):
+                ref_indices = self._map_source_refs_from_reasoning(result.get('reasoning', ''), sources)
+                if ref_indices:
+                    if result.get('assessment') == 'supported':
+                        supporting_sources = [sources[i] for i in ref_indices]
+                    elif result.get('assessment') == 'refuted':
+                        refuting_sources = [sources[i] for i in ref_indices]
+
            # Validate assessment value
            valid_assessments = ['supported', 'refuted', 'insufficient_information']
            if result['assessment'] not in valid_assessments:
--- a/backend/services/llm_providers/main_text_generation.py
+++ b/backend/services/llm_providers/main_text_generation.py
@@ -46,6 +46,7 @@ def llm_text_gen(
    preferred_provider: Optional[str] = None,
    flow_type: Optional[str] = None,
    max_tokens: Optional[int] = None,
+    temperature: Optional[float] = None,
 ) -> str:
    """
    Generate text using Language Model (LLM) based on the provided prompt.
@@ -58,6 +59,8 @@ def llm_text_gen(
        preferred_hf_models (list, optional): Preferred HuggingFace models.
        preferred_provider (str, optional): Preferred provider (google, huggingface).
        flow_type (str, optional): Flow type for logging (e.g., 'sif_agent', 'premium_tool').
+        max_tokens (int, optional): Max tokens for response. If None, provider default is used.
+        temperature (float, optional): Temperature for generation (0.0-1.0). If None, defaults to 0.7.
        
    Returns:
        str: Generated text based on the prompt.
@@ -75,9 +78,8 @@ def llm_text_gen(
        # Set default values for LLM parameters
        gpt_provider = "google"  # Default to Google Gemini
        model = "gemini-2.0-flash-001"
-        temperature = 0.7
-        if max_tokens is None:
-            max_tokens = 4000
+        if temperature is None:
+            temperature = 0.7
        top_p = 0.9
        n = 1
        fp = 16
--- a/backend/services/writing_assistant.py
+++ b/backend/services/writing_assistant.py
@@ -1,6 +1,7 @@
 import os
+import re
 import asyncio
-from typing import Any, Dict, List
+from typing import Any, Dict, List, Optional
 from dataclasses import dataclass
 from loguru import logger
 import random
@@ -17,42 +18,33 @@ class WritingSuggestion:

 class WritingAssistantService:
    """
-    Minimal writing assistant that combines Exa search with Gemini continuation.
-    - Exa provides relevant sources with content snippets
-    - Gemini generates a short, cited continuation based on current text and sources
+    Writing assistant that combines Exa search with LLM continuation.
+    - Searches relevant sources using the content near the cursor position
+    - Generates a short continuation grounded in sources
+    - Confidence derived from source availability and quality
    """

    def __init__(self) -> None:
-        # COST CONTROL: Daily usage limits
        self.daily_api_calls = 0
-        self.daily_limit = 50  # Max 50 API calls per day (~$2.50 max cost)
+        self.daily_limit = 50
        self.last_reset_date = None

    def _get_cached_suggestion(self, text: str) -> WritingSuggestion | None:
-        """No cached suggestions - always use real API calls for authentic results."""
        return None

    def _check_daily_limit(self) -> bool:
-        """Check if we're within daily API usage limits."""
        import datetime
-        
        today = datetime.date.today()
-        
-        # Reset counter if it's a new day
        if self.last_reset_date != today:
            self.daily_api_calls = 0
            self.last_reset_date = today
-        
-        # Check if we've exceeded the limit
        if self.daily_api_calls >= self.daily_limit:
            return False
-        
-        # Increment counter for this API call
        self.daily_api_calls += 1
        logger.info(f"Writing assistant API call #{self.daily_api_calls}/{self.daily_limit} today")
        return True

-    async def suggest(self, text: str, user_id: str | None = None) -> List[WritingSuggestion]:
+    async def suggest(self, text: str, user_id: str | None = None, cursor_position: Optional[int] = None) -> List[WritingSuggestion]:
        if not text or len(text.strip()) < 6:
            return []

@@ -67,26 +59,41 @@ class WritingAssistantService:
        if len(text.strip()) < 50:
            return []

-        # 1) Find relevant sources via Exa
-        sources = await self._search_sources(text, user_id=user_id)
+        # Use text before cursor for context (where the user is actively writing)
+        if cursor_position is not None and 0 < cursor_position <= len(text):
+            context_text = text[:cursor_position]
+        else:
+            context_text = text

-        # 2) Generate continuation suggestion via LLM grounded in sources
-        suggestion_text, confidence = await self._generate_continuation(text, sources, user_id=user_id)
+        # 1) Find relevant sources via Exa (non-fatal)
+        sources = []
+        try:
+            sources = await self._search_sources(context_text, user_id=user_id)
+        except Exception as e:
+            logger.warning(f"WritingAssistant Exa search failed, proceeding without sources: {e}")
+
+        # 2) Generate continuation suggestion via LLM
+        suggestion_text, confidence = await self._generate_continuation(context_text, sources, user_id=user_id)

        if not suggestion_text:
            return []

        return [WritingSuggestion(text=suggestion_text.strip(), confidence=confidence, sources=sources)]

-    async def _search_sources(self, text: str, user_id: str = None) -> List[Dict[str, Any]]:
-        """Search for relevant sources using ExaResearchProvider with subscription checks."""
+    async def _search_sources(self, context_text: str, user_id: str = None) -> List[Dict[str, Any]]:
+        """Search Exa using the last sentence before cursor for a focused query."""
        try:
            from services.blog_writer.research.exa_provider import ExaResearchProvider

-            exa_query = (
-                (text[-1000:] if len(text) > 1000 else text)
-                + "\n\nIf you found the above interesting, here's another useful resource to read:"
-            )
+            # Extract the last sentence from context to use as a focused search query
+            sentences = re.split(r'(?<=[.!?])\s+', context_text.strip())
+            last_sentence = sentences[-1].strip().strip('"').strip("'") if sentences else context_text
+
+            # If very short, use last two sentences
+            if len(last_sentence) < 20 and len(sentences) >= 2:
+                last_sentence = ' '.join(s[-2:]).strip().strip('"').strip("'")
+
+            exa_query = last_sentence[:500] if len(last_sentence) > 500 else last_sentence

            provider = ExaResearchProvider()
            sources = await provider.simple_search(
@@ -95,7 +102,6 @@ class WritingAssistantService:
                user_id=user_id,
            )

-            # Normalize keys to match expected format
            normalized = []
            for s in sources:
                normalized.append({
@@ -104,7 +110,7 @@ class WritingAssistantService:
                    "text": s.get("text", ""),
                    "author": s.get("author", ""),
                    "published_date": s.get("publishedDate", ""),
-                    "score": float(s.get("score", 0.5)),
+                    "score": float(s.get("score") if s.get("score") is not None else 0.5),
                })

            if not normalized:
@@ -151,8 +157,21 @@ class WritingAssistantService:
                suggestion = (str(ai_resp or "")).strip()
            if not suggestion:
                raise Exception("Assistive writer returned empty suggestion")
-            confidence = 0.7
-            return suggestion, confidence
+
+            # Dynamic confidence based on source quality and response signals
+            confidence = 0.5
+            if sources:
+                # More sources and higher scores = more confident
+                avg_score = sum(s.get("score", 0.5) for s in sources) / len(sources)
+                confidence = 0.5 + (len(sources) / 6.0) * 0.3 + avg_score * 0.2
+            if suggestion.endswith(('.', '!', '?')):
+                confidence += 0.05
+            # Check if citation hint was included
+            if '[http' in suggestion or '((' in suggestion:
+                confidence += 0.05
+            confidence = min(confidence, 1.0)
+
+            return suggestion, round(confidence, 2)
        except Exception as e:
            logger.error(f"WritingAssistant _generate_continuation error: {e}")
            raise