Merge branch 'recover-stash'

2026-05-23 13:13:18 +05:30
parent 33458c78c0 8f116ef4d1
commit d1cd28d407
40 changed files with 1870 additions and 859 deletions
--- a/backend/alwrity_utils/router_manager.py
+++ b/backend/alwrity_utils/router_manager.py
@@ -44,7 +44,7 @@ CORE_ROUTER_REGISTRY = [
 OPTIONAL_ROUTER_REGISTRY = [
    {"name": "blog_writer", "module": "api.blog_writer.router", "attr": "router", "features": {"all", "blog_writer"}},
    {"name": "story_writer", "module": "api.story_writer.router", "attr": "router", "features": {"all", "story_writer"}},
-{"name": "wix", "module": "api.wix_routes", "attr": "router", "features": {"all"}},
+    {"name": "wix", "module": "api.wix_routes", "attr": "router", "features": {"all", "blog_writer"}},
    {"name": "wix_test", "module": "api.wix_routes", "attr": "qa_router", "features": {"all"}},
    {"name": "blog_seo_analysis", "module": "api.blog_writer.seo_analysis", "attr": "router", "features": {"all", "blog_writer"}},
    {"name": "persona", "module": "api.persona_routes", "attr": "router", "features": {"all", "persona"}},
--- a/backend/api/blog_writer/router.py
+++ b/backend/api/blog_writer/router.py
@@ -9,10 +9,12 @@ from fastapi import APIRouter, HTTPException, Depends
 from typing import Any, Dict, List, Optional
 from pydantic import BaseModel, Field
 from loguru import logger
+from datetime import datetime
 from middleware.auth_middleware import get_current_user
 from sqlalchemy.orm import Session
 from services.database import get_db as get_db_dependency
 from utils.text_asset_tracker import save_and_track_text_content
+from models.content_asset_models import AssetType, AssetSource

 from models.blog_models import (
    BlogResearchRequest,
@@ -36,6 +38,7 @@ from models.blog_models import (
 from services.blog_writer.blog_service import BlogWriterService
 from services.blog_writer.seo.blog_seo_recommendation_applier import BlogSEORecommendationApplier
 from services.llm_providers.main_text_generation import llm_text_gen
+from services.content_asset_service import ContentAssetService
 from .task_manager import task_manager
 from .cache_manager import cache_manager
 from models.blog_models import MediumBlogGenerateRequest
@@ -1260,3 +1263,233 @@ async def save_complete_blog_asset(
    except Exception as e:
        logger.error(f"Failed to save complete blog asset: {e}")
        raise HTTPException(status_code=500, detail=str(e))
+
+
+# ---------------------------------------
+# Blog Asset API (phase-by-phase saving via ContentAsset)
+# ---------------------------------------
+
+
+class BlogAssetCreateRequest(BaseModel):
+    research_keywords: str = Field(..., max_length=2000, description="Research keywords / topic")
+    topic: Optional[str] = Field(default=None, max_length=500)
+    word_count_target: Optional[int] = Field(default=None, ge=100, le=20000)
+
+
+class BlogAssetUpdateRequest(BaseModel):
+    phase: Optional[str] = Field(default=None, pattern=r"^(research|outline|content|seo|publish)$")
+    topic: Optional[str] = Field(default=None, max_length=500)
+    selected_title: Optional[str] = Field(default=None, max_length=500)
+    word_count_target: Optional[int] = Field(default=None, ge=100, le=20000)
+    research_data: Optional[Dict[str, Any]] = None
+    outline_data: Optional[Dict[str, Any]] = None
+    content_data: Optional[Dict[str, Any]] = None
+    seo_data: Optional[Dict[str, Any]] = None
+    publish_data: Optional[Dict[str, Any]] = None
+
+
+def _normalize_keywords(kw: str) -> str:
+    """Normalize keywords for duplicate comparison."""
+    return " ".join(sorted(kw.lower().split()))
+
+
+@router.post("/asset", response_model=Dict[str, Any])
+async def create_blog_asset(
+    request: BlogAssetCreateRequest,
+    current_user: Dict[str, Any] = Depends(get_current_user),
+    db: Session = Depends(get_db),
+):
+    """
+    Create a blog ContentAsset on research start.
+    Returns existing asset if duplicate keywords found (unique topics only).
+    """
+    try:
+        if not current_user:
+            raise HTTPException(status_code=401, detail="Authentication required")
+        user_id = str(current_user.get("id", ""))
+        if not user_id:
+            raise HTTPException(status_code=401, detail="Invalid user ID")
+
+        svc = ContentAssetService(db)
+        normalized_kw = _normalize_keywords(request.research_keywords)
+
+        # Duplicate check — search existing blog assets for matching keywords
+        existing_assets, _ = svc.get_user_assets(
+            user_id=user_id,
+            source_module=AssetSource.BLOG_WRITER,
+            asset_type=AssetType.TEXT,
+            limit=100,
+        )
+        for asset in existing_assets:
+            meta = asset.asset_metadata or {}
+            if meta.get("normalized_keywords") == normalized_kw:
+                logger.info(f"Duplicate blog asset found: {asset.id}, returning existing")
+                return {
+                    "success": True,
+                    "asset": _asset_to_response(asset),
+                    "existing": True,
+                }
+
+        # Create new ContentAsset for this blog
+        title = request.topic or request.research_keywords[:200]
+        asset_metadata = {
+            "phase": "research",
+            "research_keywords": request.research_keywords,
+            "normalized_keywords": normalized_kw,
+            "word_count_target": request.word_count_target,
+            "topic": request.topic,
+            "research_data": None,
+            "outline_data": None,
+            "content_data": None,
+            "seo_data": None,
+            "publish_data": None,
+        }
+        asset = svc.create_asset(
+            user_id=user_id,
+            asset_type=AssetType.TEXT,
+            source_module=AssetSource.BLOG_WRITER,
+            filename=f"blog_{int(datetime.utcnow().timestamp())}.md",
+            file_url=f"/api/blog/content/pending",
+            title=title,
+            description=f"Blog: {title}",
+            tags=["blog", "research"],
+            asset_metadata=asset_metadata,
+        )
+        logger.info(f"✅ Created blog asset: {asset.id}")
+        return {
+            "success": True,
+            "asset": _asset_to_response(asset),
+            "existing": False,
+        }
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"Failed to create blog asset: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@router.put("/asset/{asset_id}", response_model=Dict[str, Any])
+async def update_blog_asset(
+    asset_id: int,
+    request: BlogAssetUpdateRequest,
+    current_user: Dict[str, Any] = Depends(get_current_user),
+    db: Session = Depends(get_db),
+):
+    """Update a blog asset's phase, metadata, and tags."""
+    try:
+        if not current_user:
+            raise HTTPException(status_code=401, detail="Authentication required")
+        user_id = str(current_user.get("id", ""))
+        if not user_id:
+            raise HTTPException(status_code=401, detail="Invalid user ID")
+
+        svc = ContentAssetService(db)
+        asset = svc.get_asset_by_id(asset_id, user_id)
+        if not asset:
+            raise HTTPException(status_code=404, detail="Blog asset not found")
+
+        meta = dict(asset.asset_metadata or {})
+        tags = list(asset.tags or [])
+
+        if request.phase is not None:
+            meta["phase"] = request.phase
+            # Update tags to reflect phase
+            new_tags = [t for t in tags if t not in ("research", "outline", "content", "seo", "publish")]
+            new_tags.append(request.phase)
+            if "blog" not in new_tags:
+                new_tags.append("blog")
+            tags = new_tags
+
+        if request.topic is not None:
+            meta["topic"] = request.topic
+        if request.selected_title is not None:
+            meta["selected_title"] = request.selected_title
+        if request.word_count_target is not None:
+            meta["word_count_target"] = request.word_count_target
+
+        for field in ("research_data", "outline_data", "content_data", "seo_data", "publish_data"):
+            val = getattr(request, field, None)
+            if val is not None:
+                meta[field] = val
+
+        if meta.get("selected_title"):
+            new_title = meta["selected_title"]
+        elif meta.get("topic"):
+            new_title = meta["topic"]
+        else:
+            new_title = asset.title or "Blog Post"
+
+        updated = svc.update_asset(
+            asset_id=asset_id,
+            user_id=user_id,
+            title=new_title[:500],
+            tags=tags,
+            asset_metadata=meta,
+        )
+        if not updated:
+            raise HTTPException(status_code=500, detail="Failed to update asset")
+
+        logger.info(f"✅ Updated blog asset {asset_id}: phase={meta.get('phase')}")
+        return {"success": True, "asset": _asset_to_response(updated)}
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"Failed to update blog asset {asset_id}: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@router.get("/asset/{asset_id}", response_model=Dict[str, Any])
+async def get_blog_asset(
+    asset_id: int,
+    current_user: Dict[str, Any] = Depends(get_current_user),
+    db: Session = Depends(get_db),
+):
+    """Get a blog asset with all phase data."""
+    try:
+        if not current_user:
+            raise HTTPException(status_code=401, detail="Authentication required")
+        user_id = str(current_user.get("id", ""))
+        if not user_id:
+            raise HTTPException(status_code=401, detail="Invalid user ID")
+
+        svc = ContentAssetService(db)
+        asset = svc.get_asset_by_id(asset_id, user_id)
+        if not asset:
+            raise HTTPException(status_code=404, detail="Blog asset not found")
+
+        return {"success": True, "asset": _asset_to_response(asset, full=True)}
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"Failed to get blog asset {asset_id}: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+def _asset_to_response(asset: Any, full: bool = False) -> Dict[str, Any]:
+    """Convert a ContentAsset to a blog asset response dict."""
+    meta = asset.asset_metadata or {}
+    resp: Dict[str, Any] = {
+        "id": asset.id,
+        "title": asset.title,
+        "description": asset.description,
+        "tags": asset.tags or [],
+        "phase": meta.get("phase", "research"),
+        "research_keywords": meta.get("research_keywords"),
+        "topic": meta.get("topic"),
+        "selected_title": meta.get("selected_title"),
+        "word_count_target": meta.get("word_count_target"),
+        "has_research": meta.get("research_data") is not None,
+        "has_outline": meta.get("outline_data") is not None,
+        "has_content": meta.get("content_data") is not None,
+        "has_seo": meta.get("seo_data") is not None,
+        "has_publish": meta.get("publish_data") is not None,
+        "created_at": asset.created_at.isoformat() if asset.created_at else None,
+        "updated_at": asset.updated_at.isoformat() if asset.updated_at else None,
+    }
+    if full:
+        resp["research_data"] = meta.get("research_data")
+        resp["outline_data"] = meta.get("outline_data")
+        resp["content_data"] = meta.get("content_data")
+        resp["seo_data"] = meta.get("seo_data")
+        resp["publish_data"] = meta.get("publish_data")
+    return resp
--- a/backend/api/blog_writer/task_manager.py
+++ b/backend/api/blog_writer/task_manager.py
@@ -256,7 +256,8 @@ class TaskManager:
            self.task_storage[task_id]["status"] = "running"
            self.task_storage[task_id]["progress_messages"] = []

-            await self.update_progress(task_id, "📦 Packaging outline and metadata...")
+            await self.update_progress(task_id, "📝 Alwrity is preparing your blog content — this usually takes 20–40 seconds.")
+            await self.update_progress(task_id, "📦 Packaging your outline sections and research data...")

            # Basic guard: respect global target words
            total_target = int(request.globalTargetWords or 1000)
@@ -281,16 +282,22 @@ class TaskManager:
            # Check if result came from cache
            cache_hit = getattr(result, 'cache_hit', False)
            if cache_hit:
-                await self.update_progress(task_id, "⚡ Found cached content - loading instantly!")
+                await self.update_progress(task_id, "⚡ Found existing content in cache — no need to regenerate!")
            else:
-                await self.update_progress(task_id, "🤖 Generated fresh content with AI...")
-                await self.update_progress(task_id, "✨ Post-processing and assembling sections...")
+                await self.update_progress(task_id, "🧠 AI is writing each section with research-backed insights and natural flow...")
+                await self.update_progress(task_id, "✨ Polishing content — improving structure, readability, and transitions...")

            # Mark completed
            self.task_storage[task_id]["status"] = "completed"
            self.task_storage[task_id]["result"] = result.dict()
-            await self.update_progress(task_id, f"✅ Generated {len(result.sections)} sections successfully.")
-            
+            section_count = len(result.sections)
+            total_words = sum(getattr(s, 'wordCount', 0) or 0 for s in result.sections)
+            await self.update_progress(
+                task_id,
+                f"✅ Content generation complete! {section_count} sections written ({total_words} words). "
+                "Next up: SEO Analysis to optimize your blog for search engines."
+            )
+
            # Note: Blog content tracking is handled in the status endpoint
            # to ensure we have proper database session and user context

--- a/backend/api/hallucination_detector.py
+++ b/backend/api/hallucination_detector.py
@@ -71,7 +71,7 @@ async def detect_hallucinations(request: HallucinationDetectionRequest, current_
                    text=source.get('text', ''),
                    published_date=source.get('publishedDate'),
                    author=source.get('author'),
-                    score=source.get('score', 0.5)
+                    score=source.get('score') if source.get('score') is not None else 0.5
                )
                for source in claim.supporting_sources
            ]
@@ -83,7 +83,7 @@ async def detect_hallucinations(request: HallucinationDetectionRequest, current_
                    text=source.get('text', ''),
                    published_date=source.get('publishedDate'),
                    author=source.get('author'),
-                    score=source.get('score', 0.5)
+                    score=source.get('score') if source.get('score') is not None else 0.5
                )
                for source in claim.refuting_sources
            ]
@@ -214,7 +214,7 @@ async def verify_claim(request: ClaimVerificationRequest, current_user: Dict[str
                    text=source.get('text', ''),
                    published_date=source.get('publishedDate'),
                    author=source.get('author'),
-                    score=source.get('score', 0.5)
+                    score=source.get('score') if source.get('score') is not None else 0.5
                )
                for source in claim_result.supporting_sources
            ]
@@ -226,7 +226,7 @@ async def verify_claim(request: ClaimVerificationRequest, current_user: Dict[str
                    text=source.get('text', ''),
                    published_date=source.get('publishedDate'),
                    author=source.get('author'),
-                    score=source.get('score', 0.5)
+                    score=source.get('score') if source.get('score') is not None else 0.5
                )
                for source in claim_result.refuting_sources
            ]
--- a/backend/api/writing_assistant.py
+++ b/backend/api/writing_assistant.py
@@ -12,6 +12,7 @@ router = APIRouter(prefix="/api/writing-assistant", tags=["writing-assistant"])

 class SuggestRequest(BaseModel):
    text: str
+    cursor_position: int | None = None


 class SourceModel(BaseModel):
@@ -32,6 +33,7 @@ class SuggestionModel(BaseModel):
 class SuggestResponse(BaseModel):
    success: bool
    suggestions: List[SuggestionModel]
+    message: str = ""


 assistant_service = WritingAssistantService()
@@ -41,9 +43,9 @@ assistant_service = WritingAssistantService()
 async def suggest_endpoint(req: SuggestRequest, current_user: Dict[str, Any] = Depends(get_current_user)) -> SuggestResponse:
    try:
        user_id = current_user.get("id")
-        suggestions = await assistant_service.suggest(req.text, user_id=user_id)
+        suggestions = await assistant_service.suggest(req.text, user_id=user_id, cursor_position=req.cursor_position)
        return SuggestResponse(
-            success=True,
+            success=len(suggestions) > 0,
            suggestions=[
                SuggestionModel(
                    text=s.text,
--- a/backend/app.py
+++ b/backend/app.py
@@ -679,9 +679,6 @@ if _is_full_mode():
    if campaign_creator_router:
        app.include_router(campaign_creator_router)

-    # Include content assets router
-    from api.content_assets.router import router as content_assets_router
-    app.include_router(content_assets_router)
    router_group_status["platform_extensions"] = {
        "mounted": True,
        "reason": "Full mode",
@@ -692,6 +689,10 @@ else:
        "reason": "Skipped in feature-only mode",
    }

+# Include content assets router (always — core utility, not feature-specific)
+from api.content_assets.router import router as content_assets_router
+app.include_router(content_assets_router)
+
 # Include Podcast Maker router (only when podcast feature is enabled)
 if _is_feature_enabled("podcast") and "all" not in get_enabled_features():
    from api.podcast.router import router as podcast_router
--- a/backend/routers/gsc_auth.py
+++ b/backend/routers/gsc_auth.py
@@ -76,12 +76,22 @@ async def handle_gsc_callback(
        
        success = gsc_service.handle_oauth_callback(code, state)
        
+        # If state verification failed, check if user is already connected
+        # (handles duplicate callbacks where state was consumed by a prior request)
+        if not success:
+            user_id_from_state = state.split(':')[0] if ':' in state else None
+            if user_id_from_state:
+                existing_creds = gsc_service.load_user_credentials(user_id_from_state)
+                if existing_creds:
+                    logger.info(f"GSC OAuth state already consumed, but user {user_id_from_state} has valid credentials — treating as success")
+                    success = True
+        
        if success:
            logger.info("GSC OAuth callback handled successfully")
            
            # Create GSC insights task immediately after successful connection
            try:
-                from services.database import SessionLocal
+                from services.database import get_session_for_user
                from services.platform_insights_monitoring_service import create_platform_insights_task
                
                # Get user_id from state (stored during OAuth flow)
@@ -89,23 +99,24 @@ async def handle_gsc_callback(
                user_id = state.split(':')[0] if ':' in state else None
                
                if user_id:
-                    db = SessionLocal()
-                    try:
-                        # Create insights task without site_url to avoid API calls
-                        # The executor will fetch it when the task runs (weekly)
-                        task_result = create_platform_insights_task(
-                            user_id=user_id,
-                            platform='gsc',
-                            site_url=None,  # Will be fetched by executor when task runs
-                            db=db
-                        )
-                        
-                        if task_result.get('success'):
-                            logger.info(f"Created GSC insights task for user {user_id}")
-                        else:
-                            logger.warning(f"Failed to create GSC insights task: {task_result.get('error')}")
-                    finally:
-                        db.close()
+                    db = get_session_for_user(user_id)
+                    if db:
+                        try:
+                            task_result = create_platform_insights_task(
+                                user_id=user_id,
+                                platform='gsc',
+                                site_url=None,
+                                db=db
+                            )
+                            
+                            if task_result.get('success'):
+                                logger.info(f"Created GSC insights task for user {user_id}")
+                            else:
+                                logger.warning(f"Failed to create GSC insights task: {task_result.get('error')}")
+                        finally:
+                            db.close()
+                    else:
+                        logger.warning(f"Could not create DB session for user {user_id}")
                else:
                    logger.warning(f"Could not extract user_id from state: {state}")
            except Exception as e:
@@ -125,7 +136,10 @@ async def handle_gsc_callback(
  </body>
  </html>
 """
-            return HTMLResponse(content=html)
+            return HTMLResponse(
+                content=html,
+                headers={"Cross-Origin-Opener-Policy": "unsafe-none"},
+            )
        else:
            logger.error("Failed to handle GSC OAuth callback")
            html = """
@@ -140,7 +154,11 @@ async def handle_gsc_callback(
  </body>
  </html>
 """
-            return HTMLResponse(status_code=400, content=html)
+            return HTMLResponse(
+                status_code=400,
+                content=html,
+                headers={"Cross-Origin-Opener-Policy": "unsafe-none"},
+            )
            
    except Exception as e:
        logger.error(f"Error handling GSC OAuth callback: {e}")
@@ -157,7 +175,11 @@ async def handle_gsc_callback(
  </body>
  </html>
 """
-        return HTMLResponse(status_code=500, content=html)
+        return HTMLResponse(
+            status_code=500,
+            content=html,
+            headers={"Cross-Origin-Opener-Policy": "unsafe-none"},
+        )

@router.get("/sites")
 async def get_gsc_sites(user: dict = Depends(get_current_user)):
--- a/backend/services/blog_writer/content/medium_blog_generator.py
+++ b/backend/services/blog_writer/content/medium_blog_generator.py
@@ -122,9 +122,6 @@ class MediumBlogGenerator:
        payload = {
            "title": req.title,
            "globalTargetWords": req.globalTargetWords or 1000,
-            "persona": req.persona.dict() if req.persona else None,
-            "tone": req.tone,
-            "audience": req.audience,
            "sections": [section_block(s) for s in req.sections],
        }

@@ -136,7 +133,6 @@ class MediumBlogGenerator:
            - Industry: {req.persona.industry or 'General'}
            - Tone: {req.persona.tone or 'Professional'}
            - Audience: {req.persona.audience or 'General readers'}
-            - Persona ID: {req.persona.persona_id or 'Default'}
            
            Write content that reflects this persona's expertise and communication style.
            Use industry-specific terminology and examples where appropriate.
@@ -154,40 +150,19 @@ class MediumBlogGenerator:
            "Return ONLY valid JSON with no markdown formatting or explanations."
        )

-        # Build persona-specific content instructions
-        persona_instructions = ""
-        if req.persona:
-            industry = req.persona.industry or 'General'
-            tone = req.persona.tone or 'Professional'
-            audience = req.persona.audience or 'General readers'
-            
-            persona_instructions = f"""
-            PERSONA-DRIVEN CONTENT REQUIREMENTS:
-            - Write as an expert in {industry} industry
-            - Use {tone} tone appropriate for {audience}
-            - Include industry-specific examples and terminology
-            - Demonstrate authority and expertise in the field
-            - Use language that resonates with {audience}
-            - Maintain consistent voice that reflects this persona's expertise
-            """
-
        prompt = (
-            f"Write blog content for the following sections. Each section should be {req.globalTargetWords or 1000} words total, distributed across all sections.\n\n"
+            f"Write blog content for the following sections. Total target: {req.globalTargetWords or 1000} words, distributed across all sections.\n\n"
            f"Blog Title: {req.title}\n\n"
            "For each section, write engaging content that:\n"
            "- Follows the key points provided\n"
            "- Uses the suggested keywords naturally\n"
            "- Meets the target word count\n"
-            "- Maintains professional tone\n"
-            "- References the provided sources when relevant\n"
            "- Breaks content into clear paragraphs (2-4 sentences each)\n"
-            "- Uses double line breaks (\\n\\n) between paragraphs for proper formatting\n"
+            "- Uses double line breaks (\\n\\n) between paragraphs\n"
            "- Starts with an engaging opening paragraph\n"
-            "- Ends with a strong concluding paragraph\n"
-            f"{persona_instructions}\n"
-            "IMPORTANT: Format the 'content' field with proper paragraph breaks using \\n\\n between paragraphs.\n\n"
-            "Return a JSON object with 'title' and 'sections' array. Each section should have 'id', 'heading', 'content', and 'wordCount'.\n\n"
-            f"Sections to write:\n{json.dumps(payload, ensure_ascii=False, indent=2)}"
+            "- Ends with a strong concluding paragraph\n\n"
+            "Return a JSON object with 'title' and 'sections' array. Each section must have 'id', 'heading', 'content', 'wordCount', and 'sources'.\n\n"
+            f"Sections:\n{json.dumps(payload, ensure_ascii=False, indent=2)}"
        )

        try:
@@ -195,7 +170,9 @@ class MediumBlogGenerator:
                prompt=prompt,
                json_struct=schema,
                system_prompt=system,
-                user_id=user_id
+                user_id=user_id,
+                max_tokens=None,
+                temperature=0.3,
            )
        except HTTPException:
            # Re-raise HTTPExceptions (e.g., 429 subscription limit) to preserve error details
--- a/backend/services/blog_writer/research/exa_provider.py
+++ b/backend/services/blog_writer/research/exa_provider.py
@@ -322,7 +322,7 @@ class ExaResearchProvider(BaseProvider):
                'text': getattr(result, 'text', ''),
                'publishedDate': getattr(result, 'publishedDate', ''),
                'author': getattr(result, 'author', ''),
-                'score': getattr(result, 'score', 0.5),
+                'score': (lambda v: v if v is not None else 0.5)(getattr(result, 'score', 0.5)),
            })
        
        # Track usage
--- a/backend/services/database.py
+++ b/backend/services/database.py
@@ -31,6 +31,7 @@ from models.product_marketing_models import Campaign, CampaignProposal, Campaign
 from models.product_asset_models import ProductAsset, ProductStyleTemplate, EcommerceExport
 # Podcast Maker models use SubscriptionBase, but import to ensure models are registered
 from models.podcast_models import PodcastProject
+
 # Research models use SubscriptionBase
 from models.research_models import ResearchProject
 # Video Studio models
--- a/backend/services/gsc_brainstorm_service.py
+++ b/backend/services/gsc_brainstorm_service.py
@@ -2,8 +2,9 @@
 GSC Brainstorm Service for ALwrity.

 Analyzes Google Search Console data to suggest blog topics the user should write about.
-Combines rule-based heuristics (high-impression/low-CTR keywords, near-page-1 positions)
-with LLM-powered strategic recommendations tailored to the user's topic intent.
+Combines rule-based heuristics with LLM-powered strategic recommendations tailored to
+the user's topic intent. Designed for non-SEO-experts: every insight includes plain-English
+explanations of WHY it matters and WHAT to do about it.
 """

 import json
@@ -21,9 +22,10 @@ class GSCBrainstormService:

    Flow:
    1. Fetch real GSC search analytics (query + page data, 30 days)
-    2. Apply rule-based filters (Content Optimization, Content Enhancement, Keyword Gap)
-    3. Generate LLM-powered strategic recommendations contextualised to the user's keywords
-    4. Return structured results
+    2. Compute derived metrics (CTR benchmarks, estimated traffic uplift, content formats)
+    3. Apply rule-based filters (Quick Wins, Optimization, Enhancement, Rising Stars, Page Issues)
+    4. Generate LLM-powered strategic recommendations contextualised to the user's keywords
+    5. Return structured results with all data exposed for rich frontend display
    """

    def __init__(self, gsc_service: GSCService = None):
@@ -39,18 +41,8 @@ class GSCBrainstormService:
        keywords: str,
        site_url: Optional[str] = None,
    ) -> Dict[str, Any]:
-        """
-        Generate blog topic suggestions from the user's GSC data.
-
-        Args:
-            user_id: Clerk user ID (must have GSC connected).
-            keywords: User's 3+ word topic intent (e.g. "content marketing strategy").
-            site_url: Optional site URL; auto-selected from user's first GSC site if omitted.
-
-        Returns:
-            Dict with content_opportunities, keyword_gaps, ai_recommendations, summary.
-        """
        self._user_id = user_id
+
        # 1. Resolve site_url
        if not site_url:
            sites = self.gsc_service.get_site_list(user_id)
@@ -59,6 +51,8 @@ class GSCBrainstormService:
                    "error": "No GSC sites found. Make sure your site is verified in Google Search Console.",
                    "content_opportunities": [],
                    "keyword_gaps": [],
+                    "quick_wins": [],
+                    "page_opportunities": [],
                    "ai_recommendations": {},
                    "summary": {},
                }
@@ -80,6 +74,8 @@ class GSCBrainstormService:
                "error": analytics.get("error", "Failed to fetch GSC data"),
                "content_opportunities": [],
                "keyword_gaps": [],
+                "quick_wins": [],
+                "page_opportunities": [],
                "ai_recommendations": {},
                "summary": {},
            }
@@ -93,9 +89,11 @@ class GSCBrainstormService:

        if not keywords_data:
            return {
-                "error": "No keyword data available for the selected period.",
+                "error": "No keyword data available for the selected period. This usually means your site is new to GSC or hasn't received search traffic yet.",
                "content_opportunities": [],
                "keyword_gaps": [],
+                "quick_wins": [],
+                "page_opportunities": [],
                "ai_recommendations": {},
                "summary": {
                    "site_url": site_url,
@@ -107,18 +105,23 @@ class GSCBrainstormService:
        # 4. Rule-based analysis
        content_opportunities = self._identify_content_opportunities(keywords_data)
        keyword_gaps = self._identify_keyword_gaps(keywords_data)
+        quick_wins = self._identify_quick_wins(keywords_data)
+        page_opportunities = self._identify_page_opportunities(pages_data)

        # 5. Summary metrics
        summary = self._compute_summary(keywords_data, pages_data, site_url, start_date, end_date)

-        # 6. AI recommendations (best-effort; don't fail the whole request on LLM error)
+        # 6. AI recommendations
        ai_recommendations = self._generate_ai_recommendations(
-            keywords_data, pages_data, summary, keywords
+            keywords_data, pages_data, summary, keywords,
+            content_opportunities, quick_wins, keyword_gaps,
        )

        return {
            "content_opportunities": content_opportunities,
            "keyword_gaps": keyword_gaps,
+            "quick_wins": quick_wins,
+            "page_opportunities": page_opportunities,
            "ai_recommendations": ai_recommendations,
            "summary": summary,
        }
@@ -168,39 +171,53 @@ class GSCBrainstormService:
        opportunities: List[Dict[str, Any]] = []

        # Rule 1: Content Optimization — high impressions, low CTR
+        # Meaning: Google is SHOWING your page for this query but people aren't clicking.
+        #          The content probably ranks but title/meta/snippet isn't compelling enough.
        for kw in keywords_data:
            if kw["impressions"] > 500 and kw["ctr"] < 3:
+                estimated_gain = int(kw["impressions"] * 0.05) - kw["clicks"]
                opportunities.append({
                    "type": "Content Optimization",
                    "keyword": kw["keyword"],
                    "opportunity": (
-                        f"Optimize existing content for '{kw['keyword']}' "
-                        f"to improve CTR from {kw['ctr']:.1f}% "
-                        f"(position {kw['position']:.1f})"
+                        f"Your site appears for '{kw['keyword']}' ({kw['impressions']:,} times/month) "
+                        f"but only {kw['ctr']:.1f}% click. Improving your title and meta description "
+                        f"could bring ~{max(estimated_gain, 5)} more clicks/month."
                    ),
-                    "potential_impact": "High",
+                    "potential_impact": "High" if kw["impressions"] > 1000 else "Medium",
                    "current_position": kw["position"],
+                    "current_ctr": kw["ctr"],
                    "impressions": kw["impressions"],
+                    "clicks": kw["clicks"],
+                    "estimated_traffic_gain": max(estimated_gain, 5),
                    "priority": "High" if kw["impressions"] > 1000 else "Medium",
+                    "suggested_format": GSCBrainstormService._suggest_format(kw["keyword"]),
                })

        # Rule 2: Content Enhancement — positions 11-20 with decent impressions
+        # Meaning: You're on page 2 of Google. A small content boost could push you to page 1,
+        #          where CTR increases dramatically (page 1 gets ~95% of all clicks).
        for kw in keywords_data:
            if 10 < kw["position"] <= 20 and kw["impressions"] > 100:
+                estimated_gain = int(kw["impressions"] * 0.08)
                opportunities.append({
                    "type": "Content Enhancement",
                    "keyword": kw["keyword"],
                    "opportunity": (
-                        f"Enhance content for '{kw['keyword']}' to move from "
-                        f"position {kw['position']:.1f} to the first page"
+                        f"'{kw['keyword']}' ranks #{kw['position']:.0f} (page 2). "
+                        f"Moving to page 1 could capture ~{estimated_gain} more clicks/month "
+                        f"from {kw['impressions']:,} impressions."
                    ),
-                    "potential_impact": "Medium",
+                    "potential_impact": "High" if kw["impressions"] > 500 else "Medium",
                    "current_position": kw["position"],
+                    "current_ctr": kw["ctr"],
                    "impressions": kw["impressions"],
-                    "priority": "Medium",
+                    "clicks": kw["clicks"],
+                    "estimated_traffic_gain": estimated_gain,
+                    "priority": "High" if kw["impressions"] > 500 else "Medium",
+                    "suggested_format": GSCBrainstormService._suggest_format(kw["keyword"]),
                })

-        # Sort by impressions descending, keep top 10
        opportunities.sort(key=lambda x: x["impressions"], reverse=True)
        return opportunities[:10]

@@ -212,15 +229,111 @@ class GSCBrainstormService:

        for kw in keywords_data:
            if 4 <= kw["position"] <= 20 and kw["impressions"] >= 50:
+                # Estimate traffic gain if this keyword moved to position 1-3
+                # Position 1 avg CTR ~31%, position 3 ~11%, current position CTR estimate
+                position_1_ctr = 31.0
+                current_ctr = kw["ctr"]
+                estimated_gain = max(int(kw["impressions"] * (position_1_ctr - current_ctr) / 100), 1)
+
                gaps.append({
                    "keyword": kw["keyword"],
                    "position": kw["position"],
                    "impressions": kw["impressions"],
+                    "current_ctr": kw["ctr"],
+                    "clicks": kw["clicks"],
+                    "estimated_traffic_if_page1": estimated_gain,
+                    "gap_from_page1": round(kw["position"] - 3, 1),
                })

        gaps.sort(key=lambda x: x["impressions"], reverse=True)
        return gaps[:10]

+    @staticmethod
+    def _identify_quick_wins(
+        keywords_data: List[Dict[str, Any]],
+    ) -> List[Dict[str, Any]]:
+        """Keywords already on page 1 (positions 4-10) that could reach top 3
+        with minor improvements — the highest-ROI opportunities."""
+        quick_wins: List[Dict[str, Any]] = []
+
+        for kw in keywords_data:
+            if 4 <= kw["position"] <= 10 and kw["impressions"] >= 100:
+                # Position 3 CTR ≈ 11%, position 5 CTR ≈ 6%
+                # Small improvements can yield big traffic gains
+                target_ctr = 11.0  # approximate CTR for position 3
+                estimated_gain = max(int(kw["impressions"] * (target_ctr - kw["ctr"]) / 100), 1)
+
+                quick_wins.append({
+                    "keyword": kw["keyword"],
+                    "position": kw["position"],
+                    "impressions": kw["impressions"],
+                    "current_ctr": kw["ctr"],
+                    "clicks": kw["clicks"],
+                    "estimated_traffic_gain": estimated_gain,
+                    "reason": (
+                        f"Already on page 1 at position #{kw['position']:.0f}. "
+                        f"Optimizing this page could increase CTR from {kw['ctr']:.1f}% "
+                        f"to ~{target_ctr:.0f}%, gaining ~{estimated_gain} clicks/month."
+                    ),
+                })
+
+        quick_wins.sort(key=lambda x: x["estimated_traffic_gain"], reverse=True)
+        return quick_wins[:5]
+
+    @staticmethod
+    def _identify_page_opportunities(
+        pages_data: List[Dict[str, Any]],
+    ) -> List[Dict[str, Any]]:
+        """Pages with high impressions but low CTR — the content or meta needs work."""
+        opportunities: List[Dict[str, Any]] = []
+
+        for pg in pages_data:
+            if pg["impressions"] > 300 and pg["ctr"] < 2.0:
+                short_page = pg["page"].rstrip("/").rsplit("/", 1)[-1].replace("-", " ").title()
+                if len(short_page) > 60:
+                    short_page = short_page[:57] + "..."
+                opportunities.append({
+                    "page": pg["page"],
+                    "page_title": short_page,
+                    "impressions": pg["impressions"],
+                    "clicks": pg["clicks"],
+                    "current_ctr": pg["ctr"],
+                    "current_position": pg["position"],
+                    "reason": (
+                        f"This page gets {pg['impressions']:,} impressions but only {pg['ctr']:.1f}% CTR. "
+                        f"Reviewing the title and meta description could significantly boost clicks."
+                    ),
+                })
+
+        opportunities.sort(key=lambda x: x["impressions"], reverse=True)
+        return opportunities[:5]
+
+    # ------------------------------------------------------------------ #
+    #  Content format suggestion
+    # ------------------------------------------------------------------ #
+
+    @staticmethod
+    def _suggest_format(keyword: str) -> str:
+        """Suggest a content format based on keyword patterns."""
+        kw = keyword.lower()
+        if any(w in kw for w in ["how to", "how do", "guide", "tutorial", "steps"]):
+            return "How-To Guide"
+        if any(w in kw for w in ["vs", "versus", "compare", "comparison", "difference"]):
+            return "Comparison"
+        if any(w in kw for w in ["best", "top", "recommended", "review", "reviews"]):
+            return "Top Picks / Review"
+        if any(w in kw for w in ["what is", "definition", "meaning", "explained"]):
+            return "Explainer"
+        if any(w in kw for w in ["list", "examples", "ideas", "tips", "ways"]):
+            return "Listicle"
+        if any(w in kw for w in ["free", "cheap", "alternative", "budget"]):
+            return "Budget / Alternative"
+        if any(w in kw for w in ["template", "calculator", "tool", "checker"]):
+            return "Tool / Template"
+        if any(w in kw for w in ["2024", "2025", "2026", "trends", "prediction", "future"]):
+            return "Trend Report"
+        return "In-Depth Article"
+
    # ------------------------------------------------------------------ #
    #  Summary metrics
    # ------------------------------------------------------------------ #
@@ -248,6 +361,16 @@ class GSCBrainstormService:
        top_keywords = sorted(keywords_data, key=lambda x: x["impressions"], reverse=True)[:5]
        top_pages = sorted(pages_data, key=lambda x: x["clicks"], reverse=True)[:3]

+        # Health score: 0-100 based on how many keywords are on page 1
+        total_kw = len(keywords_data) or 1
+        page1_pct = (pos_1_3 + pos_4_10) / total_kw * 100
+        top3_pct = pos_1_3 / total_kw * 100
+        health_score = round(min(top3_pct * 3 + page1_pct * 0.7, 100), 0)
+
+        # CTR benchmark: industry average is ~3.1% for position 1-10
+        ctr_benchmark = 3.1
+        ctr_vs_benchmark = round(avg_ctr - ctr_benchmark, 2)
+
        return {
            "site_url": site_url,
            "date_range": {"start": start_date, "end": end_date},
@@ -256,6 +379,8 @@ class GSCBrainstormService:
            "total_clicks": total_clicks,
            "avg_ctr": avg_ctr,
            "avg_position": avg_position,
+            "ctr_vs_benchmark": ctr_vs_benchmark,
+            "health_score": health_score,
            "keyword_distribution": {
                "positions_1_3": pos_1_3,
                "positions_4_10": pos_4_10,
@@ -263,11 +388,22 @@ class GSCBrainstormService:
                "positions_21_plus": pos_21_plus,
            },
            "top_keywords": [
-                {"keyword": kw["keyword"], "impressions": kw["impressions"], "position": kw["position"]}
+                {
+                    "keyword": kw["keyword"],
+                    "impressions": kw["impressions"],
+                    "clicks": kw["clicks"],
+                    "position": kw["position"],
+                    "ctr": kw["ctr"],
+                }
                for kw in top_keywords
            ],
            "top_pages": [
-                {"page": pg["page"], "clicks": pg["clicks"], "impressions": pg["impressions"]}
+                {
+                    "page": pg["page"],
+                    "clicks": pg["clicks"],
+                    "impressions": pg["impressions"],
+                    "ctr": pg["ctr"],
+                }
                for pg in top_pages
            ],
        }
@@ -282,60 +418,110 @@ class GSCBrainstormService:
        pages_data: List[Dict],
        summary: Dict,
        user_keywords: str,
+        content_opportunities: List[Dict],
+        quick_wins: List[Dict],
+        keyword_gaps: List[Dict],
    ) -> Dict[str, Any]:
        try:
-            top_kw = ", ".join(kw["keyword"] for kw in summary.get("top_keywords", []))
+            top_kw_list = summary.get("top_keywords", [])
+            top_kw_str = "\n".join(
+                f"  • {kw['keyword']}: {kw['impressions']:,} impressions, position {kw['position']}, {kw['ctr']:.1f}% CTR"
+                for kw in top_kw_list[:10]
+            )
            dist = summary.get("keyword_distribution", {})

-            prompt = f"""Analyze this Google Search Console data and suggest blog topics the user should write about.
+            opp_str = ""
+            if content_opportunities:
+                opp_str = "\nCONTENT OPPORTUNITIES (rule-based findings):\n" + "\n".join(
+                    f"  • {o['keyword']}: {o['opportunity']}"
+                    for o in content_opportunities[:5]
+                )
+            else:
+                opp_str = "\nNo major content opportunities detected from rule-based analysis."

-USER'S TOPIC INTENT: "{user_keywords}"
+            qw_str = ""
+            if quick_wins:
+                qw_str = "\nQUICK WINS (already on page 1, easy to optimize):\n" + "\n".join(
+                    f"  • {q['keyword']}: position #{q['position']:.0f}, {q['current_ctr']:.1f}% CTR, est. +{q['estimated_traffic_gain']} clicks/month"
+                    for q in quick_wins[:3]
+                )

-SEARCH PERFORMANCE SUMMARY:
- Total Keywords Tracked: {summary.get('total_keywords_analyzed', 0)}
+            prompt = f"""You are an expert SEO content strategist analyzing real Google Search Console data for a blog writer.
+
+The user wants to write about: "{user_keywords}"
+
+Here is their GSC data for the last 30 days:
+
+PERFORMANCE OVERVIEW:
+- Total Keywords: {summary.get('total_keywords_analyzed', 0)}
 - Total Impressions: {summary.get('total_impressions', 0):,}
 - Total Clicks: {summary.get('total_clicks', 0):,}
- Average CTR: {summary.get('avg_ctr', 0):.2f}%
+- Average CTR: {summary.get('avg_ctr', 0):.2f}% (industry avg for positions 1-10 is ~3.1%)
 - Average Position: {summary.get('avg_position', 0):.1f}
+- SEO Health Score: {summary.get('health_score', 0)}/100

-TOP PERFORMING KEYWORDS:
-{top_kw}
+TOP KEYWORDS BY IMPRESSIONS:
+{top_kw_str}

 KEYWORD POSITION DISTRIBUTION:
- Positions 1-3: {dist.get('positions_1_3', 0)}
- Positions 4-10: {dist.get('positions_4_10', 0)}
- Positions 11-20: {dist.get('positions_11_20', 0)}
- Positions 21+: {dist.get('positions_21_plus', 0)}
+- Position 1-3 (top results): {dist.get('positions_1_3', 0)} keywords
+- Position 4-10 (page 1): {dist.get('positions_4_10', 0)} keywords
+- Position 11-20 (page 2): {dist.get('positions_11_20', 0)} keywords
+- Position 21+ (page 3+): {dist.get('positions_21_plus', 0)} keywords
+{opp_str}
+{qw_str}

-Based on this data, provide:
+Based on this data, provide EXACT blog post suggestions the user should write.

-1. IMMEDIATE TOPIC OPPORTUNITIES (0-30 days):
-   - Specific blog post titles the user should write
-   - Each tied to a keyword opportunity from the data
-   - 3-5 suggestions
+For each suggestion include:
+1. A specific, compelling blog post TITLE (not vague topic)
+2. The keyword it targets and why (based on the data above)
+3. The recommended content format (how-to, listicle, comparison, etc.)
+4. Estimated impact (how many more clicks/month they could gain)

-2. CONTENT STRATEGY TOPICS (1-3 months):
-   - New topic clusters to build authority
-   - Content pillar ideas
-   - 3-5 suggestions
-
-3. LONG-TERM CONTENT VISION (3-12 months):
-   - Market expansion topics
-   - Authority-building content ideas
-   - 3-5 suggestions
-
-IMPORTANT: Relate every topic suggestion to the user's interest in "{user_keywords}".
-Return your response in this exact JSON format:
+Return your response in this EXACT JSON format (no markdown, no code fences):
 {{
-  "immediate_opportunities": ["topic 1", "topic 2", "topic 3"],
-  "content_strategy": ["strategy 1", "strategy 2", "strategy 3"],
-  "long_term_strategy": ["vision 1", "vision 2", "vision 3"]
-}}"""
+  "immediate_opportunities": [
+    {{
+      "title": "Specific Blog Post Title Here",
+      "keyword": "target keyword",
+      "reason": "Why this will work based on the data",
+      "format": "How-To Guide | Listicle | Comparison | Explainer | etc.",
+      "estimated_impact": "Estimated X more clicks/month"
+    }}
+  ],
+  "content_strategy": [
+    {{
+      "title": "Pillar Content Title",
+      "keyword": "target keyword",
+      "reason": "Strategic reasoning",
+      "format": "Content format",
+      "estimated_impact": "Expected impact"
+    }}
+  ],
+  "long_term_strategy": [
+    {{
+      "title": "Authority Building Title",
+      "keyword": "target keyword",
+      "reason": "Long-term reasoning",
+      "format": "Content format",
+      "estimated_impact": "Expected long-term impact"
+    }}
+  ]
+}}
+
+IMPORTANT:
+- Provide 3-5 items in each category
+- Every suggestion MUST relate to the user's interest in "{user_keywords}"
+- Titles should be specific and compelling, like real blog post headlines
+- Use the data above to justify each recommendation
+- Prioritize keywords with high impressions but low CTR or low position"""

            system_prompt = (
-                "You are an enterprise SEO content strategist. Provide specific, data-driven "
-                "blog topic suggestions that will improve the user's search performance. "
-                "Always respond with valid JSON matching the requested format."
+                "You are an expert SEO content strategist. You analyze Google Search Console data "
+                "and provide specific, actionable blog post recommendations that will drive real traffic. "
+                "You always respond with valid JSON matching the requested format. "
+                "Every recommendation must be backed by the data provided."
            )

            result = llm_text_gen(
@@ -350,27 +536,58 @@ Return your response in this exact JSON format:
                if parsed:
                    return parsed

-            return self._fallback_ai_recommendations(keywords_data)
+            return self._fallback_ai_recommendations(keywords_data, content_opportunities, quick_wins)

        except Exception as e:
            logger.warning(f"GSC brainstorm AI recommendations failed: {e}")
-            return self._fallback_ai_recommendations(keywords_data)
+            return self._fallback_ai_recommendations(keywords_data, content_opportunities, quick_wins)

-    @staticmethod
-    def _parse_ai_response(raw: str) -> Optional[Dict[str, List[str]]]:
+    def _parse_ai_response(self, raw: str) -> Optional[Dict[str, Any]]:
        try:
-            json_start = raw.find("{")
-            json_end = raw.rfind("}") + 1
+            # Strip markdown code fences if present
+            cleaned = raw.strip()
+            if cleaned.startswith("```"):
+                first_newline = cleaned.find("\n")
+                if first_newline != -1:
+                    cleaned = cleaned[first_newline + 1:]
+                if cleaned.endswith("```"):
+                    cleaned = cleaned[:-3].strip()
+
+            json_start = cleaned.find("{")
+            json_end = cleaned.rfind("}") + 1
            if json_start == -1 or json_end == 0:
                return None

-            chunk = raw[json_start:json_end]
+            chunk = cleaned[json_start:json_end]
            parsed = json.loads(chunk)

+            def normalize_section(section: Any) -> List[Dict[str, str]]:
+                if not isinstance(section, list):
+                    return []
+                result = []
+                for item in section:
+                    if isinstance(item, str):
+                        result.append({
+                            "title": item.split(":")[0].strip() if ":" in item else item[:60],
+                            "keyword": "",
+                            "reason": item,
+                            "format": "",
+                            "estimated_impact": "",
+                        })
+                    elif isinstance(item, dict):
+                        result.append({
+                            "title": str(item.get("title", "")),
+                            "keyword": str(item.get("keyword", "")),
+                            "reason": str(item.get("reason", "")),
+                            "format": str(item.get("format", "")),
+                            "estimated_impact": str(item.get("estimated_impact", "")),
+                        })
+                return result
+
            return {
-                "immediate_opportunities": parsed.get("immediate_opportunities", [])[:5],
-                "content_strategy": parsed.get("content_strategy", [])[:5],
-                "long_term_strategy": parsed.get("long_term_strategy", [])[:5],
+                "immediate_opportunities": normalize_section(parsed.get("immediate_opportunities", []))[:5],
+                "content_strategy": normalize_section(parsed.get("content_strategy", []))[:5],
+                "long_term_strategy": normalize_section(parsed.get("long_term_strategy", []))[:5],
            }
        except (json.JSONDecodeError, ValueError) as e:
            logger.warning(f"Failed to parse AI brainstorm response as JSON: {e}")
@@ -379,26 +596,53 @@ Return your response in this exact JSON format:
    @staticmethod
    def _fallback_ai_recommendations(
        keywords_data: List[Dict],
+        content_opportunities: List[Dict],
+        quick_wins: List[Dict],
    ) -> Dict[str, Any]:
        top_kw = keywords_data[:3] if keywords_data else []
        immediate = []
-        for kw in top_kw:
-            immediate.append(
-                f"Write a comprehensive guide on '{kw['keyword']}' "
-                f"(currently at position {kw['position']:.1f} with "
-                f"{kw['impressions']} impressions)"
-            )
+
+        # Build from quick wins first (highest ROI)
+        for qw in quick_wins[:2]:
+            immediate.append({
+                "title": f"How to Rank #{int(qw['position'])} for '{qw['keyword']}' — Optimization Guide",
+                "keyword": qw["keyword"],
+                "reason": qw.get("reason", f"Already on page 1 at position {qw['position']:.0f}"),
+                "format": "How-To Guide",
+                "estimated_impact": f"+{qw.get('estimated_traffic_gain', 10)} clicks/month",
+            })
+
+        # Then from content opportunities
+        for opp in content_opportunities[:2]:
+            immediate.append({
+                "title": f"Complete Guide to {opp['keyword'].title()}",
+                "keyword": opp["keyword"],
+                "reason": opp.get("opportunity", f"{opp['impressions']:,} impressions with room to improve"),
+                "format": opp.get("suggested_format", "In-Depth Article"),
+                "estimated_impact": f"+{opp.get('estimated_traffic_gain', 10)} clicks/month",
+            })
+
+        # Fill remaining with top keywords
+        remaining = 5 - len(immediate)
+        for kw in top_kw[:remaining]:
+            immediate.append({
+                "title": f"The Ultimate Guide to {kw['keyword'].title()}",
+                "keyword": kw["keyword"],
+                "reason": f"Top keyword with {kw['impressions']:,} impressions (position {kw['position']:.1f})",
+                "format": "In-Depth Article",
+                "estimated_impact": f"+{max(int(kw['impressions'] * 0.03), 5)} clicks/month",
+            })

        return {
-            "immediate_opportunities": immediate or ["No keyword data available for recommendations"],
+            "immediate_opportunities": immediate or [{"title": "No keyword data available", "keyword": "", "reason": "Connect GSC to get personalized suggestions", "format": "", "estimated_impact": ""}],
            "content_strategy": [
-                "Develop topic clusters around your top-performing keywords",
-                "Create comparison and vs-style content for competitive terms",
-                "Build FAQ sections targeting question-based queries",
+                {"title": "Topic Cluster: Build Authority Around Your Core Topics", "keyword": "", "reason": "Clustered content ranks higher and captures more long-tail queries", "format": "Pillar Page + Spokes", "estimated_impact": "+50-200 clicks/month over 3 months"},
+                {"title": "Comparison Guide: Your Product vs. Alternatives", "keyword": "", "reason": "Comparison content captures high-intent searchers ready to decide", "format": "Comparison", "estimated_impact": "+20-80 clicks/month"},
+                {"title": "FAQ: Answer What Your Audience Is Asking", "keyword": "", "reason": "FAQs capture featured snippets and voice search queries", "format": "FAQ / Listicle", "estimated_impact": "+30-100 clicks/month"},
            ],
            "long_term_strategy": [
-                "Build domain authority through pillar content",
-                "Expand into adjacent topic areas",
-                "Develop thought leadership content series",
+                {"title": "Pillar Content: The Definitive Resource in Your Niche", "keyword": "", "reason": "Comprehensive guides become authoritative references that attract backlinks", "format": "Long-Form Guide", "estimated_impact": "+100-500 clicks/month over 6-12 months"},
+                {"title": "Trend Report: What's Next in Your Industry", "keyword": "", "reason": "Forward-looking content captures emerging search demand early", "format": "Trend Report", "estimated_impact": "+50-200 clicks/month"},
+                {"title": "Thought Leadership: Expert Roundup and Insights", "keyword": "", "reason": "Expert content builds E-E-A-T signals that improve overall domain authority", "format": "Expert Roundup", "estimated_impact": "+30-100 clicks/month per piece"},
            ],
        }
--- a/backend/services/gsc_service.py
+++ b/backend/services/gsc_service.py
@@ -250,10 +250,10 @@ class GSCService:
            flow = Flow.from_client_config(
                self.client_config,
                scopes=self.scopes,
-                redirect_uri=redirect_uri
+                redirect_uri=redirect_uri,
+                autogenerate_code_verifier=False,
            )
-            
-            # Use a custom state that includes user_id for routing the callback to the correct DB
+
            random_state = secrets.token_urlsafe(32)
            state = f"{user_id}:{random_state}"
            
@@ -300,7 +300,7 @@ class GSCService:
                logger.error(f"User database not found for user {user_id}")
                return False

-            # Verify state in user's DB
+            # Verify state in user's DB (but don't delete yet — delete after successful token exchange)
            with sqlite3.connect(db_path) as conn:
                cursor = conn.cursor()
                cursor.execute('SELECT user_id FROM gsc_oauth_states WHERE state = ?', (state,))
@@ -309,10 +309,6 @@ class GSCService:
                if not result:
                    logger.error(f"Invalid or expired GSC OAuth state for user {user_id}")
                    return False
-                
-                # Clean up state
-                cursor.execute('DELETE FROM gsc_oauth_states WHERE state = ?', (state,))
-                conn.commit()
            
            # Exchange code for credentials
            if not self.client_config:
@@ -322,12 +318,22 @@ class GSCService:
            flow = Flow.from_client_config(
                self.client_config,
                scopes=self.scopes,
-                redirect_uri=os.getenv('GSC_REDIRECT_URI', 'http://localhost:8000/gsc/callback')
+                redirect_uri=os.getenv('GSC_REDIRECT_URI', 'http://localhost:8000/gsc/callback'),
+                autogenerate_code_verifier=False,
            )
            
            flow.fetch_token(code=authorization_code)
            credentials = flow.credentials
            
+            # State consumed successfully — clean up
+            try:
+                with sqlite3.connect(db_path) as conn:
+                    cursor = conn.cursor()
+                    cursor.execute('DELETE FROM gsc_oauth_states WHERE state = ?', (state,))
+                    conn.commit()
+            except Exception as cleanup_err:
+                logger.warning(f"Failed to clean up OAuth state: {cleanup_err}")
+            
            # Save credentials
            return self.save_user_credentials(user_id, credentials)
            
--- a/backend/services/hallucination_detector.py
+++ b/backend/services/hallucination_detector.py
@@ -343,18 +343,28 @@ class HallucinationDetector:
            logger.error(f"Error in batch evidence search: {str(e)}")
            return []

+    def _map_source_refs_from_reasoning(self, reasoning: str, sources: List[Dict[str, Any]]) -> List[int]:
+        """Parse 'Source N' references from reasoning text and return 0-based indices."""
+        import re
+        indices = set()
+        for match in re.finditer(r'Source\s+(\d+)', reasoning):
+            ref = int(match.group(1))
+            if 1 <= ref <= len(sources):
+                indices.add(ref - 1)  # convert 1-based → 0-based
+        return sorted(indices)
+
    async def _assess_claims_batch(self, claims: List[str], sources: List[Dict[str, Any]], user_id: str = None) -> List[Claim]:
        """Assess multiple claims against sources in one LLM call."""
        try:
            claims_to_assess = claims[:3]

            combined_sources = "\n\n".join([
-                f"Source {i+1}: {src.get('url','')}\nText: {src.get('text','')[:1000]}"
+                f"Source [{i}]: {src.get('url','')}\nText: {src.get('text','')[:1000]}"
                for i, src in enumerate(sources)
            ])

            claims_text = "\n".join([
-                f"Claim {i+1}: {claim}"
+                f"Claim {i}: {claim}"
                for i, claim in enumerate(claims_to_assess)
            ])

@@ -367,12 +377,14 @@ class HallucinationDetector:
                '      "claim_index": 0,\n'
                '      "assessment": "supported" or "refuted" or "insufficient_information",\n'
                '      "confidence": number between 0.0 and 1.0,\n'
-                '      "supporting_sources": [array of source indices that support the claim],\n'
-                '      "refuting_sources": [array of source indices that refute the claim],\n'
+                '      "supporting_sources": [array of 0-based source indices, e.g. [0, 2] for Source [0] and Source [2]],\n'
+                '      "refuting_sources": [array of 0-based source indices, e.g. [1] for Source [1]],\n'
                '      "reasoning": "brief explanation of your assessment"\n'
                '    }\n'
                '  ]\n'
                "}\n\n"
+                "IMPORTANT: Source indices are 0-based. Source [0] is the first source, Source [1] is the second, etc.\n"
+                "For every 'supported' or 'refuted' claim you MUST include the relevant source indices.\n\n"
                f"Claims to verify:\n{claims_text}\n\n"
                f"Sources:\n{combined_sources}\n\n"
                "Return only the JSON object:"
@@ -407,6 +419,15 @@ class HallucinationDetector:
                            if isinstance(idx, int) and 0 <= idx < len(sources):
                                refuting_sources.append(sources[idx])

+                    # Fallback: parse "Source N" from reasoning text when LLM omits indices
+                    if not supporting_sources and not refuting_sources and sources and assessment.get('reasoning'):
+                        ref_indices = self._map_source_refs_from_reasoning(assessment.get('reasoning', ''), sources)
+                        if ref_indices:
+                            if assessment.get('assessment') == 'supported':
+                                supporting_sources = [sources[i] for i in ref_indices]
+                            elif assessment.get('assessment') == 'refuted':
+                                refuting_sources = [sources[i] for i in ref_indices]
+
                    verified_claims.append(Claim(
                        text=claim,
                        confidence=float(assessment.get('confidence', 0.5)),
@@ -464,7 +485,7 @@ class HallucinationDetector:
        """Assess whether sources support or refute the claim using LLM."""
        try:
            combined_sources = "\n\n".join([
-                f"Source {i+1}: {src.get('url','')}\nText: {src.get('text','')[:2000]}"
+                f"Source [{i}]: {src.get('url','')}\nText: {src.get('text','')[:2000]}"
                for i, src in enumerate(sources)
            ])

@@ -474,10 +495,12 @@ class HallucinationDetector:
                "{\n"
                '  "assessment": "supported" or "refuted" or "insufficient_information",\n'
                '  "confidence": number between 0.0 and 1.0,\n'
-                '  "supporting_sources": [array of source indices that support the claim],\n'
-                '  "refuting_sources": [array of source indices that refute the claim],\n'
+                '  "supporting_sources": [array of 0-based source indices, e.g. [0, 2] for Source [0] and Source [2]],\n'
+                '  "refuting_sources": [array of 0-based source indices, e.g. [1] for Source [1]],\n'
                '  "reasoning": "brief explanation of your assessment"\n'
                "}\n\n"
+                "IMPORTANT: Source indices are 0-based. Source [0] is the first source, Source [1] is the second, etc.\n"
+                "For 'supported' or 'refuted' you MUST include the relevant source indices.\n\n"
                f"Claim to verify: {claim}\n\n"
                f"Sources:\n{combined_sources}\n\n"
                "Return only the JSON object:"
@@ -508,6 +531,15 @@ class HallucinationDetector:
                    if isinstance(idx, int) and 0 <= idx < len(sources):
                        refuting_sources.append(sources[idx])

+            # Fallback: parse "Source N" from reasoning text when LLM omits indices
+            if not supporting_sources and not refuting_sources and sources and result.get('reasoning'):
+                ref_indices = self._map_source_refs_from_reasoning(result.get('reasoning', ''), sources)
+                if ref_indices:
+                    if result.get('assessment') == 'supported':
+                        supporting_sources = [sources[i] for i in ref_indices]
+                    elif result.get('assessment') == 'refuted':
+                        refuting_sources = [sources[i] for i in ref_indices]
+
            # Validate assessment value
            valid_assessments = ['supported', 'refuted', 'insufficient_information']
            if result['assessment'] not in valid_assessments:
--- a/backend/services/llm_providers/main_text_generation.py
+++ b/backend/services/llm_providers/main_text_generation.py
@@ -46,6 +46,7 @@ def llm_text_gen(
    preferred_provider: Optional[str] = None,
    flow_type: Optional[str] = None,
    max_tokens: Optional[int] = None,
+    temperature: Optional[float] = None,
 ) -> str:
    """
    Generate text using Language Model (LLM) based on the provided prompt.
@@ -58,6 +59,8 @@ def llm_text_gen(
        preferred_hf_models (list, optional): Preferred HuggingFace models.
        preferred_provider (str, optional): Preferred provider (google, huggingface).
        flow_type (str, optional): Flow type for logging (e.g., 'sif_agent', 'premium_tool').
+        max_tokens (int, optional): Max tokens for response. If None, provider default is used.
+        temperature (float, optional): Temperature for generation (0.0-1.0). If None, defaults to 0.7.
        
    Returns:
        str: Generated text based on the prompt.
@@ -75,9 +78,8 @@ def llm_text_gen(
        # Set default values for LLM parameters
        gpt_provider = "google"  # Default to Google Gemini
        model = "gemini-2.0-flash-001"
-        temperature = 0.7
-        if max_tokens is None:
-            max_tokens = 4000
+        if temperature is None:
+            temperature = 0.7
        top_p = 0.9
        n = 1
        fp = 16
--- a/backend/services/writing_assistant.py
+++ b/backend/services/writing_assistant.py
@@ -1,6 +1,7 @@
 import os
+import re
 import asyncio
-from typing import Any, Dict, List
+from typing import Any, Dict, List, Optional
 from dataclasses import dataclass
 from loguru import logger
 import random
@@ -17,42 +18,33 @@ class WritingSuggestion:

 class WritingAssistantService:
    """
-    Minimal writing assistant that combines Exa search with Gemini continuation.
-    - Exa provides relevant sources with content snippets
-    - Gemini generates a short, cited continuation based on current text and sources
+    Writing assistant that combines Exa search with LLM continuation.
+    - Searches relevant sources using the content near the cursor position
+    - Generates a short continuation grounded in sources
+    - Confidence derived from source availability and quality
    """

    def __init__(self) -> None:
-        # COST CONTROL: Daily usage limits
        self.daily_api_calls = 0
-        self.daily_limit = 50  # Max 50 API calls per day (~$2.50 max cost)
+        self.daily_limit = 50
        self.last_reset_date = None

    def _get_cached_suggestion(self, text: str) -> WritingSuggestion | None:
-        """No cached suggestions - always use real API calls for authentic results."""
        return None

    def _check_daily_limit(self) -> bool:
-        """Check if we're within daily API usage limits."""
        import datetime
-        
        today = datetime.date.today()
-        
-        # Reset counter if it's a new day
        if self.last_reset_date != today:
            self.daily_api_calls = 0
            self.last_reset_date = today
-        
-        # Check if we've exceeded the limit
        if self.daily_api_calls >= self.daily_limit:
            return False
-        
-        # Increment counter for this API call
        self.daily_api_calls += 1
        logger.info(f"Writing assistant API call #{self.daily_api_calls}/{self.daily_limit} today")
        return True

-    async def suggest(self, text: str, user_id: str | None = None) -> List[WritingSuggestion]:
+    async def suggest(self, text: str, user_id: str | None = None, cursor_position: Optional[int] = None) -> List[WritingSuggestion]:
        if not text or len(text.strip()) < 6:
            return []

@@ -67,26 +59,41 @@ class WritingAssistantService:
        if len(text.strip()) < 50:
            return []

-        # 1) Find relevant sources via Exa
-        sources = await self._search_sources(text, user_id=user_id)
+        # Use text before cursor for context (where the user is actively writing)
+        if cursor_position is not None and 0 < cursor_position <= len(text):
+            context_text = text[:cursor_position]
+        else:
+            context_text = text

-        # 2) Generate continuation suggestion via LLM grounded in sources
-        suggestion_text, confidence = await self._generate_continuation(text, sources, user_id=user_id)
+        # 1) Find relevant sources via Exa (non-fatal)
+        sources = []
+        try:
+            sources = await self._search_sources(context_text, user_id=user_id)
+        except Exception as e:
+            logger.warning(f"WritingAssistant Exa search failed, proceeding without sources: {e}")
+
+        # 2) Generate continuation suggestion via LLM
+        suggestion_text, confidence = await self._generate_continuation(context_text, sources, user_id=user_id)

        if not suggestion_text:
            return []

        return [WritingSuggestion(text=suggestion_text.strip(), confidence=confidence, sources=sources)]

-    async def _search_sources(self, text: str, user_id: str = None) -> List[Dict[str, Any]]:
-        """Search for relevant sources using ExaResearchProvider with subscription checks."""
+    async def _search_sources(self, context_text: str, user_id: str = None) -> List[Dict[str, Any]]:
+        """Search Exa using the last sentence before cursor for a focused query."""
        try:
            from services.blog_writer.research.exa_provider import ExaResearchProvider

-            exa_query = (
-                (text[-1000:] if len(text) > 1000 else text)
-                + "\n\nIf you found the above interesting, here's another useful resource to read:"
-            )
+            # Extract the last sentence from context to use as a focused search query
+            sentences = re.split(r'(?<=[.!?])\s+', context_text.strip())
+            last_sentence = sentences[-1].strip().strip('"').strip("'") if sentences else context_text
+
+            # If very short, use last two sentences
+            if len(last_sentence) < 20 and len(sentences) >= 2:
+                last_sentence = ' '.join(s[-2:]).strip().strip('"').strip("'")
+
+            exa_query = last_sentence[:500] if len(last_sentence) > 500 else last_sentence

            provider = ExaResearchProvider()
            sources = await provider.simple_search(
@@ -95,7 +102,6 @@ class WritingAssistantService:
                user_id=user_id,
            )

-            # Normalize keys to match expected format
            normalized = []
            for s in sources:
                normalized.append({
@@ -104,7 +110,7 @@ class WritingAssistantService:
                    "text": s.get("text", ""),
                    "author": s.get("author", ""),
                    "published_date": s.get("publishedDate", ""),
-                    "score": float(s.get("score", 0.5)),
+                    "score": float(s.get("score") if s.get("score") is not None else 0.5),
                })

            if not normalized:
@@ -151,8 +157,21 @@ class WritingAssistantService:
                suggestion = (str(ai_resp or "")).strip()
            if not suggestion:
                raise Exception("Assistive writer returned empty suggestion")
-            confidence = 0.7
-            return suggestion, confidence
+
+            # Dynamic confidence based on source quality and response signals
+            confidence = 0.5
+            if sources:
+                # More sources and higher scores = more confident
+                avg_score = sum(s.get("score", 0.5) for s in sources) / len(sources)
+                confidence = 0.5 + (len(sources) / 6.0) * 0.3 + avg_score * 0.2
+            if suggestion.endswith(('.', '!', '?')):
+                confidence += 0.05
+            # Check if citation hint was included
+            if '[http' in suggestion or '((' in suggestion:
+                confidence += 0.05
+            confidence = min(confidence, 1.0)
+
+            return suggestion, round(confidence, 2)
        except Exception as e:
            logger.error(f"WritingAssistant _generate_continuation error: {e}")
            raise