Files
ALwrity/backend/services/gsc_brainstorm_service.py
ajaysi 644e72d289 feat: Brainstorm Topics with GSC + Issue #518 fixes + Blog Editor enhancements
Issue #518 - Subscription not updating after checkout:
- Fix stale closure in SubscriptionContext checkout polling (use subscriptionRef)
- Move checkout success polling from InitialRouteHandler into SubscriptionContext
- Remove redundant polling code from InitialRouteHandler
- Fix plan label: 'Free' instead of 'No Plan', proper capitalization
- Add plan refresh button in UserBadge
- Add 'View Costing Details' to UserBadge dropdown
- Rename 'ALwrity Podcast Maker' to 'Podcast Creator' across UI
- Clean subscription=success URL param after verification

Blog Writer WYSIWYG Editor enhancements:
- Per-section preview toggle (view/edit icons)
- Enhanced hover-based toolbar
- Circular SVG progress stats bar with detailed tooltip
- Research tool chips in stats bar footer
- Per-section TTS with useTextToSpeech hook (browser native)
- Full blog preview modal with print/PDF support
- PlayAllTTSButton: sequential playback with progress bar
- OnThisPageNav: floating sidebar with scroll tracking
- Section data attributes for scroll anchoring

GSC Brainstorm Topics feature:
- Backend: gsc_brainstorm_service.py (rule-based + LLM recommendations)
- Backend: POST /gsc/brainstorm endpoint with 3-word minimum validation
- Frontend: gscBrainstorm.ts API client
- Frontend: useGSCBrainstormConnection hook (popup OAuth, no /onboarding redirect)
- Frontend: useGSCBrainstorm hook (connect check + brainstorm call)
- Frontend: GSCBrainstormModal (3-tab results: Opportunities, Gaps, AI Recs)
- Frontend: BrainstormButton (visible at 3+ words, GSC connect overlay)
- Wire BrainstormButton into ManualResearchForm and ResearchAction
- Add blog_writer to gsc_auth router features for ALWRITY_ENABLED_FEATURES
2026-05-20 22:44:15 +05:30

404 lines
15 KiB
Python

"""
GSC Brainstorm Service for ALwrity.
Analyzes Google Search Console data to suggest blog topics the user should write about.
Combines rule-based heuristics (high-impression/low-CTR keywords, near-page-1 positions)
with LLM-powered strategic recommendations tailored to the user's topic intent.
"""
import json
from datetime import datetime, timedelta
from typing import Dict, List, Any, Optional
from loguru import logger
from services.gsc_service import GSCService
from services.llm_providers.main_text_generation import llm_text_gen
class GSCBrainstormService:
"""
Suggests blog topics based on the user's live GSC data.
Flow:
1. Fetch real GSC search analytics (query + page data, 30 days)
2. Apply rule-based filters (Content Optimization, Content Enhancement, Keyword Gap)
3. Generate LLM-powered strategic recommendations contextualised to the user's keywords
4. Return structured results
"""
def __init__(self, gsc_service: GSCService = None):
self.gsc_service = gsc_service or GSCService()
# ------------------------------------------------------------------ #
# Public entry point
# ------------------------------------------------------------------ #
def brainstorm_topics(
self,
user_id: str,
keywords: str,
site_url: Optional[str] = None,
) -> Dict[str, Any]:
"""
Generate blog topic suggestions from the user's GSC data.
Args:
user_id: Clerk user ID (must have GSC connected).
keywords: User's 3+ word topic intent (e.g. "content marketing strategy").
site_url: Optional site URL; auto-selected from user's first GSC site if omitted.
Returns:
Dict with content_opportunities, keyword_gaps, ai_recommendations, summary.
"""
self._user_id = user_id
# 1. Resolve site_url
if not site_url:
sites = self.gsc_service.get_site_list(user_id)
if not sites:
return {
"error": "No GSC sites found. Make sure your site is verified in Google Search Console.",
"content_opportunities": [],
"keyword_gaps": [],
"ai_recommendations": {},
"summary": {},
}
site_url = sites[0].get("siteUrl", "")
# 2. Fetch GSC analytics (30 days)
end_date = datetime.now().strftime("%Y-%m-%d")
start_date = (datetime.now() - timedelta(days=30)).strftime("%Y-%m-%d")
analytics = self.gsc_service.get_search_analytics(
user_id=user_id,
site_url=site_url,
start_date=start_date,
end_date=end_date,
)
if "error" in analytics:
return {
"error": analytics.get("error", "Failed to fetch GSC data"),
"content_opportunities": [],
"keyword_gaps": [],
"ai_recommendations": {},
"summary": {},
}
# 3. Parse GSC rows into structured data
query_rows = analytics.get("query_data", {}).get("rows", [])
page_rows = analytics.get("page_data", {}).get("rows", [])
keywords_data = self._parse_query_rows(query_rows)
pages_data = self._parse_page_rows(page_rows)
if not keywords_data:
return {
"error": "No keyword data available for the selected period.",
"content_opportunities": [],
"keyword_gaps": [],
"ai_recommendations": {},
"summary": {
"site_url": site_url,
"date_range": {"start": start_date, "end": end_date},
"total_keywords_analyzed": 0,
},
}
# 4. Rule-based analysis
content_opportunities = self._identify_content_opportunities(keywords_data)
keyword_gaps = self._identify_keyword_gaps(keywords_data)
# 5. Summary metrics
summary = self._compute_summary(keywords_data, pages_data, site_url, start_date, end_date)
# 6. AI recommendations (best-effort; don't fail the whole request on LLM error)
ai_recommendations = self._generate_ai_recommendations(
keywords_data, pages_data, summary, keywords
)
return {
"content_opportunities": content_opportunities,
"keyword_gaps": keyword_gaps,
"ai_recommendations": ai_recommendations,
"summary": summary,
}
# ------------------------------------------------------------------ #
# Data parsing helpers
# ------------------------------------------------------------------ #
@staticmethod
def _parse_query_rows(rows: List[Dict]) -> List[Dict[str, Any]]:
parsed = []
for row in rows:
keys = row.get("keys", [])
keyword = keys[0] if len(keys) >= 1 else "(not set)"
parsed.append({
"keyword": keyword,
"clicks": row.get("clicks", 0),
"impressions": row.get("impressions", 0),
"ctr": round(row.get("ctr", 0) * 100, 2),
"position": round(row.get("position", 0), 1),
})
return parsed
@staticmethod
def _parse_page_rows(rows: List[Dict]) -> List[Dict[str, Any]]:
parsed = []
for row in rows:
keys = row.get("keys", [])
page = keys[0] if len(keys) >= 1 else "(not set)"
parsed.append({
"page": page,
"clicks": row.get("clicks", 0),
"impressions": row.get("impressions", 0),
"ctr": round(row.get("ctr", 0) * 100, 2),
"position": round(row.get("position", 0), 1),
})
return parsed
# ------------------------------------------------------------------ #
# Rule-based opportunity identification
# ------------------------------------------------------------------ #
@staticmethod
def _identify_content_opportunities(
keywords_data: List[Dict[str, Any]],
) -> List[Dict[str, Any]]:
opportunities: List[Dict[str, Any]] = []
# Rule 1: Content Optimization — high impressions, low CTR
for kw in keywords_data:
if kw["impressions"] > 500 and kw["ctr"] < 3:
opportunities.append({
"type": "Content Optimization",
"keyword": kw["keyword"],
"opportunity": (
f"Optimize existing content for '{kw['keyword']}' "
f"to improve CTR from {kw['ctr']:.1f}% "
f"(position {kw['position']:.1f})"
),
"potential_impact": "High",
"current_position": kw["position"],
"impressions": kw["impressions"],
"priority": "High" if kw["impressions"] > 1000 else "Medium",
})
# Rule 2: Content Enhancement — positions 11-20 with decent impressions
for kw in keywords_data:
if 10 < kw["position"] <= 20 and kw["impressions"] > 100:
opportunities.append({
"type": "Content Enhancement",
"keyword": kw["keyword"],
"opportunity": (
f"Enhance content for '{kw['keyword']}' to move from "
f"position {kw['position']:.1f} to the first page"
),
"potential_impact": "Medium",
"current_position": kw["position"],
"impressions": kw["impressions"],
"priority": "Medium",
})
# Sort by impressions descending, keep top 10
opportunities.sort(key=lambda x: x["impressions"], reverse=True)
return opportunities[:10]
@staticmethod
def _identify_keyword_gaps(
keywords_data: List[Dict[str, Any]],
) -> List[Dict[str, Any]]:
gaps: List[Dict[str, Any]] = []
for kw in keywords_data:
if 4 <= kw["position"] <= 20 and kw["impressions"] >= 50:
gaps.append({
"keyword": kw["keyword"],
"position": kw["position"],
"impressions": kw["impressions"],
})
gaps.sort(key=lambda x: x["impressions"], reverse=True)
return gaps[:10]
# ------------------------------------------------------------------ #
# Summary metrics
# ------------------------------------------------------------------ #
@staticmethod
def _compute_summary(
keywords_data: List[Dict],
pages_data: List[Dict],
site_url: str,
start_date: str,
end_date: str,
) -> Dict[str, Any]:
total_impressions = sum(kw["impressions"] for kw in keywords_data)
total_clicks = sum(kw["clicks"] for kw in keywords_data)
avg_ctr = round((total_clicks / total_impressions * 100) if total_impressions else 0, 2)
avg_position = round(
sum(kw["position"] for kw in keywords_data) / len(keywords_data), 1
) if keywords_data else 0
pos_1_3 = len([kw for kw in keywords_data if kw["position"] <= 3])
pos_4_10 = len([kw for kw in keywords_data if 3 < kw["position"] <= 10])
pos_11_20 = len([kw for kw in keywords_data if 10 < kw["position"] <= 20])
pos_21_plus = len([kw for kw in keywords_data if kw["position"] > 20])
top_keywords = sorted(keywords_data, key=lambda x: x["impressions"], reverse=True)[:5]
top_pages = sorted(pages_data, key=lambda x: x["clicks"], reverse=True)[:3]
return {
"site_url": site_url,
"date_range": {"start": start_date, "end": end_date},
"total_keywords_analyzed": len(keywords_data),
"total_impressions": total_impressions,
"total_clicks": total_clicks,
"avg_ctr": avg_ctr,
"avg_position": avg_position,
"keyword_distribution": {
"positions_1_3": pos_1_3,
"positions_4_10": pos_4_10,
"positions_11_20": pos_11_20,
"positions_21_plus": pos_21_plus,
},
"top_keywords": [
{"keyword": kw["keyword"], "impressions": kw["impressions"], "position": kw["position"]}
for kw in top_keywords
],
"top_pages": [
{"page": pg["page"], "clicks": pg["clicks"], "impressions": pg["impressions"]}
for pg in top_pages
],
}
# ------------------------------------------------------------------ #
# AI-powered strategic recommendations
# ------------------------------------------------------------------ #
def _generate_ai_recommendations(
self,
keywords_data: List[Dict],
pages_data: List[Dict],
summary: Dict,
user_keywords: str,
) -> Dict[str, Any]:
try:
top_kw = ", ".join(kw["keyword"] for kw in summary.get("top_keywords", []))
dist = summary.get("keyword_distribution", {})
prompt = f"""Analyze this Google Search Console data and suggest blog topics the user should write about.
USER'S TOPIC INTENT: "{user_keywords}"
SEARCH PERFORMANCE SUMMARY:
- Total Keywords Tracked: {summary.get('total_keywords_analyzed', 0)}
- Total Impressions: {summary.get('total_impressions', 0):,}
- Total Clicks: {summary.get('total_clicks', 0):,}
- Average CTR: {summary.get('avg_ctr', 0):.2f}%
- Average Position: {summary.get('avg_position', 0):.1f}
TOP PERFORMING KEYWORDS:
{top_kw}
KEYWORD POSITION DISTRIBUTION:
- Positions 1-3: {dist.get('positions_1_3', 0)}
- Positions 4-10: {dist.get('positions_4_10', 0)}
- Positions 11-20: {dist.get('positions_11_20', 0)}
- Positions 21+: {dist.get('positions_21_plus', 0)}
Based on this data, provide:
1. IMMEDIATE TOPIC OPPORTUNITIES (0-30 days):
- Specific blog post titles the user should write
- Each tied to a keyword opportunity from the data
- 3-5 suggestions
2. CONTENT STRATEGY TOPICS (1-3 months):
- New topic clusters to build authority
- Content pillar ideas
- 3-5 suggestions
3. LONG-TERM CONTENT VISION (3-12 months):
- Market expansion topics
- Authority-building content ideas
- 3-5 suggestions
IMPORTANT: Relate every topic suggestion to the user's interest in "{user_keywords}".
Return your response in this exact JSON format:
{{
"immediate_opportunities": ["topic 1", "topic 2", "topic 3"],
"content_strategy": ["strategy 1", "strategy 2", "strategy 3"],
"long_term_strategy": ["vision 1", "vision 2", "vision 3"]
}}"""
system_prompt = (
"You are an enterprise SEO content strategist. Provide specific, data-driven "
"blog topic suggestions that will improve the user's search performance. "
"Always respond with valid JSON matching the requested format."
)
result = llm_text_gen(
prompt=prompt,
system_prompt=system_prompt,
user_id=getattr(self, '_user_id', None),
flow_type="gsc_brainstorm",
)
if result:
parsed = self._parse_ai_response(result)
if parsed:
return parsed
return self._fallback_ai_recommendations(keywords_data)
except Exception as e:
logger.warning(f"GSC brainstorm AI recommendations failed: {e}")
return self._fallback_ai_recommendations(keywords_data)
@staticmethod
def _parse_ai_response(raw: str) -> Optional[Dict[str, List[str]]]:
try:
json_start = raw.find("{")
json_end = raw.rfind("}") + 1
if json_start == -1 or json_end == 0:
return None
chunk = raw[json_start:json_end]
parsed = json.loads(chunk)
return {
"immediate_opportunities": parsed.get("immediate_opportunities", [])[:5],
"content_strategy": parsed.get("content_strategy", [])[:5],
"long_term_strategy": parsed.get("long_term_strategy", [])[:5],
}
except (json.JSONDecodeError, ValueError) as e:
logger.warning(f"Failed to parse AI brainstorm response as JSON: {e}")
return None
@staticmethod
def _fallback_ai_recommendations(
keywords_data: List[Dict],
) -> Dict[str, Any]:
top_kw = keywords_data[:3] if keywords_data else []
immediate = []
for kw in top_kw:
immediate.append(
f"Write a comprehensive guide on '{kw['keyword']}' "
f"(currently at position {kw['position']:.1f} with "
f"{kw['impressions']} impressions)"
)
return {
"immediate_opportunities": immediate or ["No keyword data available for recommendations"],
"content_strategy": [
"Develop topic clusters around your top-performing keywords",
"Create comparison and vs-style content for competitive terms",
"Build FAQ sections targeting question-based queries",
],
"long_term_strategy": [
"Build domain authority through pillar content",
"Expand into adjacent topic areas",
"Develop thought leadership content series",
],
}