Files
ALwrity/backend/services/gsc_brainstorm_service.py
ajaysi aaf94049da feat: validate podcast cost estimation accuracy, document per-token costs, and fix subscription/plan enforcement
Issue #543 — Validate Estimated Cost Accuracy (UI vs Backend)

Backend:
- cost_estimator.py uses pricing catalog (APIProviderPricing) as single source of truth
- All 7 cost components: analysis, research (search+LLM), script, TTS, voice clone, avatar, video
- initialize_default_pricing() runs on every app startup for auto-sync

Frontend cost estimation fixes:
- Added missing analysisCost, scriptCost, voiceCloneCost to PodcastEstimate type
- toPodcastEstimate() now extracts all 7 backend fields (was dropping 3)
- headerCostEst maps analysisCost->Analyze, scriptCost->Write, voiceCloneCost->Produce
- EstimateCard shows 5 chips: Analysis, Research, Script, Voice(TTS+clone), Visuals(avatar+video)
- Chip sum now equals backend total for all configurations

Subscription & plan fixes:
- Removed Stripe re-verification from checkSubscription() (downgrade regression fix #539)
- Added verifyCheckoutRef pattern for reliable mount-time checkout polling
- One-time Stripe sync effect with pending_subscription_change flag for Customer Portal returns
- Free plan limits: stability_calls 3->10, audio_calls 5->10 (supports 2 podcasts)
- Image enforcement uses actual provider (GPT_PROVIDER), not hardcoded Stability
- Billing/pricing pages bypass onboarding check in ProtectedRoute
- Gradient buttons + loading spinner on plan chip in UserBadge
- Added metadata-based Stripe lookup fallback (Issue #538)

Documentation:
- TESTING_GUIDE.md: comprehensive testing instructions for non-technical testers
  - Free plan limits, usage tracking, cost estimation formulas
  - 10 test cases for UI verification
  - Troubleshooting guide
  - Quick-reference cost formulas with all default rates

Cleanup: removed legacy ToBeMigrated directory (70+ files, ~22K LOC)
GSC Brainstorm: service, hook, modal, and UI components for blog topic brainstorming
2026-05-27 08:46:38 +05:30

851 lines
37 KiB
Python

"""
GSC Brainstorm Service for ALwrity.
Analyzes Google Search Console data to suggest blog topics the user should write about.
Combines rule-based heuristics with LLM-powered strategic recommendations tailored to
the user's topic intent. Designed for non-SEO-experts: every insight includes plain-English
explanations of WHY it matters and WHAT to do about it.
"""
import json
from datetime import datetime, timedelta
from typing import Dict, List, Any, Optional
from loguru import logger
from services.gsc_service import GSCService
from services.llm_providers.main_text_generation import llm_text_gen
class GSCBrainstormService:
"""
Suggests blog topics based on the user's live GSC data.
Flow:
1. Fetch real GSC search analytics (query + page data, 30 days)
2. Compute derived metrics (CTR benchmarks, estimated traffic uplift, content formats)
3. Apply rule-based filters (Quick Wins, Optimization, Enhancement, Rising Stars, Page Issues)
4. Generate LLM-powered strategic recommendations contextualised to the user's keywords
5. Return structured results with all data exposed for rich frontend display
"""
def __init__(self, gsc_service: GSCService = None):
self.gsc_service = gsc_service or GSCService()
# ------------------------------------------------------------------ #
# Public entry point
# ------------------------------------------------------------------ #
def brainstorm_topics(
self,
user_id: str,
keywords: str,
site_url: Optional[str] = None,
) -> Dict[str, Any]:
self._user_id = user_id
# 1. Resolve site_url
if not site_url:
sites = self.gsc_service.get_site_list(user_id)
if not sites:
return {
"error": "No GSC sites found. Make sure your site is verified in Google Search Console.",
"content_opportunities": [],
"keyword_gaps": [],
"quick_wins": [],
"page_opportunities": [],
"ai_recommendations": {},
"summary": {},
}
site_url = sites[0].get("siteUrl", "")
# 2. Fetch GSC analytics (30 days)
end_date = datetime.now().strftime("%Y-%m-%d")
start_date = (datetime.now() - timedelta(days=30)).strftime("%Y-%m-%d")
analytics = self.gsc_service.get_search_analytics(
user_id=user_id,
site_url=site_url,
start_date=start_date,
end_date=end_date,
)
if "error" in analytics:
return {
"error": analytics.get("error", "Failed to fetch GSC data"),
"content_opportunities": [],
"keyword_gaps": [],
"quick_wins": [],
"page_opportunities": [],
"ai_recommendations": {},
"summary": {},
}
# 3. Parse GSC rows into structured data
query_rows = analytics.get("query_data", {}).get("rows", [])
page_rows = analytics.get("page_data", {}).get("rows", [])
keywords_data = self._parse_query_rows(query_rows)
pages_data = self._parse_page_rows(page_rows)
if not keywords_data:
return {
"error": "No keyword data available for the selected period. This usually means your site is new to GSC or hasn't received search traffic yet.",
"content_opportunities": [],
"keyword_gaps": [],
"quick_wins": [],
"page_opportunities": [],
"ai_recommendations": {},
"summary": {
"site_url": site_url,
"date_range": {"start": start_date, "end": end_date},
"total_keywords_analyzed": 0,
},
}
# 4. Score keywords for topic relevance and filter to topic-related subset
logger.info(f"Filtering {len(keywords_data)} GSC keywords for topic relevance to: '{keywords}'")
keywords_data, pages_data = self._filter_by_topic_relevance(
keywords_data, pages_data, keywords
)
logger.info(f"After topic filter: {len(keywords_data)} keywords, {len(pages_data)} pages")
if not keywords_data:
return {
"error": "No GSC keywords matched your topic. Try a broader research topic or check your GSC data.",
"content_opportunities": [],
"keyword_gaps": [],
"quick_wins": [],
"page_opportunities": [],
"ai_recommendations": {},
"summary": {
"site_url": site_url,
"date_range": {"start": start_date, "end": end_date},
"total_keywords_analyzed": 0,
},
}
# 5. Compute threshold multiplier based on available topic keywords
# When topic filtering yields fewer keywords, lower impression thresholds
# to surface more topic-relevant opportunities.
filtered_count = len(keywords_data)
threshold_multiplier = max(0.1, filtered_count / 200.0)
logger.info(f"Threshold multiplier: {threshold_multiplier:.2f} ({filtered_count} topic keywords)")
# 6. Rule-based analysis with adjusted thresholds
content_opportunities = self._identify_content_opportunities(keywords_data, threshold_multiplier)
keyword_gaps = self._identify_keyword_gaps(keywords_data, threshold_multiplier)
quick_wins = self._identify_quick_wins(keywords_data, threshold_multiplier)
page_opportunities = self._identify_page_opportunities(pages_data, threshold_multiplier)
# 7. Summary metrics
summary = self._compute_summary(keywords_data, pages_data, site_url, start_date, end_date)
# 8. AI recommendations
ai_recommendations = self._generate_ai_recommendations(
keywords_data, pages_data, summary, keywords,
content_opportunities, quick_wins, keyword_gaps,
)
return {
"content_opportunities": content_opportunities,
"keyword_gaps": keyword_gaps,
"quick_wins": quick_wins,
"page_opportunities": page_opportunities,
"ai_recommendations": ai_recommendations,
"summary": summary,
}
# ------------------------------------------------------------------ #
# Data parsing helpers
# ------------------------------------------------------------------ #
@staticmethod
def _parse_query_rows(rows: List[Dict]) -> List[Dict[str, Any]]:
parsed = []
for row in rows:
keys = row.get("keys", [])
keyword = keys[0] if len(keys) >= 1 else "(not set)"
parsed.append({
"keyword": keyword,
"clicks": row.get("clicks", 0),
"impressions": row.get("impressions", 0),
"ctr": round(row.get("ctr", 0) * 100, 2),
"position": round(row.get("position", 0), 1),
})
return parsed
@staticmethod
def _parse_page_rows(rows: List[Dict]) -> List[Dict[str, Any]]:
parsed = []
for row in rows:
keys = row.get("keys", [])
page = keys[0] if len(keys) >= 1 else "(not set)"
parsed.append({
"page": page,
"clicks": row.get("clicks", 0),
"impressions": row.get("impressions", 0),
"ctr": round(row.get("ctr", 0) * 100, 2),
"position": round(row.get("position", 0), 1),
})
return parsed
# ------------------------------------------------------------------ #
# Topic relevance scoring and filtering
# ------------------------------------------------------------------ #
_semantic_model = None # class-level cache for sentence-transformers
@staticmethod
def _compute_semantic_scores(
keywords_data: List[Dict[str, Any]],
user_keywords: str,
) -> Dict[int, float]:
"""Compute cosine similarity between embedding of each GSC keyword and user topic.
Uses sentence-transformers (all-MiniLM-L6-v2) for lightweight semantic matching.
Returns dict mapping keyword index to similarity score (0-1), or empty on failure.
"""
try:
import numpy as np
from sentence_transformers import SentenceTransformer
model = GSCBrainstormService._semantic_model
if model is None:
logger.info("Loading semantic embedding model (all-MiniLM-L6-v2)...")
model = SentenceTransformer("all-MiniLM-L6-v2", device="cpu")
GSCBrainstormService._semantic_model = model
texts, indices = [], []
for i, kw in enumerate(keywords_data):
text = kw.get("keyword", "")
if text.strip():
texts.append(text)
indices.append(i)
if not texts:
return {}
all_texts = [user_keywords] + texts
embeddings = model.encode(all_texts, show_progress_bar=False, convert_to_numpy=True)
user_emb = embeddings[0]
kw_embs = embeddings[1:]
norms = np.linalg.norm(kw_embs, axis=1)
user_norm = np.linalg.norm(user_emb)
similarities = np.dot(kw_embs, user_emb) / (norms * user_norm + 1e-8)
return dict(zip(indices, [float(s) for s in similarities]))
except Exception as e:
logger.warning(f"Semantic similarity scoring unavailable, falling back to term-only: {e}")
return {}
@staticmethod
def _tokenize(text: str) -> set:
"""Lowercase and split into individual meaningful tokens."""
import re
tokens = re.findall(r"[a-zA-Z0-9]+", text.lower())
return {t for t in tokens if len(t) >= 3}
@staticmethod
def _score_keyword_relevance(gsc_keyword: str, user_tokens: set, user_phrase: str) -> float:
"""Score a single GSC keyword for relevance to the user's topic tokens."""
kw_lower = gsc_keyword.lower()
# Exact phrase match → highest score
if user_phrase.lower() in kw_lower:
return 1.0
score = 0.0
kw_tokens = GSCBrainstormService._tokenize(gsc_keyword)
if not kw_tokens:
return 0.0
# Count overlapping tokens
matches = user_tokens & kw_tokens
score += len(matches) * 0.5
# Partial/substring matches for remaining user tokens
for ut in user_tokens:
if ut not in matches:
if ut in kw_lower:
score += 0.2
# Normalize by max possible score (capped at 1.0)
return min(score, 1.0)
def _filter_by_topic_relevance(
self,
keywords_data: List[Dict[str, Any]],
pages_data: List[Dict[str, Any]],
user_keywords: str,
) -> tuple:
"""Score GSC keywords for topic overlap and keep the most relevant subset.
Returns (filtered_keywords, filtered_pages) where filtered_keywords
includes topic-relevant keywords + top-performer fallbacks.
"""
if not user_keywords or not user_keywords.strip():
return keywords_data, pages_data
user_tokens = self._tokenize(user_keywords)
if not user_tokens:
return keywords_data, pages_data
# Compute semantic similarity scores (catches synonyms, e.g. "plant-based protein" for "vegan")
semantic_scores = GSCBrainstormService._compute_semantic_scores(keywords_data, user_keywords)
semantic_available = bool(semantic_scores)
# Score every keyword: blend term overlap (50%) + semantic similarity (50%)
scored = []
for i, kw in enumerate(keywords_data):
term_score = self._score_keyword_relevance(
kw.get("keyword", ""), user_tokens, user_keywords
)
if semantic_available:
sem_score = semantic_scores.get(i, 0.0)
blended = 0.5 * term_score + 0.5 * sem_score
else:
blended = term_score # fallback to term-only
kw["_relevance"] = blended
scored.append(kw)
# Sort by blended relevance desc, then impressions desc
scored.sort(key=lambda x: (-x["_relevance"], -x.get("impressions", 0)))
# Take top 150 by relevance
top_relevant = [k for k in scored if k["_relevance"] > 0][:150]
# Also keep top 50 by impressions as fallback (ensures general site context)
by_impressions = sorted(
scored, key=lambda x: -x.get("impressions", 0)
)[:50]
# Merge and deduplicate by keyword
seen = set()
merged = []
for kw in top_relevant + by_impressions:
key = kw.get("keyword", "")
if key not in seen:
seen.add(key)
merged.append(kw)
# Remove internal score key from results
for kw in merged:
kw.pop("_relevance", None)
logger.info(
f"Topic relevance: {len(scored)} scored, "
f"{len(top_relevant)} topic-relevant, "
f"{len(merged)} after merge with top-by-impressions"
)
# Filter pages: keep pages whose URL contains any topic-relevant keyword
relevant_keywords_lower = {kw.get("keyword", "").lower() for kw in merged if kw.get("keyword")}
filtered_pages = []
for pg in pages_data:
page_url = pg.get("page", "").lower()
# Keep page if any filtered keyword appears in the URL
if any(kw in page_url for kw in relevant_keywords_lower):
filtered_pages.append(pg)
# Always keep at least top 20 pages by impressions for context
pages_by_imp = sorted(pages_data, key=lambda x: -x.get("impressions", 0))[:20]
seen_page_urls = {p.get("page", "") for p in filtered_pages}
for pg in pages_by_imp:
if pg.get("page", "") not in seen_page_urls:
filtered_pages.append(pg)
return merged, filtered_pages
# ------------------------------------------------------------------ #
# Rule-based opportunity identification
# ------------------------------------------------------------------ #
@staticmethod
def _identify_content_opportunities(
keywords_data: List[Dict[str, Any]],
threshold_multiplier: float = 1.0,
) -> List[Dict[str, Any]]:
opportunities: List[Dict[str, Any]] = []
_imp_high = int(500 * threshold_multiplier)
_imp_impact_high = int(1000 * threshold_multiplier)
_imp_enhance = int(100 * threshold_multiplier)
_imp_enhance_high = int(500 * threshold_multiplier)
# Rule 1: Content Optimization — high impressions, low CTR
for kw in keywords_data:
if kw["impressions"] > _imp_high and kw["ctr"] < 3:
estimated_gain = int(kw["impressions"] * 0.05) - kw["clicks"]
opportunities.append({
"type": "Content Optimization",
"keyword": kw["keyword"],
"opportunity": (
f"Your site appears for '{kw['keyword']}' ({kw['impressions']:,} times/month) "
f"but only {kw['ctr']:.1f}% click. Improving your title and meta description "
f"could bring ~{max(estimated_gain, 5)} more clicks/month."
),
"potential_impact": "High" if kw["impressions"] > _imp_impact_high else "Medium",
"current_position": kw["position"],
"current_ctr": kw["ctr"],
"impressions": kw["impressions"],
"clicks": kw["clicks"],
"estimated_traffic_gain": max(estimated_gain, 5),
"priority": "High" if kw["impressions"] > _imp_impact_high else "Medium",
"suggested_format": GSCBrainstormService._suggest_format(kw["keyword"]),
})
# Rule 2: Content Enhancement — positions 11-20 with decent impressions
for kw in keywords_data:
if 10 < kw["position"] <= 20 and kw["impressions"] > _imp_enhance:
estimated_gain = int(kw["impressions"] * 0.08)
opportunities.append({
"type": "Content Enhancement",
"keyword": kw["keyword"],
"opportunity": (
f"'{kw['keyword']}' ranks #{kw['position']:.0f} (page 2). "
f"Moving to page 1 could capture ~{estimated_gain} more clicks/month "
f"from {kw['impressions']:,} impressions."
),
"potential_impact": "High" if kw["impressions"] > _imp_enhance_high else "Medium",
"current_position": kw["position"],
"current_ctr": kw["ctr"],
"impressions": kw["impressions"],
"clicks": kw["clicks"],
"estimated_traffic_gain": estimated_gain,
"priority": "High" if kw["impressions"] > _imp_enhance_high else "Medium",
"suggested_format": GSCBrainstormService._suggest_format(kw["keyword"]),
})
opportunities.sort(key=lambda x: x["impressions"], reverse=True)
return opportunities[:10]
@staticmethod
def _identify_keyword_gaps(
keywords_data: List[Dict[str, Any]],
threshold_multiplier: float = 1.0,
) -> List[Dict[str, Any]]:
gaps: List[Dict[str, Any]] = []
_imp_min = int(50 * threshold_multiplier)
for kw in keywords_data:
if 4 <= kw["position"] <= 20 and kw["impressions"] >= _imp_min:
# Estimate traffic gain if this keyword moved to position 1-3
# Position 1 avg CTR ~31%, position 3 ~11%, current position CTR estimate
position_1_ctr = 31.0
current_ctr = kw["ctr"]
estimated_gain = max(int(kw["impressions"] * (position_1_ctr - current_ctr) / 100), 1)
gaps.append({
"keyword": kw["keyword"],
"position": kw["position"],
"impressions": kw["impressions"],
"current_ctr": kw["ctr"],
"clicks": kw["clicks"],
"estimated_traffic_if_page1": estimated_gain,
"gap_from_page1": round(kw["position"] - 3, 1),
})
gaps.sort(key=lambda x: x["impressions"], reverse=True)
return gaps[:10]
@staticmethod
def _identify_quick_wins(
keywords_data: List[Dict[str, Any]],
threshold_multiplier: float = 1.0,
) -> List[Dict[str, Any]]:
quick_wins: List[Dict[str, Any]] = []
_imp_min = int(100 * threshold_multiplier)
for kw in keywords_data:
if 4 <= kw["position"] <= 10 and kw["impressions"] >= _imp_min:
# Position 3 CTR ≈ 11%, position 5 CTR ≈ 6%
# Small improvements can yield big traffic gains
target_ctr = 11.0 # approximate CTR for position 3
estimated_gain = max(int(kw["impressions"] * (target_ctr - kw["ctr"]) / 100), 1)
quick_wins.append({
"keyword": kw["keyword"],
"position": kw["position"],
"impressions": kw["impressions"],
"current_ctr": kw["ctr"],
"clicks": kw["clicks"],
"estimated_traffic_gain": estimated_gain,
"reason": (
f"Already on page 1 at position #{kw['position']:.0f}. "
f"Optimizing this page could increase CTR from {kw['ctr']:.1f}% "
f"to ~{target_ctr:.0f}%, gaining ~{estimated_gain} clicks/month."
),
})
quick_wins.sort(key=lambda x: x["estimated_traffic_gain"], reverse=True)
return quick_wins[:5]
@staticmethod
def _identify_page_opportunities(
pages_data: List[Dict[str, Any]],
threshold_multiplier: float = 1.0,
) -> List[Dict[str, Any]]:
opportunities: List[Dict[str, Any]] = []
_imp_min = int(300 * threshold_multiplier)
for pg in pages_data:
if pg["impressions"] > _imp_min and pg["ctr"] < 2.0:
short_page = pg["page"].rstrip("/").rsplit("/", 1)[-1].replace("-", " ").title()
if len(short_page) > 60:
short_page = short_page[:57] + "..."
opportunities.append({
"page": pg["page"],
"page_title": short_page,
"impressions": pg["impressions"],
"clicks": pg["clicks"],
"current_ctr": pg["ctr"],
"current_position": pg["position"],
"reason": (
f"This page gets {pg['impressions']:,} impressions but only {pg['ctr']:.1f}% CTR. "
f"Reviewing the title and meta description could significantly boost clicks."
),
})
opportunities.sort(key=lambda x: x["impressions"], reverse=True)
return opportunities[:5]
# ------------------------------------------------------------------ #
# Content format suggestion
# ------------------------------------------------------------------ #
@staticmethod
def _suggest_format(keyword: str) -> str:
"""Suggest a content format based on keyword patterns."""
kw = keyword.lower()
if any(w in kw for w in ["how to", "how do", "guide", "tutorial", "steps"]):
return "How-To Guide"
if any(w in kw for w in ["vs", "versus", "compare", "comparison", "difference"]):
return "Comparison"
if any(w in kw for w in ["best", "top", "recommended", "review", "reviews"]):
return "Top Picks / Review"
if any(w in kw for w in ["what is", "definition", "meaning", "explained"]):
return "Explainer"
if any(w in kw for w in ["list", "examples", "ideas", "tips", "ways"]):
return "Listicle"
if any(w in kw for w in ["free", "cheap", "alternative", "budget"]):
return "Budget / Alternative"
if any(w in kw for w in ["template", "calculator", "tool", "checker"]):
return "Tool / Template"
if any(w in kw for w in ["2024", "2025", "2026", "trends", "prediction", "future"]):
return "Trend Report"
return "In-Depth Article"
# ------------------------------------------------------------------ #
# Summary metrics
# ------------------------------------------------------------------ #
@staticmethod
def _compute_summary(
keywords_data: List[Dict],
pages_data: List[Dict],
site_url: str,
start_date: str,
end_date: str,
) -> Dict[str, Any]:
total_impressions = sum(kw["impressions"] for kw in keywords_data)
total_clicks = sum(kw["clicks"] for kw in keywords_data)
avg_ctr = round((total_clicks / total_impressions * 100) if total_impressions else 0, 2)
avg_position = round(
sum(kw["position"] for kw in keywords_data) / len(keywords_data), 1
) if keywords_data else 0
pos_1_3 = len([kw for kw in keywords_data if kw["position"] <= 3])
pos_4_10 = len([kw for kw in keywords_data if 3 < kw["position"] <= 10])
pos_11_20 = len([kw for kw in keywords_data if 10 < kw["position"] <= 20])
pos_21_plus = len([kw for kw in keywords_data if kw["position"] > 20])
top_keywords = sorted(keywords_data, key=lambda x: x["impressions"], reverse=True)[:5]
top_pages = sorted(pages_data, key=lambda x: x["clicks"], reverse=True)[:3]
# Health score: 0-100 based on how many keywords are on page 1
total_kw = len(keywords_data) or 1
page1_pct = (pos_1_3 + pos_4_10) / total_kw * 100
top3_pct = pos_1_3 / total_kw * 100
health_score = round(min(top3_pct * 3 + page1_pct * 0.7, 100), 0)
# CTR benchmark: industry average is ~3.1% for position 1-10
ctr_benchmark = 3.1
ctr_vs_benchmark = round(avg_ctr - ctr_benchmark, 2)
return {
"site_url": site_url,
"date_range": {"start": start_date, "end": end_date},
"total_keywords_analyzed": len(keywords_data),
"total_impressions": total_impressions,
"total_clicks": total_clicks,
"avg_ctr": avg_ctr,
"avg_position": avg_position,
"ctr_vs_benchmark": ctr_vs_benchmark,
"health_score": health_score,
"keyword_distribution": {
"positions_1_3": pos_1_3,
"positions_4_10": pos_4_10,
"positions_11_20": pos_11_20,
"positions_21_plus": pos_21_plus,
},
"top_keywords": [
{
"keyword": kw["keyword"],
"impressions": kw["impressions"],
"clicks": kw["clicks"],
"position": kw["position"],
"ctr": kw["ctr"],
}
for kw in top_keywords
],
"top_pages": [
{
"page": pg["page"],
"clicks": pg["clicks"],
"impressions": pg["impressions"],
"ctr": pg["ctr"],
}
for pg in top_pages
],
}
# ------------------------------------------------------------------ #
# AI-powered strategic recommendations
# ------------------------------------------------------------------ #
def _generate_ai_recommendations(
self,
keywords_data: List[Dict],
pages_data: List[Dict],
summary: Dict,
user_keywords: str,
content_opportunities: List[Dict],
quick_wins: List[Dict],
keyword_gaps: List[Dict],
) -> Dict[str, Any]:
try:
# Build topic-relevant keyword list from filtered keywords_data
topic_keywords = sorted(
keywords_data,
key=lambda x: (x.get("impressions", 0) * max(1, 11 - min(x.get("position", 10), 10))),
reverse=True
)[:25]
topic_kw_str = "\n".join(
f"{kw['keyword']}: {kw['impressions']:,} impressions, position {kw['position']}, {kw['ctr']:.1f}% CTR"
for kw in topic_keywords
)
dist = summary.get("keyword_distribution", {})
opp_str = ""
if content_opportunities:
opp_str = "\nCONTENT OPPORTUNITIES (rule-based findings):\n" + "\n".join(
f"{o['keyword']}: {o['opportunity']}"
for o in content_opportunities[:5]
)
else:
opp_str = "\nNo major content opportunities detected from rule-based analysis."
qw_str = ""
if quick_wins:
qw_str = "\nQUICK WINS (already on page 1, easy to optimize):\n" + "\n".join(
f"{q['keyword']}: position #{q['position']:.0f}, {q['current_ctr']:.1f}% CTR, est. +{q['estimated_traffic_gain']} clicks/month"
for q in quick_wins[:3]
)
prompt = f"""You are an expert SEO content strategist analyzing real Google Search Console data for a blog writer.
The user wants to write about: "{user_keywords}"
Here is their GSC data for the last 30 days, already filtered to keywords related to their topic:
PERFORMANCE OVERVIEW:
- Total Topic-Relevant Keywords: {summary.get('total_keywords_analyzed', 0)}
- Total Impressions (topic): {summary.get('total_impressions', 0):,}
- Total Clicks (topic): {summary.get('total_clicks', 0):,}
- Average CTR: {summary.get('avg_ctr', 0):.2f}% (industry avg for positions 1-10 is ~3.1%)
- Average Position: {summary.get('avg_position', 0):.1f}
- SEO Health Score: {summary.get('health_score', 0)}/100
TOPIC-RELEVANT KEYWORDS (sorted by potential impact):
{topic_kw_str}
KEYWORD POSITION DISTRIBUTION:
- Position 1-3 (top results): {dist.get('positions_1_3', 0)} keywords
- Position 4-10 (page 1): {dist.get('positions_4_10', 0)} keywords
- Position 11-20 (page 2): {dist.get('positions_11_20', 0)} keywords
- Position 21+ (page 3+): {dist.get('positions_21_plus', 0)} keywords
{opp_str}
{qw_str}
Based on this data, provide EXACT blog post suggestions the user should write.
For each suggestion include:
1. A specific, compelling blog post TITLE (not vague topic)
2. The keyword it targets and why (based on the data above)
3. The recommended content format (how-to, listicle, comparison, etc.)
4. Estimated impact (how many more clicks/month they could gain)
Return your response in this EXACT JSON format (no markdown, no code fences):
{{
"immediate_opportunities": [
{{
"title": "Specific Blog Post Title Here",
"keyword": "target keyword",
"reason": "Why this will work based on the data",
"format": "How-To Guide | Listicle | Comparison | Explainer | etc.",
"estimated_impact": "Estimated X more clicks/month"
}}
],
"content_strategy": [
{{
"title": "Pillar Content Title",
"keyword": "target keyword",
"reason": "Strategic reasoning",
"format": "Content format",
"estimated_impact": "Expected impact"
}}
],
"long_term_strategy": [
{{
"title": "Authority Building Title",
"keyword": "target keyword",
"reason": "Long-term reasoning",
"format": "Content format",
"estimated_impact": "Expected long-term impact"
}}
]
}}
IMPORTANT:
- Provide 3-5 items in each category
- Every suggestion MUST relate to the user's interest in "{user_keywords}"
- Titles should be specific and compelling, like real blog post headlines
- Use the KEYWORD DATA above to justify each recommendation — reference specific keywords, their impressions, positions, and CTR
- Prioritize keywords with high impressions but low CTR or low position"""
system_prompt = (
"You are an expert SEO content strategist. You analyze Google Search Console data "
"and provide specific, actionable blog post recommendations that will drive real traffic. "
"You always respond with valid JSON matching the requested format. "
"Every recommendation must be backed by the data provided."
)
result = llm_text_gen(
prompt=prompt,
system_prompt=system_prompt,
user_id=getattr(self, '_user_id', None),
flow_type="gsc_brainstorm",
)
if result:
parsed = self._parse_ai_response(result)
if parsed:
return parsed
return self._fallback_ai_recommendations(keywords_data, content_opportunities, quick_wins)
except Exception as e:
logger.warning(f"GSC brainstorm AI recommendations failed: {e}")
return self._fallback_ai_recommendations(keywords_data, content_opportunities, quick_wins)
def _parse_ai_response(self, raw: str) -> Optional[Dict[str, Any]]:
try:
# Strip markdown code fences if present
cleaned = raw.strip()
if cleaned.startswith("```"):
first_newline = cleaned.find("\n")
if first_newline != -1:
cleaned = cleaned[first_newline + 1:]
if cleaned.endswith("```"):
cleaned = cleaned[:-3].strip()
json_start = cleaned.find("{")
json_end = cleaned.rfind("}") + 1
if json_start == -1 or json_end == 0:
return None
chunk = cleaned[json_start:json_end]
parsed = json.loads(chunk)
def normalize_section(section: Any) -> List[Dict[str, str]]:
if not isinstance(section, list):
return []
result = []
for item in section:
if isinstance(item, str):
result.append({
"title": item.split(":")[0].strip() if ":" in item else item[:60],
"keyword": "",
"reason": item,
"format": "",
"estimated_impact": "",
})
elif isinstance(item, dict):
result.append({
"title": str(item.get("title", "")),
"keyword": str(item.get("keyword", "")),
"reason": str(item.get("reason", "")),
"format": str(item.get("format", "")),
"estimated_impact": str(item.get("estimated_impact", "")),
})
return result
return {
"immediate_opportunities": normalize_section(parsed.get("immediate_opportunities", []))[:5],
"content_strategy": normalize_section(parsed.get("content_strategy", []))[:5],
"long_term_strategy": normalize_section(parsed.get("long_term_strategy", []))[:5],
}
except (json.JSONDecodeError, ValueError) as e:
logger.warning(f"Failed to parse AI brainstorm response as JSON: {e}")
return None
@staticmethod
def _fallback_ai_recommendations(
keywords_data: List[Dict],
content_opportunities: List[Dict],
quick_wins: List[Dict],
) -> Dict[str, Any]:
top_kw = keywords_data[:3] if keywords_data else []
immediate = []
# Build from quick wins first (highest ROI)
for qw in quick_wins[:2]:
immediate.append({
"title": f"How to Rank #{int(qw['position'])} for '{qw['keyword']}' — Optimization Guide",
"keyword": qw["keyword"],
"reason": qw.get("reason", f"Already on page 1 at position {qw['position']:.0f}"),
"format": "How-To Guide",
"estimated_impact": f"+{qw.get('estimated_traffic_gain', 10)} clicks/month",
})
# Then from content opportunities
for opp in content_opportunities[:2]:
immediate.append({
"title": f"Complete Guide to {opp['keyword'].title()}",
"keyword": opp["keyword"],
"reason": opp.get("opportunity", f"{opp['impressions']:,} impressions with room to improve"),
"format": opp.get("suggested_format", "In-Depth Article"),
"estimated_impact": f"+{opp.get('estimated_traffic_gain', 10)} clicks/month",
})
# Fill remaining with top keywords
remaining = 5 - len(immediate)
for kw in top_kw[:remaining]:
immediate.append({
"title": f"The Ultimate Guide to {kw['keyword'].title()}",
"keyword": kw["keyword"],
"reason": f"Top keyword with {kw['impressions']:,} impressions (position {kw['position']:.1f})",
"format": "In-Depth Article",
"estimated_impact": f"+{max(int(kw['impressions'] * 0.03), 5)} clicks/month",
})
return {
"immediate_opportunities": immediate or [{"title": "No keyword data available", "keyword": "", "reason": "Connect GSC to get personalized suggestions", "format": "", "estimated_impact": ""}],
"content_strategy": [
{"title": "Topic Cluster: Build Authority Around Your Core Topics", "keyword": "", "reason": "Clustered content ranks higher and captures more long-tail queries", "format": "Pillar Page + Spokes", "estimated_impact": "+50-200 clicks/month over 3 months"},
{"title": "Comparison Guide: Your Product vs. Alternatives", "keyword": "", "reason": "Comparison content captures high-intent searchers ready to decide", "format": "Comparison", "estimated_impact": "+20-80 clicks/month"},
{"title": "FAQ: Answer What Your Audience Is Asking", "keyword": "", "reason": "FAQs capture featured snippets and voice search queries", "format": "FAQ / Listicle", "estimated_impact": "+30-100 clicks/month"},
],
"long_term_strategy": [
{"title": "Pillar Content: The Definitive Resource in Your Niche", "keyword": "", "reason": "Comprehensive guides become authoritative references that attract backlinks", "format": "Long-Form Guide", "estimated_impact": "+100-500 clicks/month over 6-12 months"},
{"title": "Trend Report: What's Next in Your Industry", "keyword": "", "reason": "Forward-looking content captures emerging search demand early", "format": "Trend Report", "estimated_impact": "+50-200 clicks/month"},
{"title": "Thought Leadership: Expert Roundup and Insights", "keyword": "", "reason": "Expert content builds E-E-A-T signals that improve overall domain authority", "format": "Expert Roundup", "estimated_impact": "+30-100 clicks/month per piece"},
],
}