feat: image generation overhaul (model-aware text, dim clamping, \.30 pricing), event-driven dashboard cache invalidation, SEO insights (AI visibility, GSC, keyword gap), YouTube OAuth/publish, blog writer & content planning improvements, scheduler monitoring updates

This commit is contained in:
ajaysi
2026-05-30 07:58:22 +05:30
parent aaf94049da
commit 64f1f88cdd
129 changed files with 8796 additions and 8755 deletions

View File

@@ -0,0 +1,297 @@
"""
AI Visibility Insights Service
Detects Google AI Overview impact signals from GSC search analytics data.
Core heuristic:
- AIO Impacted keywords: high impressions + high position (top 3) + very low CTR
→ content likely being shown/cited in Google AI Overviews without clicks
- AIO Opportunity keywords: strong CTR + moderate position
→ content already performing well, potential for AIO citation with optimization
All thresholds are configurable for flexibility.
"""
from typing import Dict, List, Any, Optional
from dataclasses import dataclass, field
from datetime import datetime, timedelta
from loguru import logger
from services.gsc_service import GSCService
@dataclass
class AIOThresholds:
"""Configurable thresholds for AI Overview detection."""
# AIO Impacted detection
impacted_min_impressions: int = 500
impacted_max_position: float = 4.0
impacted_max_ctr: float = 2.0
# AIO Opportunity detection
opportunity_min_impressions: int = 300
opportunity_min_position: float = 4.0
opportunity_max_position: float = 10.0
opportunity_min_ctr: float = 5.0
@dataclass
class AIOVisibilityResult:
"""Structured result from AI Overview analysis."""
summary: Dict[str, Any] = field(default_factory=dict)
impacted_keywords: List[Dict[str, Any]] = field(default_factory=list)
opportunity_keywords: List[Dict[str, Any]] = field(default_factory=list)
recommendations: List[str] = field(default_factory=list)
error: Optional[str] = None
class AIVisibilityInsightsService:
"""Analyze GSC data for AI Overview impact signals."""
def __init__(self, gsc_service: GSCService):
self.gsc_service = gsc_service
def analyze(
self,
user_id: str,
site_url: str,
start_date: Optional[str] = None,
end_date: Optional[str] = None,
thresholds: Optional[AIOThresholds] = None,
) -> AIOVisibilityResult:
"""
Analyze GSC data for AI Overview insights.
Args:
user_id: Clerk user ID
site_url: Verified GSC site URL (e.g., "https://example.com/")
start_date: ISO date string; defaults to 30 days ago
end_date: ISO date string; defaults to today
thresholds: Custom thresholds; uses defaults if omitted
Returns:
AIOVisibilityResult with summary, keyword lists, and recommendations
"""
t = thresholds or AIOThresholds()
result = AIOVisibilityResult()
try:
# Set date defaults
if not end_date:
end_date = datetime.now().strftime("%Y-%m-%d")
if not start_date:
start_date = (datetime.now() - timedelta(days=30)).strftime("%Y-%m-%d")
logger.info(
f"AIVisibility: analyzing {site_url} for user {user_id} "
f"({start_date} to {end_date})"
)
# Fetch GSC search analytics
analytics = self.gsc_service.get_search_analytics(
user_id=user_id,
site_url=site_url,
start_date=start_date,
end_date=end_date,
)
# Validate response
error = analytics.get("error")
if error:
result.error = error
return result
query_data = analytics.get("query_data", {})
rows = query_data.get("rows", [])
if not rows:
result.error = "No query data returned from GSC"
return result
# Parse and classify each keyword
total_keywords = 0
total_impressions = 0
total_clicks = 0
aio_impressions = 0
aio_estimated_clicks = 0
impact_count = 0
opportunity_count = 0
impacted_list = []
opportunity_list = []
for row in rows:
keys = row.get("keys", [])
keyword = keys[0] if keys else "(not set)"
impressions = row.get("impressions", 0)
clicks = row.get("clicks", 0)
ctr_decimal = row.get("ctr", 0)
ctr_pct = round(ctr_decimal * 100, 2)
position = round(row.get("position", 0), 1)
total_keywords += 1
total_impressions += impressions
total_clicks += clicks
entry = {
"keyword": keyword,
"impressions": impressions,
"clicks": clicks,
"ctr": ctr_pct,
"position": position,
}
# AIO Impacted: high impressions, top position, very low CTR
if (
impressions >= t.impacted_min_impressions
and position <= t.impacted_max_position
and ctr_pct <= t.impacted_max_ctr
):
# Estimate what clicks WOULD be at a healthy top-3 CTR (~8%)
target_ctr = 8.0
expected_clicks = int(impressions * target_ctr / 100)
traffic_loss = max(0, expected_clicks - clicks)
entry["estimated_traffic_loss"] = traffic_loss
entry["target_ctr"] = target_ctr
entry["aio_impacted"] = True
impacted_list.append(entry)
aio_impressions += impressions
aio_estimated_clicks += traffic_loss
impact_count += 1
# AIO Opportunity: good CTR, position 4-10 — strong enough to target AIO citation
if (
impressions >= t.opportunity_min_impressions
and t.opportunity_min_position <= position <= t.opportunity_max_position
and ctr_pct >= t.opportunity_min_ctr
):
entry["aio_opportunity"] = True
entry["recommendation"] = self._suggest_aio_format(keyword, position, ctr_pct)
opportunity_list.append(entry)
opportunity_count += 1
# Sort by impact/opportunity
impacted_list.sort(key=lambda x: x.get("estimated_traffic_loss", 0), reverse=True)
opportunity_list.sort(key=lambda x: x["impressions"], reverse=True)
# Compute summary
avg_ctr = round((total_clicks / total_impressions * 100) if total_impressions else 0, 2)
avg_position = (
round(
sum(r.get("position", 0) for r in rows) / len(rows), 1
)
if rows
else 0
)
result.summary = {
"total_keywords_analyzed": total_keywords,
"total_impressions": total_impressions,
"total_clicks": total_clicks,
"average_ctr": avg_ctr,
"average_position": avg_position,
"aio_impacted_keywords": impact_count,
"aio_opportunity_keywords": opportunity_count,
"aio_zero_click_impressions": aio_impressions,
"aio_estimated_traffic_loss": aio_estimated_clicks,
"date_range": {"start": start_date, "end": end_date},
"thresholds_used": {
"impacted": {
"min_impressions": t.impacted_min_impressions,
"max_position": t.impacted_max_position,
"max_ctr": t.impacted_max_ctr,
},
"opportunity": {
"min_impressions": t.opportunity_min_impressions,
"min_position": t.opportunity_min_position,
"max_position": t.opportunity_max_position,
"min_ctr": t.opportunity_min_ctr,
},
},
}
# Build recommendations
result.recommendations = self._build_recommendations(
impacted_list, opportunity_list, result.summary
)
result.impacted_keywords = impacted_list[:20]
result.opportunity_keywords = opportunity_list[:20]
logger.info(
f"AIVisibility: analysis complete for {site_url}"
f"{impact_count} impacted, {opportunity_count} opportunities"
)
except Exception as e:
logger.error(f"AIVisibility: analysis error for {user_id}: {e}")
result.error = str(e)
return result
@staticmethod
def _suggest_aio_format(keyword: str, position: float, ctr: float) -> str:
"""Suggest content format for AIO optimization based on keyword pattern."""
kw_lower = keyword.lower()
if any(w in kw_lower for w in ["how", "steps", "guide", "tutorial", "way to"]):
return "Create a step-by-step guide with clear numbered lists for AIO citation"
if any(w in kw_lower for w in ["what", "define", "meaning", "explain", "overview"]):
return "Add a concise definition/summary block at the top of the article"
if any(w in kw_lower for w in ["vs", "versus", "difference", "comparison", "or"]):
return "Use a structured comparison table — AI crawlers favor tabular data"
if any(w in kw_lower for w in ["best", "top", "recommended", "review"]):
return "Format as a ranked list with bullet-point pros/cons for AI snippet extraction"
if any(w in kw_lower for w in ["why", "reason", "cause", "benefit"]):
return "Include a bullet-point summary of key reasons/benefits for AIO extraction"
if any(w in kw_lower for w in ["price", "cost", "pricing", "cheap", "affordable"]):
return "Add a pricing/comparison table — highly structured data for AI citation"
if any(w in kw_lower for w in ["example", "sample", "template", "checklist"]):
return "Provide actionable examples or a downloadable template checklist"
if position <= 3 and ctr < 3:
return "Optimize content with FAQ schema and concise summary paragraphs to reclaim AIO clicks"
if position <= 5:
return "Add structured data markup (FAQ, HowTo) and a TL;DR box for AI Overview targeting"
return "Improve content depth with data-backed insights and structured formatting for AI snippet eligibility"
@staticmethod
def _build_recommendations(
impacted: List[Dict[str, Any]],
opportunities: List[Dict[str, Any]],
summary: Dict[str, Any],
) -> List[str]:
"""Generate AI Overview optimization recommendations."""
recs = []
impacted_count = summary.get("aio_impacted_keywords", 0)
opportunity_count = summary.get("aio_opportunity_keywords", 0)
traffic_loss = summary.get("aio_estimated_traffic_loss", 0)
if impacted_count > 0:
recs.append(
f"⚠️ {impacted_count} keyword(s) show AI Overview impact signals "
f"(estimated {traffic_loss} lost clicks). "
"Add concise, structured summary blocks early in your content to reclaim visibility."
)
if opportunity_count > 0:
recs.append(
f"{opportunity_count} keyword(s) are strong AIO optimization candidates. "
"Apply FAQ schema, HowTo schema, and clear bullet-point summaries."
)
if impacted_count == 0 and opportunity_count == 0:
recs.append(
"No clear AI Overview signals detected. "
"Consider expanding your keyword coverage in conversational/intent-based queries."
)
recs.append(
"General AIO best practices: "
"1) Use FAQ schema for question-based queries, "
"2) Add <table> elements for comparative data, "
"3) Keep key takeaways in the first 100 words, "
"4) Use descriptive headings (H2/H3) that mirror natural language queries."
)
return recs

View File

@@ -0,0 +1,508 @@
"""
GSC Strategy Insights Service for SEO Dashboard
Transforms Google Search Console data into strategic insights optimized for
SEO Dashboard (not blog topic suggestions). Focuses on:
- Trend analysis and performance monitoring
- ROI-weighted opportunity prioritization
- Competitive positioning insights
- Impact forecasting and recommendations
This service builds upon GSCBrainstormService but focuses on dashboard needs:
- Broader SEO strategy context
- Historical trend analysis
- Competitive benchmarking
- Multi-metric ranking and scoring
"""
from typing import Dict, Any, List, Optional, Tuple
from datetime import datetime, timedelta
import asyncio
from dataclasses import dataclass
from enum import Enum
from loguru import logger
import json
from services.gsc_service import GSCService
from services.gsc_brainstorm_service import GSCBrainstormService
from services.llm_providers.main_text_generation import llm_text_gen
# Enums for strategy types
class StrategyType(str, Enum):
"""Types of strategic insights"""
QUICK_WIN = "quick_win"
KEYWORD_GAP = "keyword_gap"
CONTENT_OPPORTUNITY = "content_opportunity"
PAGE_OPTIMIZATION = "page_optimization"
COMPETITIVE_GAP = "competitive_gap"
MARKET_INSIGHT = "market_insight"
TREND_ALERT = "trend_alert"
SEASONAL_PATTERN = "seasonal_pattern"
class OpportunitySeverity(str, Enum):
"""Severity levels for opportunities"""
CRITICAL = "critical" # 80-100 ROI score
HIGH = "high" # 60-79 ROI score
MEDIUM = "medium" # 40-59 ROI score
LOW = "low" # 20-39 ROI score
WATCH = "watch" # <20 ROI score
# Data classes for structured responses
@dataclass
class StrategyOpportunity:
"""Represents a single strategic opportunity"""
type: StrategyType
keyword: str
description: str
roi_score: float # 0-100
priority: int # 1-10
effort_hours: float
timeline_weeks: int
current_position: float
impressions: int
current_ctr: float
estimated_impact: float # Monthly clicks gained
severity: OpportunitySeverity
recommendations: List[str]
related_keywords: List[str]
timestamp: datetime
@dataclass
class TrendMetric:
"""Represents a performance trend"""
keyword: str
metric: str # 'position', 'impressions', 'clicks', 'ctr'
current_value: float
value_30d_ago: float
value_90d_ago: float
trend: str # 'up', 'down', 'stable'
trend_percentage: float # -100 to +100
momentum: float # Acceleration of trend
seasonal: bool
anomaly: bool
@dataclass
class HealthMetrics:
"""Overall dashboard health metrics"""
health_score: int # 0-100
score_trend: str # 'up', 'down', 'stable'
score_change: float # Percentage change
total_keywords: int
page_1_keywords: int
avg_position: float
avg_ctr: float
total_impressions: int
total_clicks: int
opportunities_count: int
quick_wins_count: int
keyword_gaps_count: int
competitive_gaps_count: int
timestamp: datetime
period: str # 'daily', 'weekly', 'monthly'
class GSCStrategyInsightsService:
"""
Service for generating strategic SEO dashboard insights from GSC data.
Key differences from GSCBrainstormService:
1. Dashboard-focused context (not blog-specific)
2. Trend analysis with historical data
3. ROI-weighted scoring
4. Competitive positioning
5. Impact forecasting
6. Multi-metric health scoring
"""
def __init__(self, gsc_service: Optional[GSCService] = None):
"""
Initialize the strategy insights service.
Args:
gsc_service: Optional GSCService instance (uses default if not provided)
"""
self.service_name = "gsc_strategy_insights"
self.gsc_service = gsc_service or GSCService()
self.brainstorm_service = GSCBrainstormService(gsc_service)
logger.info(f"Initialized {self.service_name}")
async def get_dashboard_strategy(
self,
user_id: str,
site_url: str,
include_trends: bool = True,
include_competitive: bool = True,
top_n: int = 20
) -> Dict[str, Any]:
"""
Get comprehensive strategy insights for dashboard display.
Args:
user_id: User ID for context
site_url: Website URL
include_trends: Include trend analysis
include_competitive: Include competitive analysis
top_n: Number of top opportunities to return
Returns:
Comprehensive strategy insights
"""
try:
logger.info(f"Generating dashboard strategy for {site_url}")
start_time = datetime.utcnow()
# Execute parallel analysis tasks
tasks = {
'opportunities': self._get_ranked_opportunities(site_url, top_n),
'health_metrics': self._calculate_health_metrics(site_url),
'quick_summary': self._generate_quick_summary(site_url),
}
# Conditional tasks
if include_trends:
tasks['trends'] = self._analyze_performance_trends(site_url)
if include_competitive:
tasks['competitive'] = self._analyze_competitive_positioning(site_url)
# Execute all tasks concurrently
results = await asyncio.gather(*tasks.values(), return_exceptions=True)
# Aggregate results
strategy_data = {}
for task_name, result in zip(tasks.keys(), results):
if isinstance(result, Exception):
logger.error(f"Strategy task {task_name} failed: {str(result)}")
strategy_data[task_name] = {'status': 'failed', 'error': str(result)}
else:
strategy_data[task_name] = result
execution_time = (datetime.utcnow() - start_time).total_seconds()
return {
'status': 'success',
'data': strategy_data,
'generated_at': datetime.utcnow().isoformat(),
'execution_time_seconds': execution_time,
'site_url': site_url,
}
except Exception as e:
logger.error(f"Error generating dashboard strategy: {str(e)}")
return {
'status': 'error',
'error': str(e),
'generated_at': datetime.utcnow().isoformat(),
}
async def _get_ranked_opportunities(
self,
site_url: str,
top_n: int = 20
) -> Dict[str, Any]:
"""
Get ROI-weighted ranked opportunities.
Scoring formula (0-100):
ROI = 0.40 × (traffic_impact) +
0.30 × (ease_of_implementation) +
0.20 × (competitive_advantage) +
0.10 × (momentum_score)
Args:
site_url: Website URL
top_n: Number of top opportunities
Returns:
Ranked opportunities with ROI scores
"""
try:
# Get brainstorm opportunities (reuse existing analysis)
brainstorm_result = await self.brainstorm_service.brainstorm_topics(
user_id="dashboard",
keywords="all", # Special case: all keywords
site_url=site_url
)
if not brainstorm_result or 'error' in brainstorm_result:
return {'status': 'no_data', 'error': 'Could not fetch brainstorm data'}
# Extract all opportunities
all_opportunities = []
# Quick wins (positions 4-10)
for win in brainstorm_result.get('quick_wins', []):
roi = self._calculate_roi_score(
traffic_impact=min(100, (win['impressions'] / 1000) * 10),
ease=80, # Positions 4-10 are relatively easy
competitive=50,
momentum=60
)
opportunity = StrategyOpportunity(
type=StrategyType.QUICK_WIN,
keyword=win['keyword'],
description=f"Position {win['position']} → page 1 ranking",
roi_score=roi,
priority=1,
effort_hours=2,
timeline_weeks=1,
current_position=win['position'],
impressions=win['impressions'],
current_ctr=win['current_ctr'],
estimated_impact=win.get('estimated_traffic_gain', 0),
severity=self._get_severity(roi),
recommendations=[
"Update title and meta description",
"Improve content quality and depth",
"Add internal links from authority pages"
],
related_keywords=self._find_related_keywords(win['keyword']),
timestamp=datetime.utcnow()
)
all_opportunities.append(opportunity)
# Content opportunities (high volume, low CTR)
for opp in brainstorm_result.get('content_opportunities', []):
roi = self._calculate_roi_score(
traffic_impact=min(100, (opp['impressions'] / 2000) * 10),
ease=70, # Meta updates are easy
competitive=40,
momentum=50
)
opportunity = StrategyOpportunity(
type=StrategyType.CONTENT_OPPORTUNITY,
keyword=opp['keyword'],
description=f"{opp['impressions']} impressions at position {opp['current_position']}",
roi_score=roi,
priority=2,
effort_hours=3,
timeline_weeks=1,
current_position=opp['current_position'],
impressions=opp['impressions'],
current_ctr=opp['current_ctr'],
estimated_impact=opp.get('estimated_traffic_gain', 0),
severity=self._get_severity(roi),
recommendations=[
f"Improve CTR from {opp['current_ctr']}% to 5%+",
"A/B test meta descriptions",
"Review SERP position and update title angle"
],
related_keywords=self._find_related_keywords(opp['keyword']),
timestamp=datetime.utcnow()
)
all_opportunities.append(opportunity)
# Keyword gaps (positions 11-20)
for gap in brainstorm_result.get('keyword_gaps', []):
roi = self._calculate_roi_score(
traffic_impact=min(100, (gap['estimated_traffic_if_page1'] / 500) * 10),
ease=50, # Requires content improvements
competitive=70,
momentum=60
)
opportunity = StrategyOpportunity(
type=StrategyType.KEYWORD_GAP,
keyword=gap['keyword'],
description=f"Position {gap['position']} → large traffic opportunity",
roi_score=roi,
priority=2,
effort_hours=8,
timeline_weeks=4,
current_position=gap['position'],
impressions=gap['impressions'],
current_ctr=gap['current_ctr'],
estimated_impact=gap.get('estimated_traffic_if_page1', 0),
severity=self._get_severity(roi),
recommendations=[
"Create comprehensive guide on this topic",
"Increase content depth and topical coverage",
"Build topical authority in this space"
],
related_keywords=self._find_related_keywords(gap['keyword']),
timestamp=datetime.utcnow()
)
all_opportunities.append(opportunity)
# Sort by ROI score descending
ranked = sorted(all_opportunities, key=lambda x: x.roi_score, reverse=True)
# Convert to dictionaries and return top N
return {
'status': 'success',
'opportunities': [
{
'type': opp.type.value,
'keyword': opp.keyword,
'roi_score': round(opp.roi_score, 1),
'priority': opp.priority,
'effort_hours': opp.effort_hours,
'timeline_weeks': opp.timeline_weeks,
'current_position': opp.current_position,
'impressions': opp.impressions,
'estimated_impact': round(opp.estimated_impact, 1),
'severity': opp.severity.value,
'recommendations': opp.recommendations,
'related_keywords': opp.related_keywords,
}
for opp in ranked[:top_n]
],
'total_opportunities': len(ranked),
}
except Exception as e:
logger.error(f"Error ranking opportunities: {str(e)}")
return {'status': 'error', 'error': str(e)}
async def _calculate_health_metrics(self, site_url: str) -> Dict[str, Any]:
"""
Calculate comprehensive health metrics for dashboard.
Metrics include:
- Health score (0-100)
- Keyword position distribution
- Average CTR vs benchmark
- Growth trends
- Overall assessment
"""
try:
# Get brainstorm summary (has health score)
brainstorm_result = await self.brainstorm_service.brainstorm_topics(
user_id="dashboard",
keywords="all",
site_url=site_url
)
summary = brainstorm_result.get('summary', {})
return {
'status': 'success',
'health_score': summary.get('health_score', 0),
'health_trend': 'stable', # TODO: Compare with historical
'total_keywords': summary.get('total_keywords_analyzed', 0),
'page_1_keywords': summary.get('keyword_distribution', {}).get('positions_1_3', 0),
'avg_position': summary.get('avg_position', 0),
'avg_ctr': summary.get('avg_ctr', 0),
'ctr_vs_benchmark': summary.get('ctr_vs_benchmark', 0),
'total_impressions': summary.get('total_impressions', 0),
'total_clicks': summary.get('total_clicks', 0),
'timestamp': datetime.utcnow().isoformat(),
}
except Exception as e:
logger.error(f"Error calculating health metrics: {str(e)}")
return {'status': 'error', 'error': str(e)}
async def _generate_quick_summary(self, site_url: str) -> Dict[str, Any]:
"""Generate a quick text summary of key insights."""
try:
brainstorm_result = await self.brainstorm_service.brainstorm_topics(
user_id="dashboard",
keywords="all",
site_url=site_url
)
summary = brainstorm_result.get('summary', {})
quick_wins_count = len(brainstorm_result.get('quick_wins', []))
opportunities_count = len(brainstorm_result.get('content_opportunities', []))
gaps_count = len(brainstorm_result.get('keyword_gaps', []))
# Generate summary text
summary_text = (
f"Found {quick_wins_count} quick wins (positions 4-10), "
f"{opportunities_count} content optimization opportunities (high volume, low CTR), "
f"and {gaps_count} keyword gaps on page 2+ that could boost traffic. "
f"Overall SEO health: {summary.get('health_score', 0)}/100. "
)
return {
'status': 'success',
'summary': summary_text,
'key_metrics': {
'quick_wins': quick_wins_count,
'opportunities': opportunities_count,
'gaps': gaps_count,
'health_score': summary.get('health_score', 0),
}
}
except Exception as e:
logger.error(f"Error generating quick summary: {str(e)}")
return {'status': 'error', 'error': str(e)}
async def _analyze_performance_trends(self, site_url: str) -> Dict[str, Any]:
"""Analyze performance trends over time."""
# TODO: Implement historical trend analysis
# This would require storing historical GSC snapshots
return {
'status': 'pending',
'message': 'Trend analysis requires historical data collection',
'note': 'To be implemented in Phase 2'
}
async def _analyze_competitive_positioning(self, site_url: str) -> Dict[str, Any]:
"""Analyze competitive positioning."""
# TODO: Implement competitive analysis
# This would require competitor keyword data
return {
'status': 'pending',
'message': 'Competitive analysis requires competitor data integration',
'note': 'To be implemented in Phase 2'
}
def _calculate_roi_score(
self,
traffic_impact: float,
ease: float,
competitive: float,
momentum: float
) -> float:
"""
Calculate ROI score (0-100).
Formula:
ROI = 0.40 × traffic_impact +
0.30 × ease +
0.20 × competitive +
0.10 × momentum
"""
roi = (
0.40 * min(100, traffic_impact) +
0.30 * min(100, ease) +
0.20 * min(100, competitive) +
0.10 * min(100, momentum)
)
return min(100, max(0, roi))
def _get_severity(self, roi_score: float) -> OpportunitySeverity:
"""Get severity level based on ROI score."""
if roi_score >= 80:
return OpportunitySeverity.CRITICAL
elif roi_score >= 60:
return OpportunitySeverity.HIGH
elif roi_score >= 40:
return OpportunitySeverity.MEDIUM
elif roi_score >= 20:
return OpportunitySeverity.LOW
else:
return OpportunitySeverity.WATCH
def _find_related_keywords(self, keyword: str) -> List[str]:
"""Find related keywords (placeholder)."""
# TODO: Implement semantic similarity search
# For now, return empty list
return []
# Export for router usage
__all__ = [
'GSCStrategyInsightsService',
'StrategyOpportunity',
'StrategyType',
'OpportunitySeverity',
'HealthMetrics',
'TrendMetric',
]