298 lines
12 KiB
Python
298 lines
12 KiB
Python
"""
|
|
AI Visibility Insights Service
|
|
|
|
Detects Google AI Overview impact signals from GSC search analytics data.
|
|
|
|
Core heuristic:
|
|
- AIO Impacted keywords: high impressions + high position (top 3) + very low CTR
|
|
→ content likely being shown/cited in Google AI Overviews without clicks
|
|
- AIO Opportunity keywords: strong CTR + moderate position
|
|
→ content already performing well, potential for AIO citation with optimization
|
|
|
|
All thresholds are configurable for flexibility.
|
|
"""
|
|
|
|
from typing import Dict, List, Any, Optional
|
|
from dataclasses import dataclass, field
|
|
from datetime import datetime, timedelta
|
|
from loguru import logger
|
|
|
|
from services.gsc_service import GSCService
|
|
|
|
|
|
@dataclass
|
|
class AIOThresholds:
|
|
"""Configurable thresholds for AI Overview detection."""
|
|
|
|
# AIO Impacted detection
|
|
impacted_min_impressions: int = 500
|
|
impacted_max_position: float = 4.0
|
|
impacted_max_ctr: float = 2.0
|
|
|
|
# AIO Opportunity detection
|
|
opportunity_min_impressions: int = 300
|
|
opportunity_min_position: float = 4.0
|
|
opportunity_max_position: float = 10.0
|
|
opportunity_min_ctr: float = 5.0
|
|
|
|
|
|
@dataclass
|
|
class AIOVisibilityResult:
|
|
"""Structured result from AI Overview analysis."""
|
|
|
|
summary: Dict[str, Any] = field(default_factory=dict)
|
|
impacted_keywords: List[Dict[str, Any]] = field(default_factory=list)
|
|
opportunity_keywords: List[Dict[str, Any]] = field(default_factory=list)
|
|
recommendations: List[str] = field(default_factory=list)
|
|
error: Optional[str] = None
|
|
|
|
|
|
class AIVisibilityInsightsService:
|
|
"""Analyze GSC data for AI Overview impact signals."""
|
|
|
|
def __init__(self, gsc_service: GSCService):
|
|
self.gsc_service = gsc_service
|
|
|
|
def analyze(
|
|
self,
|
|
user_id: str,
|
|
site_url: str,
|
|
start_date: Optional[str] = None,
|
|
end_date: Optional[str] = None,
|
|
thresholds: Optional[AIOThresholds] = None,
|
|
) -> AIOVisibilityResult:
|
|
"""
|
|
Analyze GSC data for AI Overview insights.
|
|
|
|
Args:
|
|
user_id: Clerk user ID
|
|
site_url: Verified GSC site URL (e.g., "https://example.com/")
|
|
start_date: ISO date string; defaults to 30 days ago
|
|
end_date: ISO date string; defaults to today
|
|
thresholds: Custom thresholds; uses defaults if omitted
|
|
|
|
Returns:
|
|
AIOVisibilityResult with summary, keyword lists, and recommendations
|
|
"""
|
|
t = thresholds or AIOThresholds()
|
|
result = AIOVisibilityResult()
|
|
|
|
try:
|
|
# Set date defaults
|
|
if not end_date:
|
|
end_date = datetime.now().strftime("%Y-%m-%d")
|
|
if not start_date:
|
|
start_date = (datetime.now() - timedelta(days=30)).strftime("%Y-%m-%d")
|
|
|
|
logger.info(
|
|
f"AIVisibility: analyzing {site_url} for user {user_id} "
|
|
f"({start_date} to {end_date})"
|
|
)
|
|
|
|
# Fetch GSC search analytics
|
|
analytics = self.gsc_service.get_search_analytics(
|
|
user_id=user_id,
|
|
site_url=site_url,
|
|
start_date=start_date,
|
|
end_date=end_date,
|
|
)
|
|
|
|
# Validate response
|
|
error = analytics.get("error")
|
|
if error:
|
|
result.error = error
|
|
return result
|
|
|
|
query_data = analytics.get("query_data", {})
|
|
rows = query_data.get("rows", [])
|
|
if not rows:
|
|
result.error = "No query data returned from GSC"
|
|
return result
|
|
|
|
# Parse and classify each keyword
|
|
total_keywords = 0
|
|
total_impressions = 0
|
|
total_clicks = 0
|
|
aio_impressions = 0
|
|
aio_estimated_clicks = 0
|
|
impact_count = 0
|
|
opportunity_count = 0
|
|
|
|
impacted_list = []
|
|
opportunity_list = []
|
|
|
|
for row in rows:
|
|
keys = row.get("keys", [])
|
|
keyword = keys[0] if keys else "(not set)"
|
|
impressions = row.get("impressions", 0)
|
|
clicks = row.get("clicks", 0)
|
|
ctr_decimal = row.get("ctr", 0)
|
|
ctr_pct = round(ctr_decimal * 100, 2)
|
|
position = round(row.get("position", 0), 1)
|
|
|
|
total_keywords += 1
|
|
total_impressions += impressions
|
|
total_clicks += clicks
|
|
|
|
entry = {
|
|
"keyword": keyword,
|
|
"impressions": impressions,
|
|
"clicks": clicks,
|
|
"ctr": ctr_pct,
|
|
"position": position,
|
|
}
|
|
|
|
# AIO Impacted: high impressions, top position, very low CTR
|
|
if (
|
|
impressions >= t.impacted_min_impressions
|
|
and position <= t.impacted_max_position
|
|
and ctr_pct <= t.impacted_max_ctr
|
|
):
|
|
# Estimate what clicks WOULD be at a healthy top-3 CTR (~8%)
|
|
target_ctr = 8.0
|
|
expected_clicks = int(impressions * target_ctr / 100)
|
|
traffic_loss = max(0, expected_clicks - clicks)
|
|
|
|
entry["estimated_traffic_loss"] = traffic_loss
|
|
entry["target_ctr"] = target_ctr
|
|
entry["aio_impacted"] = True
|
|
impacted_list.append(entry)
|
|
aio_impressions += impressions
|
|
aio_estimated_clicks += traffic_loss
|
|
impact_count += 1
|
|
|
|
# AIO Opportunity: good CTR, position 4-10 — strong enough to target AIO citation
|
|
if (
|
|
impressions >= t.opportunity_min_impressions
|
|
and t.opportunity_min_position <= position <= t.opportunity_max_position
|
|
and ctr_pct >= t.opportunity_min_ctr
|
|
):
|
|
entry["aio_opportunity"] = True
|
|
entry["recommendation"] = self._suggest_aio_format(keyword, position, ctr_pct)
|
|
opportunity_list.append(entry)
|
|
opportunity_count += 1
|
|
|
|
# Sort by impact/opportunity
|
|
impacted_list.sort(key=lambda x: x.get("estimated_traffic_loss", 0), reverse=True)
|
|
opportunity_list.sort(key=lambda x: x["impressions"], reverse=True)
|
|
|
|
# Compute summary
|
|
avg_ctr = round((total_clicks / total_impressions * 100) if total_impressions else 0, 2)
|
|
avg_position = (
|
|
round(
|
|
sum(r.get("position", 0) for r in rows) / len(rows), 1
|
|
)
|
|
if rows
|
|
else 0
|
|
)
|
|
|
|
result.summary = {
|
|
"total_keywords_analyzed": total_keywords,
|
|
"total_impressions": total_impressions,
|
|
"total_clicks": total_clicks,
|
|
"average_ctr": avg_ctr,
|
|
"average_position": avg_position,
|
|
"aio_impacted_keywords": impact_count,
|
|
"aio_opportunity_keywords": opportunity_count,
|
|
"aio_zero_click_impressions": aio_impressions,
|
|
"aio_estimated_traffic_loss": aio_estimated_clicks,
|
|
"date_range": {"start": start_date, "end": end_date},
|
|
"thresholds_used": {
|
|
"impacted": {
|
|
"min_impressions": t.impacted_min_impressions,
|
|
"max_position": t.impacted_max_position,
|
|
"max_ctr": t.impacted_max_ctr,
|
|
},
|
|
"opportunity": {
|
|
"min_impressions": t.opportunity_min_impressions,
|
|
"min_position": t.opportunity_min_position,
|
|
"max_position": t.opportunity_max_position,
|
|
"min_ctr": t.opportunity_min_ctr,
|
|
},
|
|
},
|
|
}
|
|
|
|
# Build recommendations
|
|
result.recommendations = self._build_recommendations(
|
|
impacted_list, opportunity_list, result.summary
|
|
)
|
|
|
|
result.impacted_keywords = impacted_list[:20]
|
|
result.opportunity_keywords = opportunity_list[:20]
|
|
|
|
logger.info(
|
|
f"AIVisibility: analysis complete for {site_url} — "
|
|
f"{impact_count} impacted, {opportunity_count} opportunities"
|
|
)
|
|
|
|
except Exception as e:
|
|
logger.error(f"AIVisibility: analysis error for {user_id}: {e}")
|
|
result.error = str(e)
|
|
|
|
return result
|
|
|
|
@staticmethod
|
|
def _suggest_aio_format(keyword: str, position: float, ctr: float) -> str:
|
|
"""Suggest content format for AIO optimization based on keyword pattern."""
|
|
kw_lower = keyword.lower()
|
|
|
|
if any(w in kw_lower for w in ["how", "steps", "guide", "tutorial", "way to"]):
|
|
return "Create a step-by-step guide with clear numbered lists for AIO citation"
|
|
if any(w in kw_lower for w in ["what", "define", "meaning", "explain", "overview"]):
|
|
return "Add a concise definition/summary block at the top of the article"
|
|
if any(w in kw_lower for w in ["vs", "versus", "difference", "comparison", "or"]):
|
|
return "Use a structured comparison table — AI crawlers favor tabular data"
|
|
if any(w in kw_lower for w in ["best", "top", "recommended", "review"]):
|
|
return "Format as a ranked list with bullet-point pros/cons for AI snippet extraction"
|
|
if any(w in kw_lower for w in ["why", "reason", "cause", "benefit"]):
|
|
return "Include a bullet-point summary of key reasons/benefits for AIO extraction"
|
|
if any(w in kw_lower for w in ["price", "cost", "pricing", "cheap", "affordable"]):
|
|
return "Add a pricing/comparison table — highly structured data for AI citation"
|
|
if any(w in kw_lower for w in ["example", "sample", "template", "checklist"]):
|
|
return "Provide actionable examples or a downloadable template checklist"
|
|
|
|
if position <= 3 and ctr < 3:
|
|
return "Optimize content with FAQ schema and concise summary paragraphs to reclaim AIO clicks"
|
|
if position <= 5:
|
|
return "Add structured data markup (FAQ, HowTo) and a TL;DR box for AI Overview targeting"
|
|
return "Improve content depth with data-backed insights and structured formatting for AI snippet eligibility"
|
|
|
|
@staticmethod
|
|
def _build_recommendations(
|
|
impacted: List[Dict[str, Any]],
|
|
opportunities: List[Dict[str, Any]],
|
|
summary: Dict[str, Any],
|
|
) -> List[str]:
|
|
"""Generate AI Overview optimization recommendations."""
|
|
recs = []
|
|
impacted_count = summary.get("aio_impacted_keywords", 0)
|
|
opportunity_count = summary.get("aio_opportunity_keywords", 0)
|
|
traffic_loss = summary.get("aio_estimated_traffic_loss", 0)
|
|
|
|
if impacted_count > 0:
|
|
recs.append(
|
|
f"⚠️ {impacted_count} keyword(s) show AI Overview impact signals "
|
|
f"(estimated {traffic_loss} lost clicks). "
|
|
"Add concise, structured summary blocks early in your content to reclaim visibility."
|
|
)
|
|
if opportunity_count > 0:
|
|
recs.append(
|
|
f"✅ {opportunity_count} keyword(s) are strong AIO optimization candidates. "
|
|
"Apply FAQ schema, HowTo schema, and clear bullet-point summaries."
|
|
)
|
|
if impacted_count == 0 and opportunity_count == 0:
|
|
recs.append(
|
|
"No clear AI Overview signals detected. "
|
|
"Consider expanding your keyword coverage in conversational/intent-based queries."
|
|
)
|
|
|
|
recs.append(
|
|
"General AIO best practices: "
|
|
"1) Use FAQ schema for question-based queries, "
|
|
"2) Add <table> elements for comparative data, "
|
|
"3) Keep key takeaways in the first 100 words, "
|
|
"4) Use descriptive headings (H2/H3) that mirror natural language queries."
|
|
)
|
|
|
|
return recs
|