Add new analytics modules from PR #436

- backend/services/analytics/opportunity_scorer.py: Functions for scoring and ranking
  opportunities from search queries (high_impression_low_ctr_queries,
  rising_queries, declining_pages, score_and_rank_opportunities,
  categorize_opportunities)
- backend/services/gsc_service.py: GSC (Google Search Console) service
This commit is contained in:
ajaysi
2026-03-22 11:36:38 +05:30
parent b28dc4b5f6
commit 51bc76345f
2 changed files with 187 additions and 0 deletions

View File

@@ -0,0 +1,128 @@
"""Opportunity scoring helpers for search analytics data."""
from typing import Any, Dict, List
Opportunity = Dict[str, Any]
MetricRow = Dict[str, Any]
def high_impression_low_ctr_queries(
query_rows: List[MetricRow],
min_impressions: float = 100.0,
max_ctr: float = 2.5,
) -> List[Opportunity]:
"""Return queries with strong impressions but weak CTR."""
opportunities: List[Opportunity] = []
for row in query_rows:
current = row.get("current_metrics", {})
impressions = float(current.get("impressions", 0) or 0)
ctr = float(current.get("ctr", 0) or 0)
if impressions < min_impressions or ctr > max_ctr:
continue
opportunities.append({
"id": row.get("id") or f"q:{row.get('query', 'unknown')}",
"query": row.get("query"),
"page_url": row.get("page_url"),
"reason": "high_impression_low_ctr_query",
"score": 0.0,
"current_metrics": current,
"previous_metrics": row.get("previous_metrics", {}),
})
return opportunities
def rising_queries(
query_rows: List[MetricRow],
min_impression_delta: float = 50.0,
min_click_delta: float = 5.0,
) -> List[Opportunity]:
"""Return query opportunities with positive window-over-window growth."""
opportunities: List[Opportunity] = []
for row in query_rows:
current = row.get("current_metrics", {})
previous = row.get("previous_metrics", {})
delta_impressions = float(current.get("impressions", 0) or 0) - float(previous.get("impressions", 0) or 0)
delta_clicks = float(current.get("clicks", 0) or 0) - float(previous.get("clicks", 0) or 0)
if delta_impressions < min_impression_delta and delta_clicks < min_click_delta:
continue
opportunities.append({
"id": row.get("id") or f"q:{row.get('query', 'unknown')}",
"query": row.get("query"),
"page_url": row.get("page_url"),
"reason": "rising_query",
"score": 0.0,
"current_metrics": current,
"previous_metrics": previous,
})
return opportunities
def declining_pages(
page_rows: List[MetricRow],
min_impression_drop: float = 50.0,
min_click_drop: float = 5.0,
) -> List[Opportunity]:
"""Return page opportunities with negative window-over-window change."""
opportunities: List[Opportunity] = []
for row in page_rows:
current = row.get("current_metrics", {})
previous = row.get("previous_metrics", {})
impression_drop = float(previous.get("impressions", 0) or 0) - float(current.get("impressions", 0) or 0)
click_drop = float(previous.get("clicks", 0) or 0) - float(current.get("clicks", 0) or 0)
if impression_drop < min_impression_drop and click_drop < min_click_drop:
continue
opportunities.append({
"id": row.get("id") or f"p:{row.get('page_url', 'unknown')}",
"query": row.get("query"),
"page_url": row.get("page_url"),
"reason": "declining_page",
"score": 0.0,
"current_metrics": current,
"previous_metrics": previous,
})
return opportunities
def score_and_rank_opportunities(opportunities: List[Opportunity]) -> List[Opportunity]:
"""Assign simple priority score and return opportunities ordered by score."""
scored: List[Opportunity] = []
for item in opportunities:
current = item.get("current_metrics", {})
previous = item.get("previous_metrics", {})
impressions = float(current.get("impressions", 0) or 0)
clicks = float(current.get("clicks", 0) or 0)
ctr = float(current.get("ctr", 0) or 0)
previous_impressions = float(previous.get("impressions", 0) or 0)
previous_clicks = float(previous.get("clicks", 0) or 0)
momentum = abs(impressions - previous_impressions) + (abs(clicks - previous_clicks) * 10)
opportunity_multiplier = {
"high_impression_low_ctr_query": 1.2,
"rising_query": 1.0,
"declining_page": 1.3,
}.get(item.get("reason"), 1.0)
score = (impressions * 0.05) + (clicks * 0.8) + ((100.0 - ctr) * 0.1) + (momentum * 0.15)
updated = dict(item)
updated["score"] = round(score * opportunity_multiplier, 2)
scored.append(updated)
return sorted(scored, key=lambda x: (x.get("score", 0), str(x.get("id", ""))), reverse=True)
def categorize_opportunities(
query_rows: List[MetricRow],
page_rows: List[MetricRow],
) -> List[Opportunity]:
"""Build all opportunity categories and return a stable, ranked schema."""
opportunities: List[Opportunity] = []
opportunities.extend(high_impression_low_ctr_queries(query_rows))
opportunities.extend(rising_queries(query_rows))
opportunities.extend(declining_pages(page_rows))
return score_and_rank_opportunities(opportunities)

View File

@@ -393,6 +393,18 @@ class GSCService:
# Return empty list instead of raising to prevent frontend 500s
return []
def _calculate_previous_period(self, start_date: str, end_date: str):
"""Calculate previous period date window matching current range length."""
try:
start_dt = datetime.strptime(start_date, "%Y-%m-%d")
end_dt = datetime.strptime(end_date, "%Y-%m-%d")
window_days = max((end_dt - start_dt).days + 1, 1)
prev_end = start_dt - timedelta(days=1)
prev_start = prev_end - timedelta(days=window_days - 1)
return prev_start.strftime("%Y-%m-%d"), prev_end.strftime("%Y-%m-%d")
except Exception:
return None, None
def get_search_analytics(self, user_id: str, site_url: str,
start_date: str = None, end_date: str = None) -> Dict[str, Any]:
"""Get search analytics data from GSC."""
@@ -537,6 +549,41 @@ class GSCService:
qp_rows = []
qp_row_count = 0
# Optional previous-period windows for opportunity trend detection
prev_query_rows = []
prev_page_rows = []
prev_start_date, prev_end_date = self._calculate_previous_period(start_date, end_date)
if prev_start_date and prev_end_date:
try:
prev_query_request = {
'startDate': prev_start_date,
'endDate': prev_end_date,
'dimensions': ['query'],
'rowLimit': 1000
}
prev_query_response = service.searchanalytics().query(
siteUrl=site_url,
body=prev_query_request
).execute()
prev_query_rows = prev_query_response.get('rows', [])
except Exception as prev_query_error:
logger.warning(f"GSC previous query request failed for user {user_id}: {prev_query_error}")
try:
prev_page_request = {
'startDate': prev_start_date,
'endDate': prev_end_date,
'dimensions': ['page'],
'rowLimit': 1000
}
prev_page_response = service.searchanalytics().query(
siteUrl=site_url,
body=prev_page_request
).execute()
prev_page_rows = prev_page_response.get('rows', [])
except Exception as prev_page_error:
logger.warning(f"GSC previous page request failed for user {user_id}: {prev_page_error}")
# Combine overall, query, page and query+page data
analytics_data = {
'overall_metrics': {
@@ -555,6 +602,12 @@ class GSCService:
'rows': qp_rows,
'rowCount': qp_row_count
},
'previous_period': {
'startDate': prev_start_date,
'endDate': prev_end_date,
'query_data': {'rows': prev_query_rows, 'rowCount': len(prev_query_rows)},
'page_data': {'rows': prev_page_rows, 'rowCount': len(prev_page_rows)}
},
'verification_data': {
'rows': verification_rows,
'rowCount': len(verification_rows)
@@ -580,6 +633,12 @@ class GSCService:
'query_data': {'rows': [], 'rowCount': 0},
'page_data': {'rows': [], 'rowCount': 0},
'query_page_data': {'rows': [], 'rowCount': 0},
'previous_period': {
'startDate': None,
'endDate': None,
'query_data': {'rows': [], 'rowCount': 0},
'page_data': {'rows': [], 'rowCount': 0}
},
'verification_data': {
'rows': verification_rows,
'rowCount': len(verification_rows)