Add new analytics modules from PR #436
- backend/services/analytics/opportunity_scorer.py: Functions for scoring and ranking opportunities from search queries (high_impression_low_ctr_queries, rising_queries, declining_pages, score_and_rank_opportunities, categorize_opportunities) - backend/services/gsc_service.py: GSC (Google Search Console) service
This commit is contained in:
128
backend/services/analytics/opportunity_scorer.py
Normal file
128
backend/services/analytics/opportunity_scorer.py
Normal file
@@ -0,0 +1,128 @@
|
||||
"""Opportunity scoring helpers for search analytics data."""
|
||||
|
||||
from typing import Any, Dict, List
|
||||
|
||||
|
||||
Opportunity = Dict[str, Any]
|
||||
MetricRow = Dict[str, Any]
|
||||
|
||||
|
||||
def high_impression_low_ctr_queries(
|
||||
query_rows: List[MetricRow],
|
||||
min_impressions: float = 100.0,
|
||||
max_ctr: float = 2.5,
|
||||
) -> List[Opportunity]:
|
||||
"""Return queries with strong impressions but weak CTR."""
|
||||
opportunities: List[Opportunity] = []
|
||||
for row in query_rows:
|
||||
current = row.get("current_metrics", {})
|
||||
impressions = float(current.get("impressions", 0) or 0)
|
||||
ctr = float(current.get("ctr", 0) or 0)
|
||||
if impressions < min_impressions or ctr > max_ctr:
|
||||
continue
|
||||
|
||||
opportunities.append({
|
||||
"id": row.get("id") or f"q:{row.get('query', 'unknown')}",
|
||||
"query": row.get("query"),
|
||||
"page_url": row.get("page_url"),
|
||||
"reason": "high_impression_low_ctr_query",
|
||||
"score": 0.0,
|
||||
"current_metrics": current,
|
||||
"previous_metrics": row.get("previous_metrics", {}),
|
||||
})
|
||||
return opportunities
|
||||
|
||||
|
||||
def rising_queries(
|
||||
query_rows: List[MetricRow],
|
||||
min_impression_delta: float = 50.0,
|
||||
min_click_delta: float = 5.0,
|
||||
) -> List[Opportunity]:
|
||||
"""Return query opportunities with positive window-over-window growth."""
|
||||
opportunities: List[Opportunity] = []
|
||||
for row in query_rows:
|
||||
current = row.get("current_metrics", {})
|
||||
previous = row.get("previous_metrics", {})
|
||||
delta_impressions = float(current.get("impressions", 0) or 0) - float(previous.get("impressions", 0) or 0)
|
||||
delta_clicks = float(current.get("clicks", 0) or 0) - float(previous.get("clicks", 0) or 0)
|
||||
if delta_impressions < min_impression_delta and delta_clicks < min_click_delta:
|
||||
continue
|
||||
|
||||
opportunities.append({
|
||||
"id": row.get("id") or f"q:{row.get('query', 'unknown')}",
|
||||
"query": row.get("query"),
|
||||
"page_url": row.get("page_url"),
|
||||
"reason": "rising_query",
|
||||
"score": 0.0,
|
||||
"current_metrics": current,
|
||||
"previous_metrics": previous,
|
||||
})
|
||||
return opportunities
|
||||
|
||||
|
||||
def declining_pages(
|
||||
page_rows: List[MetricRow],
|
||||
min_impression_drop: float = 50.0,
|
||||
min_click_drop: float = 5.0,
|
||||
) -> List[Opportunity]:
|
||||
"""Return page opportunities with negative window-over-window change."""
|
||||
opportunities: List[Opportunity] = []
|
||||
for row in page_rows:
|
||||
current = row.get("current_metrics", {})
|
||||
previous = row.get("previous_metrics", {})
|
||||
impression_drop = float(previous.get("impressions", 0) or 0) - float(current.get("impressions", 0) or 0)
|
||||
click_drop = float(previous.get("clicks", 0) or 0) - float(current.get("clicks", 0) or 0)
|
||||
if impression_drop < min_impression_drop and click_drop < min_click_drop:
|
||||
continue
|
||||
|
||||
opportunities.append({
|
||||
"id": row.get("id") or f"p:{row.get('page_url', 'unknown')}",
|
||||
"query": row.get("query"),
|
||||
"page_url": row.get("page_url"),
|
||||
"reason": "declining_page",
|
||||
"score": 0.0,
|
||||
"current_metrics": current,
|
||||
"previous_metrics": previous,
|
||||
})
|
||||
return opportunities
|
||||
|
||||
|
||||
def score_and_rank_opportunities(opportunities: List[Opportunity]) -> List[Opportunity]:
|
||||
"""Assign simple priority score and return opportunities ordered by score."""
|
||||
scored: List[Opportunity] = []
|
||||
for item in opportunities:
|
||||
current = item.get("current_metrics", {})
|
||||
previous = item.get("previous_metrics", {})
|
||||
impressions = float(current.get("impressions", 0) or 0)
|
||||
clicks = float(current.get("clicks", 0) or 0)
|
||||
ctr = float(current.get("ctr", 0) or 0)
|
||||
|
||||
previous_impressions = float(previous.get("impressions", 0) or 0)
|
||||
previous_clicks = float(previous.get("clicks", 0) or 0)
|
||||
momentum = abs(impressions - previous_impressions) + (abs(clicks - previous_clicks) * 10)
|
||||
|
||||
opportunity_multiplier = {
|
||||
"high_impression_low_ctr_query": 1.2,
|
||||
"rising_query": 1.0,
|
||||
"declining_page": 1.3,
|
||||
}.get(item.get("reason"), 1.0)
|
||||
|
||||
score = (impressions * 0.05) + (clicks * 0.8) + ((100.0 - ctr) * 0.1) + (momentum * 0.15)
|
||||
updated = dict(item)
|
||||
updated["score"] = round(score * opportunity_multiplier, 2)
|
||||
scored.append(updated)
|
||||
|
||||
return sorted(scored, key=lambda x: (x.get("score", 0), str(x.get("id", ""))), reverse=True)
|
||||
|
||||
|
||||
def categorize_opportunities(
|
||||
query_rows: List[MetricRow],
|
||||
page_rows: List[MetricRow],
|
||||
) -> List[Opportunity]:
|
||||
"""Build all opportunity categories and return a stable, ranked schema."""
|
||||
opportunities: List[Opportunity] = []
|
||||
opportunities.extend(high_impression_low_ctr_queries(query_rows))
|
||||
opportunities.extend(rising_queries(query_rows))
|
||||
opportunities.extend(declining_pages(page_rows))
|
||||
return score_and_rank_opportunities(opportunities)
|
||||
|
||||
@@ -393,6 +393,18 @@ class GSCService:
|
||||
# Return empty list instead of raising to prevent frontend 500s
|
||||
return []
|
||||
|
||||
def _calculate_previous_period(self, start_date: str, end_date: str):
|
||||
"""Calculate previous period date window matching current range length."""
|
||||
try:
|
||||
start_dt = datetime.strptime(start_date, "%Y-%m-%d")
|
||||
end_dt = datetime.strptime(end_date, "%Y-%m-%d")
|
||||
window_days = max((end_dt - start_dt).days + 1, 1)
|
||||
prev_end = start_dt - timedelta(days=1)
|
||||
prev_start = prev_end - timedelta(days=window_days - 1)
|
||||
return prev_start.strftime("%Y-%m-%d"), prev_end.strftime("%Y-%m-%d")
|
||||
except Exception:
|
||||
return None, None
|
||||
|
||||
def get_search_analytics(self, user_id: str, site_url: str,
|
||||
start_date: str = None, end_date: str = None) -> Dict[str, Any]:
|
||||
"""Get search analytics data from GSC."""
|
||||
@@ -537,6 +549,41 @@ class GSCService:
|
||||
qp_rows = []
|
||||
qp_row_count = 0
|
||||
|
||||
# Optional previous-period windows for opportunity trend detection
|
||||
prev_query_rows = []
|
||||
prev_page_rows = []
|
||||
prev_start_date, prev_end_date = self._calculate_previous_period(start_date, end_date)
|
||||
if prev_start_date and prev_end_date:
|
||||
try:
|
||||
prev_query_request = {
|
||||
'startDate': prev_start_date,
|
||||
'endDate': prev_end_date,
|
||||
'dimensions': ['query'],
|
||||
'rowLimit': 1000
|
||||
}
|
||||
prev_query_response = service.searchanalytics().query(
|
||||
siteUrl=site_url,
|
||||
body=prev_query_request
|
||||
).execute()
|
||||
prev_query_rows = prev_query_response.get('rows', [])
|
||||
except Exception as prev_query_error:
|
||||
logger.warning(f"GSC previous query request failed for user {user_id}: {prev_query_error}")
|
||||
|
||||
try:
|
||||
prev_page_request = {
|
||||
'startDate': prev_start_date,
|
||||
'endDate': prev_end_date,
|
||||
'dimensions': ['page'],
|
||||
'rowLimit': 1000
|
||||
}
|
||||
prev_page_response = service.searchanalytics().query(
|
||||
siteUrl=site_url,
|
||||
body=prev_page_request
|
||||
).execute()
|
||||
prev_page_rows = prev_page_response.get('rows', [])
|
||||
except Exception as prev_page_error:
|
||||
logger.warning(f"GSC previous page request failed for user {user_id}: {prev_page_error}")
|
||||
|
||||
# Combine overall, query, page and query+page data
|
||||
analytics_data = {
|
||||
'overall_metrics': {
|
||||
@@ -555,6 +602,12 @@ class GSCService:
|
||||
'rows': qp_rows,
|
||||
'rowCount': qp_row_count
|
||||
},
|
||||
'previous_period': {
|
||||
'startDate': prev_start_date,
|
||||
'endDate': prev_end_date,
|
||||
'query_data': {'rows': prev_query_rows, 'rowCount': len(prev_query_rows)},
|
||||
'page_data': {'rows': prev_page_rows, 'rowCount': len(prev_page_rows)}
|
||||
},
|
||||
'verification_data': {
|
||||
'rows': verification_rows,
|
||||
'rowCount': len(verification_rows)
|
||||
@@ -580,6 +633,12 @@ class GSCService:
|
||||
'query_data': {'rows': [], 'rowCount': 0},
|
||||
'page_data': {'rows': [], 'rowCount': 0},
|
||||
'query_page_data': {'rows': [], 'rowCount': 0},
|
||||
'previous_period': {
|
||||
'startDate': None,
|
||||
'endDate': None,
|
||||
'query_data': {'rows': [], 'rowCount': 0},
|
||||
'page_data': {'rows': [], 'rowCount': 0}
|
||||
},
|
||||
'verification_data': {
|
||||
'rows': verification_rows,
|
||||
'rowCount': len(verification_rows)
|
||||
|
||||
Reference in New Issue
Block a user