From 51bc76345f5ae111ed1e23bb58e8246eecbac845 Mon Sep 17 00:00:00 2001 From: ajaysi Date: Sun, 22 Mar 2026 11:36:38 +0530 Subject: [PATCH] Add new analytics modules from PR #436 - backend/services/analytics/opportunity_scorer.py: Functions for scoring and ranking opportunities from search queries (high_impression_low_ctr_queries, rising_queries, declining_pages, score_and_rank_opportunities, categorize_opportunities) - backend/services/gsc_service.py: GSC (Google Search Console) service --- .../services/analytics/opportunity_scorer.py | 128 ++++++++++++++++++ backend/services/gsc_service.py | 59 ++++++++ 2 files changed, 187 insertions(+) create mode 100644 backend/services/analytics/opportunity_scorer.py diff --git a/backend/services/analytics/opportunity_scorer.py b/backend/services/analytics/opportunity_scorer.py new file mode 100644 index 00000000..ff1f275f --- /dev/null +++ b/backend/services/analytics/opportunity_scorer.py @@ -0,0 +1,128 @@ +"""Opportunity scoring helpers for search analytics data.""" + +from typing import Any, Dict, List + + +Opportunity = Dict[str, Any] +MetricRow = Dict[str, Any] + + +def high_impression_low_ctr_queries( + query_rows: List[MetricRow], + min_impressions: float = 100.0, + max_ctr: float = 2.5, +) -> List[Opportunity]: + """Return queries with strong impressions but weak CTR.""" + opportunities: List[Opportunity] = [] + for row in query_rows: + current = row.get("current_metrics", {}) + impressions = float(current.get("impressions", 0) or 0) + ctr = float(current.get("ctr", 0) or 0) + if impressions < min_impressions or ctr > max_ctr: + continue + + opportunities.append({ + "id": row.get("id") or f"q:{row.get('query', 'unknown')}", + "query": row.get("query"), + "page_url": row.get("page_url"), + "reason": "high_impression_low_ctr_query", + "score": 0.0, + "current_metrics": current, + "previous_metrics": row.get("previous_metrics", {}), + }) + return opportunities + + +def rising_queries( + query_rows: List[MetricRow], + min_impression_delta: float = 50.0, + min_click_delta: float = 5.0, +) -> List[Opportunity]: + """Return query opportunities with positive window-over-window growth.""" + opportunities: List[Opportunity] = [] + for row in query_rows: + current = row.get("current_metrics", {}) + previous = row.get("previous_metrics", {}) + delta_impressions = float(current.get("impressions", 0) or 0) - float(previous.get("impressions", 0) or 0) + delta_clicks = float(current.get("clicks", 0) or 0) - float(previous.get("clicks", 0) or 0) + if delta_impressions < min_impression_delta and delta_clicks < min_click_delta: + continue + + opportunities.append({ + "id": row.get("id") or f"q:{row.get('query', 'unknown')}", + "query": row.get("query"), + "page_url": row.get("page_url"), + "reason": "rising_query", + "score": 0.0, + "current_metrics": current, + "previous_metrics": previous, + }) + return opportunities + + +def declining_pages( + page_rows: List[MetricRow], + min_impression_drop: float = 50.0, + min_click_drop: float = 5.0, +) -> List[Opportunity]: + """Return page opportunities with negative window-over-window change.""" + opportunities: List[Opportunity] = [] + for row in page_rows: + current = row.get("current_metrics", {}) + previous = row.get("previous_metrics", {}) + impression_drop = float(previous.get("impressions", 0) or 0) - float(current.get("impressions", 0) or 0) + click_drop = float(previous.get("clicks", 0) or 0) - float(current.get("clicks", 0) or 0) + if impression_drop < min_impression_drop and click_drop < min_click_drop: + continue + + opportunities.append({ + "id": row.get("id") or f"p:{row.get('page_url', 'unknown')}", + "query": row.get("query"), + "page_url": row.get("page_url"), + "reason": "declining_page", + "score": 0.0, + "current_metrics": current, + "previous_metrics": previous, + }) + return opportunities + + +def score_and_rank_opportunities(opportunities: List[Opportunity]) -> List[Opportunity]: + """Assign simple priority score and return opportunities ordered by score.""" + scored: List[Opportunity] = [] + for item in opportunities: + current = item.get("current_metrics", {}) + previous = item.get("previous_metrics", {}) + impressions = float(current.get("impressions", 0) or 0) + clicks = float(current.get("clicks", 0) or 0) + ctr = float(current.get("ctr", 0) or 0) + + previous_impressions = float(previous.get("impressions", 0) or 0) + previous_clicks = float(previous.get("clicks", 0) or 0) + momentum = abs(impressions - previous_impressions) + (abs(clicks - previous_clicks) * 10) + + opportunity_multiplier = { + "high_impression_low_ctr_query": 1.2, + "rising_query": 1.0, + "declining_page": 1.3, + }.get(item.get("reason"), 1.0) + + score = (impressions * 0.05) + (clicks * 0.8) + ((100.0 - ctr) * 0.1) + (momentum * 0.15) + updated = dict(item) + updated["score"] = round(score * opportunity_multiplier, 2) + scored.append(updated) + + return sorted(scored, key=lambda x: (x.get("score", 0), str(x.get("id", ""))), reverse=True) + + +def categorize_opportunities( + query_rows: List[MetricRow], + page_rows: List[MetricRow], +) -> List[Opportunity]: + """Build all opportunity categories and return a stable, ranked schema.""" + opportunities: List[Opportunity] = [] + opportunities.extend(high_impression_low_ctr_queries(query_rows)) + opportunities.extend(rising_queries(query_rows)) + opportunities.extend(declining_pages(page_rows)) + return score_and_rank_opportunities(opportunities) + diff --git a/backend/services/gsc_service.py b/backend/services/gsc_service.py index 95dfabde..e6ea9a4a 100644 --- a/backend/services/gsc_service.py +++ b/backend/services/gsc_service.py @@ -393,6 +393,18 @@ class GSCService: # Return empty list instead of raising to prevent frontend 500s return [] + def _calculate_previous_period(self, start_date: str, end_date: str): + """Calculate previous period date window matching current range length.""" + try: + start_dt = datetime.strptime(start_date, "%Y-%m-%d") + end_dt = datetime.strptime(end_date, "%Y-%m-%d") + window_days = max((end_dt - start_dt).days + 1, 1) + prev_end = start_dt - timedelta(days=1) + prev_start = prev_end - timedelta(days=window_days - 1) + return prev_start.strftime("%Y-%m-%d"), prev_end.strftime("%Y-%m-%d") + except Exception: + return None, None + def get_search_analytics(self, user_id: str, site_url: str, start_date: str = None, end_date: str = None) -> Dict[str, Any]: """Get search analytics data from GSC.""" @@ -537,6 +549,41 @@ class GSCService: qp_rows = [] qp_row_count = 0 + # Optional previous-period windows for opportunity trend detection + prev_query_rows = [] + prev_page_rows = [] + prev_start_date, prev_end_date = self._calculate_previous_period(start_date, end_date) + if prev_start_date and prev_end_date: + try: + prev_query_request = { + 'startDate': prev_start_date, + 'endDate': prev_end_date, + 'dimensions': ['query'], + 'rowLimit': 1000 + } + prev_query_response = service.searchanalytics().query( + siteUrl=site_url, + body=prev_query_request + ).execute() + prev_query_rows = prev_query_response.get('rows', []) + except Exception as prev_query_error: + logger.warning(f"GSC previous query request failed for user {user_id}: {prev_query_error}") + + try: + prev_page_request = { + 'startDate': prev_start_date, + 'endDate': prev_end_date, + 'dimensions': ['page'], + 'rowLimit': 1000 + } + prev_page_response = service.searchanalytics().query( + siteUrl=site_url, + body=prev_page_request + ).execute() + prev_page_rows = prev_page_response.get('rows', []) + except Exception as prev_page_error: + logger.warning(f"GSC previous page request failed for user {user_id}: {prev_page_error}") + # Combine overall, query, page and query+page data analytics_data = { 'overall_metrics': { @@ -555,6 +602,12 @@ class GSCService: 'rows': qp_rows, 'rowCount': qp_row_count }, + 'previous_period': { + 'startDate': prev_start_date, + 'endDate': prev_end_date, + 'query_data': {'rows': prev_query_rows, 'rowCount': len(prev_query_rows)}, + 'page_data': {'rows': prev_page_rows, 'rowCount': len(prev_page_rows)} + }, 'verification_data': { 'rows': verification_rows, 'rowCount': len(verification_rows) @@ -580,6 +633,12 @@ class GSCService: 'query_data': {'rows': [], 'rowCount': 0}, 'page_data': {'rows': [], 'rowCount': 0}, 'query_page_data': {'rows': [], 'rowCount': 0}, + 'previous_period': { + 'startDate': None, + 'endDate': None, + 'query_data': {'rows': [], 'rowCount': 0}, + 'page_data': {'rows': [], 'rowCount': 0} + }, 'verification_data': { 'rows': verification_rows, 'rowCount': len(verification_rows)