Files
ALwrity/backend/services/analytics/handlers/gsc_handler.py
ajaysi 1f087aad4c Bing Analytics and Insights added, background jobs added, database setup updated, environment setup updated, frontend updated, backend updated.
Onboarding Manager and Router Manager refactored, analytics and background jobs added, database setup updated, environment setup updated, frontend updated, backend updated.
Critical onboarding database migration implemented.
2025-10-18 10:28:15 +05:30

256 lines
11 KiB
Python

"""
Google Search Console Analytics Handler
Handles GSC analytics data retrieval and processing.
"""
from typing import Dict, Any
from datetime import datetime, timedelta
from loguru import logger
from services.gsc_service import GSCService
from ...analytics_cache_service import analytics_cache
from ..models.analytics_data import AnalyticsData
from ..models.platform_types import PlatformType
from .base_handler import BaseAnalyticsHandler
class GSCAnalyticsHandler(BaseAnalyticsHandler):
"""Handler for Google Search Console analytics"""
def __init__(self):
super().__init__(PlatformType.GSC)
self.gsc_service = GSCService()
async def get_analytics(self, user_id: str) -> AnalyticsData:
"""
Get Google Search Console analytics data with caching
Returns comprehensive SEO metrics including clicks, impressions, CTR, and position data.
"""
self.log_analytics_request(user_id, "get_analytics")
# Check cache first - GSC API calls can be expensive
cached_data = analytics_cache.get('gsc_analytics', user_id)
if cached_data:
logger.info("Using cached GSC analytics for user {user_id}", user_id=user_id)
return AnalyticsData(**cached_data)
logger.info("Fetching fresh GSC analytics for user {user_id}", user_id=user_id)
try:
# Get user's sites
sites = self.gsc_service.get_site_list(user_id)
logger.info(f"GSC Sites found for user {user_id}: {sites}")
if not sites:
logger.warning(f"No GSC sites found for user {user_id}")
return self.create_error_response('No GSC sites found')
# Get analytics for the first site (or combine all sites)
site_url = sites[0]['siteUrl']
logger.info(f"Using GSC site URL: {site_url}")
# Get search analytics for last 30 days
end_date = datetime.now().strftime('%Y-%m-%d')
start_date = (datetime.now() - timedelta(days=30)).strftime('%Y-%m-%d')
logger.info(f"GSC Date range: {start_date} to {end_date}")
search_analytics = self.gsc_service.get_search_analytics(
user_id=user_id,
site_url=site_url,
start_date=start_date,
end_date=end_date
)
logger.info(f"GSC Search analytics retrieved for user {user_id}")
# Process GSC data into standardized format
processed_metrics = self._process_gsc_metrics(search_analytics)
result = self.create_success_response(
metrics=processed_metrics,
date_range={'start': start_date, 'end': end_date}
)
# Cache the result to avoid expensive API calls
analytics_cache.set('gsc_analytics', user_id, result.__dict__)
logger.info("Cached GSC analytics data for user {user_id}", user_id=user_id)
return result
except Exception as e:
self.log_analytics_error(user_id, "get_analytics", e)
error_result = self.create_error_response(str(e))
# Cache error result for shorter time to retry sooner
analytics_cache.set('gsc_analytics', user_id, error_result.__dict__, ttl_override=300) # 5 minutes
return error_result
def get_connection_status(self, user_id: str) -> Dict[str, Any]:
"""Get GSC connection status"""
self.log_analytics_request(user_id, "get_connection_status")
try:
sites = self.gsc_service.get_site_list(user_id)
return {
'connected': len(sites) > 0,
'sites_count': len(sites),
'sites': sites[:3] if sites else [], # Show first 3 sites
'error': None
}
except Exception as e:
self.log_analytics_error(user_id, "get_connection_status", e)
return {
'connected': False,
'sites_count': 0,
'sites': [],
'error': str(e)
}
def _process_gsc_metrics(self, search_analytics: Dict[str, Any]) -> Dict[str, Any]:
"""Process GSC raw data into standardized metrics"""
try:
# Debug: Log the raw search analytics data structure
logger.info(f"GSC Raw search analytics structure: {search_analytics}")
logger.info(f"GSC Raw search analytics keys: {list(search_analytics.keys())}")
# Handle new data structure with overall_metrics and query_data
if 'overall_metrics' in search_analytics:
# New structure from updated GSC service
overall_rows = search_analytics.get('overall_metrics', {}).get('rows', [])
query_rows = search_analytics.get('query_data', {}).get('rows', [])
verification_rows = search_analytics.get('verification_data', {}).get('rows', [])
logger.info(f"GSC Overall metrics rows: {len(overall_rows)}")
logger.info(f"GSC Query data rows: {len(query_rows)}")
logger.info(f"GSC Verification rows: {len(verification_rows)}")
if overall_rows:
logger.info(f"GSC Overall first row: {overall_rows[0]}")
if query_rows:
logger.info(f"GSC Query first row: {query_rows[0]}")
# Use query_rows for detailed insights, overall_rows for summary
rows = query_rows if query_rows else overall_rows
else:
# Legacy structure
rows = search_analytics.get('rows', [])
logger.info(f"GSC Legacy rows count: {len(rows)}")
if rows:
logger.info(f"GSC Legacy first row structure: {rows[0]}")
logger.info(f"GSC Legacy first row keys: {list(rows[0].keys()) if rows[0] else 'No rows'}")
# Calculate summary metrics - handle different response formats
total_clicks = 0
total_impressions = 0
total_position = 0
valid_rows = 0
for row in rows:
# Handle different possible response formats
clicks = row.get('clicks', 0)
impressions = row.get('impressions', 0)
position = row.get('position', 0)
# If position is 0 or None, skip it from average calculation
if position and position > 0:
total_position += position
valid_rows += 1
total_clicks += clicks
total_impressions += impressions
avg_ctr = (total_clicks / total_impressions * 100) if total_impressions > 0 else 0
avg_position = total_position / valid_rows if valid_rows > 0 else 0
logger.info(f"GSC Calculated metrics - clicks: {total_clicks}, impressions: {total_impressions}, ctr: {avg_ctr}, position: {avg_position}, valid_rows: {valid_rows}")
# Get top performing queries - handle different data structures
if rows and 'keys' in rows[0]:
# New GSC API format with keys array
top_queries = sorted(rows, key=lambda x: x.get('clicks', 0), reverse=True)[:10]
# Get top performing pages (if we have page data)
page_data = {}
for row in rows:
# Handle different key structures
keys = row.get('keys', [])
if len(keys) > 1 and keys[1]: # Page data available
page = keys[1].get('keys', ['Unknown'])[0] if isinstance(keys[1], dict) else str(keys[1])
else:
page = 'Unknown'
if page not in page_data:
page_data[page] = {'clicks': 0, 'impressions': 0, 'ctr': 0, 'position': 0}
page_data[page]['clicks'] += row.get('clicks', 0)
page_data[page]['impressions'] += row.get('impressions', 0)
else:
# Legacy format or no keys structure
top_queries = sorted(rows, key=lambda x: x.get('clicks', 0), reverse=True)[:10]
page_data = {}
# Calculate page metrics
for page in page_data:
if page_data[page]['impressions'] > 0:
page_data[page]['ctr'] = page_data[page]['clicks'] / page_data[page]['impressions'] * 100
top_pages = sorted(page_data.items(), key=lambda x: x[1]['clicks'], reverse=True)[:10]
return {
'connection_status': 'connected',
'connected_sites': 1, # GSC typically has one site per user
'total_clicks': total_clicks,
'total_impressions': total_impressions,
'avg_ctr': round(avg_ctr, 2),
'avg_position': round(avg_position, 2),
'total_queries': len(rows),
'top_queries': [
{
'query': self._extract_query_from_row(row),
'clicks': row.get('clicks', 0),
'impressions': row.get('impressions', 0),
'ctr': round(row.get('ctr', 0) * 100, 2),
'position': round(row.get('position', 0), 2)
}
for row in top_queries
],
'top_pages': [
{
'page': page,
'clicks': data['clicks'],
'impressions': data['impressions'],
'ctr': round(data['ctr'], 2)
}
for page, data in top_pages
],
'note': 'Google Search Console provides search performance data, keyword rankings, and SEO insights'
}
except Exception as e:
logger.error(f"Error processing GSC metrics: {e}")
return {
'connection_status': 'error',
'connected_sites': 0,
'total_clicks': 0,
'total_impressions': 0,
'avg_ctr': 0,
'avg_position': 0,
'total_queries': 0,
'top_queries': [],
'top_pages': [],
'error': str(e)
}
def _extract_query_from_row(self, row: Dict[str, Any]) -> str:
"""Extract query text from GSC API row data"""
try:
keys = row.get('keys', [])
if keys and len(keys) > 0:
first_key = keys[0]
if isinstance(first_key, dict):
return first_key.get('keys', ['Unknown'])[0]
else:
return str(first_key)
return 'Unknown'
except Exception as e:
logger.error(f"Error extracting query from row: {e}")
return 'Unknown'