Save local changes (GSC/Bing integrations) before merging PR #354

This commit is contained in:
ajaysi
2026-02-13 13:11:27 +05:30
parent 43e66835ac
commit 08a1f4a1d8
144 changed files with 8310 additions and 2748 deletions

View File

@@ -20,12 +20,13 @@ class BaseAnalyticsHandler(ABC):
self.platform_name = platform_type.value
@abstractmethod
async def get_analytics(self, user_id: str) -> AnalyticsData:
async def get_analytics(self, user_id: str, **kwargs) -> AnalyticsData:
"""
Get analytics data for the platform
Args:
user_id: User ID to get analytics for
**kwargs: Additional arguments for specific handlers
Returns:
AnalyticsData object with platform metrics

View File

@@ -42,7 +42,7 @@ class BingAnalyticsHandler(BaseAnalyticsHandler):
db_url = f'sqlite:///{db_path}'
return BingInsightsService(db_url)
async def get_analytics(self, user_id: str) -> AnalyticsData:
async def get_analytics(self, user_id: str, target_url: str = None, **kwargs) -> AnalyticsData:
"""
Get Bing Webmaster analytics data using Bing Webmaster API
"""
@@ -83,9 +83,32 @@ class BingAnalyticsHandler(BaseAnalyticsHandler):
if not access_token:
return self.create_error_response('Bing Webmaster access token not available')
# Select site: Prefer target_url match, otherwise first site
selected_site = sites[0] if sites else None
if not selected_site:
return self.create_error_response('No Bing sites found')
if target_url and sites:
logger.info(f"Attempting to match target URL: {target_url}")
# Normalize target URL (remove protocol, trailing slash)
normalized_target = target_url.replace('https://', '').replace('http://', '').rstrip('/')
for site in sites:
# Bing uses 'Url' key
site_url = site.get('Url', '')
normalized_site = site_url.replace('https://', '').replace('http://', '').rstrip('/')
if normalized_target in normalized_site or normalized_site in normalized_target:
selected_site = site
logger.info(f"Found matching Bing site: {site_url}")
break
site_url_for_storage = selected_site.get('Url', '') if selected_site else ''
logger.info(f"Using Bing site URL: {site_url_for_storage}")
query_stats = {}
try:
site_url_for_storage = sites[0].get('Url', '') if (sites and isinstance(sites[0], dict)) else None
stored = storage_service.get_analytics_summary(user_id, site_url_for_storage, days=30)
if stored and isinstance(stored, dict):
query_stats = {
@@ -99,7 +122,7 @@ class BingAnalyticsHandler(BaseAnalyticsHandler):
logger.warning(f"Bing analytics: Failed to read stored analytics summary: {e}")
# Get enhanced insights
insights = self._get_enhanced_insights_with_service(insights_service, user_id, sites[0].get('Url', '') if sites else '')
insights = self._get_enhanced_insights_with_service(insights_service, user_id, site_url_for_storage)
metrics = {
'connection_status': 'connected',

View File

@@ -22,16 +22,22 @@ class GSCAnalyticsHandler(BaseAnalyticsHandler):
super().__init__(PlatformType.GSC)
self.gsc_service = GSCService()
async def get_analytics(self, user_id: str) -> AnalyticsData:
async def get_analytics(self, user_id: str, target_url: str = None, **kwargs) -> AnalyticsData:
"""
Get Google Search Console analytics data with caching
Args:
user_id: User ID to get analytics for
target_url: Optional URL to prefer when selecting GSC site
Returns comprehensive SEO metrics including clicks, impressions, CTR, and position data.
"""
self.log_analytics_request(user_id, "get_analytics")
# Check cache first - GSC API calls can be expensive
cached_data = analytics_cache.get('gsc_analytics', user_id)
# Include target_url in cache key if provided
cache_key = f"{user_id}_{target_url}" if target_url else user_id
cached_data = analytics_cache.get('gsc_analytics', cache_key)
if cached_data:
logger.info("Using cached GSC analytics for user {user_id}", user_id=user_id)
return AnalyticsData(**cached_data)
@@ -45,8 +51,23 @@ class GSCAnalyticsHandler(BaseAnalyticsHandler):
logger.warning(f"No GSC sites found for user {user_id}")
return self.create_error_response('No GSC sites found')
# Get analytics for the first site (or combine all sites)
site_url = sites[0]['siteUrl']
# Select site: Prefer target_url match, otherwise first site
selected_site = sites[0]
if target_url:
logger.info(f"Attempting to match target URL: {target_url}")
# Normalize target URL (remove protocol, trailing slash)
normalized_target = target_url.replace('https://', '').replace('http://', '').rstrip('/')
for site in sites:
site_url = site['siteUrl']
normalized_site = site_url.replace('https://', '').replace('http://', '').rstrip('/')
if normalized_target in normalized_site or normalized_site in normalized_target:
selected_site = site
logger.info(f"Found matching GSC site: {site_url}")
break
site_url = selected_site['siteUrl']
logger.info(f"Using GSC site URL: {site_url}")
# Get search analytics for last 30 days
@@ -71,7 +92,7 @@ class GSCAnalyticsHandler(BaseAnalyticsHandler):
)
# Cache the result to avoid expensive API calls
analytics_cache.set('gsc_analytics', user_id, result.__dict__)
analytics_cache.set('gsc_analytics', cache_key, result.__dict__)
logger.info("Cached GSC analytics data for user {user_id}", user_id=user_id)
return result
@@ -81,7 +102,7 @@ class GSCAnalyticsHandler(BaseAnalyticsHandler):
error_result = self.create_error_response(str(e))
# Cache error result for shorter time to retry sooner
analytics_cache.set('gsc_analytics', user_id, error_result.__dict__, ttl_override=300) # 5 minutes
analytics_cache.set('gsc_analytics', cache_key, error_result.__dict__, ttl_override=300) # 5 minutes
return error_result
def get_connection_status(self, user_id: str) -> Dict[str, Any]:
@@ -117,111 +138,93 @@ class GSCAnalyticsHandler(BaseAnalyticsHandler):
# New structure from updated GSC service
overall_rows = search_analytics.get('overall_metrics', {}).get('rows', [])
query_rows = search_analytics.get('query_data', {}).get('rows', [])
verification_rows = search_analytics.get('verification_data', {}).get('rows', [])
logger.info(f"GSC Overall metrics rows: {len(overall_rows)}")
logger.info(f"GSC Query data rows: {len(query_rows)}")
logger.info(f"GSC Verification rows: {len(verification_rows)}")
# Calculate totals from overall_rows (most accurate as it includes anonymized queries)
total_clicks = 0
total_impressions = 0
total_position = 0
valid_position_rows = 0
if overall_rows:
logger.info(f"GSC Overall first row: {overall_rows[0]}")
if query_rows:
logger.info(f"GSC Query first row: {query_rows[0]}")
# Use overall_rows for totals if available, otherwise fallback to query_rows
calc_rows = overall_rows if overall_rows else query_rows
for row in calc_rows:
clicks = row.get('clicks', 0)
impressions = row.get('impressions', 0)
position = row.get('position', 0)
total_clicks += clicks
total_impressions += impressions
if position and position > 0:
total_position += position * impressions # Weighted average
# Calculate weighted average position
avg_position = total_position / total_impressions if total_impressions > 0 else 0
avg_ctr = (total_clicks / total_impressions * 100) if total_impressions > 0 else 0
# Use query_rows for top queries list
top_queries_source = query_rows
# Use query_rows for detailed insights, overall_rows for summary
rows = query_rows if query_rows else overall_rows
else:
# Legacy structure
rows = search_analytics.get('rows', [])
logger.info(f"GSC Legacy rows count: {len(rows)}")
if rows:
logger.info(f"GSC Legacy first row structure: {rows[0]}")
logger.info(f"GSC Legacy first row keys: {list(rows[0].keys()) if rows[0] else 'No rows'}")
# Calculate summary metrics - handle different response formats
total_clicks = 0
total_impressions = 0
total_position = 0
valid_rows = 0
for row in rows:
# Handle different possible response formats
clicks = row.get('clicks', 0)
impressions = row.get('impressions', 0)
position = row.get('position', 0)
# ... existing legacy logic ...
calc_rows = rows
top_queries_source = rows
# If position is 0 or None, skip it from average calculation
if position and position > 0:
total_position += position
valid_rows += 1
total_clicks = 0
total_impressions = 0
total_position = 0
valid_position_rows = 0
total_clicks += clicks
total_impressions += impressions
avg_ctr = (total_clicks / total_impressions * 100) if total_impressions > 0 else 0
avg_position = total_position / valid_rows if valid_rows > 0 else 0
logger.info(f"GSC Calculated metrics - clicks: {total_clicks}, impressions: {total_impressions}, ctr: {avg_ctr}, position: {avg_position}, valid_rows: {valid_rows}")
# Get top performing queries - handle different data structures
if rows and 'keys' in rows[0]:
# New GSC API format with keys array
top_queries = sorted(rows, key=lambda x: x.get('clicks', 0), reverse=True)[:10]
# Get top performing pages (if we have page data)
page_data = {}
for row in rows:
# Handle different key structures
keys = row.get('keys', [])
if len(keys) > 1 and keys[1]: # Page data available
page = keys[1].get('keys', ['Unknown'])[0] if isinstance(keys[1], dict) else str(keys[1])
else:
page = 'Unknown'
for row in calc_rows:
clicks = row.get('clicks', 0)
impressions = row.get('impressions', 0)
position = row.get('position', 0)
if page not in page_data:
page_data[page] = {'clicks': 0, 'impressions': 0, 'ctr': 0, 'position': 0}
page_data[page]['clicks'] += row.get('clicks', 0)
page_data[page]['impressions'] += row.get('impressions', 0)
else:
# Legacy format or no keys structure
top_queries = sorted(rows, key=lambda x: x.get('clicks', 0), reverse=True)[:10]
page_data = {}
total_clicks += clicks
total_impressions += impressions
if position and position > 0:
# Simple average for legacy/unknown structure if we can't do weighted
total_position += position
valid_position_rows += 1
avg_ctr = (total_clicks / total_impressions * 100) if total_impressions > 0 else 0
avg_position = total_position / valid_position_rows if valid_position_rows > 0 else 0
# Calculate page metrics
for page in page_data:
if page_data[page]['impressions'] > 0:
page_data[page]['ctr'] = page_data[page]['clicks'] / page_data[page]['impressions'] * 100
top_pages = sorted(page_data.items(), key=lambda x: x[1]['clicks'], reverse=True)[:10]
return {
'connection_status': 'connected',
'connected_sites': 1, # GSC typically has one site per user
'total_clicks': total_clicks,
'total_impressions': total_impressions,
'avg_ctr': round(avg_ctr, 2),
'avg_position': round(avg_position, 2),
'total_queries': len(rows),
'top_queries': [
{
# Get top performing queries
top_queries = []
if top_queries_source:
# Sort by clicks
sorted_queries = sorted(top_queries_source, key=lambda x: x.get('clicks', 0), reverse=True)[:10]
for row in sorted_queries:
top_queries.append({
'query': self._extract_query_from_row(row),
'clicks': row.get('clicks', 0),
'impressions': row.get('impressions', 0),
'ctr': round(row.get('ctr', 0) * 100, 2),
'position': round(row.get('position', 0), 2)
}
for row in top_queries
],
'top_pages': [
{
'page': page,
'clicks': data['clicks'],
'impressions': data['impressions'],
'ctr': round(data['ctr'], 2)
}
for page, data in top_pages
],
'note': 'Google Search Console provides search performance data, keyword rankings, and SEO insights'
})
# Prepare Top Pages (requires page dimension, but we only requested query dimension in gsc_service step 3)
# To get top pages, we would need another API call with dimension=['page']
# For now, we'll return empty top_pages or infer from what we have if possible (we can't from query data)
top_pages = []
return {
'connection_status': 'connected',
'connected_sites': 1,
'total_clicks': total_clicks,
'total_impressions': total_impressions,
'avg_ctr': round(avg_ctr, 2),
'avg_position': round(avg_position, 2),
'total_queries': len(top_queries_source) if top_queries_source else 0,
'top_queries': top_queries,
'top_pages': top_pages
}
except Exception as e:

View File

@@ -59,6 +59,32 @@ class PlatformAnalyticsService:
logger.info(f"Getting comprehensive analytics for user {user_id}, platforms: {platforms}")
analytics_data = {}
# Determine target URL from Wix/WP for GSC site selection
target_url = None
try:
status = await self.get_platform_connection_status(user_id)
# Check Wix
if status.get('wix', {}).get('connected'):
sites = status['wix'].get('sites', [])
if sites:
# Assuming site object has 'blog_url' or 'url'
site = sites[0]
target_url = site.get('blog_url') or site.get('url')
# Check WordPress if no Wix
if not target_url and status.get('wordpress', {}).get('connected'):
sites = status['wordpress'].get('sites', [])
if sites:
site = sites[0]
target_url = site.get('blog_url') or site.get('url')
if target_url:
logger.info(f"Identified primary site URL for GSC matching: {target_url}")
except Exception as e:
logger.warning(f"Failed to determine target URL for GSC: {e}")
for platform_name in platforms:
try:
# Convert string to PlatformType enum
@@ -66,7 +92,10 @@ class PlatformAnalyticsService:
handler = self.handlers.get(platform_type)
if handler:
analytics_data[platform_name] = await handler.get_analytics(user_id)
if platform_type == PlatformType.GSC or platform_type == PlatformType.BING:
analytics_data[platform_name] = await handler.get_analytics(user_id, target_url=target_url)
else:
analytics_data[platform_name] = await handler.get_analytics(user_id)
else:
logger.warning(f"Unknown platform: {platform_name}")
analytics_data[platform_name] = self._create_error_response(platform_name, f"Unknown platform: {platform_name}")