Onboarding Manager and Router Manager refactored, analytics and background jobs added, database setup updated, environment setup updated, frontend updated, backend updated. Critical onboarding database migration implemented.
1039 lines
47 KiB
Python
1039 lines
47 KiB
Python
"""
|
|
Bing Webmaster Insights Service
|
|
|
|
Provides advanced analytics insights and recommendations based on Bing Webmaster data.
|
|
"""
|
|
|
|
from typing import Dict, Any, List, Optional, Tuple
|
|
from datetime import datetime, timedelta
|
|
from loguru import logger
|
|
import json
|
|
|
|
from ...bing_analytics_storage_service import BingAnalyticsStorageService
|
|
from ..models.platform_types import PlatformType
|
|
from ...analytics_cache_service import AnalyticsCacheService
|
|
|
|
|
|
class BingInsightsService:
|
|
"""Service for generating Bing Webmaster insights and recommendations"""
|
|
|
|
def __init__(self, database_url: str):
|
|
self.storage_service = BingAnalyticsStorageService(database_url)
|
|
self.cache_service = AnalyticsCacheService()
|
|
|
|
def get_performance_insights(self, user_id: str, site_url: str, days: int = 30) -> Dict[str, Any]:
|
|
"""Get performance insights including trends and patterns"""
|
|
try:
|
|
# Check cache first
|
|
cache_key = self.cache_service._generate_cache_key(
|
|
'bing_performance_insights',
|
|
user_id,
|
|
site_url=site_url,
|
|
days=days
|
|
)
|
|
|
|
cached_result = self.cache_service.get('bing_analytics', cache_key)
|
|
if cached_result:
|
|
logger.info(f"Returning cached performance insights for user {user_id}")
|
|
return cached_result
|
|
|
|
# Quick check if data exists before expensive operations
|
|
logger.info(f"Quick data check for user {user_id}, site: {site_url}")
|
|
quick_summary = self.storage_service.get_analytics_summary(user_id, site_url, days)
|
|
if 'error' in quick_summary:
|
|
logger.info(f"No stored data found for user {user_id}, returning basic insights")
|
|
insights = self._generate_basic_performance_insights({})
|
|
# Cache basic insights for shorter time
|
|
self.cache_service.set('bing_analytics', cache_key, insights)
|
|
return insights
|
|
|
|
# Generate insights from real data (with timeout protection)
|
|
logger.info(f"Generating performance insights from stored data for user {user_id}")
|
|
insights = self._generate_performance_insights_from_data(quick_summary, [])
|
|
|
|
# Cache the result
|
|
self.cache_service.set('bing_analytics', cache_key, insights)
|
|
|
|
return insights
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error getting performance insights: {e}")
|
|
# Return basic insights on error to prevent hanging
|
|
return self._generate_basic_performance_insights({})
|
|
|
|
def get_seo_insights(self, user_id: str, site_url: str, days: int = 30) -> Dict[str, Any]:
|
|
"""Get SEO-specific insights and opportunities"""
|
|
try:
|
|
# Check cache first
|
|
cache_key = self.cache_service._generate_cache_key(
|
|
'bing_seo_insights',
|
|
user_id,
|
|
site_url=site_url,
|
|
days=days
|
|
)
|
|
|
|
cached_result = self.cache_service.get('bing_analytics', cache_key)
|
|
if cached_result:
|
|
logger.info(f"Returning cached SEO insights for user {user_id}")
|
|
return cached_result
|
|
|
|
# Quick check if data exists
|
|
logger.info(f"Quick SEO data check for user {user_id}, site: {site_url}")
|
|
summary = self.storage_service.get_analytics_summary(user_id, site_url, days)
|
|
|
|
if 'error' in summary:
|
|
logger.info(f"No stored data found for user {user_id}, returning basic SEO insights")
|
|
insights = self._generate_basic_seo_insights({})
|
|
# Cache basic insights for shorter time
|
|
self.cache_service.set('bing_analytics', cache_key, insights)
|
|
return insights
|
|
|
|
# Get limited top queries to prevent timeout
|
|
logger.info(f"Generating SEO insights from stored data for user {user_id}")
|
|
top_queries = self.storage_service.get_top_queries(user_id, site_url, days, limit=50) # Reduced from 100
|
|
|
|
if not top_queries:
|
|
logger.info(f"No query data found for user {user_id}, using basic SEO insights")
|
|
insights = self._generate_basic_seo_insights(summary)
|
|
else:
|
|
# Generate insights from real data
|
|
insights = self._generate_seo_insights_from_data(summary, top_queries)
|
|
|
|
# Cache the result
|
|
self.cache_service.set('bing_analytics', cache_key, insights)
|
|
|
|
return insights
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error getting SEO insights: {e}")
|
|
# Return basic insights on error to prevent hanging
|
|
return self._generate_basic_seo_insights({})
|
|
|
|
def get_competitive_insights(self, user_id: str, site_url: str, days: int = 30) -> Dict[str, Any]:
|
|
"""Get competitive analysis and market insights"""
|
|
try:
|
|
# Check cache first
|
|
cache_key = self.cache_service._generate_cache_key(
|
|
'bing_competitive_insights',
|
|
user_id,
|
|
site_url=site_url,
|
|
days=days
|
|
)
|
|
|
|
cached_result = self.cache_service.get('bing_analytics', cache_key)
|
|
if cached_result:
|
|
logger.info(f"Returning cached competitive insights for user {user_id}")
|
|
return cached_result
|
|
|
|
# Generate insights
|
|
logger.info(f"Generating competitive insights for user {user_id}")
|
|
insights = {
|
|
'market_position': {'status': 'basic_analysis', 'message': 'Basic insights available'},
|
|
'competition_analysis': {'status': 'basic_analysis', 'message': 'Basic insights available'},
|
|
'growth_opportunities': [],
|
|
'competitive_recommendations': []
|
|
}
|
|
|
|
# Cache the result
|
|
self.cache_service.set('bing_analytics', cache_key, insights)
|
|
|
|
return insights
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error getting competitive insights: {e}")
|
|
return {'error': str(e)}
|
|
|
|
def _analyze_market_position(self, summary: Dict[str, Any], top_queries: List[Dict[str, Any]]) -> Dict[str, Any]:
|
|
"""Analyze market position and competitive landscape"""
|
|
try:
|
|
if not summary or not top_queries:
|
|
return {'error': 'Insufficient data for market analysis'}
|
|
|
|
# Analyze query diversity
|
|
unique_queries = len(set(q['query'] for q in top_queries))
|
|
total_queries = summary.get('total_queries', 0)
|
|
query_diversity = (unique_queries / total_queries * 100) if total_queries > 0 else 0
|
|
|
|
# Analyze performance distribution
|
|
high_performing_queries = [q for q in top_queries if q.get('clicks', 0) > 10]
|
|
medium_performing_queries = [q for q in top_queries if 1 <= q.get('clicks', 0) <= 10]
|
|
low_performing_queries = [q for q in top_queries if q.get('clicks', 0) == 0]
|
|
|
|
# Market position indicators
|
|
market_position = {
|
|
'query_diversity_score': round(query_diversity, 2),
|
|
'high_performing_queries': len(high_performing_queries),
|
|
'medium_performing_queries': len(medium_performing_queries),
|
|
'low_performing_queries': len(low_performing_queries),
|
|
'market_penetration': round((len(high_performing_queries) / len(top_queries) * 100), 2) if top_queries else 0,
|
|
'competitive_advantage': 'High' if query_diversity > 50 else 'Medium' if query_diversity > 25 else 'Low'
|
|
}
|
|
|
|
return market_position
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error analyzing market position: {e}")
|
|
return {'error': str(e)}
|
|
|
|
def _analyze_competition(self, top_queries: List[Dict[str, Any]]) -> Dict[str, Any]:
|
|
"""Analyze competitive landscape based on query performance"""
|
|
try:
|
|
if not top_queries:
|
|
return {'error': 'No query data available for competition analysis'}
|
|
|
|
# Analyze query performance distribution
|
|
high_performing = [q for q in top_queries if q.get('clicks', 0) > 10]
|
|
medium_performing = [q for q in top_queries if 1 <= q.get('clicks', 0) <= 10]
|
|
low_performing = [q for q in top_queries if q.get('clicks', 0) == 0]
|
|
|
|
# Calculate competitive metrics
|
|
total_queries = len(top_queries)
|
|
competition_analysis = {
|
|
'high_performing_queries': len(high_performing),
|
|
'medium_performing_queries': len(medium_performing),
|
|
'low_performing_queries': len(low_performing),
|
|
'competitive_advantage_score': round((len(high_performing) / total_queries * 100), 2) if total_queries > 0 else 0,
|
|
'market_penetration': 'High' if len(high_performing) > total_queries * 0.3 else 'Medium' if len(high_performing) > total_queries * 0.1 else 'Low',
|
|
'top_competitors': [q['query'] for q in high_performing[:5]] if high_performing else []
|
|
}
|
|
|
|
return competition_analysis
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error analyzing competition: {e}")
|
|
return {'error': str(e)}
|
|
|
|
def _identify_growth_opportunities(self, top_queries: List[Dict[str, Any]]) -> List[Dict[str, str]]:
|
|
"""Identify growth opportunities based on query performance"""
|
|
try:
|
|
if not top_queries:
|
|
return []
|
|
|
|
opportunities = []
|
|
|
|
# Find high-impression, low-click queries (potential for CTR improvement)
|
|
high_impression_low_click = [
|
|
q for q in top_queries
|
|
if q.get('impressions', 0) > 50 and q.get('clicks', 0) < 5
|
|
]
|
|
|
|
if high_impression_low_click:
|
|
opportunities.append({
|
|
'type': 'CTR Improvement',
|
|
'description': f'{len(high_impression_low_click)} queries have high impressions but low clicks',
|
|
'action': 'Optimize meta descriptions and titles for these queries'
|
|
})
|
|
|
|
# Find queries with good clicks but poor position
|
|
good_clicks_poor_position = [
|
|
q for q in top_queries
|
|
if q.get('clicks', 0) > 10 and q.get('position', 100) > 10
|
|
]
|
|
|
|
if good_clicks_poor_position:
|
|
opportunities.append({
|
|
'type': 'Position Improvement',
|
|
'description': f'{len(good_clicks_poor_position)} queries have good clicks but poor positions',
|
|
'action': 'Improve content quality and relevance for these topics'
|
|
})
|
|
|
|
# Find zero-click queries with decent impressions
|
|
zero_click_opportunities = [
|
|
q for q in top_queries
|
|
if q.get('impressions', 0) > 20 and q.get('clicks', 0) == 0
|
|
]
|
|
|
|
if zero_click_opportunities:
|
|
opportunities.append({
|
|
'type': 'Content Gap',
|
|
'description': f'{len(zero_click_opportunities)} queries get impressions but no clicks',
|
|
'action': 'Create targeted content for these query topics'
|
|
})
|
|
|
|
return opportunities[:3] # Return top 3 opportunities
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error identifying growth opportunities: {e}")
|
|
return []
|
|
|
|
def get_actionable_recommendations(self, user_id: str, site_url: str, days: int = 30) -> Dict[str, Any]:
|
|
"""Get actionable recommendations for improving search performance"""
|
|
try:
|
|
# Check cache first
|
|
cache_key = self.cache_service._generate_cache_key(
|
|
'bing_actionable_recommendations',
|
|
user_id,
|
|
site_url=site_url,
|
|
days=days
|
|
)
|
|
|
|
cached_result = self.cache_service.get('bing_analytics', cache_key)
|
|
if cached_result:
|
|
logger.info(f"Returning cached actionable recommendations for user {user_id}")
|
|
return cached_result
|
|
|
|
# Get actual data from storage service
|
|
logger.info(f"Generating actionable recommendations from stored data for user {user_id}")
|
|
|
|
# Get data for analysis
|
|
summary = self.storage_service.get_analytics_summary(user_id, site_url, days)
|
|
top_queries = self.storage_service.get_top_queries(user_id, site_url, days, limit=100)
|
|
daily_metrics = self.storage_service.get_daily_metrics(user_id, site_url, days)
|
|
|
|
if 'error' in summary or not top_queries:
|
|
logger.warning(f"No stored data found, generating basic recommendations")
|
|
insights = {
|
|
'immediate_actions': [],
|
|
'content_optimization': [],
|
|
'technical_improvements': [],
|
|
'long_term_strategy': [],
|
|
'priority_score': {}
|
|
}
|
|
else:
|
|
# Generate insights from real data
|
|
insights = {
|
|
'immediate_actions': self._get_immediate_actions(summary, top_queries),
|
|
'content_optimization': self._get_content_optimization_recommendations(top_queries),
|
|
'technical_improvements': self._get_technical_improvements(top_queries, daily_metrics),
|
|
'long_term_strategy': self._get_long_term_strategy(summary, top_queries),
|
|
'priority_score': self._calculate_priority_scores(top_queries)
|
|
}
|
|
|
|
# Cache the result
|
|
self.cache_service.set('bing_analytics', cache_key, insights)
|
|
|
|
return insights
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error getting actionable recommendations: {e}")
|
|
return {'error': str(e)}
|
|
|
|
def _get_immediate_actions(self, summary: Dict[str, Any], top_queries: List[Dict[str, Any]]) -> List[Dict[str, str]]:
|
|
"""Get immediate actions based on current performance"""
|
|
try:
|
|
if not summary or not top_queries:
|
|
return []
|
|
|
|
actions = []
|
|
|
|
# Analyze CTR performance
|
|
avg_ctr = summary.get('avg_ctr', 0)
|
|
if avg_ctr < 2.0:
|
|
actions.append({
|
|
'action': 'Improve CTR',
|
|
'priority': 'High',
|
|
'description': f'Current CTR is {avg_ctr:.1f}%, below industry average of 2-3%',
|
|
'action': 'Optimize meta descriptions and titles to be more compelling'
|
|
})
|
|
|
|
# Analyze query diversity
|
|
unique_queries = len(set(q['query'] for q in top_queries))
|
|
if unique_queries < 20:
|
|
actions.append({
|
|
'action': 'Expand Query Coverage',
|
|
'priority': 'Medium',
|
|
'description': f'Only {unique_queries} unique queries found',
|
|
'action': 'Create content targeting more keyword variations'
|
|
})
|
|
|
|
# Analyze low-performing queries
|
|
low_performing = [q for q in top_queries if q.get('clicks', 0) == 0 and q.get('impressions', 0) > 10]
|
|
if len(low_performing) > 5:
|
|
actions.append({
|
|
'action': 'Fix Zero-Click Queries',
|
|
'priority': 'High',
|
|
'description': f'{len(low_performing)} queries get impressions but no clicks',
|
|
'action': 'Improve content relevance and meta descriptions for these queries'
|
|
})
|
|
|
|
return actions
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error getting immediate actions: {e}")
|
|
return []
|
|
|
|
def _get_content_optimization_recommendations(self, top_queries: List[Dict[str, Any]]) -> List[Dict[str, str]]:
|
|
"""Get content optimization recommendations based on query analysis"""
|
|
try:
|
|
if not top_queries:
|
|
return []
|
|
|
|
recommendations = []
|
|
|
|
# Analyze query length patterns
|
|
short_queries = [q for q in top_queries if len(q.get('query', '')) <= 3]
|
|
long_queries = [q for q in top_queries if len(q.get('query', '')) > 10]
|
|
|
|
if len(short_queries) > len(long_queries):
|
|
recommendations.append({
|
|
'type': 'Content Strategy',
|
|
'priority': 'Medium',
|
|
'recommendation': 'Focus on long-tail keyword content to capture more specific searches'
|
|
})
|
|
|
|
# Analyze high-impression, low-CTR queries
|
|
low_ctr_queries = [
|
|
q for q in top_queries
|
|
if q.get('impressions', 0) > 100 and (q.get('clicks', 0) / max(q.get('impressions', 1), 1)) < 0.02
|
|
]
|
|
|
|
if low_ctr_queries:
|
|
recommendations.append({
|
|
'type': 'Meta Optimization',
|
|
'priority': 'High',
|
|
'recommendation': f'Optimize meta descriptions for {len(low_ctr_queries)} high-impression, low-CTR queries'
|
|
})
|
|
|
|
# Analyze position vs clicks correlation
|
|
position_10_plus = [q for q in top_queries if q.get('position', 100) > 10 and q.get('clicks', 0) > 0]
|
|
if position_10_plus:
|
|
recommendations.append({
|
|
'type': 'Content Quality',
|
|
'priority': 'High',
|
|
'recommendation': f'Improve content quality for {len(position_10_plus)} queries ranking beyond position 10'
|
|
})
|
|
|
|
# Analyze query intent patterns
|
|
question_queries = [q for q in top_queries if '?' in q.get('query', '') or q.get('query', '').startswith(('what', 'how', 'why', 'when', 'where'))]
|
|
if len(question_queries) > 5:
|
|
recommendations.append({
|
|
'type': 'Content Format',
|
|
'priority': 'Medium',
|
|
'recommendation': 'Create FAQ-style content to better match question-based queries'
|
|
})
|
|
|
|
return recommendations[:4] # Return top 4 recommendations
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error getting content optimization recommendations: {e}")
|
|
return []
|
|
|
|
def _generate_basic_seo_insights(self, summary: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""Generate basic SEO insights from summary data when detailed query data is not available"""
|
|
try:
|
|
total_clicks = summary.get('total_clicks', 0)
|
|
total_impressions = summary.get('total_impressions', 0)
|
|
total_queries = summary.get('total_queries', 0)
|
|
avg_ctr = summary.get('avg_ctr', 0)
|
|
|
|
# Generate basic insights from summary data
|
|
query_analysis = {
|
|
'total_queries': total_queries,
|
|
'brand_queries': {'percentage': 30}, # Estimated
|
|
'non_brand_queries': {'percentage': 70}, # Estimated
|
|
'query_length_distribution': {'average_length': 4} # Estimated
|
|
}
|
|
|
|
technical_insights = {
|
|
'average_position': 8.5, # Estimated based on CTR
|
|
'average_ctr': avg_ctr,
|
|
'position_distribution': {
|
|
'top_3': int(total_queries * 0.15), # Estimated 15% in top 3
|
|
'top_10': int(total_queries * 0.35) # Estimated 35% in top 10
|
|
}
|
|
}
|
|
|
|
seo_recommendations = [
|
|
{
|
|
'type': 'data',
|
|
'priority': 'high',
|
|
'recommendation': 'Collect more detailed search data to generate comprehensive insights'
|
|
},
|
|
{
|
|
'type': 'performance',
|
|
'priority': 'medium',
|
|
'recommendation': f'Current CTR of {avg_ctr:.1f}% is {"good" if avg_ctr > 3 else "needs improvement"}'
|
|
}
|
|
]
|
|
|
|
return {
|
|
'query_analysis': query_analysis,
|
|
'content_opportunities': [],
|
|
'technical_insights': technical_insights,
|
|
'seo_recommendations': seo_recommendations
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error generating basic SEO insights: {e}")
|
|
return {'error': str(e)}
|
|
|
|
def _get_technical_improvements(self, top_queries: List[Dict[str, Any]], daily_metrics: List[Dict[str, Any]]) -> List[Dict[str, str]]:
|
|
"""Get technical improvement recommendations"""
|
|
try:
|
|
if not top_queries and not daily_metrics:
|
|
return []
|
|
|
|
improvements = []
|
|
|
|
# Analyze position distribution
|
|
if top_queries:
|
|
avg_position = sum(q.get('position', 100) for q in top_queries) / len(top_queries)
|
|
if avg_position > 10:
|
|
improvements.append({
|
|
'type': 'Position Optimization',
|
|
'priority': 'High',
|
|
'recommendation': f'Average position is {avg_position:.1f}, focus on improving content quality'
|
|
})
|
|
|
|
# Analyze CTR performance
|
|
if daily_metrics:
|
|
recent_ctr = sum(m.get('avg_ctr', 0) for m in daily_metrics[-7:]) / len(daily_metrics[-7:]) if daily_metrics else 0
|
|
if recent_ctr < 2.0:
|
|
improvements.append({
|
|
'type': 'CTR Enhancement',
|
|
'priority': 'High',
|
|
'recommendation': 'Optimize meta descriptions and titles to improve click-through rates'
|
|
})
|
|
|
|
# Analyze query diversity
|
|
if top_queries:
|
|
unique_queries = len(set(q.get('query', '') for q in top_queries))
|
|
if unique_queries < 20:
|
|
improvements.append({
|
|
'type': 'Content Expansion',
|
|
'priority': 'Medium',
|
|
'recommendation': 'Create content targeting more keyword variations'
|
|
})
|
|
|
|
return improvements[:3] # Return top 3 improvements
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error getting technical improvements: {e}")
|
|
return []
|
|
|
|
def _generate_competitive_recommendations(self, summary: Dict[str, Any], top_queries: List[Dict[str, Any]]) -> List[Dict[str, str]]:
|
|
"""Generate competitive recommendations based on market analysis"""
|
|
try:
|
|
if not summary and not top_queries:
|
|
return []
|
|
|
|
recommendations = []
|
|
|
|
# Analyze market position
|
|
total_queries = summary.get('total_queries', 0) if summary else len(top_queries)
|
|
avg_ctr = summary.get('avg_ctr', 0) if summary else 0
|
|
|
|
if total_queries > 0:
|
|
# Market penetration analysis
|
|
if total_queries < 50:
|
|
recommendations.append({
|
|
'type': 'Market Expansion',
|
|
'priority': 'High',
|
|
'recommendation': 'Expand keyword targeting to capture more search volume'
|
|
})
|
|
|
|
# CTR competitiveness
|
|
if avg_ctr < 3.0:
|
|
recommendations.append({
|
|
'type': 'Competitive CTR',
|
|
'priority': 'High',
|
|
'recommendation': 'Improve CTR to compete better with top-ranking pages'
|
|
})
|
|
elif avg_ctr > 8.0:
|
|
recommendations.append({
|
|
'type': 'Market Leadership',
|
|
'priority': 'Medium',
|
|
'recommendation': 'Leverage high CTR to expand into related keyword markets'
|
|
})
|
|
|
|
# Query diversity analysis
|
|
if top_queries:
|
|
unique_queries = len(set(q.get('query', '') for q in top_queries))
|
|
if unique_queries / len(top_queries) < 0.5:
|
|
recommendations.append({
|
|
'type': 'Query Diversification',
|
|
'priority': 'Medium',
|
|
'recommendation': 'Diversify content to target more unique search queries'
|
|
})
|
|
|
|
return recommendations[:3] # Return top 3 recommendations
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error generating competitive recommendations: {e}")
|
|
return []
|
|
|
|
def _get_long_term_strategy(self, summary: Dict[str, Any], top_queries: List[Dict[str, Any]]) -> List[Dict[str, str]]:
|
|
"""Get long-term strategic recommendations"""
|
|
try:
|
|
if not summary and not top_queries:
|
|
return []
|
|
|
|
strategies = []
|
|
|
|
# Growth strategy based on current performance
|
|
total_clicks = summary.get('total_clicks', 0) if summary else 0
|
|
total_impressions = summary.get('total_impressions', 0) if summary else 0
|
|
avg_ctr = summary.get('avg_ctr', 0) if summary else 0
|
|
|
|
if total_clicks > 0:
|
|
# Content scaling strategy
|
|
if total_clicks < 1000:
|
|
strategies.append({
|
|
'type': 'Content Scaling',
|
|
'priority': 'High',
|
|
'recommendation': 'Scale content production to capture more search traffic'
|
|
})
|
|
elif total_clicks > 5000:
|
|
strategies.append({
|
|
'type': 'Market Dominance',
|
|
'priority': 'High',
|
|
'recommendation': 'Focus on maintaining market leadership and expanding into new verticals'
|
|
})
|
|
|
|
# Technical SEO strategy
|
|
if avg_ctr < 5.0:
|
|
strategies.append({
|
|
'type': 'Technical Optimization',
|
|
'priority': 'Medium',
|
|
'recommendation': 'Invest in technical SEO improvements for long-term growth'
|
|
})
|
|
|
|
# Brand building strategy
|
|
if total_impressions > 10000:
|
|
strategies.append({
|
|
'type': 'Brand Building',
|
|
'priority': 'Medium',
|
|
'recommendation': 'Focus on brand awareness and authority building in your niche'
|
|
})
|
|
|
|
# Query analysis for strategy
|
|
if top_queries:
|
|
brand_queries = [q for q in top_queries if 'alwrity' in q.get('query', '').lower()]
|
|
if len(brand_queries) / len(top_queries) < 0.3:
|
|
strategies.append({
|
|
'type': 'Brand Recognition',
|
|
'priority': 'Medium',
|
|
'recommendation': 'Increase brand-related content to improve brand recognition in search'
|
|
})
|
|
|
|
return strategies[:3] # Return top 3 strategies
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error getting long-term strategy: {e}")
|
|
return []
|
|
|
|
def _calculate_priority_scores(self, top_queries: List[Dict[str, Any]]) -> Dict[str, int]:
|
|
"""Calculate priority scores for different optimization areas"""
|
|
try:
|
|
if not top_queries:
|
|
return {}
|
|
|
|
scores = {
|
|
'ctr_optimization': 0,
|
|
'position_improvement': 0,
|
|
'content_expansion': 0,
|
|
'technical_seo': 0
|
|
}
|
|
|
|
# Analyze CTR optimization priority
|
|
low_ctr_queries = [q for q in top_queries if q.get('ctr', 0) < 2.0]
|
|
if len(low_ctr_queries) > len(top_queries) * 0.3:
|
|
scores['ctr_optimization'] = 8
|
|
|
|
# Analyze position improvement priority
|
|
poor_position_queries = [q for q in top_queries if q.get('position', 100) > 10]
|
|
if len(poor_position_queries) > len(top_queries) * 0.4:
|
|
scores['position_improvement'] = 7
|
|
|
|
# Analyze content expansion priority
|
|
unique_queries = len(set(q.get('query', '') for q in top_queries))
|
|
if unique_queries < 20:
|
|
scores['content_expansion'] = 6
|
|
|
|
# Analyze technical SEO priority
|
|
high_impression_low_click = [q for q in top_queries if q.get('impressions', 0) > 50 and q.get('clicks', 0) < 5]
|
|
if len(high_impression_low_click) > 5:
|
|
scores['technical_seo'] = 9
|
|
|
|
return scores
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error calculating priority scores: {e}")
|
|
return {}
|
|
|
|
def _generate_performance_insights_from_data(self, summary: Dict[str, Any], daily_metrics: List[Dict[str, Any]]) -> Dict[str, Any]:
|
|
"""Generate performance insights from actual stored data"""
|
|
try:
|
|
# Extract performance summary from stored data
|
|
performance_summary = {
|
|
'total_clicks': summary.get('total_clicks', 0),
|
|
'total_impressions': summary.get('total_impressions', 0),
|
|
'avg_ctr': summary.get('avg_ctr', 0),
|
|
'total_queries': summary.get('total_queries', 0)
|
|
}
|
|
|
|
# Analyze trends from daily metrics
|
|
trends = self._analyze_trends(daily_metrics)
|
|
|
|
# Get performance indicators
|
|
performance_indicators = self._get_performance_indicators(summary, daily_metrics)
|
|
|
|
# Generate insights based on real data
|
|
insights = self._generate_performance_insights(summary, daily_metrics)
|
|
|
|
return {
|
|
'performance_summary': performance_summary,
|
|
'trends': trends,
|
|
'performance_indicators': performance_indicators,
|
|
'insights': insights
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error generating performance insights from data: {e}")
|
|
return {'error': str(e)}
|
|
|
|
def _generate_seo_insights_from_data(self, summary: Dict[str, Any], top_queries: List[Dict[str, Any]]) -> Dict[str, Any]:
|
|
"""Generate SEO insights from actual stored data"""
|
|
try:
|
|
# Analyze query patterns from real data
|
|
query_analysis = self._analyze_query_patterns(top_queries)
|
|
|
|
# Get technical insights
|
|
technical_insights = self._get_technical_insights(top_queries)
|
|
|
|
# Identify content opportunities
|
|
content_opportunities = self._identify_content_opportunities(top_queries)
|
|
|
|
# Generate SEO recommendations
|
|
seo_recommendations = self._generate_seo_recommendations(top_queries)
|
|
|
|
return {
|
|
'query_analysis': query_analysis,
|
|
'content_opportunities': content_opportunities,
|
|
'technical_insights': technical_insights,
|
|
'seo_recommendations': seo_recommendations
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error generating SEO insights from data: {e}")
|
|
return {'error': str(e)}
|
|
|
|
def _generate_basic_performance_insights(self, summary: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""Generate basic performance insights when detailed data is not available"""
|
|
try:
|
|
# Generate basic insights with estimated data
|
|
performance_summary = {
|
|
'total_clicks': 0,
|
|
'total_impressions': 0,
|
|
'avg_ctr': 0,
|
|
'total_queries': 0
|
|
}
|
|
|
|
trends = {
|
|
'status': 'insufficient_data',
|
|
'message': 'Detailed analytics data not available for trend analysis'
|
|
}
|
|
|
|
performance_indicators = {
|
|
'performance_level': 'Unknown',
|
|
'traffic_quality': 'Unknown',
|
|
'growth_potential': 'Unknown'
|
|
}
|
|
|
|
insights = [
|
|
'Detailed analytics data is not available in the database',
|
|
'Connect Bing Webmaster Tools to collect comprehensive search data',
|
|
'Basic metrics are available but detailed insights require data collection'
|
|
]
|
|
|
|
return {
|
|
'performance_summary': performance_summary,
|
|
'trends': trends,
|
|
'performance_indicators': performance_indicators,
|
|
'insights': insights
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error generating basic performance insights: {e}")
|
|
return {'error': str(e)}
|
|
|
|
def _analyze_trends(self, daily_metrics: List[Dict[str, Any]]) -> Dict[str, Any]:
|
|
"""Analyze performance trends over time"""
|
|
if not daily_metrics or len(daily_metrics) < 7:
|
|
return {'status': 'insufficient_data', 'message': 'Need at least 7 days of data for trend analysis'}
|
|
|
|
# Calculate week-over-week trends
|
|
recent_week = daily_metrics[-7:] if len(daily_metrics) >= 7 else daily_metrics
|
|
previous_week = daily_metrics[-14:-7] if len(daily_metrics) >= 14 else daily_metrics[:-7]
|
|
|
|
recent_avg_ctr = sum(m.get('avg_ctr', 0) for m in recent_week) / len(recent_week)
|
|
previous_avg_ctr = sum(m.get('avg_ctr', 0) for m in previous_week) / len(previous_week) if previous_week else recent_avg_ctr
|
|
|
|
recent_clicks = sum(m.get('total_clicks', 0) for m in recent_week)
|
|
previous_clicks = sum(m.get('total_clicks', 0) for m in previous_week) if previous_week else recent_clicks
|
|
|
|
ctr_change = self._calculate_percentage_change(recent_avg_ctr, previous_avg_ctr)
|
|
clicks_change = self._calculate_percentage_change(recent_clicks, previous_clicks)
|
|
|
|
return {
|
|
'ctr_trend': {
|
|
'current': recent_avg_ctr,
|
|
'previous': previous_avg_ctr,
|
|
'change_percent': ctr_change,
|
|
'direction': 'up' if ctr_change > 0 else 'down' if ctr_change < 0 else 'stable'
|
|
},
|
|
'clicks_trend': {
|
|
'current': recent_clicks,
|
|
'previous': previous_clicks,
|
|
'change_percent': clicks_change,
|
|
'direction': 'up' if clicks_change > 0 else 'down' if clicks_change < 0 else 'stable'
|
|
},
|
|
'trend_strength': self._calculate_trend_strength(daily_metrics)
|
|
}
|
|
|
|
def _get_performance_indicators(self, summary: Dict[str, Any], daily_metrics: List[Dict[str, Any]]) -> Dict[str, Any]:
|
|
"""Get key performance indicators"""
|
|
total_clicks = summary.get('total_clicks', 0)
|
|
total_impressions = summary.get('total_impressions', 0)
|
|
avg_ctr = summary.get('avg_ctr', 0)
|
|
|
|
# Calculate performance scores
|
|
ctr_score = min(100, (avg_ctr / 5) * 100) # Assuming 5% CTR is excellent
|
|
volume_score = min(100, (total_clicks / 1000) * 100) # Assuming 1000 clicks is good
|
|
consistency_score = self._calculate_consistency_score(daily_metrics)
|
|
|
|
return {
|
|
'ctr_score': round(ctr_score, 1),
|
|
'volume_score': round(volume_score, 1),
|
|
'consistency_score': round(consistency_score, 1),
|
|
'overall_score': round((ctr_score + volume_score + consistency_score) / 3, 1),
|
|
'performance_level': self._get_performance_level(ctr_score, volume_score, consistency_score)
|
|
}
|
|
|
|
def _generate_performance_insights(self, summary: Dict[str, Any], daily_metrics: List[Dict[str, Any]]) -> List[str]:
|
|
"""Generate performance insights"""
|
|
insights = []
|
|
|
|
total_clicks = summary.get('total_clicks', 0)
|
|
avg_ctr = summary.get('avg_ctr', 0)
|
|
|
|
# CTR insights
|
|
if avg_ctr < 2:
|
|
insights.append("Your CTR is below 2%. Consider optimizing titles and descriptions for better click-through rates.")
|
|
elif avg_ctr > 5:
|
|
insights.append("Excellent CTR performance! Your content is highly engaging.")
|
|
else:
|
|
insights.append("Good CTR performance. There's room for improvement with better title optimization.")
|
|
|
|
# Volume insights
|
|
if total_clicks < 100:
|
|
insights.append("Low click volume suggests limited visibility. Focus on increasing impressions through content expansion.")
|
|
elif total_clicks > 1000:
|
|
insights.append("Strong click volume indicates good search visibility. Maintain content quality and consistency.")
|
|
|
|
# Trend insights
|
|
if daily_metrics and len(daily_metrics) >= 7:
|
|
recent_avg = sum(m.get('total_clicks', 0) for m in daily_metrics[-7:]) / 7
|
|
older_avg = sum(m.get('total_clicks', 0) for m in daily_metrics[:-7]) / max(1, len(daily_metrics) - 7)
|
|
|
|
if recent_avg > older_avg * 1.2:
|
|
insights.append("Positive trend: Recent performance shows 20%+ improvement in clicks.")
|
|
elif recent_avg < older_avg * 0.8:
|
|
insights.append("Declining trend: Recent performance shows 20%+ decrease in clicks. Investigate potential issues.")
|
|
|
|
return insights
|
|
|
|
def _analyze_query_patterns(self, top_queries: List[Dict[str, Any]]) -> Dict[str, Any]:
|
|
"""Analyze query patterns and characteristics"""
|
|
if not top_queries:
|
|
return {'error': 'No query data available'}
|
|
|
|
# Analyze query characteristics
|
|
brand_queries = [q for q in top_queries if q.get('is_brand', False)]
|
|
non_brand_queries = [q for q in top_queries if not q.get('is_brand', False)]
|
|
|
|
# Calculate metrics
|
|
total_clicks = sum(q.get('clicks', 0) for q in top_queries)
|
|
brand_clicks = sum(q.get('clicks', 0) for q in brand_queries)
|
|
non_brand_clicks = sum(q.get('clicks', 0) for q in non_brand_queries)
|
|
|
|
# Query length analysis
|
|
short_queries = [q for q in top_queries if len(q.get('query', '')) <= 3]
|
|
long_queries = [q for q in top_queries if len(q.get('query', '')) > 10]
|
|
|
|
return {
|
|
'total_queries': len(top_queries),
|
|
'brand_queries': {
|
|
'count': len(brand_queries),
|
|
'clicks': brand_clicks,
|
|
'percentage': round((brand_clicks / total_clicks * 100), 1) if total_clicks > 0 else 0
|
|
},
|
|
'non_brand_queries': {
|
|
'count': len(non_brand_queries),
|
|
'clicks': non_brand_clicks,
|
|
'percentage': round((non_brand_clicks / total_clicks * 100), 1) if total_clicks > 0 else 0
|
|
},
|
|
'query_length_distribution': {
|
|
'short_queries': len(short_queries),
|
|
'long_queries': len(long_queries),
|
|
'average_length': round(sum(len(q.get('query', '')) for q in top_queries) / len(top_queries), 1)
|
|
},
|
|
'top_categories': self._get_query_categories(top_queries)
|
|
}
|
|
|
|
def _identify_content_opportunities(self, top_queries: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
"""Identify content optimization opportunities"""
|
|
opportunities = []
|
|
|
|
# High impression, low CTR queries
|
|
high_impression_low_ctr = [
|
|
q for q in top_queries
|
|
if q.get('impressions', 0) > 100 and q.get('ctr', 0) < 2
|
|
]
|
|
|
|
for query in high_impression_low_ctr[:5]: # Top 5 opportunities
|
|
opportunities.append({
|
|
'query': query.get('query', ''),
|
|
'impressions': query.get('impressions', 0),
|
|
'ctr': query.get('ctr', 0),
|
|
'opportunity': 'High impressions but low CTR - optimize title and description',
|
|
'priority': 'high'
|
|
})
|
|
|
|
# Queries with declining performance
|
|
declining_queries = [
|
|
q for q in top_queries
|
|
if q.get('clicks', 0) > 0 and q.get('avg_position', 0) > 10
|
|
]
|
|
|
|
for query in declining_queries[:3]: # Top 3 declining
|
|
opportunities.append({
|
|
'query': query.get('query', ''),
|
|
'position': query.get('avg_position', 0),
|
|
'clicks': query.get('clicks', 0),
|
|
'opportunity': 'Declining position - improve content relevance and authority',
|
|
'priority': 'medium'
|
|
})
|
|
|
|
return opportunities
|
|
|
|
def _get_technical_insights(self, top_queries: List[Dict[str, Any]]) -> Dict[str, Any]:
|
|
"""Get technical SEO insights"""
|
|
if not top_queries:
|
|
return {'error': 'No query data available'}
|
|
|
|
# Position analysis
|
|
positions = [q.get('avg_position', 0) for q in top_queries if q.get('avg_position', 0) > 0]
|
|
avg_position = sum(positions) / len(positions) if positions else 0
|
|
|
|
# CTR analysis
|
|
ctrs = [q.get('ctr', 0) for q in top_queries if q.get('ctr', 0) > 0]
|
|
avg_ctr = sum(ctrs) / len(ctrs) if ctrs else 0
|
|
|
|
return {
|
|
'average_position': round(avg_position, 1),
|
|
'average_ctr': round(avg_ctr, 2),
|
|
'position_distribution': {
|
|
'top_3': len([p for p in positions if p <= 3]),
|
|
'top_10': len([p for p in positions if p <= 10]),
|
|
'page_2_plus': len([p for p in positions if p > 10])
|
|
},
|
|
'ctr_distribution': {
|
|
'excellent': len([c for c in ctrs if c >= 5]),
|
|
'good': len([c for c in ctrs if 2 <= c < 5]),
|
|
'poor': len([c for c in ctrs if c < 2])
|
|
}
|
|
}
|
|
|
|
def _generate_seo_recommendations(self, top_queries: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
"""Generate SEO recommendations based on query analysis"""
|
|
recommendations = []
|
|
|
|
if not top_queries:
|
|
return [{'type': 'data', 'priority': 'high', 'recommendation': 'Collect more search data to generate insights'}]
|
|
|
|
# Analyze performance patterns
|
|
high_performing = [q for q in top_queries if q.get('ctr', 0) > 5 and q.get('clicks', 0) > 10]
|
|
underperforming = [q for q in top_queries if q.get('ctr', 0) < 2 and q.get('impressions', 0) > 50]
|
|
|
|
if high_performing:
|
|
recommendations.append({
|
|
'type': 'content',
|
|
'priority': 'high',
|
|
'recommendation': f'Replicate success patterns from {len(high_performing)} high-performing queries',
|
|
'action': 'Analyze top-performing content and apply similar optimization strategies'
|
|
})
|
|
|
|
if underperforming:
|
|
recommendations.append({
|
|
'type': 'optimization',
|
|
'priority': 'medium',
|
|
'recommendation': f'Optimize {len(underperforming)} underperforming queries with high impressions',
|
|
'action': 'Improve title tags, meta descriptions, and content relevance'
|
|
})
|
|
|
|
# Brand vs non-brand analysis
|
|
brand_queries = [q for q in top_queries if q.get('is_brand', False)]
|
|
if len(brand_queries) / len(top_queries) > 0.7:
|
|
recommendations.append({
|
|
'type': 'strategy',
|
|
'priority': 'medium',
|
|
'recommendation': 'High brand query dependency - diversify with non-brand content',
|
|
'action': 'Create content targeting informational and commercial non-brand queries'
|
|
})
|
|
|
|
return recommendations
|
|
|
|
def _calculate_percentage_change(self, current: float, previous: float) -> float:
|
|
"""Calculate percentage change between two values"""
|
|
if previous == 0:
|
|
return 100 if current > 0 else 0
|
|
return round(((current - previous) / previous) * 100, 1)
|
|
|
|
def _calculate_trend_strength(self, daily_metrics: List[Dict[str, Any]]) -> str:
|
|
"""Calculate the strength of trends"""
|
|
if len(daily_metrics) < 7:
|
|
return 'insufficient_data'
|
|
|
|
# Simple trend analysis
|
|
recent_week = daily_metrics[-7:]
|
|
clicks_trend = [m.get('total_clicks', 0) for m in recent_week]
|
|
|
|
# Check if trend is consistently up, down, or stable
|
|
increasing = sum(1 for i in range(1, len(clicks_trend)) if clicks_trend[i] > clicks_trend[i-1])
|
|
decreasing = sum(1 for i in range(1, len(clicks_trend)) if clicks_trend[i] < clicks_trend[i-1])
|
|
|
|
if increasing > decreasing + 2:
|
|
return 'strong_upward'
|
|
elif decreasing > increasing + 2:
|
|
return 'strong_downward'
|
|
else:
|
|
return 'stable'
|
|
|
|
def _calculate_consistency_score(self, daily_metrics: List[Dict[str, Any]]) -> float:
|
|
"""Calculate consistency score based on daily performance"""
|
|
if len(daily_metrics) < 7:
|
|
return 0
|
|
|
|
clicks = [m.get('total_clicks', 0) for m in daily_metrics]
|
|
avg_clicks = sum(clicks) / len(clicks)
|
|
|
|
# Calculate coefficient of variation (lower is more consistent)
|
|
variance = sum((c - avg_clicks) ** 2 for c in clicks) / len(clicks)
|
|
std_dev = variance ** 0.5
|
|
cv = (std_dev / avg_clicks) * 100 if avg_clicks > 0 else 100
|
|
|
|
# Convert to 0-100 score (lower CV = higher consistency)
|
|
return max(0, 100 - cv)
|
|
|
|
def _get_performance_level(self, ctr_score: float, volume_score: float, consistency_score: float) -> str:
|
|
"""Determine overall performance level"""
|
|
overall = (ctr_score + volume_score + consistency_score) / 3
|
|
|
|
if overall >= 80:
|
|
return 'excellent'
|
|
elif overall >= 60:
|
|
return 'good'
|
|
elif overall >= 40:
|
|
return 'fair'
|
|
else:
|
|
return 'needs_improvement'
|
|
|
|
def _get_query_categories(self, top_queries: List[Dict[str, Any]]) -> Dict[str, int]:
|
|
"""Get distribution of query categories"""
|
|
categories = {}
|
|
for query in top_queries:
|
|
category = query.get('category', 'general')
|
|
categories[category] = categories.get(category, 0) + 1
|
|
return categories
|