Base code
This commit is contained in:
22
backend/services/seo/__init__.py
Normal file
22
backend/services/seo/__init__.py
Normal file
@@ -0,0 +1,22 @@
|
||||
"""
|
||||
SEO Dashboard Services Package
|
||||
|
||||
This package provides comprehensive SEO analytics and dashboard functionality,
|
||||
leveraging existing OAuth connections from onboarding step 5 and competitive
|
||||
analysis from step 3.
|
||||
|
||||
Services:
|
||||
- SEODashboardService: Main orchestration service for dashboard data
|
||||
- AnalyticsAggregator: Combines and normalizes data from multiple platforms
|
||||
- CompetitiveAnalyzer: Leverages onboarding research data for competitive insights
|
||||
"""
|
||||
|
||||
from .dashboard_service import SEODashboardService
|
||||
from .analytics_aggregator import AnalyticsAggregator
|
||||
from .competitive_analyzer import CompetitiveAnalyzer
|
||||
|
||||
__all__ = [
|
||||
"SEODashboardService",
|
||||
"AnalyticsAggregator",
|
||||
"CompetitiveAnalyzer",
|
||||
]
|
||||
447
backend/services/seo/analytics_aggregator.py
Normal file
447
backend/services/seo/analytics_aggregator.py
Normal file
@@ -0,0 +1,447 @@
|
||||
"""
|
||||
Analytics Aggregator Service
|
||||
|
||||
Combines and normalizes data from multiple platforms (GSC, Bing, etc.)
|
||||
for the SEO dashboard. Provides unified metrics and timeseries data.
|
||||
"""
|
||||
|
||||
from typing import Dict, Any, List, Optional, Tuple
|
||||
from datetime import datetime, timedelta
|
||||
from collections import defaultdict
|
||||
from loguru import logger
|
||||
|
||||
from utils.logger_utils import get_service_logger
|
||||
|
||||
logger = get_service_logger("analytics_aggregator")
|
||||
|
||||
class AnalyticsAggregator:
|
||||
"""Aggregates analytics data from multiple platforms."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the analytics aggregator."""
|
||||
pass
|
||||
|
||||
def combine_metrics(self, gsc_data: Dict[str, Any], bing_data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
Combine metrics from GSC and Bing data.
|
||||
|
||||
Args:
|
||||
gsc_data: GSC analytics data
|
||||
bing_data: Bing analytics data
|
||||
|
||||
Returns:
|
||||
Combined metrics dictionary
|
||||
"""
|
||||
try:
|
||||
# Extract metrics from each platform
|
||||
gsc_metrics = self._extract_gsc_metrics(gsc_data)
|
||||
bing_metrics = self._extract_bing_metrics(bing_data)
|
||||
|
||||
# Combine the metrics
|
||||
combined = {
|
||||
"clicks": gsc_metrics.get("clicks", 0) + bing_metrics.get("clicks", 0),
|
||||
"impressions": gsc_metrics.get("impressions", 0) + bing_metrics.get("impressions", 0),
|
||||
"ctr": self._calculate_combined_ctr(gsc_metrics, bing_metrics),
|
||||
"position": self._calculate_combined_position(gsc_metrics, bing_metrics),
|
||||
"queries": gsc_metrics.get("queries", 0) + bing_metrics.get("queries", 0),
|
||||
"pages": gsc_metrics.get("pages", 0) + bing_metrics.get("pages", 0),
|
||||
"countries": self._combine_countries(gsc_metrics.get("countries", []), bing_metrics.get("countries", [])),
|
||||
"devices": self._combine_devices(gsc_metrics.get("devices", []), bing_metrics.get("devices", [])),
|
||||
"sources": {
|
||||
"gsc": gsc_metrics,
|
||||
"bing": bing_metrics
|
||||
}
|
||||
}
|
||||
|
||||
logger.debug(f"Combined metrics: {combined}")
|
||||
return combined
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error combining metrics: {e}")
|
||||
return {
|
||||
"clicks": 0,
|
||||
"impressions": 0,
|
||||
"ctr": 0.0,
|
||||
"position": 0.0,
|
||||
"queries": 0,
|
||||
"pages": 0,
|
||||
"countries": [],
|
||||
"devices": [],
|
||||
"sources": {"gsc": {}, "bing": {}}
|
||||
}
|
||||
|
||||
def normalize_timeseries(self, gsc_daily: List[Dict[str, Any]], bing_daily: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Normalize timeseries data from GSC and Bing to aligned date series.
|
||||
|
||||
Args:
|
||||
gsc_daily: GSC daily data
|
||||
bing_daily: Bing daily data
|
||||
|
||||
Returns:
|
||||
Normalized timeseries data
|
||||
"""
|
||||
try:
|
||||
# Convert to date-indexed dictionaries
|
||||
gsc_by_date = {item["date"]: item for item in gsc_daily}
|
||||
bing_by_date = {item["date"]: item for item in bing_daily}
|
||||
|
||||
# Get all unique dates
|
||||
all_dates = set(gsc_by_date.keys()) | set(bing_by_date.keys())
|
||||
sorted_dates = sorted(all_dates)
|
||||
|
||||
# Create normalized timeseries
|
||||
timeseries = []
|
||||
for date in sorted_dates:
|
||||
gsc_item = gsc_by_date.get(date, {})
|
||||
bing_item = bing_by_date.get(date, {})
|
||||
|
||||
normalized_item = {
|
||||
"date": date,
|
||||
"clicks": gsc_item.get("clicks", 0) + bing_item.get("clicks", 0),
|
||||
"impressions": gsc_item.get("impressions", 0) + bing_item.get("impressions", 0),
|
||||
"ctr": self._calculate_daily_ctr(gsc_item, bing_item),
|
||||
"position": self._calculate_daily_position(gsc_item, bing_item),
|
||||
"gsc_clicks": gsc_item.get("clicks", 0),
|
||||
"gsc_impressions": gsc_item.get("impressions", 0),
|
||||
"bing_clicks": bing_item.get("clicks", 0),
|
||||
"bing_impressions": bing_item.get("impressions", 0)
|
||||
}
|
||||
|
||||
timeseries.append(normalized_item)
|
||||
|
||||
logger.debug(f"Normalized timeseries with {len(timeseries)} data points")
|
||||
return timeseries
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error normalizing timeseries: {e}")
|
||||
return []
|
||||
|
||||
def top_queries_combined(self, gsc_data: Dict[str, Any], bing_data: Dict[str, Any], limit: int = 20) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Get top queries combined from GSC and Bing data.
|
||||
|
||||
Args:
|
||||
gsc_data: GSC data
|
||||
bing_data: Bing data
|
||||
limit: Maximum number of queries to return
|
||||
|
||||
Returns:
|
||||
List of top queries with source tags
|
||||
"""
|
||||
try:
|
||||
# Extract queries from each platform
|
||||
gsc_queries = self._extract_gsc_queries(gsc_data)
|
||||
bing_queries = self._extract_bing_queries(bing_data)
|
||||
|
||||
# Combine and deduplicate queries
|
||||
query_map = {}
|
||||
|
||||
# Add GSC queries
|
||||
for query in gsc_queries:
|
||||
query_text = query.get("query", "").lower()
|
||||
if query_text in query_map:
|
||||
# Merge data from both sources
|
||||
existing = query_map[query_text]
|
||||
existing["gsc_clicks"] = query.get("clicks", 0)
|
||||
existing["gsc_impressions"] = query.get("impressions", 0)
|
||||
existing["gsc_ctr"] = query.get("ctr", 0)
|
||||
existing["gsc_position"] = query.get("position", 0)
|
||||
existing["total_clicks"] = existing.get("total_clicks", 0) + query.get("clicks", 0)
|
||||
existing["total_impressions"] = existing.get("total_impressions", 0) + query.get("impressions", 0)
|
||||
existing["sources"].append("gsc")
|
||||
else:
|
||||
query_map[query_text] = {
|
||||
"query": query.get("query", ""),
|
||||
"gsc_clicks": query.get("clicks", 0),
|
||||
"gsc_impressions": query.get("impressions", 0),
|
||||
"gsc_ctr": query.get("ctr", 0),
|
||||
"gsc_position": query.get("position", 0),
|
||||
"bing_clicks": 0,
|
||||
"bing_impressions": 0,
|
||||
"bing_ctr": 0,
|
||||
"bing_position": 0,
|
||||
"total_clicks": query.get("clicks", 0),
|
||||
"total_impressions": query.get("impressions", 0),
|
||||
"sources": ["gsc"]
|
||||
}
|
||||
|
||||
# Add Bing queries
|
||||
for query in bing_queries:
|
||||
query_text = query.get("query", "").lower()
|
||||
if query_text in query_map:
|
||||
# Merge data from both sources
|
||||
existing = query_map[query_text]
|
||||
existing["bing_clicks"] = query.get("clicks", 0)
|
||||
existing["bing_impressions"] = query.get("impressions", 0)
|
||||
existing["bing_ctr"] = query.get("ctr", 0)
|
||||
existing["bing_position"] = query.get("position", 0)
|
||||
existing["total_clicks"] = existing.get("total_clicks", 0) + query.get("clicks", 0)
|
||||
existing["total_impressions"] = existing.get("total_impressions", 0) + query.get("impressions", 0)
|
||||
existing["sources"].append("bing")
|
||||
else:
|
||||
query_map[query_text] = {
|
||||
"query": query.get("query", ""),
|
||||
"gsc_clicks": 0,
|
||||
"gsc_impressions": 0,
|
||||
"gsc_ctr": 0,
|
||||
"gsc_position": 0,
|
||||
"bing_clicks": query.get("clicks", 0),
|
||||
"bing_impressions": query.get("impressions", 0),
|
||||
"bing_ctr": query.get("ctr", 0),
|
||||
"bing_position": query.get("position", 0),
|
||||
"total_clicks": query.get("clicks", 0),
|
||||
"total_impressions": query.get("impressions", 0),
|
||||
"sources": ["bing"]
|
||||
}
|
||||
|
||||
# Sort by total clicks and return top N
|
||||
sorted_queries = sorted(
|
||||
query_map.values(),
|
||||
key=lambda x: x["total_clicks"],
|
||||
reverse=True
|
||||
)
|
||||
|
||||
logger.debug(f"Combined {len(sorted_queries)} unique queries, returning top {limit}")
|
||||
return sorted_queries[:limit]
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error combining top queries: {e}")
|
||||
return []
|
||||
|
||||
def _extract_gsc_metrics(self, gsc_data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Extract metrics from GSC data."""
|
||||
try:
|
||||
if "error" in gsc_data:
|
||||
return {}
|
||||
|
||||
data = gsc_data.get("data", {})
|
||||
return {
|
||||
"clicks": data.get("clicks", 0),
|
||||
"impressions": data.get("impressions", 0),
|
||||
"ctr": data.get("ctr", 0.0),
|
||||
"position": data.get("position", 0.0),
|
||||
"queries": len(data.get("queries", [])),
|
||||
"pages": len(data.get("pages", [])),
|
||||
"countries": data.get("countries", []),
|
||||
"devices": data.get("devices", [])
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting GSC metrics: {e}")
|
||||
return {}
|
||||
|
||||
def _extract_bing_metrics(self, bing_data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Extract metrics from Bing data."""
|
||||
try:
|
||||
if "error" in bing_data:
|
||||
return {}
|
||||
|
||||
data = bing_data.get("data", {})
|
||||
return {
|
||||
"clicks": data.get("clicks", 0),
|
||||
"impressions": data.get("impressions", 0),
|
||||
"ctr": data.get("ctr", 0.0),
|
||||
"position": data.get("position", 0.0),
|
||||
"queries": len(data.get("queries", [])),
|
||||
"pages": len(data.get("pages", [])),
|
||||
"countries": data.get("countries", []),
|
||||
"devices": data.get("devices", [])
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting Bing metrics: {e}")
|
||||
return {}
|
||||
|
||||
def _extract_gsc_queries(self, gsc_data: Dict[str, Any]) -> List[Dict[str, Any]]:
|
||||
"""Extract queries from GSC data."""
|
||||
try:
|
||||
if "error" in gsc_data:
|
||||
return []
|
||||
|
||||
data = gsc_data.get("data", {})
|
||||
return data.get("queries", [])
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting GSC queries: {e}")
|
||||
return []
|
||||
|
||||
def _extract_bing_queries(self, bing_data: Dict[str, Any]) -> List[Dict[str, Any]]:
|
||||
"""Extract queries from Bing data."""
|
||||
try:
|
||||
if "error" in bing_data:
|
||||
return []
|
||||
|
||||
data = bing_data.get("data", {})
|
||||
return data.get("queries", [])
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting Bing queries: {e}")
|
||||
return []
|
||||
|
||||
def _calculate_combined_ctr(self, gsc_metrics: Dict[str, Any], bing_metrics: Dict[str, Any]) -> float:
|
||||
"""Calculate combined CTR from GSC and Bing metrics."""
|
||||
try:
|
||||
total_clicks = gsc_metrics.get("clicks", 0) + bing_metrics.get("clicks", 0)
|
||||
total_impressions = gsc_metrics.get("impressions", 0) + bing_metrics.get("impressions", 0)
|
||||
|
||||
if total_impressions > 0:
|
||||
return total_clicks / total_impressions
|
||||
return 0.0
|
||||
except Exception as e:
|
||||
logger.error(f"Error calculating combined CTR: {e}")
|
||||
return 0.0
|
||||
|
||||
def _calculate_combined_position(self, gsc_metrics: Dict[str, Any], bing_metrics: Dict[str, Any]) -> float:
|
||||
"""Calculate combined average position from GSC and Bing metrics."""
|
||||
try:
|
||||
gsc_position = gsc_metrics.get("position", 0)
|
||||
bing_position = bing_metrics.get("position", 0)
|
||||
|
||||
# Weight by impressions if available
|
||||
gsc_impressions = gsc_metrics.get("impressions", 0)
|
||||
bing_impressions = bing_metrics.get("impressions", 0)
|
||||
total_impressions = gsc_impressions + bing_impressions
|
||||
|
||||
if total_impressions > 0:
|
||||
return (gsc_position * gsc_impressions + bing_position * bing_impressions) / total_impressions
|
||||
elif gsc_position > 0 and bing_position > 0:
|
||||
return (gsc_position + bing_position) / 2
|
||||
elif gsc_position > 0:
|
||||
return gsc_position
|
||||
elif bing_position > 0:
|
||||
return bing_position
|
||||
return 0.0
|
||||
except Exception as e:
|
||||
logger.error(f"Error calculating combined position: {e}")
|
||||
return 0.0
|
||||
|
||||
def _calculate_daily_ctr(self, gsc_item: Dict[str, Any], bing_item: Dict[str, Any]) -> float:
|
||||
"""Calculate CTR for a single day."""
|
||||
try:
|
||||
total_clicks = gsc_item.get("clicks", 0) + bing_item.get("clicks", 0)
|
||||
total_impressions = gsc_item.get("impressions", 0) + bing_item.get("impressions", 0)
|
||||
|
||||
if total_impressions > 0:
|
||||
return total_clicks / total_impressions
|
||||
return 0.0
|
||||
except Exception as e:
|
||||
logger.error(f"Error calculating daily CTR: {e}")
|
||||
return 0.0
|
||||
|
||||
def _calculate_daily_position(self, gsc_item: Dict[str, Any], bing_item: Dict[str, Any]) -> float:
|
||||
"""Calculate average position for a single day."""
|
||||
try:
|
||||
gsc_position = gsc_item.get("position", 0)
|
||||
bing_position = bing_item.get("position", 0)
|
||||
|
||||
if gsc_position > 0 and bing_position > 0:
|
||||
return (gsc_position + bing_position) / 2
|
||||
elif gsc_position > 0:
|
||||
return gsc_position
|
||||
elif bing_position > 0:
|
||||
return bing_position
|
||||
return 0.0
|
||||
except Exception as e:
|
||||
logger.error(f"Error calculating daily position: {e}")
|
||||
return 0.0
|
||||
|
||||
def _combine_countries(self, gsc_countries: List[Dict[str, Any]], bing_countries: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
||||
"""Combine country data from GSC and Bing."""
|
||||
try:
|
||||
country_map = {}
|
||||
|
||||
# Add GSC countries
|
||||
for country in gsc_countries:
|
||||
country_code = country.get("country", "")
|
||||
if country_code in country_map:
|
||||
existing = country_map[country_code]
|
||||
existing["gsc_clicks"] = country.get("clicks", 0)
|
||||
existing["gsc_impressions"] = country.get("impressions", 0)
|
||||
existing["total_clicks"] = existing.get("total_clicks", 0) + country.get("clicks", 0)
|
||||
existing["total_impressions"] = existing.get("total_impressions", 0) + country.get("impressions", 0)
|
||||
else:
|
||||
country_map[country_code] = {
|
||||
"country": country_code,
|
||||
"gsc_clicks": country.get("clicks", 0),
|
||||
"gsc_impressions": country.get("impressions", 0),
|
||||
"bing_clicks": 0,
|
||||
"bing_impressions": 0,
|
||||
"total_clicks": country.get("clicks", 0),
|
||||
"total_impressions": country.get("impressions", 0)
|
||||
}
|
||||
|
||||
# Add Bing countries
|
||||
for country in bing_countries:
|
||||
country_code = country.get("country", "")
|
||||
if country_code in country_map:
|
||||
existing = country_map[country_code]
|
||||
existing["bing_clicks"] = country.get("clicks", 0)
|
||||
existing["bing_impressions"] = country.get("impressions", 0)
|
||||
existing["total_clicks"] = existing.get("total_clicks", 0) + country.get("clicks", 0)
|
||||
existing["total_impressions"] = existing.get("total_impressions", 0) + country.get("impressions", 0)
|
||||
else:
|
||||
country_map[country_code] = {
|
||||
"country": country_code,
|
||||
"gsc_clicks": 0,
|
||||
"gsc_impressions": 0,
|
||||
"bing_clicks": country.get("clicks", 0),
|
||||
"bing_impressions": country.get("impressions", 0),
|
||||
"total_clicks": country.get("clicks", 0),
|
||||
"total_impressions": country.get("impressions", 0)
|
||||
}
|
||||
|
||||
# Sort by total clicks
|
||||
return sorted(country_map.values(), key=lambda x: x["total_clicks"], reverse=True)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error combining countries: {e}")
|
||||
return []
|
||||
|
||||
def _combine_devices(self, gsc_devices: List[Dict[str, Any]], bing_devices: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
||||
"""Combine device data from GSC and Bing."""
|
||||
try:
|
||||
device_map = {}
|
||||
|
||||
# Add GSC devices
|
||||
for device in gsc_devices:
|
||||
device_type = device.get("device", "")
|
||||
if device_type in device_map:
|
||||
existing = device_map[device_type]
|
||||
existing["gsc_clicks"] = device.get("clicks", 0)
|
||||
existing["gsc_impressions"] = device.get("impressions", 0)
|
||||
existing["total_clicks"] = existing.get("total_clicks", 0) + device.get("clicks", 0)
|
||||
existing["total_impressions"] = existing.get("total_impressions", 0) + device.get("impressions", 0)
|
||||
else:
|
||||
device_map[device_type] = {
|
||||
"device": device_type,
|
||||
"gsc_clicks": device.get("clicks", 0),
|
||||
"gsc_impressions": device.get("impressions", 0),
|
||||
"bing_clicks": 0,
|
||||
"bing_impressions": 0,
|
||||
"total_clicks": device.get("clicks", 0),
|
||||
"total_impressions": device.get("impressions", 0)
|
||||
}
|
||||
|
||||
# Add Bing devices
|
||||
for device in bing_devices:
|
||||
device_type = device.get("device", "")
|
||||
if device_type in device_map:
|
||||
existing = device_map[device_type]
|
||||
existing["bing_clicks"] = device.get("clicks", 0)
|
||||
existing["bing_impressions"] = device.get("impressions", 0)
|
||||
existing["total_clicks"] = existing.get("total_clicks", 0) + device.get("clicks", 0)
|
||||
existing["total_impressions"] = existing.get("total_impressions", 0) + device.get("impressions", 0)
|
||||
else:
|
||||
device_map[device_type] = {
|
||||
"device": device_type,
|
||||
"gsc_clicks": 0,
|
||||
"gsc_impressions": 0,
|
||||
"bing_clicks": device.get("clicks", 0),
|
||||
"bing_impressions": device.get("impressions", 0),
|
||||
"total_clicks": device.get("clicks", 0),
|
||||
"total_impressions": device.get("impressions", 0)
|
||||
}
|
||||
|
||||
# Sort by total clicks
|
||||
return sorted(device_map.values(), key=lambda x: x["total_clicks"], reverse=True)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error combining devices: {e}")
|
||||
return []
|
||||
402
backend/services/seo/competitive_analyzer.py
Normal file
402
backend/services/seo/competitive_analyzer.py
Normal file
@@ -0,0 +1,402 @@
|
||||
"""
|
||||
Competitive Analyzer Service
|
||||
|
||||
Leverages onboarding step 3 research data and combines it with GSC/Bing
|
||||
query data to provide competitive insights. Superior to SEMrush/Ahrefs
|
||||
because it uses actual user data and personalized content strategy.
|
||||
"""
|
||||
|
||||
from typing import Dict, Any, List, Optional, Set, Tuple
|
||||
from datetime import datetime, timedelta
|
||||
from sqlalchemy.orm import Session
|
||||
from loguru import logger
|
||||
|
||||
from utils.logger_utils import get_service_logger
|
||||
from services.onboarding.data_service import OnboardingDataService
|
||||
from services.calendar_generation_datasource_framework.data_processing.comprehensive_user_data import ComprehensiveUserDataProcessor
|
||||
|
||||
logger = get_service_logger("competitive_analyzer")
|
||||
|
||||
class CompetitiveAnalyzer:
|
||||
"""Analyzes competitive landscape using onboarding research data and analytics."""
|
||||
|
||||
def __init__(self, db: Session):
|
||||
"""Initialize the competitive analyzer."""
|
||||
self.db = db
|
||||
self.user_data_service = OnboardingDataService(db)
|
||||
self.comprehensive_processor = ComprehensiveUserDataProcessor(db)
|
||||
|
||||
async def get_competitive_insights(self, user_id: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Get comprehensive competitive insights for a user.
|
||||
|
||||
Args:
|
||||
user_id: User ID
|
||||
|
||||
Returns:
|
||||
Dictionary containing competitive insights
|
||||
"""
|
||||
try:
|
||||
# Get user's research preferences and competitor data
|
||||
research_prefs = self.user_data_service.get_user_research_preferences(user_id)
|
||||
competitors = research_prefs.get('competitors', []) if research_prefs else []
|
||||
|
||||
if not competitors:
|
||||
logger.info(f"No competitors found for user {user_id}")
|
||||
return {
|
||||
"competitor_keywords": [],
|
||||
"content_gaps": [],
|
||||
"opportunity_score": 0,
|
||||
"competitors_analyzed": 0,
|
||||
"last_updated": datetime.now().isoformat()
|
||||
}
|
||||
|
||||
# Get comprehensive user data including competitor analysis
|
||||
comprehensive_data = self.comprehensive_processor.get_comprehensive_user_data(user_id)
|
||||
competitor_analysis = comprehensive_data.get('competitor_analysis', {})
|
||||
|
||||
# Extract competitor keywords and content topics
|
||||
competitor_keywords = self._extract_competitor_keywords(competitor_analysis, competitors)
|
||||
|
||||
# Get user's current keywords from GSC/Bing (would be passed in real implementation)
|
||||
user_keywords = self._get_user_keywords(user_id)
|
||||
|
||||
# Find content gaps
|
||||
content_gaps = self._find_content_gaps(user_keywords, competitor_keywords)
|
||||
|
||||
# Calculate opportunity score
|
||||
opportunity_score = self._calculate_opportunity_score(content_gaps, competitor_keywords)
|
||||
|
||||
# Generate actionable insights
|
||||
insights = self._generate_insights(content_gaps, competitor_keywords, opportunity_score)
|
||||
|
||||
return {
|
||||
"competitor_keywords": competitor_keywords,
|
||||
"content_gaps": content_gaps,
|
||||
"opportunity_score": opportunity_score,
|
||||
"competitors_analyzed": len(competitors),
|
||||
"insights": insights,
|
||||
"last_updated": datetime.now().isoformat()
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting competitive insights for user {user_id}: {e}")
|
||||
return {
|
||||
"competitor_keywords": [],
|
||||
"content_gaps": [],
|
||||
"opportunity_score": 0,
|
||||
"competitors_analyzed": 0,
|
||||
"insights": [],
|
||||
"last_updated": datetime.now().isoformat()
|
||||
}
|
||||
|
||||
def _extract_competitor_keywords(self, competitor_analysis: Dict[str, Any], competitors: List[str]) -> List[Dict[str, Any]]:
|
||||
"""Extract keywords from competitor analysis."""
|
||||
try:
|
||||
keywords = []
|
||||
|
||||
# Extract from competitor analysis data
|
||||
for competitor_url in competitors:
|
||||
competitor_data = competitor_analysis.get(competitor_url, {})
|
||||
|
||||
# Extract keywords from various sources
|
||||
competitor_keywords = competitor_data.get('keywords', [])
|
||||
content_topics = competitor_data.get('content_topics', [])
|
||||
meta_keywords = competitor_data.get('meta_keywords', [])
|
||||
|
||||
# Combine all keyword sources
|
||||
all_keywords = set()
|
||||
all_keywords.update(competitor_keywords)
|
||||
all_keywords.update(content_topics)
|
||||
all_keywords.update(meta_keywords)
|
||||
|
||||
# Add to keywords list with competitor attribution
|
||||
for keyword in all_keywords:
|
||||
if keyword and len(keyword.strip()) > 0:
|
||||
keywords.append({
|
||||
"keyword": keyword.strip(),
|
||||
"competitor": competitor_url,
|
||||
"source": "analysis",
|
||||
"volume_estimate": competitor_data.get('keyword_volume', {}).get(keyword, 0),
|
||||
"difficulty_estimate": competitor_data.get('keyword_difficulty', {}).get(keyword, 0),
|
||||
"relevance_score": self._calculate_relevance_score(keyword, competitor_data)
|
||||
})
|
||||
|
||||
# Remove duplicates and sort by relevance
|
||||
unique_keywords = self._deduplicate_keywords(keywords)
|
||||
sorted_keywords = sorted(unique_keywords, key=lambda x: x['relevance_score'], reverse=True)
|
||||
|
||||
logger.debug(f"Extracted {len(sorted_keywords)} unique competitor keywords")
|
||||
return sorted_keywords[:100] # Limit to top 100
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting competitor keywords: {e}")
|
||||
return []
|
||||
|
||||
def _get_user_keywords(self, user_id: str) -> Set[str]:
|
||||
"""Get user's current keywords from GSC/Bing data."""
|
||||
try:
|
||||
# In a real implementation, this would fetch from GSC/Bing APIs
|
||||
# For now, return empty set as placeholder
|
||||
# This would be called from the dashboard service with actual query data
|
||||
return set()
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting user keywords: {e}")
|
||||
return set()
|
||||
|
||||
def _find_content_gaps(self, user_keywords: Set[str], competitor_keywords: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
||||
"""Find content gaps between user and competitors."""
|
||||
try:
|
||||
content_gaps = []
|
||||
user_keywords_lower = {kw.lower() for kw in user_keywords}
|
||||
|
||||
for comp_keyword in competitor_keywords:
|
||||
keyword = comp_keyword['keyword'].lower()
|
||||
|
||||
# Check if user doesn't have this keyword
|
||||
if keyword not in user_keywords_lower:
|
||||
# Check for partial matches (related keywords)
|
||||
is_related = any(
|
||||
self._are_keywords_related(keyword, user_kw)
|
||||
for user_kw in user_keywords_lower
|
||||
)
|
||||
|
||||
if not is_related:
|
||||
content_gaps.append({
|
||||
"keyword": comp_keyword['keyword'],
|
||||
"competitor": comp_keyword['competitor'],
|
||||
"volume_estimate": comp_keyword.get('volume_estimate', 0),
|
||||
"difficulty_estimate": comp_keyword.get('difficulty_estimate', 0),
|
||||
"relevance_score": comp_keyword['relevance_score'],
|
||||
"opportunity_type": self._classify_opportunity_type(comp_keyword),
|
||||
"content_suggestion": self._generate_content_suggestion(comp_keyword)
|
||||
})
|
||||
|
||||
# Sort by opportunity score (volume * relevance / difficulty)
|
||||
sorted_gaps = sorted(
|
||||
content_gaps,
|
||||
key=lambda x: (x['volume_estimate'] * x['relevance_score']) / max(x['difficulty_estimate'], 1),
|
||||
reverse=True
|
||||
)
|
||||
|
||||
logger.debug(f"Found {len(sorted_gaps)} content gaps")
|
||||
return sorted_gaps[:50] # Limit to top 50
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error finding content gaps: {e}")
|
||||
return []
|
||||
|
||||
def _calculate_opportunity_score(self, content_gaps: List[Dict[str, Any]], competitor_keywords: List[Dict[str, Any]]) -> int:
|
||||
"""Calculate overall opportunity score (0-100)."""
|
||||
try:
|
||||
if not content_gaps:
|
||||
return 0
|
||||
|
||||
# Calculate average opportunity metrics
|
||||
avg_volume = sum(gap['volume_estimate'] for gap in content_gaps) / len(content_gaps)
|
||||
avg_relevance = sum(gap['relevance_score'] for gap in content_gaps) / len(content_gaps)
|
||||
avg_difficulty = sum(gap['difficulty_estimate'] for gap in content_gaps) / len(content_gaps)
|
||||
|
||||
# Calculate opportunity score
|
||||
# Higher volume and relevance = higher score
|
||||
# Lower difficulty = higher score
|
||||
volume_score = min(avg_volume / 1000, 1.0) * 40 # Max 40 points for volume
|
||||
relevance_score = avg_relevance * 30 # Max 30 points for relevance
|
||||
difficulty_score = max(0, (10 - avg_difficulty) / 10) * 30 # Max 30 points for low difficulty
|
||||
|
||||
total_score = volume_score + relevance_score + difficulty_score
|
||||
opportunity_score = min(int(total_score), 100)
|
||||
|
||||
logger.debug(f"Calculated opportunity score: {opportunity_score}")
|
||||
return opportunity_score
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error calculating opportunity score: {e}")
|
||||
return 0
|
||||
|
||||
def _generate_insights(self, content_gaps: List[Dict[str, Any]], competitor_keywords: List[Dict[str, Any]], opportunity_score: int) -> List[Dict[str, Any]]:
|
||||
"""Generate actionable insights from competitive analysis."""
|
||||
try:
|
||||
insights = []
|
||||
|
||||
# High opportunity score insight
|
||||
if opportunity_score > 70:
|
||||
insights.append({
|
||||
"type": "opportunity",
|
||||
"priority": "high",
|
||||
"title": "High Competitive Opportunity",
|
||||
"description": f"Your opportunity score is {opportunity_score}% - competitors are ranking for many keywords you're not targeting.",
|
||||
"action": "Create content for the identified keyword gaps to capture more organic traffic."
|
||||
})
|
||||
elif opportunity_score > 40:
|
||||
insights.append({
|
||||
"type": "opportunity",
|
||||
"priority": "medium",
|
||||
"title": "Moderate Competitive Opportunity",
|
||||
"description": f"Your opportunity score is {opportunity_score}% - there are some keyword gaps you could target.",
|
||||
"action": "Review the content gaps and prioritize high-volume, low-difficulty keywords."
|
||||
})
|
||||
|
||||
# Content gap insights
|
||||
if content_gaps:
|
||||
high_volume_gaps = [gap for gap in content_gaps if gap['volume_estimate'] > 500]
|
||||
if high_volume_gaps:
|
||||
insights.append({
|
||||
"type": "content",
|
||||
"priority": "high",
|
||||
"title": "High-Volume Content Gaps",
|
||||
"description": f"Found {len(high_volume_gaps)} high-volume keywords that competitors rank for but you don't.",
|
||||
"action": "Create comprehensive content targeting these high-volume keywords."
|
||||
})
|
||||
|
||||
low_difficulty_gaps = [gap for gap in content_gaps if gap['difficulty_estimate'] < 3]
|
||||
if low_difficulty_gaps:
|
||||
insights.append({
|
||||
"type": "content",
|
||||
"priority": "medium",
|
||||
"title": "Low-Difficulty Content Gaps",
|
||||
"description": f"Found {len(low_difficulty_gaps)} low-difficulty keywords that would be easy to rank for.",
|
||||
"action": "Quick wins: Create content for these low-difficulty keywords first."
|
||||
})
|
||||
|
||||
# Competitor analysis insights
|
||||
if competitor_keywords:
|
||||
top_competitors = {}
|
||||
for kw in competitor_keywords:
|
||||
competitor = kw['competitor']
|
||||
if competitor not in top_competitors:
|
||||
top_competitors[competitor] = 0
|
||||
top_competitors[competitor] += 1
|
||||
|
||||
top_competitor = max(top_competitors.items(), key=lambda x: x[1]) if top_competitors else None
|
||||
if top_competitor:
|
||||
insights.append({
|
||||
"type": "competitive",
|
||||
"priority": "medium",
|
||||
"title": "Top Competitor Analysis",
|
||||
"description": f"{top_competitor[0]} has the most keyword overlap with your content strategy.",
|
||||
"action": f"Analyze {top_competitor[0]}'s content strategy for additional keyword opportunities."
|
||||
})
|
||||
|
||||
return insights
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating insights: {e}")
|
||||
return []
|
||||
|
||||
def _deduplicate_keywords(self, keywords: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
||||
"""Remove duplicate keywords and merge data."""
|
||||
try:
|
||||
keyword_map = {}
|
||||
|
||||
for kw in keywords:
|
||||
keyword = kw['keyword'].lower()
|
||||
if keyword in keyword_map:
|
||||
# Merge data from multiple competitors
|
||||
existing = keyword_map[keyword]
|
||||
existing['competitors'].append(kw['competitor'])
|
||||
existing['volume_estimate'] = max(existing['volume_estimate'], kw['volume_estimate'])
|
||||
existing['relevance_score'] = max(existing['relevance_score'], kw['relevance_score'])
|
||||
else:
|
||||
keyword_map[keyword] = {
|
||||
'keyword': kw['keyword'],
|
||||
'competitors': [kw['competitor']],
|
||||
'source': kw['source'],
|
||||
'volume_estimate': kw['volume_estimate'],
|
||||
'difficulty_estimate': kw['difficulty_estimate'],
|
||||
'relevance_score': kw['relevance_score']
|
||||
}
|
||||
|
||||
return list(keyword_map.values())
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error deduplicating keywords: {e}")
|
||||
return []
|
||||
|
||||
def _calculate_relevance_score(self, keyword: str, competitor_data: Dict[str, Any]) -> float:
|
||||
"""Calculate relevance score for a keyword based on competitor data."""
|
||||
try:
|
||||
# Base relevance score
|
||||
relevance = 0.5
|
||||
|
||||
# Increase relevance based on keyword frequency in competitor content
|
||||
content_frequency = competitor_data.get('content_frequency', {})
|
||||
if keyword in content_frequency:
|
||||
relevance += min(content_frequency[keyword] / 10, 0.3)
|
||||
|
||||
# Increase relevance based on meta keyword presence
|
||||
meta_keywords = competitor_data.get('meta_keywords', [])
|
||||
if keyword in meta_keywords:
|
||||
relevance += 0.2
|
||||
|
||||
# Increase relevance based on title presence
|
||||
titles = competitor_data.get('titles', [])
|
||||
if any(keyword.lower() in title.lower() for title in titles):
|
||||
relevance += 0.2
|
||||
|
||||
# Normalize to 0-1 range
|
||||
return min(relevance, 1.0)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error calculating relevance score: {e}")
|
||||
return 0.5
|
||||
|
||||
def _are_keywords_related(self, keyword1: str, keyword2: str) -> bool:
|
||||
"""Check if two keywords are related."""
|
||||
try:
|
||||
# Simple similarity check - can be enhanced with NLP
|
||||
words1 = set(keyword1.lower().split())
|
||||
words2 = set(keyword2.lower().split())
|
||||
|
||||
# Check for word overlap
|
||||
overlap = len(words1.intersection(words2))
|
||||
total_words = len(words1.union(words2))
|
||||
|
||||
if total_words == 0:
|
||||
return False
|
||||
|
||||
similarity = overlap / total_words
|
||||
return similarity > 0.3 # 30% word overlap threshold
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error checking keyword relatedness: {e}")
|
||||
return False
|
||||
|
||||
def _classify_opportunity_type(self, keyword_data: Dict[str, Any]) -> str:
|
||||
"""Classify the type of opportunity for a keyword."""
|
||||
try:
|
||||
volume = keyword_data.get('volume_estimate', 0)
|
||||
difficulty = keyword_data.get('difficulty_estimate', 0)
|
||||
relevance = keyword_data.get('relevance_score', 0)
|
||||
|
||||
if volume > 1000 and difficulty < 5 and relevance > 0.7:
|
||||
return "high_priority"
|
||||
elif volume > 500 and difficulty < 7 and relevance > 0.5:
|
||||
return "medium_priority"
|
||||
elif volume > 100 and difficulty < 8:
|
||||
return "low_priority"
|
||||
else:
|
||||
return "long_term"
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error classifying opportunity type: {e}")
|
||||
return "unknown"
|
||||
|
||||
def _generate_content_suggestion(self, keyword_data: Dict[str, Any]) -> str:
|
||||
"""Generate content suggestion for a keyword."""
|
||||
try:
|
||||
keyword = keyword_data['keyword']
|
||||
opportunity_type = self._classify_opportunity_type(keyword_data)
|
||||
|
||||
suggestions = {
|
||||
"high_priority": f"Create comprehensive, in-depth content targeting '{keyword}' - high volume, low difficulty opportunity.",
|
||||
"medium_priority": f"Consider creating content around '{keyword}' - good volume with moderate competition.",
|
||||
"low_priority": f"'{keyword}' could be a good long-tail keyword to target in future content.",
|
||||
"long_term": f"'{keyword}' might be worth monitoring for future content opportunities."
|
||||
}
|
||||
|
||||
return suggestions.get(opportunity_type, f"Consider creating content around '{keyword}'.")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating content suggestion: {e}")
|
||||
return f"Consider creating content around '{keyword_data.get('keyword', 'this keyword')}'."
|
||||
397
backend/services/seo/dashboard_service.py
Normal file
397
backend/services/seo/dashboard_service.py
Normal file
@@ -0,0 +1,397 @@
|
||||
"""
|
||||
SEO Dashboard Service
|
||||
|
||||
Main orchestration service that coordinates data fetching from GSC, Bing,
|
||||
and other analytics sources for the SEO dashboard. Leverages existing
|
||||
OAuth connections from onboarding step 5.
|
||||
"""
|
||||
|
||||
from typing import Dict, Any, Optional, List
|
||||
from datetime import datetime, timedelta
|
||||
from sqlalchemy.orm import Session
|
||||
from loguru import logger
|
||||
|
||||
from utils.logger_utils import get_service_logger
|
||||
from services.gsc_service import GSCService
|
||||
from services.integrations.bing_oauth import BingOAuthService
|
||||
from services.bing_analytics_storage_service import BingAnalyticsStorageService
|
||||
from services.analytics_cache_service import AnalyticsCacheService
|
||||
from services.onboarding.data_service import OnboardingDataService
|
||||
from .analytics_aggregator import AnalyticsAggregator
|
||||
from .competitive_analyzer import CompetitiveAnalyzer
|
||||
|
||||
logger = get_service_logger("seo_dashboard")
|
||||
|
||||
class SEODashboardService:
|
||||
"""Main service for SEO dashboard data orchestration."""
|
||||
|
||||
def __init__(self, db: Session):
|
||||
"""Initialize the SEO dashboard service."""
|
||||
self.db = db
|
||||
self.gsc_service = GSCService()
|
||||
self.bing_oauth = BingOAuthService()
|
||||
self.bing_storage = BingAnalyticsStorageService("sqlite:///alwrity.db")
|
||||
self.analytics_cache = AnalyticsCacheService()
|
||||
self.user_data_service = OnboardingDataService(db)
|
||||
self.analytics_aggregator = AnalyticsAggregator()
|
||||
self.competitive_analyzer = CompetitiveAnalyzer(db)
|
||||
|
||||
async def get_platform_status(self, user_id: str) -> Dict[str, Any]:
|
||||
"""Get connection status for GSC and Bing platforms."""
|
||||
try:
|
||||
# Check GSC connection
|
||||
gsc_credentials = self.gsc_service.load_user_credentials(user_id)
|
||||
gsc_connected = gsc_credentials is not None
|
||||
|
||||
# Check Bing connection with detailed status
|
||||
bing_token_status = self.bing_oauth.get_user_token_status(user_id)
|
||||
bing_connected = bing_token_status.get('has_active_tokens', False)
|
||||
|
||||
# Get cached data for last sync info
|
||||
gsc_data = self.analytics_cache.get('gsc_analytics', user_id)
|
||||
bing_data = self.analytics_cache.get('bing_analytics', user_id)
|
||||
|
||||
return {
|
||||
"gsc": {
|
||||
"connected": gsc_connected,
|
||||
"sites": self._get_gsc_sites(user_id) if gsc_connected else [],
|
||||
"last_sync": gsc_data.get('last_updated') if gsc_data else None,
|
||||
"status": "connected" if gsc_connected else "disconnected"
|
||||
},
|
||||
"bing": {
|
||||
"connected": bing_connected,
|
||||
"sites": self._get_bing_sites(user_id) if bing_connected else [],
|
||||
"last_sync": bing_data.get('last_updated') if bing_data else None,
|
||||
"status": "connected" if bing_connected else ("expired" if bing_token_status.get('has_expired_tokens') else "disconnected"),
|
||||
"has_expired_tokens": bing_token_status.get('has_expired_tokens', False),
|
||||
"last_token_date": bing_token_status.get('last_token_date'),
|
||||
"total_tokens": bing_token_status.get('total_tokens', 0)
|
||||
}
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting platform status for user {user_id}: {e}")
|
||||
return {
|
||||
"gsc": {"connected": False, "sites": [], "last_sync": None, "status": "error"},
|
||||
"bing": {"connected": False, "sites": [], "last_sync": None, "status": "error"}
|
||||
}
|
||||
|
||||
async def get_dashboard_overview(self, user_id: str, site_url: Optional[str] = None) -> Dict[str, Any]:
|
||||
"""Get comprehensive dashboard overview with real GSC/Bing data."""
|
||||
try:
|
||||
# Get user's website URL if not provided
|
||||
if not site_url:
|
||||
# Try to get from website analysis first
|
||||
website_analysis = self.user_data_service.get_user_website_analysis(int(user_id))
|
||||
if website_analysis and website_analysis.get('website_url'):
|
||||
site_url = website_analysis['website_url']
|
||||
else:
|
||||
# Fallback: try to get from Bing sites
|
||||
bing_sites = self._get_bing_sites(user_id)
|
||||
if bing_sites:
|
||||
site_url = bing_sites[0] # Use first Bing site
|
||||
else:
|
||||
site_url = 'https://alwrity.com' # Default fallback
|
||||
|
||||
# Get platform status
|
||||
platform_status = await self.get_platform_status(user_id)
|
||||
|
||||
# Get analytics data
|
||||
gsc_data = await self.get_gsc_data(user_id, site_url)
|
||||
bing_data = await self.get_bing_data(user_id, site_url)
|
||||
|
||||
# Aggregate metrics
|
||||
summary = self.analytics_aggregator.combine_metrics(gsc_data, bing_data)
|
||||
timeseries = self.analytics_aggregator.normalize_timeseries(
|
||||
gsc_data.get("timeseries", []),
|
||||
bing_data.get("timeseries", [])
|
||||
)
|
||||
|
||||
# Get competitive insights
|
||||
competitor_insights = await self.competitive_analyzer.get_competitive_insights(user_id)
|
||||
|
||||
# Calculate health score
|
||||
health_score = self._calculate_health_score(summary, platform_status)
|
||||
|
||||
# Generate AI insights
|
||||
ai_insights = await self._generate_ai_insights(summary, timeseries, competitor_insights)
|
||||
|
||||
return {
|
||||
"website_url": site_url,
|
||||
"platforms": platform_status,
|
||||
"summary": summary,
|
||||
"timeseries": timeseries,
|
||||
"competitor_insights": competitor_insights,
|
||||
"health_score": health_score,
|
||||
"ai_insights": ai_insights,
|
||||
"last_updated": datetime.now().isoformat()
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting dashboard overview for user {user_id}: {e}")
|
||||
raise
|
||||
|
||||
async def get_gsc_data(self, user_id: str, site_url: Optional[str] = None) -> Dict[str, Any]:
|
||||
"""Get GSC data for the specified site."""
|
||||
try:
|
||||
# Check if user has GSC credentials
|
||||
credentials = self.gsc_service.load_user_credentials(user_id)
|
||||
if not credentials:
|
||||
return {"error": "GSC not connected", "data": [], "status": "disconnected"}
|
||||
|
||||
# Try to get from cache first
|
||||
cache_key = f"gsc_analytics:{user_id}:{site_url or 'default'}"
|
||||
cached_data = self.analytics_cache.get('gsc_analytics', user_id, site_url=site_url or 'default')
|
||||
if cached_data:
|
||||
return cached_data
|
||||
|
||||
# Fetch fresh data from GSC API
|
||||
if site_url:
|
||||
gsc_data = self.gsc_service.get_search_analytics(user_id, site_url)
|
||||
else:
|
||||
# Get all sites for user
|
||||
sites = self._get_gsc_sites(user_id)
|
||||
if sites:
|
||||
gsc_data = self.gsc_service.get_search_analytics(user_id, sites[0])
|
||||
else:
|
||||
return {"error": "No GSC sites found", "data": [], "status": "disconnected"}
|
||||
|
||||
# Cache the data
|
||||
self.analytics_cache.set('gsc_analytics', user_id, gsc_data, ttl_override=3600, site_url=site_url or 'default') # 1 hour cache
|
||||
|
||||
return gsc_data
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting GSC data for user {user_id}: {e}")
|
||||
return {"error": str(e), "data": [], "status": "error"}
|
||||
|
||||
async def get_bing_data(self, user_id: str, site_url: Optional[str] = None) -> Dict[str, Any]:
|
||||
"""Get Bing Webmaster Tools data for the specified site."""
|
||||
try:
|
||||
# Check if user has Bing tokens
|
||||
tokens = self.bing_oauth.get_user_tokens(user_id)
|
||||
if not tokens:
|
||||
return {"error": "Bing not connected", "data": [], "status": "disconnected"}
|
||||
|
||||
# Try to get from cache first
|
||||
cache_key = f"bing_analytics:{user_id}:{site_url or 'default'}"
|
||||
cached_data = self.analytics_cache.get('bing_analytics', user_id, site_url=site_url or 'default')
|
||||
if cached_data:
|
||||
return cached_data
|
||||
|
||||
# Get data from Bing storage service
|
||||
if site_url:
|
||||
bing_data = self.bing_storage.get_analytics_summary(user_id, site_url, days=30)
|
||||
else:
|
||||
# Get all sites for user
|
||||
sites = self._get_bing_sites(user_id)
|
||||
if sites:
|
||||
logger.info(f"Using first Bing site for analysis: {sites[0]}")
|
||||
bing_data = self.bing_storage.get_analytics_summary(user_id, sites[0], days=30)
|
||||
else:
|
||||
logger.warning(f"No Bing sites found for user {user_id}")
|
||||
return {"error": "No Bing sites found", "data": [], "status": "disconnected"}
|
||||
|
||||
# Cache the data
|
||||
self.analytics_cache.set('bing_analytics', user_id, bing_data, ttl_override=3600, site_url=site_url or 'default') # 1 hour cache
|
||||
|
||||
return bing_data
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting Bing data for user {user_id}: {e}")
|
||||
return {"error": str(e), "data": [], "status": "error"}
|
||||
|
||||
async def get_competitive_insights(self, user_id: str) -> Dict[str, Any]:
|
||||
"""Get competitive insights from onboarding step 3 data."""
|
||||
try:
|
||||
return await self.competitive_analyzer.get_competitive_insights(user_id)
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting competitive insights for user {user_id}: {e}")
|
||||
return {
|
||||
"competitor_keywords": [],
|
||||
"content_gaps": [],
|
||||
"opportunity_score": 0
|
||||
}
|
||||
|
||||
async def refresh_analytics_data(self, user_id: str, site_url: Optional[str] = None) -> Dict[str, Any]:
|
||||
"""Refresh analytics data by invalidating cache and fetching fresh data."""
|
||||
try:
|
||||
# Invalidate cache
|
||||
cache_keys = [
|
||||
f"gsc_analytics:{user_id}",
|
||||
f"bing_analytics:{user_id}",
|
||||
f"gsc_analytics:{user_id}:{site_url or 'default'}",
|
||||
f"bing_analytics:{user_id}:{site_url or 'default'}"
|
||||
]
|
||||
|
||||
for key in cache_keys:
|
||||
self.analytics_cache.delete(key)
|
||||
|
||||
# Fetch fresh data
|
||||
gsc_result = await self.get_gsc_data(user_id, site_url)
|
||||
bing_result = await self.get_bing_data(user_id, site_url)
|
||||
|
||||
return {
|
||||
"status": "success",
|
||||
"message": "Analytics data refreshed successfully",
|
||||
"last_updated": datetime.now().isoformat(),
|
||||
"platforms": {
|
||||
"gsc": {"status": "success" if "error" not in gsc_result else "error"},
|
||||
"bing": {"status": "success" if "error" not in bing_result else "error"}
|
||||
}
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error refreshing analytics data for user {user_id}: {e}")
|
||||
return {
|
||||
"status": "error",
|
||||
"message": f"Failed to refresh analytics data: {str(e)}",
|
||||
"last_updated": datetime.now().isoformat()
|
||||
}
|
||||
|
||||
def _get_gsc_sites(self, user_id: str) -> List[str]:
|
||||
"""Get GSC sites for user."""
|
||||
try:
|
||||
credentials = self.gsc_service.load_user_credentials(user_id)
|
||||
if not credentials:
|
||||
return []
|
||||
|
||||
# This would need to be implemented in GSCService
|
||||
# For now, return empty list
|
||||
return []
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting GSC sites for user {user_id}: {e}")
|
||||
return []
|
||||
|
||||
def _get_bing_sites(self, user_id: str) -> List[str]:
|
||||
"""Get Bing sites for user."""
|
||||
try:
|
||||
# Use the existing get_user_sites method from BingOAuthService
|
||||
sites = self.bing_oauth.get_user_sites(user_id)
|
||||
if not sites:
|
||||
logger.warning(f"No Bing sites found for user {user_id}")
|
||||
return []
|
||||
|
||||
# Extract site URLs from the sites data
|
||||
site_urls = []
|
||||
for site in sites:
|
||||
if isinstance(site, dict) and site.get('url'):
|
||||
site_urls.append(site['url'])
|
||||
elif isinstance(site, str):
|
||||
site_urls.append(site)
|
||||
|
||||
logger.info(f"Found {len(site_urls)} Bing sites for user {user_id}: {site_urls}")
|
||||
return site_urls
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting Bing sites for user {user_id}: {e}")
|
||||
return []
|
||||
|
||||
def _calculate_health_score(self, summary: Dict[str, Any], platform_status: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Calculate overall SEO health score."""
|
||||
try:
|
||||
score = 0
|
||||
max_score = 100
|
||||
|
||||
# Base score for connected platforms
|
||||
if platform_status.get("gsc", {}).get("connected"):
|
||||
score += 30
|
||||
if platform_status.get("bing", {}).get("connected"):
|
||||
score += 20
|
||||
|
||||
# Traffic score (0-30)
|
||||
clicks = summary.get("clicks", 0)
|
||||
if clicks > 1000:
|
||||
score += 30
|
||||
elif clicks > 500:
|
||||
score += 20
|
||||
elif clicks > 100:
|
||||
score += 10
|
||||
|
||||
# CTR score (0-20)
|
||||
ctr = summary.get("ctr", 0)
|
||||
if ctr > 0.05: # 5%
|
||||
score += 20
|
||||
elif ctr > 0.03: # 3%
|
||||
score += 15
|
||||
elif ctr > 0.01: # 1%
|
||||
score += 10
|
||||
|
||||
# Determine trend and color
|
||||
if score >= 80:
|
||||
trend = "up"
|
||||
label = "EXCELLENT"
|
||||
color = "#4CAF50"
|
||||
elif score >= 60:
|
||||
trend = "stable"
|
||||
label = "GOOD"
|
||||
color = "#2196F3"
|
||||
elif score >= 40:
|
||||
trend = "down"
|
||||
label = "NEEDS IMPROVEMENT"
|
||||
color = "#FF9800"
|
||||
else:
|
||||
trend = "down"
|
||||
label = "POOR"
|
||||
color = "#F44336"
|
||||
|
||||
return {
|
||||
"score": score,
|
||||
"change": 0, # Would need historical data to calculate
|
||||
"trend": trend,
|
||||
"label": label,
|
||||
"color": color
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error calculating health score: {e}")
|
||||
return {
|
||||
"score": 0,
|
||||
"change": 0,
|
||||
"trend": "unknown",
|
||||
"label": "UNKNOWN",
|
||||
"color": "#9E9E9E"
|
||||
}
|
||||
|
||||
async def _generate_ai_insights(self, summary: Dict[str, Any], timeseries: List[Dict[str, Any]], competitor_insights: Dict[str, Any]) -> List[Dict[str, Any]]:
|
||||
"""Generate AI insights from analytics data."""
|
||||
try:
|
||||
insights = []
|
||||
|
||||
# Traffic insights
|
||||
clicks = summary.get("clicks", 0)
|
||||
ctr = summary.get("ctr", 0)
|
||||
|
||||
if clicks > 0 and ctr < 0.02: # Low CTR
|
||||
insights.append({
|
||||
"type": "opportunity",
|
||||
"priority": "high",
|
||||
"text": f"Your CTR is {ctr:.1%}, which is below average. Consider optimizing your meta descriptions and titles.",
|
||||
"category": "performance"
|
||||
})
|
||||
|
||||
# Competitive insights
|
||||
opportunity_score = competitor_insights.get("opportunity_score", 0)
|
||||
if opportunity_score > 70:
|
||||
insights.append({
|
||||
"type": "opportunity",
|
||||
"priority": "high",
|
||||
"text": f"High opportunity score of {opportunity_score}% - competitors are ranking for keywords you're not targeting.",
|
||||
"category": "competitive"
|
||||
})
|
||||
|
||||
# Content gaps
|
||||
content_gaps = competitor_insights.get("content_gaps", [])
|
||||
if content_gaps:
|
||||
insights.append({
|
||||
"type": "action",
|
||||
"priority": "medium",
|
||||
"text": f"Found {len(content_gaps)} content gaps. Consider creating content for these topics.",
|
||||
"category": "content"
|
||||
})
|
||||
|
||||
return insights
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating AI insights: {e}")
|
||||
return []
|
||||
Reference in New Issue
Block a user