Base code

This commit is contained in:
Kunthawat Greethong
2026-01-08 22:39:53 +07:00
parent 697115c61a
commit c35fa52117
2169 changed files with 626670 additions and 0 deletions

View File

@@ -0,0 +1,22 @@
"""
SEO Dashboard Services Package
This package provides comprehensive SEO analytics and dashboard functionality,
leveraging existing OAuth connections from onboarding step 5 and competitive
analysis from step 3.
Services:
- SEODashboardService: Main orchestration service for dashboard data
- AnalyticsAggregator: Combines and normalizes data from multiple platforms
- CompetitiveAnalyzer: Leverages onboarding research data for competitive insights
"""
from .dashboard_service import SEODashboardService
from .analytics_aggregator import AnalyticsAggregator
from .competitive_analyzer import CompetitiveAnalyzer
__all__ = [
"SEODashboardService",
"AnalyticsAggregator",
"CompetitiveAnalyzer",
]

View File

@@ -0,0 +1,447 @@
"""
Analytics Aggregator Service
Combines and normalizes data from multiple platforms (GSC, Bing, etc.)
for the SEO dashboard. Provides unified metrics and timeseries data.
"""
from typing import Dict, Any, List, Optional, Tuple
from datetime import datetime, timedelta
from collections import defaultdict
from loguru import logger
from utils.logger_utils import get_service_logger
logger = get_service_logger("analytics_aggregator")
class AnalyticsAggregator:
"""Aggregates analytics data from multiple platforms."""
def __init__(self):
"""Initialize the analytics aggregator."""
pass
def combine_metrics(self, gsc_data: Dict[str, Any], bing_data: Dict[str, Any]) -> Dict[str, Any]:
"""
Combine metrics from GSC and Bing data.
Args:
gsc_data: GSC analytics data
bing_data: Bing analytics data
Returns:
Combined metrics dictionary
"""
try:
# Extract metrics from each platform
gsc_metrics = self._extract_gsc_metrics(gsc_data)
bing_metrics = self._extract_bing_metrics(bing_data)
# Combine the metrics
combined = {
"clicks": gsc_metrics.get("clicks", 0) + bing_metrics.get("clicks", 0),
"impressions": gsc_metrics.get("impressions", 0) + bing_metrics.get("impressions", 0),
"ctr": self._calculate_combined_ctr(gsc_metrics, bing_metrics),
"position": self._calculate_combined_position(gsc_metrics, bing_metrics),
"queries": gsc_metrics.get("queries", 0) + bing_metrics.get("queries", 0),
"pages": gsc_metrics.get("pages", 0) + bing_metrics.get("pages", 0),
"countries": self._combine_countries(gsc_metrics.get("countries", []), bing_metrics.get("countries", [])),
"devices": self._combine_devices(gsc_metrics.get("devices", []), bing_metrics.get("devices", [])),
"sources": {
"gsc": gsc_metrics,
"bing": bing_metrics
}
}
logger.debug(f"Combined metrics: {combined}")
return combined
except Exception as e:
logger.error(f"Error combining metrics: {e}")
return {
"clicks": 0,
"impressions": 0,
"ctr": 0.0,
"position": 0.0,
"queries": 0,
"pages": 0,
"countries": [],
"devices": [],
"sources": {"gsc": {}, "bing": {}}
}
def normalize_timeseries(self, gsc_daily: List[Dict[str, Any]], bing_daily: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
"""
Normalize timeseries data from GSC and Bing to aligned date series.
Args:
gsc_daily: GSC daily data
bing_daily: Bing daily data
Returns:
Normalized timeseries data
"""
try:
# Convert to date-indexed dictionaries
gsc_by_date = {item["date"]: item for item in gsc_daily}
bing_by_date = {item["date"]: item for item in bing_daily}
# Get all unique dates
all_dates = set(gsc_by_date.keys()) | set(bing_by_date.keys())
sorted_dates = sorted(all_dates)
# Create normalized timeseries
timeseries = []
for date in sorted_dates:
gsc_item = gsc_by_date.get(date, {})
bing_item = bing_by_date.get(date, {})
normalized_item = {
"date": date,
"clicks": gsc_item.get("clicks", 0) + bing_item.get("clicks", 0),
"impressions": gsc_item.get("impressions", 0) + bing_item.get("impressions", 0),
"ctr": self._calculate_daily_ctr(gsc_item, bing_item),
"position": self._calculate_daily_position(gsc_item, bing_item),
"gsc_clicks": gsc_item.get("clicks", 0),
"gsc_impressions": gsc_item.get("impressions", 0),
"bing_clicks": bing_item.get("clicks", 0),
"bing_impressions": bing_item.get("impressions", 0)
}
timeseries.append(normalized_item)
logger.debug(f"Normalized timeseries with {len(timeseries)} data points")
return timeseries
except Exception as e:
logger.error(f"Error normalizing timeseries: {e}")
return []
def top_queries_combined(self, gsc_data: Dict[str, Any], bing_data: Dict[str, Any], limit: int = 20) -> List[Dict[str, Any]]:
"""
Get top queries combined from GSC and Bing data.
Args:
gsc_data: GSC data
bing_data: Bing data
limit: Maximum number of queries to return
Returns:
List of top queries with source tags
"""
try:
# Extract queries from each platform
gsc_queries = self._extract_gsc_queries(gsc_data)
bing_queries = self._extract_bing_queries(bing_data)
# Combine and deduplicate queries
query_map = {}
# Add GSC queries
for query in gsc_queries:
query_text = query.get("query", "").lower()
if query_text in query_map:
# Merge data from both sources
existing = query_map[query_text]
existing["gsc_clicks"] = query.get("clicks", 0)
existing["gsc_impressions"] = query.get("impressions", 0)
existing["gsc_ctr"] = query.get("ctr", 0)
existing["gsc_position"] = query.get("position", 0)
existing["total_clicks"] = existing.get("total_clicks", 0) + query.get("clicks", 0)
existing["total_impressions"] = existing.get("total_impressions", 0) + query.get("impressions", 0)
existing["sources"].append("gsc")
else:
query_map[query_text] = {
"query": query.get("query", ""),
"gsc_clicks": query.get("clicks", 0),
"gsc_impressions": query.get("impressions", 0),
"gsc_ctr": query.get("ctr", 0),
"gsc_position": query.get("position", 0),
"bing_clicks": 0,
"bing_impressions": 0,
"bing_ctr": 0,
"bing_position": 0,
"total_clicks": query.get("clicks", 0),
"total_impressions": query.get("impressions", 0),
"sources": ["gsc"]
}
# Add Bing queries
for query in bing_queries:
query_text = query.get("query", "").lower()
if query_text in query_map:
# Merge data from both sources
existing = query_map[query_text]
existing["bing_clicks"] = query.get("clicks", 0)
existing["bing_impressions"] = query.get("impressions", 0)
existing["bing_ctr"] = query.get("ctr", 0)
existing["bing_position"] = query.get("position", 0)
existing["total_clicks"] = existing.get("total_clicks", 0) + query.get("clicks", 0)
existing["total_impressions"] = existing.get("total_impressions", 0) + query.get("impressions", 0)
existing["sources"].append("bing")
else:
query_map[query_text] = {
"query": query.get("query", ""),
"gsc_clicks": 0,
"gsc_impressions": 0,
"gsc_ctr": 0,
"gsc_position": 0,
"bing_clicks": query.get("clicks", 0),
"bing_impressions": query.get("impressions", 0),
"bing_ctr": query.get("ctr", 0),
"bing_position": query.get("position", 0),
"total_clicks": query.get("clicks", 0),
"total_impressions": query.get("impressions", 0),
"sources": ["bing"]
}
# Sort by total clicks and return top N
sorted_queries = sorted(
query_map.values(),
key=lambda x: x["total_clicks"],
reverse=True
)
logger.debug(f"Combined {len(sorted_queries)} unique queries, returning top {limit}")
return sorted_queries[:limit]
except Exception as e:
logger.error(f"Error combining top queries: {e}")
return []
def _extract_gsc_metrics(self, gsc_data: Dict[str, Any]) -> Dict[str, Any]:
"""Extract metrics from GSC data."""
try:
if "error" in gsc_data:
return {}
data = gsc_data.get("data", {})
return {
"clicks": data.get("clicks", 0),
"impressions": data.get("impressions", 0),
"ctr": data.get("ctr", 0.0),
"position": data.get("position", 0.0),
"queries": len(data.get("queries", [])),
"pages": len(data.get("pages", [])),
"countries": data.get("countries", []),
"devices": data.get("devices", [])
}
except Exception as e:
logger.error(f"Error extracting GSC metrics: {e}")
return {}
def _extract_bing_metrics(self, bing_data: Dict[str, Any]) -> Dict[str, Any]:
"""Extract metrics from Bing data."""
try:
if "error" in bing_data:
return {}
data = bing_data.get("data", {})
return {
"clicks": data.get("clicks", 0),
"impressions": data.get("impressions", 0),
"ctr": data.get("ctr", 0.0),
"position": data.get("position", 0.0),
"queries": len(data.get("queries", [])),
"pages": len(data.get("pages", [])),
"countries": data.get("countries", []),
"devices": data.get("devices", [])
}
except Exception as e:
logger.error(f"Error extracting Bing metrics: {e}")
return {}
def _extract_gsc_queries(self, gsc_data: Dict[str, Any]) -> List[Dict[str, Any]]:
"""Extract queries from GSC data."""
try:
if "error" in gsc_data:
return []
data = gsc_data.get("data", {})
return data.get("queries", [])
except Exception as e:
logger.error(f"Error extracting GSC queries: {e}")
return []
def _extract_bing_queries(self, bing_data: Dict[str, Any]) -> List[Dict[str, Any]]:
"""Extract queries from Bing data."""
try:
if "error" in bing_data:
return []
data = bing_data.get("data", {})
return data.get("queries", [])
except Exception as e:
logger.error(f"Error extracting Bing queries: {e}")
return []
def _calculate_combined_ctr(self, gsc_metrics: Dict[str, Any], bing_metrics: Dict[str, Any]) -> float:
"""Calculate combined CTR from GSC and Bing metrics."""
try:
total_clicks = gsc_metrics.get("clicks", 0) + bing_metrics.get("clicks", 0)
total_impressions = gsc_metrics.get("impressions", 0) + bing_metrics.get("impressions", 0)
if total_impressions > 0:
return total_clicks / total_impressions
return 0.0
except Exception as e:
logger.error(f"Error calculating combined CTR: {e}")
return 0.0
def _calculate_combined_position(self, gsc_metrics: Dict[str, Any], bing_metrics: Dict[str, Any]) -> float:
"""Calculate combined average position from GSC and Bing metrics."""
try:
gsc_position = gsc_metrics.get("position", 0)
bing_position = bing_metrics.get("position", 0)
# Weight by impressions if available
gsc_impressions = gsc_metrics.get("impressions", 0)
bing_impressions = bing_metrics.get("impressions", 0)
total_impressions = gsc_impressions + bing_impressions
if total_impressions > 0:
return (gsc_position * gsc_impressions + bing_position * bing_impressions) / total_impressions
elif gsc_position > 0 and bing_position > 0:
return (gsc_position + bing_position) / 2
elif gsc_position > 0:
return gsc_position
elif bing_position > 0:
return bing_position
return 0.0
except Exception as e:
logger.error(f"Error calculating combined position: {e}")
return 0.0
def _calculate_daily_ctr(self, gsc_item: Dict[str, Any], bing_item: Dict[str, Any]) -> float:
"""Calculate CTR for a single day."""
try:
total_clicks = gsc_item.get("clicks", 0) + bing_item.get("clicks", 0)
total_impressions = gsc_item.get("impressions", 0) + bing_item.get("impressions", 0)
if total_impressions > 0:
return total_clicks / total_impressions
return 0.0
except Exception as e:
logger.error(f"Error calculating daily CTR: {e}")
return 0.0
def _calculate_daily_position(self, gsc_item: Dict[str, Any], bing_item: Dict[str, Any]) -> float:
"""Calculate average position for a single day."""
try:
gsc_position = gsc_item.get("position", 0)
bing_position = bing_item.get("position", 0)
if gsc_position > 0 and bing_position > 0:
return (gsc_position + bing_position) / 2
elif gsc_position > 0:
return gsc_position
elif bing_position > 0:
return bing_position
return 0.0
except Exception as e:
logger.error(f"Error calculating daily position: {e}")
return 0.0
def _combine_countries(self, gsc_countries: List[Dict[str, Any]], bing_countries: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
"""Combine country data from GSC and Bing."""
try:
country_map = {}
# Add GSC countries
for country in gsc_countries:
country_code = country.get("country", "")
if country_code in country_map:
existing = country_map[country_code]
existing["gsc_clicks"] = country.get("clicks", 0)
existing["gsc_impressions"] = country.get("impressions", 0)
existing["total_clicks"] = existing.get("total_clicks", 0) + country.get("clicks", 0)
existing["total_impressions"] = existing.get("total_impressions", 0) + country.get("impressions", 0)
else:
country_map[country_code] = {
"country": country_code,
"gsc_clicks": country.get("clicks", 0),
"gsc_impressions": country.get("impressions", 0),
"bing_clicks": 0,
"bing_impressions": 0,
"total_clicks": country.get("clicks", 0),
"total_impressions": country.get("impressions", 0)
}
# Add Bing countries
for country in bing_countries:
country_code = country.get("country", "")
if country_code in country_map:
existing = country_map[country_code]
existing["bing_clicks"] = country.get("clicks", 0)
existing["bing_impressions"] = country.get("impressions", 0)
existing["total_clicks"] = existing.get("total_clicks", 0) + country.get("clicks", 0)
existing["total_impressions"] = existing.get("total_impressions", 0) + country.get("impressions", 0)
else:
country_map[country_code] = {
"country": country_code,
"gsc_clicks": 0,
"gsc_impressions": 0,
"bing_clicks": country.get("clicks", 0),
"bing_impressions": country.get("impressions", 0),
"total_clicks": country.get("clicks", 0),
"total_impressions": country.get("impressions", 0)
}
# Sort by total clicks
return sorted(country_map.values(), key=lambda x: x["total_clicks"], reverse=True)
except Exception as e:
logger.error(f"Error combining countries: {e}")
return []
def _combine_devices(self, gsc_devices: List[Dict[str, Any]], bing_devices: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
"""Combine device data from GSC and Bing."""
try:
device_map = {}
# Add GSC devices
for device in gsc_devices:
device_type = device.get("device", "")
if device_type in device_map:
existing = device_map[device_type]
existing["gsc_clicks"] = device.get("clicks", 0)
existing["gsc_impressions"] = device.get("impressions", 0)
existing["total_clicks"] = existing.get("total_clicks", 0) + device.get("clicks", 0)
existing["total_impressions"] = existing.get("total_impressions", 0) + device.get("impressions", 0)
else:
device_map[device_type] = {
"device": device_type,
"gsc_clicks": device.get("clicks", 0),
"gsc_impressions": device.get("impressions", 0),
"bing_clicks": 0,
"bing_impressions": 0,
"total_clicks": device.get("clicks", 0),
"total_impressions": device.get("impressions", 0)
}
# Add Bing devices
for device in bing_devices:
device_type = device.get("device", "")
if device_type in device_map:
existing = device_map[device_type]
existing["bing_clicks"] = device.get("clicks", 0)
existing["bing_impressions"] = device.get("impressions", 0)
existing["total_clicks"] = existing.get("total_clicks", 0) + device.get("clicks", 0)
existing["total_impressions"] = existing.get("total_impressions", 0) + device.get("impressions", 0)
else:
device_map[device_type] = {
"device": device_type,
"gsc_clicks": 0,
"gsc_impressions": 0,
"bing_clicks": device.get("clicks", 0),
"bing_impressions": device.get("impressions", 0),
"total_clicks": device.get("clicks", 0),
"total_impressions": device.get("impressions", 0)
}
# Sort by total clicks
return sorted(device_map.values(), key=lambda x: x["total_clicks"], reverse=True)
except Exception as e:
logger.error(f"Error combining devices: {e}")
return []

View File

@@ -0,0 +1,402 @@
"""
Competitive Analyzer Service
Leverages onboarding step 3 research data and combines it with GSC/Bing
query data to provide competitive insights. Superior to SEMrush/Ahrefs
because it uses actual user data and personalized content strategy.
"""
from typing import Dict, Any, List, Optional, Set, Tuple
from datetime import datetime, timedelta
from sqlalchemy.orm import Session
from loguru import logger
from utils.logger_utils import get_service_logger
from services.onboarding.data_service import OnboardingDataService
from services.calendar_generation_datasource_framework.data_processing.comprehensive_user_data import ComprehensiveUserDataProcessor
logger = get_service_logger("competitive_analyzer")
class CompetitiveAnalyzer:
"""Analyzes competitive landscape using onboarding research data and analytics."""
def __init__(self, db: Session):
"""Initialize the competitive analyzer."""
self.db = db
self.user_data_service = OnboardingDataService(db)
self.comprehensive_processor = ComprehensiveUserDataProcessor(db)
async def get_competitive_insights(self, user_id: str) -> Dict[str, Any]:
"""
Get comprehensive competitive insights for a user.
Args:
user_id: User ID
Returns:
Dictionary containing competitive insights
"""
try:
# Get user's research preferences and competitor data
research_prefs = self.user_data_service.get_user_research_preferences(user_id)
competitors = research_prefs.get('competitors', []) if research_prefs else []
if not competitors:
logger.info(f"No competitors found for user {user_id}")
return {
"competitor_keywords": [],
"content_gaps": [],
"opportunity_score": 0,
"competitors_analyzed": 0,
"last_updated": datetime.now().isoformat()
}
# Get comprehensive user data including competitor analysis
comprehensive_data = self.comprehensive_processor.get_comprehensive_user_data(user_id)
competitor_analysis = comprehensive_data.get('competitor_analysis', {})
# Extract competitor keywords and content topics
competitor_keywords = self._extract_competitor_keywords(competitor_analysis, competitors)
# Get user's current keywords from GSC/Bing (would be passed in real implementation)
user_keywords = self._get_user_keywords(user_id)
# Find content gaps
content_gaps = self._find_content_gaps(user_keywords, competitor_keywords)
# Calculate opportunity score
opportunity_score = self._calculate_opportunity_score(content_gaps, competitor_keywords)
# Generate actionable insights
insights = self._generate_insights(content_gaps, competitor_keywords, opportunity_score)
return {
"competitor_keywords": competitor_keywords,
"content_gaps": content_gaps,
"opportunity_score": opportunity_score,
"competitors_analyzed": len(competitors),
"insights": insights,
"last_updated": datetime.now().isoformat()
}
except Exception as e:
logger.error(f"Error getting competitive insights for user {user_id}: {e}")
return {
"competitor_keywords": [],
"content_gaps": [],
"opportunity_score": 0,
"competitors_analyzed": 0,
"insights": [],
"last_updated": datetime.now().isoformat()
}
def _extract_competitor_keywords(self, competitor_analysis: Dict[str, Any], competitors: List[str]) -> List[Dict[str, Any]]:
"""Extract keywords from competitor analysis."""
try:
keywords = []
# Extract from competitor analysis data
for competitor_url in competitors:
competitor_data = competitor_analysis.get(competitor_url, {})
# Extract keywords from various sources
competitor_keywords = competitor_data.get('keywords', [])
content_topics = competitor_data.get('content_topics', [])
meta_keywords = competitor_data.get('meta_keywords', [])
# Combine all keyword sources
all_keywords = set()
all_keywords.update(competitor_keywords)
all_keywords.update(content_topics)
all_keywords.update(meta_keywords)
# Add to keywords list with competitor attribution
for keyword in all_keywords:
if keyword and len(keyword.strip()) > 0:
keywords.append({
"keyword": keyword.strip(),
"competitor": competitor_url,
"source": "analysis",
"volume_estimate": competitor_data.get('keyword_volume', {}).get(keyword, 0),
"difficulty_estimate": competitor_data.get('keyword_difficulty', {}).get(keyword, 0),
"relevance_score": self._calculate_relevance_score(keyword, competitor_data)
})
# Remove duplicates and sort by relevance
unique_keywords = self._deduplicate_keywords(keywords)
sorted_keywords = sorted(unique_keywords, key=lambda x: x['relevance_score'], reverse=True)
logger.debug(f"Extracted {len(sorted_keywords)} unique competitor keywords")
return sorted_keywords[:100] # Limit to top 100
except Exception as e:
logger.error(f"Error extracting competitor keywords: {e}")
return []
def _get_user_keywords(self, user_id: str) -> Set[str]:
"""Get user's current keywords from GSC/Bing data."""
try:
# In a real implementation, this would fetch from GSC/Bing APIs
# For now, return empty set as placeholder
# This would be called from the dashboard service with actual query data
return set()
except Exception as e:
logger.error(f"Error getting user keywords: {e}")
return set()
def _find_content_gaps(self, user_keywords: Set[str], competitor_keywords: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
"""Find content gaps between user and competitors."""
try:
content_gaps = []
user_keywords_lower = {kw.lower() for kw in user_keywords}
for comp_keyword in competitor_keywords:
keyword = comp_keyword['keyword'].lower()
# Check if user doesn't have this keyword
if keyword not in user_keywords_lower:
# Check for partial matches (related keywords)
is_related = any(
self._are_keywords_related(keyword, user_kw)
for user_kw in user_keywords_lower
)
if not is_related:
content_gaps.append({
"keyword": comp_keyword['keyword'],
"competitor": comp_keyword['competitor'],
"volume_estimate": comp_keyword.get('volume_estimate', 0),
"difficulty_estimate": comp_keyword.get('difficulty_estimate', 0),
"relevance_score": comp_keyword['relevance_score'],
"opportunity_type": self._classify_opportunity_type(comp_keyword),
"content_suggestion": self._generate_content_suggestion(comp_keyword)
})
# Sort by opportunity score (volume * relevance / difficulty)
sorted_gaps = sorted(
content_gaps,
key=lambda x: (x['volume_estimate'] * x['relevance_score']) / max(x['difficulty_estimate'], 1),
reverse=True
)
logger.debug(f"Found {len(sorted_gaps)} content gaps")
return sorted_gaps[:50] # Limit to top 50
except Exception as e:
logger.error(f"Error finding content gaps: {e}")
return []
def _calculate_opportunity_score(self, content_gaps: List[Dict[str, Any]], competitor_keywords: List[Dict[str, Any]]) -> int:
"""Calculate overall opportunity score (0-100)."""
try:
if not content_gaps:
return 0
# Calculate average opportunity metrics
avg_volume = sum(gap['volume_estimate'] for gap in content_gaps) / len(content_gaps)
avg_relevance = sum(gap['relevance_score'] for gap in content_gaps) / len(content_gaps)
avg_difficulty = sum(gap['difficulty_estimate'] for gap in content_gaps) / len(content_gaps)
# Calculate opportunity score
# Higher volume and relevance = higher score
# Lower difficulty = higher score
volume_score = min(avg_volume / 1000, 1.0) * 40 # Max 40 points for volume
relevance_score = avg_relevance * 30 # Max 30 points for relevance
difficulty_score = max(0, (10 - avg_difficulty) / 10) * 30 # Max 30 points for low difficulty
total_score = volume_score + relevance_score + difficulty_score
opportunity_score = min(int(total_score), 100)
logger.debug(f"Calculated opportunity score: {opportunity_score}")
return opportunity_score
except Exception as e:
logger.error(f"Error calculating opportunity score: {e}")
return 0
def _generate_insights(self, content_gaps: List[Dict[str, Any]], competitor_keywords: List[Dict[str, Any]], opportunity_score: int) -> List[Dict[str, Any]]:
"""Generate actionable insights from competitive analysis."""
try:
insights = []
# High opportunity score insight
if opportunity_score > 70:
insights.append({
"type": "opportunity",
"priority": "high",
"title": "High Competitive Opportunity",
"description": f"Your opportunity score is {opportunity_score}% - competitors are ranking for many keywords you're not targeting.",
"action": "Create content for the identified keyword gaps to capture more organic traffic."
})
elif opportunity_score > 40:
insights.append({
"type": "opportunity",
"priority": "medium",
"title": "Moderate Competitive Opportunity",
"description": f"Your opportunity score is {opportunity_score}% - there are some keyword gaps you could target.",
"action": "Review the content gaps and prioritize high-volume, low-difficulty keywords."
})
# Content gap insights
if content_gaps:
high_volume_gaps = [gap for gap in content_gaps if gap['volume_estimate'] > 500]
if high_volume_gaps:
insights.append({
"type": "content",
"priority": "high",
"title": "High-Volume Content Gaps",
"description": f"Found {len(high_volume_gaps)} high-volume keywords that competitors rank for but you don't.",
"action": "Create comprehensive content targeting these high-volume keywords."
})
low_difficulty_gaps = [gap for gap in content_gaps if gap['difficulty_estimate'] < 3]
if low_difficulty_gaps:
insights.append({
"type": "content",
"priority": "medium",
"title": "Low-Difficulty Content Gaps",
"description": f"Found {len(low_difficulty_gaps)} low-difficulty keywords that would be easy to rank for.",
"action": "Quick wins: Create content for these low-difficulty keywords first."
})
# Competitor analysis insights
if competitor_keywords:
top_competitors = {}
for kw in competitor_keywords:
competitor = kw['competitor']
if competitor not in top_competitors:
top_competitors[competitor] = 0
top_competitors[competitor] += 1
top_competitor = max(top_competitors.items(), key=lambda x: x[1]) if top_competitors else None
if top_competitor:
insights.append({
"type": "competitive",
"priority": "medium",
"title": "Top Competitor Analysis",
"description": f"{top_competitor[0]} has the most keyword overlap with your content strategy.",
"action": f"Analyze {top_competitor[0]}'s content strategy for additional keyword opportunities."
})
return insights
except Exception as e:
logger.error(f"Error generating insights: {e}")
return []
def _deduplicate_keywords(self, keywords: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
"""Remove duplicate keywords and merge data."""
try:
keyword_map = {}
for kw in keywords:
keyword = kw['keyword'].lower()
if keyword in keyword_map:
# Merge data from multiple competitors
existing = keyword_map[keyword]
existing['competitors'].append(kw['competitor'])
existing['volume_estimate'] = max(existing['volume_estimate'], kw['volume_estimate'])
existing['relevance_score'] = max(existing['relevance_score'], kw['relevance_score'])
else:
keyword_map[keyword] = {
'keyword': kw['keyword'],
'competitors': [kw['competitor']],
'source': kw['source'],
'volume_estimate': kw['volume_estimate'],
'difficulty_estimate': kw['difficulty_estimate'],
'relevance_score': kw['relevance_score']
}
return list(keyword_map.values())
except Exception as e:
logger.error(f"Error deduplicating keywords: {e}")
return []
def _calculate_relevance_score(self, keyword: str, competitor_data: Dict[str, Any]) -> float:
"""Calculate relevance score for a keyword based on competitor data."""
try:
# Base relevance score
relevance = 0.5
# Increase relevance based on keyword frequency in competitor content
content_frequency = competitor_data.get('content_frequency', {})
if keyword in content_frequency:
relevance += min(content_frequency[keyword] / 10, 0.3)
# Increase relevance based on meta keyword presence
meta_keywords = competitor_data.get('meta_keywords', [])
if keyword in meta_keywords:
relevance += 0.2
# Increase relevance based on title presence
titles = competitor_data.get('titles', [])
if any(keyword.lower() in title.lower() for title in titles):
relevance += 0.2
# Normalize to 0-1 range
return min(relevance, 1.0)
except Exception as e:
logger.error(f"Error calculating relevance score: {e}")
return 0.5
def _are_keywords_related(self, keyword1: str, keyword2: str) -> bool:
"""Check if two keywords are related."""
try:
# Simple similarity check - can be enhanced with NLP
words1 = set(keyword1.lower().split())
words2 = set(keyword2.lower().split())
# Check for word overlap
overlap = len(words1.intersection(words2))
total_words = len(words1.union(words2))
if total_words == 0:
return False
similarity = overlap / total_words
return similarity > 0.3 # 30% word overlap threshold
except Exception as e:
logger.error(f"Error checking keyword relatedness: {e}")
return False
def _classify_opportunity_type(self, keyword_data: Dict[str, Any]) -> str:
"""Classify the type of opportunity for a keyword."""
try:
volume = keyword_data.get('volume_estimate', 0)
difficulty = keyword_data.get('difficulty_estimate', 0)
relevance = keyword_data.get('relevance_score', 0)
if volume > 1000 and difficulty < 5 and relevance > 0.7:
return "high_priority"
elif volume > 500 and difficulty < 7 and relevance > 0.5:
return "medium_priority"
elif volume > 100 and difficulty < 8:
return "low_priority"
else:
return "long_term"
except Exception as e:
logger.error(f"Error classifying opportunity type: {e}")
return "unknown"
def _generate_content_suggestion(self, keyword_data: Dict[str, Any]) -> str:
"""Generate content suggestion for a keyword."""
try:
keyword = keyword_data['keyword']
opportunity_type = self._classify_opportunity_type(keyword_data)
suggestions = {
"high_priority": f"Create comprehensive, in-depth content targeting '{keyword}' - high volume, low difficulty opportunity.",
"medium_priority": f"Consider creating content around '{keyword}' - good volume with moderate competition.",
"low_priority": f"'{keyword}' could be a good long-tail keyword to target in future content.",
"long_term": f"'{keyword}' might be worth monitoring for future content opportunities."
}
return suggestions.get(opportunity_type, f"Consider creating content around '{keyword}'.")
except Exception as e:
logger.error(f"Error generating content suggestion: {e}")
return f"Consider creating content around '{keyword_data.get('keyword', 'this keyword')}'."

View File

@@ -0,0 +1,397 @@
"""
SEO Dashboard Service
Main orchestration service that coordinates data fetching from GSC, Bing,
and other analytics sources for the SEO dashboard. Leverages existing
OAuth connections from onboarding step 5.
"""
from typing import Dict, Any, Optional, List
from datetime import datetime, timedelta
from sqlalchemy.orm import Session
from loguru import logger
from utils.logger_utils import get_service_logger
from services.gsc_service import GSCService
from services.integrations.bing_oauth import BingOAuthService
from services.bing_analytics_storage_service import BingAnalyticsStorageService
from services.analytics_cache_service import AnalyticsCacheService
from services.onboarding.data_service import OnboardingDataService
from .analytics_aggregator import AnalyticsAggregator
from .competitive_analyzer import CompetitiveAnalyzer
logger = get_service_logger("seo_dashboard")
class SEODashboardService:
"""Main service for SEO dashboard data orchestration."""
def __init__(self, db: Session):
"""Initialize the SEO dashboard service."""
self.db = db
self.gsc_service = GSCService()
self.bing_oauth = BingOAuthService()
self.bing_storage = BingAnalyticsStorageService("sqlite:///alwrity.db")
self.analytics_cache = AnalyticsCacheService()
self.user_data_service = OnboardingDataService(db)
self.analytics_aggregator = AnalyticsAggregator()
self.competitive_analyzer = CompetitiveAnalyzer(db)
async def get_platform_status(self, user_id: str) -> Dict[str, Any]:
"""Get connection status for GSC and Bing platforms."""
try:
# Check GSC connection
gsc_credentials = self.gsc_service.load_user_credentials(user_id)
gsc_connected = gsc_credentials is not None
# Check Bing connection with detailed status
bing_token_status = self.bing_oauth.get_user_token_status(user_id)
bing_connected = bing_token_status.get('has_active_tokens', False)
# Get cached data for last sync info
gsc_data = self.analytics_cache.get('gsc_analytics', user_id)
bing_data = self.analytics_cache.get('bing_analytics', user_id)
return {
"gsc": {
"connected": gsc_connected,
"sites": self._get_gsc_sites(user_id) if gsc_connected else [],
"last_sync": gsc_data.get('last_updated') if gsc_data else None,
"status": "connected" if gsc_connected else "disconnected"
},
"bing": {
"connected": bing_connected,
"sites": self._get_bing_sites(user_id) if bing_connected else [],
"last_sync": bing_data.get('last_updated') if bing_data else None,
"status": "connected" if bing_connected else ("expired" if bing_token_status.get('has_expired_tokens') else "disconnected"),
"has_expired_tokens": bing_token_status.get('has_expired_tokens', False),
"last_token_date": bing_token_status.get('last_token_date'),
"total_tokens": bing_token_status.get('total_tokens', 0)
}
}
except Exception as e:
logger.error(f"Error getting platform status for user {user_id}: {e}")
return {
"gsc": {"connected": False, "sites": [], "last_sync": None, "status": "error"},
"bing": {"connected": False, "sites": [], "last_sync": None, "status": "error"}
}
async def get_dashboard_overview(self, user_id: str, site_url: Optional[str] = None) -> Dict[str, Any]:
"""Get comprehensive dashboard overview with real GSC/Bing data."""
try:
# Get user's website URL if not provided
if not site_url:
# Try to get from website analysis first
website_analysis = self.user_data_service.get_user_website_analysis(int(user_id))
if website_analysis and website_analysis.get('website_url'):
site_url = website_analysis['website_url']
else:
# Fallback: try to get from Bing sites
bing_sites = self._get_bing_sites(user_id)
if bing_sites:
site_url = bing_sites[0] # Use first Bing site
else:
site_url = 'https://alwrity.com' # Default fallback
# Get platform status
platform_status = await self.get_platform_status(user_id)
# Get analytics data
gsc_data = await self.get_gsc_data(user_id, site_url)
bing_data = await self.get_bing_data(user_id, site_url)
# Aggregate metrics
summary = self.analytics_aggregator.combine_metrics(gsc_data, bing_data)
timeseries = self.analytics_aggregator.normalize_timeseries(
gsc_data.get("timeseries", []),
bing_data.get("timeseries", [])
)
# Get competitive insights
competitor_insights = await self.competitive_analyzer.get_competitive_insights(user_id)
# Calculate health score
health_score = self._calculate_health_score(summary, platform_status)
# Generate AI insights
ai_insights = await self._generate_ai_insights(summary, timeseries, competitor_insights)
return {
"website_url": site_url,
"platforms": platform_status,
"summary": summary,
"timeseries": timeseries,
"competitor_insights": competitor_insights,
"health_score": health_score,
"ai_insights": ai_insights,
"last_updated": datetime.now().isoformat()
}
except Exception as e:
logger.error(f"Error getting dashboard overview for user {user_id}: {e}")
raise
async def get_gsc_data(self, user_id: str, site_url: Optional[str] = None) -> Dict[str, Any]:
"""Get GSC data for the specified site."""
try:
# Check if user has GSC credentials
credentials = self.gsc_service.load_user_credentials(user_id)
if not credentials:
return {"error": "GSC not connected", "data": [], "status": "disconnected"}
# Try to get from cache first
cache_key = f"gsc_analytics:{user_id}:{site_url or 'default'}"
cached_data = self.analytics_cache.get('gsc_analytics', user_id, site_url=site_url or 'default')
if cached_data:
return cached_data
# Fetch fresh data from GSC API
if site_url:
gsc_data = self.gsc_service.get_search_analytics(user_id, site_url)
else:
# Get all sites for user
sites = self._get_gsc_sites(user_id)
if sites:
gsc_data = self.gsc_service.get_search_analytics(user_id, sites[0])
else:
return {"error": "No GSC sites found", "data": [], "status": "disconnected"}
# Cache the data
self.analytics_cache.set('gsc_analytics', user_id, gsc_data, ttl_override=3600, site_url=site_url or 'default') # 1 hour cache
return gsc_data
except Exception as e:
logger.error(f"Error getting GSC data for user {user_id}: {e}")
return {"error": str(e), "data": [], "status": "error"}
async def get_bing_data(self, user_id: str, site_url: Optional[str] = None) -> Dict[str, Any]:
"""Get Bing Webmaster Tools data for the specified site."""
try:
# Check if user has Bing tokens
tokens = self.bing_oauth.get_user_tokens(user_id)
if not tokens:
return {"error": "Bing not connected", "data": [], "status": "disconnected"}
# Try to get from cache first
cache_key = f"bing_analytics:{user_id}:{site_url or 'default'}"
cached_data = self.analytics_cache.get('bing_analytics', user_id, site_url=site_url or 'default')
if cached_data:
return cached_data
# Get data from Bing storage service
if site_url:
bing_data = self.bing_storage.get_analytics_summary(user_id, site_url, days=30)
else:
# Get all sites for user
sites = self._get_bing_sites(user_id)
if sites:
logger.info(f"Using first Bing site for analysis: {sites[0]}")
bing_data = self.bing_storage.get_analytics_summary(user_id, sites[0], days=30)
else:
logger.warning(f"No Bing sites found for user {user_id}")
return {"error": "No Bing sites found", "data": [], "status": "disconnected"}
# Cache the data
self.analytics_cache.set('bing_analytics', user_id, bing_data, ttl_override=3600, site_url=site_url or 'default') # 1 hour cache
return bing_data
except Exception as e:
logger.error(f"Error getting Bing data for user {user_id}: {e}")
return {"error": str(e), "data": [], "status": "error"}
async def get_competitive_insights(self, user_id: str) -> Dict[str, Any]:
"""Get competitive insights from onboarding step 3 data."""
try:
return await self.competitive_analyzer.get_competitive_insights(user_id)
except Exception as e:
logger.error(f"Error getting competitive insights for user {user_id}: {e}")
return {
"competitor_keywords": [],
"content_gaps": [],
"opportunity_score": 0
}
async def refresh_analytics_data(self, user_id: str, site_url: Optional[str] = None) -> Dict[str, Any]:
"""Refresh analytics data by invalidating cache and fetching fresh data."""
try:
# Invalidate cache
cache_keys = [
f"gsc_analytics:{user_id}",
f"bing_analytics:{user_id}",
f"gsc_analytics:{user_id}:{site_url or 'default'}",
f"bing_analytics:{user_id}:{site_url or 'default'}"
]
for key in cache_keys:
self.analytics_cache.delete(key)
# Fetch fresh data
gsc_result = await self.get_gsc_data(user_id, site_url)
bing_result = await self.get_bing_data(user_id, site_url)
return {
"status": "success",
"message": "Analytics data refreshed successfully",
"last_updated": datetime.now().isoformat(),
"platforms": {
"gsc": {"status": "success" if "error" not in gsc_result else "error"},
"bing": {"status": "success" if "error" not in bing_result else "error"}
}
}
except Exception as e:
logger.error(f"Error refreshing analytics data for user {user_id}: {e}")
return {
"status": "error",
"message": f"Failed to refresh analytics data: {str(e)}",
"last_updated": datetime.now().isoformat()
}
def _get_gsc_sites(self, user_id: str) -> List[str]:
"""Get GSC sites for user."""
try:
credentials = self.gsc_service.load_user_credentials(user_id)
if not credentials:
return []
# This would need to be implemented in GSCService
# For now, return empty list
return []
except Exception as e:
logger.error(f"Error getting GSC sites for user {user_id}: {e}")
return []
def _get_bing_sites(self, user_id: str) -> List[str]:
"""Get Bing sites for user."""
try:
# Use the existing get_user_sites method from BingOAuthService
sites = self.bing_oauth.get_user_sites(user_id)
if not sites:
logger.warning(f"No Bing sites found for user {user_id}")
return []
# Extract site URLs from the sites data
site_urls = []
for site in sites:
if isinstance(site, dict) and site.get('url'):
site_urls.append(site['url'])
elif isinstance(site, str):
site_urls.append(site)
logger.info(f"Found {len(site_urls)} Bing sites for user {user_id}: {site_urls}")
return site_urls
except Exception as e:
logger.error(f"Error getting Bing sites for user {user_id}: {e}")
return []
def _calculate_health_score(self, summary: Dict[str, Any], platform_status: Dict[str, Any]) -> Dict[str, Any]:
"""Calculate overall SEO health score."""
try:
score = 0
max_score = 100
# Base score for connected platforms
if platform_status.get("gsc", {}).get("connected"):
score += 30
if platform_status.get("bing", {}).get("connected"):
score += 20
# Traffic score (0-30)
clicks = summary.get("clicks", 0)
if clicks > 1000:
score += 30
elif clicks > 500:
score += 20
elif clicks > 100:
score += 10
# CTR score (0-20)
ctr = summary.get("ctr", 0)
if ctr > 0.05: # 5%
score += 20
elif ctr > 0.03: # 3%
score += 15
elif ctr > 0.01: # 1%
score += 10
# Determine trend and color
if score >= 80:
trend = "up"
label = "EXCELLENT"
color = "#4CAF50"
elif score >= 60:
trend = "stable"
label = "GOOD"
color = "#2196F3"
elif score >= 40:
trend = "down"
label = "NEEDS IMPROVEMENT"
color = "#FF9800"
else:
trend = "down"
label = "POOR"
color = "#F44336"
return {
"score": score,
"change": 0, # Would need historical data to calculate
"trend": trend,
"label": label,
"color": color
}
except Exception as e:
logger.error(f"Error calculating health score: {e}")
return {
"score": 0,
"change": 0,
"trend": "unknown",
"label": "UNKNOWN",
"color": "#9E9E9E"
}
async def _generate_ai_insights(self, summary: Dict[str, Any], timeseries: List[Dict[str, Any]], competitor_insights: Dict[str, Any]) -> List[Dict[str, Any]]:
"""Generate AI insights from analytics data."""
try:
insights = []
# Traffic insights
clicks = summary.get("clicks", 0)
ctr = summary.get("ctr", 0)
if clicks > 0 and ctr < 0.02: # Low CTR
insights.append({
"type": "opportunity",
"priority": "high",
"text": f"Your CTR is {ctr:.1%}, which is below average. Consider optimizing your meta descriptions and titles.",
"category": "performance"
})
# Competitive insights
opportunity_score = competitor_insights.get("opportunity_score", 0)
if opportunity_score > 70:
insights.append({
"type": "opportunity",
"priority": "high",
"text": f"High opportunity score of {opportunity_score}% - competitors are ranking for keywords you're not targeting.",
"category": "competitive"
})
# Content gaps
content_gaps = competitor_insights.get("content_gaps", [])
if content_gaps:
insights.append({
"type": "action",
"priority": "medium",
"text": f"Found {len(content_gaps)} content gaps. Consider creating content for these topics.",
"category": "content"
})
return insights
except Exception as e:
logger.error(f"Error generating AI insights: {e}")
return []