Files
ALwrity/backend/services/seo_tools/gsc_strategy_insights_service.py

509 lines
19 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
GSC Strategy Insights Service for SEO Dashboard
Transforms Google Search Console data into strategic insights optimized for
SEO Dashboard (not blog topic suggestions). Focuses on:
- Trend analysis and performance monitoring
- ROI-weighted opportunity prioritization
- Competitive positioning insights
- Impact forecasting and recommendations
This service builds upon GSCBrainstormService but focuses on dashboard needs:
- Broader SEO strategy context
- Historical trend analysis
- Competitive benchmarking
- Multi-metric ranking and scoring
"""
from typing import Dict, Any, List, Optional, Tuple
from datetime import datetime, timedelta
import asyncio
from dataclasses import dataclass
from enum import Enum
from loguru import logger
import json
from services.gsc_service import GSCService
from services.gsc_brainstorm_service import GSCBrainstormService
from services.llm_providers.main_text_generation import llm_text_gen
# Enums for strategy types
class StrategyType(str, Enum):
"""Types of strategic insights"""
QUICK_WIN = "quick_win"
KEYWORD_GAP = "keyword_gap"
CONTENT_OPPORTUNITY = "content_opportunity"
PAGE_OPTIMIZATION = "page_optimization"
COMPETITIVE_GAP = "competitive_gap"
MARKET_INSIGHT = "market_insight"
TREND_ALERT = "trend_alert"
SEASONAL_PATTERN = "seasonal_pattern"
class OpportunitySeverity(str, Enum):
"""Severity levels for opportunities"""
CRITICAL = "critical" # 80-100 ROI score
HIGH = "high" # 60-79 ROI score
MEDIUM = "medium" # 40-59 ROI score
LOW = "low" # 20-39 ROI score
WATCH = "watch" # <20 ROI score
# Data classes for structured responses
@dataclass
class StrategyOpportunity:
"""Represents a single strategic opportunity"""
type: StrategyType
keyword: str
description: str
roi_score: float # 0-100
priority: int # 1-10
effort_hours: float
timeline_weeks: int
current_position: float
impressions: int
current_ctr: float
estimated_impact: float # Monthly clicks gained
severity: OpportunitySeverity
recommendations: List[str]
related_keywords: List[str]
timestamp: datetime
@dataclass
class TrendMetric:
"""Represents a performance trend"""
keyword: str
metric: str # 'position', 'impressions', 'clicks', 'ctr'
current_value: float
value_30d_ago: float
value_90d_ago: float
trend: str # 'up', 'down', 'stable'
trend_percentage: float # -100 to +100
momentum: float # Acceleration of trend
seasonal: bool
anomaly: bool
@dataclass
class HealthMetrics:
"""Overall dashboard health metrics"""
health_score: int # 0-100
score_trend: str # 'up', 'down', 'stable'
score_change: float # Percentage change
total_keywords: int
page_1_keywords: int
avg_position: float
avg_ctr: float
total_impressions: int
total_clicks: int
opportunities_count: int
quick_wins_count: int
keyword_gaps_count: int
competitive_gaps_count: int
timestamp: datetime
period: str # 'daily', 'weekly', 'monthly'
class GSCStrategyInsightsService:
"""
Service for generating strategic SEO dashboard insights from GSC data.
Key differences from GSCBrainstormService:
1. Dashboard-focused context (not blog-specific)
2. Trend analysis with historical data
3. ROI-weighted scoring
4. Competitive positioning
5. Impact forecasting
6. Multi-metric health scoring
"""
def __init__(self, gsc_service: Optional[GSCService] = None):
"""
Initialize the strategy insights service.
Args:
gsc_service: Optional GSCService instance (uses default if not provided)
"""
self.service_name = "gsc_strategy_insights"
self.gsc_service = gsc_service or GSCService()
self.brainstorm_service = GSCBrainstormService(gsc_service)
logger.info(f"Initialized {self.service_name}")
async def get_dashboard_strategy(
self,
user_id: str,
site_url: str,
include_trends: bool = True,
include_competitive: bool = True,
top_n: int = 20
) -> Dict[str, Any]:
"""
Get comprehensive strategy insights for dashboard display.
Args:
user_id: User ID for context
site_url: Website URL
include_trends: Include trend analysis
include_competitive: Include competitive analysis
top_n: Number of top opportunities to return
Returns:
Comprehensive strategy insights
"""
try:
logger.info(f"Generating dashboard strategy for {site_url}")
start_time = datetime.utcnow()
# Execute parallel analysis tasks
tasks = {
'opportunities': self._get_ranked_opportunities(site_url, top_n),
'health_metrics': self._calculate_health_metrics(site_url),
'quick_summary': self._generate_quick_summary(site_url),
}
# Conditional tasks
if include_trends:
tasks['trends'] = self._analyze_performance_trends(site_url)
if include_competitive:
tasks['competitive'] = self._analyze_competitive_positioning(site_url)
# Execute all tasks concurrently
results = await asyncio.gather(*tasks.values(), return_exceptions=True)
# Aggregate results
strategy_data = {}
for task_name, result in zip(tasks.keys(), results):
if isinstance(result, Exception):
logger.error(f"Strategy task {task_name} failed: {str(result)}")
strategy_data[task_name] = {'status': 'failed', 'error': str(result)}
else:
strategy_data[task_name] = result
execution_time = (datetime.utcnow() - start_time).total_seconds()
return {
'status': 'success',
'data': strategy_data,
'generated_at': datetime.utcnow().isoformat(),
'execution_time_seconds': execution_time,
'site_url': site_url,
}
except Exception as e:
logger.error(f"Error generating dashboard strategy: {str(e)}")
return {
'status': 'error',
'error': str(e),
'generated_at': datetime.utcnow().isoformat(),
}
async def _get_ranked_opportunities(
self,
site_url: str,
top_n: int = 20
) -> Dict[str, Any]:
"""
Get ROI-weighted ranked opportunities.
Scoring formula (0-100):
ROI = 0.40 × (traffic_impact) +
0.30 × (ease_of_implementation) +
0.20 × (competitive_advantage) +
0.10 × (momentum_score)
Args:
site_url: Website URL
top_n: Number of top opportunities
Returns:
Ranked opportunities with ROI scores
"""
try:
# Get brainstorm opportunities (reuse existing analysis)
brainstorm_result = await self.brainstorm_service.brainstorm_topics(
user_id="dashboard",
keywords="all", # Special case: all keywords
site_url=site_url
)
if not brainstorm_result or 'error' in brainstorm_result:
return {'status': 'no_data', 'error': 'Could not fetch brainstorm data'}
# Extract all opportunities
all_opportunities = []
# Quick wins (positions 4-10)
for win in brainstorm_result.get('quick_wins', []):
roi = self._calculate_roi_score(
traffic_impact=min(100, (win['impressions'] / 1000) * 10),
ease=80, # Positions 4-10 are relatively easy
competitive=50,
momentum=60
)
opportunity = StrategyOpportunity(
type=StrategyType.QUICK_WIN,
keyword=win['keyword'],
description=f"Position {win['position']} → page 1 ranking",
roi_score=roi,
priority=1,
effort_hours=2,
timeline_weeks=1,
current_position=win['position'],
impressions=win['impressions'],
current_ctr=win['current_ctr'],
estimated_impact=win.get('estimated_traffic_gain', 0),
severity=self._get_severity(roi),
recommendations=[
"Update title and meta description",
"Improve content quality and depth",
"Add internal links from authority pages"
],
related_keywords=self._find_related_keywords(win['keyword']),
timestamp=datetime.utcnow()
)
all_opportunities.append(opportunity)
# Content opportunities (high volume, low CTR)
for opp in brainstorm_result.get('content_opportunities', []):
roi = self._calculate_roi_score(
traffic_impact=min(100, (opp['impressions'] / 2000) * 10),
ease=70, # Meta updates are easy
competitive=40,
momentum=50
)
opportunity = StrategyOpportunity(
type=StrategyType.CONTENT_OPPORTUNITY,
keyword=opp['keyword'],
description=f"{opp['impressions']} impressions at position {opp['current_position']}",
roi_score=roi,
priority=2,
effort_hours=3,
timeline_weeks=1,
current_position=opp['current_position'],
impressions=opp['impressions'],
current_ctr=opp['current_ctr'],
estimated_impact=opp.get('estimated_traffic_gain', 0),
severity=self._get_severity(roi),
recommendations=[
f"Improve CTR from {opp['current_ctr']}% to 5%+",
"A/B test meta descriptions",
"Review SERP position and update title angle"
],
related_keywords=self._find_related_keywords(opp['keyword']),
timestamp=datetime.utcnow()
)
all_opportunities.append(opportunity)
# Keyword gaps (positions 11-20)
for gap in brainstorm_result.get('keyword_gaps', []):
roi = self._calculate_roi_score(
traffic_impact=min(100, (gap['estimated_traffic_if_page1'] / 500) * 10),
ease=50, # Requires content improvements
competitive=70,
momentum=60
)
opportunity = StrategyOpportunity(
type=StrategyType.KEYWORD_GAP,
keyword=gap['keyword'],
description=f"Position {gap['position']} → large traffic opportunity",
roi_score=roi,
priority=2,
effort_hours=8,
timeline_weeks=4,
current_position=gap['position'],
impressions=gap['impressions'],
current_ctr=gap['current_ctr'],
estimated_impact=gap.get('estimated_traffic_if_page1', 0),
severity=self._get_severity(roi),
recommendations=[
"Create comprehensive guide on this topic",
"Increase content depth and topical coverage",
"Build topical authority in this space"
],
related_keywords=self._find_related_keywords(gap['keyword']),
timestamp=datetime.utcnow()
)
all_opportunities.append(opportunity)
# Sort by ROI score descending
ranked = sorted(all_opportunities, key=lambda x: x.roi_score, reverse=True)
# Convert to dictionaries and return top N
return {
'status': 'success',
'opportunities': [
{
'type': opp.type.value,
'keyword': opp.keyword,
'roi_score': round(opp.roi_score, 1),
'priority': opp.priority,
'effort_hours': opp.effort_hours,
'timeline_weeks': opp.timeline_weeks,
'current_position': opp.current_position,
'impressions': opp.impressions,
'estimated_impact': round(opp.estimated_impact, 1),
'severity': opp.severity.value,
'recommendations': opp.recommendations,
'related_keywords': opp.related_keywords,
}
for opp in ranked[:top_n]
],
'total_opportunities': len(ranked),
}
except Exception as e:
logger.error(f"Error ranking opportunities: {str(e)}")
return {'status': 'error', 'error': str(e)}
async def _calculate_health_metrics(self, site_url: str) -> Dict[str, Any]:
"""
Calculate comprehensive health metrics for dashboard.
Metrics include:
- Health score (0-100)
- Keyword position distribution
- Average CTR vs benchmark
- Growth trends
- Overall assessment
"""
try:
# Get brainstorm summary (has health score)
brainstorm_result = await self.brainstorm_service.brainstorm_topics(
user_id="dashboard",
keywords="all",
site_url=site_url
)
summary = brainstorm_result.get('summary', {})
return {
'status': 'success',
'health_score': summary.get('health_score', 0),
'health_trend': 'stable', # TODO: Compare with historical
'total_keywords': summary.get('total_keywords_analyzed', 0),
'page_1_keywords': summary.get('keyword_distribution', {}).get('positions_1_3', 0),
'avg_position': summary.get('avg_position', 0),
'avg_ctr': summary.get('avg_ctr', 0),
'ctr_vs_benchmark': summary.get('ctr_vs_benchmark', 0),
'total_impressions': summary.get('total_impressions', 0),
'total_clicks': summary.get('total_clicks', 0),
'timestamp': datetime.utcnow().isoformat(),
}
except Exception as e:
logger.error(f"Error calculating health metrics: {str(e)}")
return {'status': 'error', 'error': str(e)}
async def _generate_quick_summary(self, site_url: str) -> Dict[str, Any]:
"""Generate a quick text summary of key insights."""
try:
brainstorm_result = await self.brainstorm_service.brainstorm_topics(
user_id="dashboard",
keywords="all",
site_url=site_url
)
summary = brainstorm_result.get('summary', {})
quick_wins_count = len(brainstorm_result.get('quick_wins', []))
opportunities_count = len(brainstorm_result.get('content_opportunities', []))
gaps_count = len(brainstorm_result.get('keyword_gaps', []))
# Generate summary text
summary_text = (
f"Found {quick_wins_count} quick wins (positions 4-10), "
f"{opportunities_count} content optimization opportunities (high volume, low CTR), "
f"and {gaps_count} keyword gaps on page 2+ that could boost traffic. "
f"Overall SEO health: {summary.get('health_score', 0)}/100. "
)
return {
'status': 'success',
'summary': summary_text,
'key_metrics': {
'quick_wins': quick_wins_count,
'opportunities': opportunities_count,
'gaps': gaps_count,
'health_score': summary.get('health_score', 0),
}
}
except Exception as e:
logger.error(f"Error generating quick summary: {str(e)}")
return {'status': 'error', 'error': str(e)}
async def _analyze_performance_trends(self, site_url: str) -> Dict[str, Any]:
"""Analyze performance trends over time."""
# TODO: Implement historical trend analysis
# This would require storing historical GSC snapshots
return {
'status': 'pending',
'message': 'Trend analysis requires historical data collection',
'note': 'To be implemented in Phase 2'
}
async def _analyze_competitive_positioning(self, site_url: str) -> Dict[str, Any]:
"""Analyze competitive positioning."""
# TODO: Implement competitive analysis
# This would require competitor keyword data
return {
'status': 'pending',
'message': 'Competitive analysis requires competitor data integration',
'note': 'To be implemented in Phase 2'
}
def _calculate_roi_score(
self,
traffic_impact: float,
ease: float,
competitive: float,
momentum: float
) -> float:
"""
Calculate ROI score (0-100).
Formula:
ROI = 0.40 × traffic_impact +
0.30 × ease +
0.20 × competitive +
0.10 × momentum
"""
roi = (
0.40 * min(100, traffic_impact) +
0.30 * min(100, ease) +
0.20 * min(100, competitive) +
0.10 * min(100, momentum)
)
return min(100, max(0, roi))
def _get_severity(self, roi_score: float) -> OpportunitySeverity:
"""Get severity level based on ROI score."""
if roi_score >= 80:
return OpportunitySeverity.CRITICAL
elif roi_score >= 60:
return OpportunitySeverity.HIGH
elif roi_score >= 40:
return OpportunitySeverity.MEDIUM
elif roi_score >= 20:
return OpportunitySeverity.LOW
else:
return OpportunitySeverity.WATCH
def _find_related_keywords(self, keyword: str) -> List[str]:
"""Find related keywords (placeholder)."""
# TODO: Implement semantic similarity search
# For now, return empty list
return []
# Export for router usage
__all__ = [
'GSCStrategyInsightsService',
'StrategyOpportunity',
'StrategyType',
'OpportunitySeverity',
'HealthMetrics',
'TrendMetric',
]