ALwrity/backend/services/seo_tools/gsc_strategy_insights_service.py

"""
GSC Strategy Insights Service for SEO Dashboard

Transforms Google Search Console data into strategic insights optimized for
SEO Dashboard (not blog topic suggestions). Focuses on:
- Trend analysis and performance monitoring
- ROI-weighted opportunity prioritization
- Competitive positioning insights
- Impact forecasting and recommendations

This service builds upon GSCBrainstormService but focuses on dashboard needs:
- Broader SEO strategy context
- Historical trend analysis
- Competitive benchmarking
- Multi-metric ranking and scoring
"""

from typing import Dict, Any, List, Optional, Tuple
from datetime import datetime, timedelta
import asyncio
from dataclasses import dataclass
from enum import Enum
from loguru import logger
import json

from services.gsc_service import GSCService
from services.gsc_brainstorm_service import GSCBrainstormService
from services.llm_providers.main_text_generation import llm_text_gen


# Enums for strategy types
class StrategyType(str, Enum):
    """Types of strategic insights"""
    QUICK_WIN = "quick_win"
    KEYWORD_GAP = "keyword_gap"
    CONTENT_OPPORTUNITY = "content_opportunity"
    PAGE_OPTIMIZATION = "page_optimization"
    COMPETITIVE_GAP = "competitive_gap"
    MARKET_INSIGHT = "market_insight"
    TREND_ALERT = "trend_alert"
    SEASONAL_PATTERN = "seasonal_pattern"


class OpportunitySeverity(str, Enum):
    """Severity levels for opportunities"""
    CRITICAL = "critical"      # 80-100 ROI score
    HIGH = "high"              # 60-79 ROI score
    MEDIUM = "medium"          # 40-59 ROI score
    LOW = "low"                # 20-39 ROI score
    WATCH = "watch"            # <20 ROI score


# Data classes for structured responses
@dataclass
class StrategyOpportunity:
    """Represents a single strategic opportunity"""
    type: StrategyType
    keyword: str
    description: str
    roi_score: float  # 0-100
    priority: int  # 1-10
    effort_hours: float
    timeline_weeks: int
    current_position: float
    impressions: int
    current_ctr: float
    estimated_impact: float  # Monthly clicks gained
    severity: OpportunitySeverity
    recommendations: List[str]
    related_keywords: List[str]
    timestamp: datetime


@dataclass
class TrendMetric:
    """Represents a performance trend"""
    keyword: str
    metric: str  # 'position', 'impressions', 'clicks', 'ctr'
    current_value: float
    value_30d_ago: float
    value_90d_ago: float
    trend: str  # 'up', 'down', 'stable'
    trend_percentage: float  # -100 to +100
    momentum: float  # Acceleration of trend
    seasonal: bool
    anomaly: bool


@dataclass
class HealthMetrics:
    """Overall dashboard health metrics"""
    health_score: int  # 0-100
    score_trend: str  # 'up', 'down', 'stable'
    score_change: float  # Percentage change
    total_keywords: int
    page_1_keywords: int
    avg_position: float
    avg_ctr: float
    total_impressions: int
    total_clicks: int
    opportunities_count: int
    quick_wins_count: int
    keyword_gaps_count: int
    competitive_gaps_count: int
    timestamp: datetime
    period: str  # 'daily', 'weekly', 'monthly'


class GSCStrategyInsightsService:
    """
    Service for generating strategic SEO dashboard insights from GSC data.

    Key differences from GSCBrainstormService:
    1. Dashboard-focused context (not blog-specific)
    2. Trend analysis with historical data
    3. ROI-weighted scoring
    4. Competitive positioning
    5. Impact forecasting
    6. Multi-metric health scoring
    """

    def __init__(self, gsc_service: Optional[GSCService] = None):
        """
        Initialize the strategy insights service.

        Args:
            gsc_service: Optional GSCService instance (uses default if not provided)
        """
        self.service_name = "gsc_strategy_insights"
        self.gsc_service = gsc_service or GSCService()
        self.brainstorm_service = GSCBrainstormService(gsc_service)
        logger.info(f"Initialized {self.service_name}")

    async def get_dashboard_strategy(
        self,
        user_id: str,
        site_url: str,
        include_trends: bool = True,
        include_competitive: bool = True,
        top_n: int = 20
    ) -> Dict[str, Any]:
        """
        Get comprehensive strategy insights for dashboard display.

        Args:
            user_id: User ID for context
            site_url: Website URL
            include_trends: Include trend analysis
            include_competitive: Include competitive analysis
            top_n: Number of top opportunities to return

        Returns:
            Comprehensive strategy insights
        """
        try:
            logger.info(f"Generating dashboard strategy for {site_url}")
            start_time = datetime.utcnow()

            # Execute parallel analysis tasks
            tasks = {
                'opportunities': self._get_ranked_opportunities(site_url, top_n),
                'health_metrics': self._calculate_health_metrics(site_url),
                'quick_summary': self._generate_quick_summary(site_url),
            }

            # Conditional tasks
            if include_trends:
                tasks['trends'] = self._analyze_performance_trends(site_url)
            if include_competitive:
                tasks['competitive'] = self._analyze_competitive_positioning(site_url)

            # Execute all tasks concurrently
            results = await asyncio.gather(*tasks.values(), return_exceptions=True)

            # Aggregate results
            strategy_data = {}
            for task_name, result in zip(tasks.keys(), results):
                if isinstance(result, Exception):
                    logger.error(f"Strategy task {task_name} failed: {str(result)}")
                    strategy_data[task_name] = {'status': 'failed', 'error': str(result)}
                else:
                    strategy_data[task_name] = result

            execution_time = (datetime.utcnow() - start_time).total_seconds()

            return {
                'status': 'success',
                'data': strategy_data,
                'generated_at': datetime.utcnow().isoformat(),
                'execution_time_seconds': execution_time,
                'site_url': site_url,
            }

        except Exception as e:
            logger.error(f"Error generating dashboard strategy: {str(e)}")
            return {
                'status': 'error',
                'error': str(e),
                'generated_at': datetime.utcnow().isoformat(),
            }

    async def _get_ranked_opportunities(
        self,
        site_url: str,
        top_n: int = 20
    ) -> Dict[str, Any]:
        """
        Get ROI-weighted ranked opportunities.

        Scoring formula (0-100):
        ROI = 0.40 × (traffic_impact) +
              0.30 × (ease_of_implementation) +
              0.20 × (competitive_advantage) +
              0.10 × (momentum_score)

        Args:
            site_url: Website URL
            top_n: Number of top opportunities

        Returns:
            Ranked opportunities with ROI scores
        """
        try:
            # Get brainstorm opportunities (reuse existing analysis)
            brainstorm_result = await self.brainstorm_service.brainstorm_topics(
                user_id="dashboard",
                keywords="all",  # Special case: all keywords
                site_url=site_url
            )

            if not brainstorm_result or 'error' in brainstorm_result:
                return {'status': 'no_data', 'error': 'Could not fetch brainstorm data'}

            # Extract all opportunities
            all_opportunities = []

            # Quick wins (positions 4-10)
            for win in brainstorm_result.get('quick_wins', []):
                roi = self._calculate_roi_score(
                    traffic_impact=min(100, (win['impressions'] / 1000) * 10),
                    ease=80,  # Positions 4-10 are relatively easy
                    competitive=50,
                    momentum=60
                )
                opportunity = StrategyOpportunity(
                    type=StrategyType.QUICK_WIN,
                    keyword=win['keyword'],
                    description=f"Position {win['position']} → page 1 ranking",
                    roi_score=roi,
                    priority=1,
                    effort_hours=2,
                    timeline_weeks=1,
                    current_position=win['position'],
                    impressions=win['impressions'],
                    current_ctr=win['current_ctr'],
                    estimated_impact=win.get('estimated_traffic_gain', 0),
                    severity=self._get_severity(roi),
                    recommendations=[
                        "Update title and meta description",
                        "Improve content quality and depth",
                        "Add internal links from authority pages"
                    ],
                    related_keywords=self._find_related_keywords(win['keyword']),
                    timestamp=datetime.utcnow()
                )
                all_opportunities.append(opportunity)

            # Content opportunities (high volume, low CTR)
            for opp in brainstorm_result.get('content_opportunities', []):
                roi = self._calculate_roi_score(
                    traffic_impact=min(100, (opp['impressions'] / 2000) * 10),
                    ease=70,  # Meta updates are easy
                    competitive=40,
                    momentum=50
                )
                opportunity = StrategyOpportunity(
                    type=StrategyType.CONTENT_OPPORTUNITY,
                    keyword=opp['keyword'],
                    description=f"{opp['impressions']} impressions at position {opp['current_position']}",
                    roi_score=roi,
                    priority=2,
                    effort_hours=3,
                    timeline_weeks=1,
                    current_position=opp['current_position'],
                    impressions=opp['impressions'],
                    current_ctr=opp['current_ctr'],
                    estimated_impact=opp.get('estimated_traffic_gain', 0),
                    severity=self._get_severity(roi),
                    recommendations=[
                        f"Improve CTR from {opp['current_ctr']}% to 5%+",
                        "A/B test meta descriptions",
                        "Review SERP position and update title angle"
                    ],
                    related_keywords=self._find_related_keywords(opp['keyword']),
                    timestamp=datetime.utcnow()
                )
                all_opportunities.append(opportunity)

            # Keyword gaps (positions 11-20)
            for gap in brainstorm_result.get('keyword_gaps', []):
                roi = self._calculate_roi_score(
                    traffic_impact=min(100, (gap['estimated_traffic_if_page1'] / 500) * 10),
                    ease=50,  # Requires content improvements
                    competitive=70,
                    momentum=60
                )
                opportunity = StrategyOpportunity(
                    type=StrategyType.KEYWORD_GAP,
                    keyword=gap['keyword'],
                    description=f"Position {gap['position']} → large traffic opportunity",
                    roi_score=roi,
                    priority=2,
                    effort_hours=8,
                    timeline_weeks=4,
                    current_position=gap['position'],
                    impressions=gap['impressions'],
                    current_ctr=gap['current_ctr'],
                    estimated_impact=gap.get('estimated_traffic_if_page1', 0),
                    severity=self._get_severity(roi),
                    recommendations=[
                        "Create comprehensive guide on this topic",
                        "Increase content depth and topical coverage",
                        "Build topical authority in this space"
                    ],
                    related_keywords=self._find_related_keywords(gap['keyword']),
                    timestamp=datetime.utcnow()
                )
                all_opportunities.append(opportunity)

            # Sort by ROI score descending
            ranked = sorted(all_opportunities, key=lambda x: x.roi_score, reverse=True)

            # Convert to dictionaries and return top N
            return {
                'status': 'success',
                'opportunities': [
                    {
                        'type': opp.type.value,
                        'keyword': opp.keyword,
                        'roi_score': round(opp.roi_score, 1),
                        'priority': opp.priority,
                        'effort_hours': opp.effort_hours,
                        'timeline_weeks': opp.timeline_weeks,
                        'current_position': opp.current_position,
                        'impressions': opp.impressions,
                        'estimated_impact': round(opp.estimated_impact, 1),
                        'severity': opp.severity.value,
                        'recommendations': opp.recommendations,
                        'related_keywords': opp.related_keywords,
                    }
                    for opp in ranked[:top_n]
                ],
                'total_opportunities': len(ranked),
            }

        except Exception as e:
            logger.error(f"Error ranking opportunities: {str(e)}")
            return {'status': 'error', 'error': str(e)}

    async def _calculate_health_metrics(self, site_url: str) -> Dict[str, Any]:
        """
        Calculate comprehensive health metrics for dashboard.

        Metrics include:
        - Health score (0-100)
        - Keyword position distribution
        - Average CTR vs benchmark
        - Growth trends
        - Overall assessment
        """
        try:
            # Get brainstorm summary (has health score)
            brainstorm_result = await self.brainstorm_service.brainstorm_topics(
                user_id="dashboard",
                keywords="all",
                site_url=site_url
            )

            summary = brainstorm_result.get('summary', {})

            return {
                'status': 'success',
                'health_score': summary.get('health_score', 0),
                'health_trend': 'stable',  # TODO: Compare with historical
                'total_keywords': summary.get('total_keywords_analyzed', 0),
                'page_1_keywords': summary.get('keyword_distribution', {}).get('positions_1_3', 0),
                'avg_position': summary.get('avg_position', 0),
                'avg_ctr': summary.get('avg_ctr', 0),
                'ctr_vs_benchmark': summary.get('ctr_vs_benchmark', 0),
                'total_impressions': summary.get('total_impressions', 0),
                'total_clicks': summary.get('total_clicks', 0),
                'timestamp': datetime.utcnow().isoformat(),
            }

        except Exception as e:
            logger.error(f"Error calculating health metrics: {str(e)}")
            return {'status': 'error', 'error': str(e)}

    async def _generate_quick_summary(self, site_url: str) -> Dict[str, Any]:
        """Generate a quick text summary of key insights."""
        try:
            brainstorm_result = await self.brainstorm_service.brainstorm_topics(
                user_id="dashboard",
                keywords="all",
                site_url=site_url
            )

            summary = brainstorm_result.get('summary', {})
            quick_wins_count = len(brainstorm_result.get('quick_wins', []))
            opportunities_count = len(brainstorm_result.get('content_opportunities', []))
            gaps_count = len(brainstorm_result.get('keyword_gaps', []))

            # Generate summary text
            summary_text = (
                f"Found {quick_wins_count} quick wins (positions 4-10), "
                f"{opportunities_count} content optimization opportunities (high volume, low CTR), "
                f"and {gaps_count} keyword gaps on page 2+ that could boost traffic. "
                f"Overall SEO health: {summary.get('health_score', 0)}/100. "
            )

            return {
                'status': 'success',
                'summary': summary_text,
                'key_metrics': {
                    'quick_wins': quick_wins_count,
                    'opportunities': opportunities_count,
                    'gaps': gaps_count,
                    'health_score': summary.get('health_score', 0),
                }
            }

        except Exception as e:
            logger.error(f"Error generating quick summary: {str(e)}")
            return {'status': 'error', 'error': str(e)}

    async def _analyze_performance_trends(self, site_url: str) -> Dict[str, Any]:
        """Analyze performance trends over time."""
        # TODO: Implement historical trend analysis
        # This would require storing historical GSC snapshots
        return {
            'status': 'pending',
            'message': 'Trend analysis requires historical data collection',
            'note': 'To be implemented in Phase 2'
        }

    async def _analyze_competitive_positioning(self, site_url: str) -> Dict[str, Any]:
        """Analyze competitive positioning."""
        # TODO: Implement competitive analysis
        # This would require competitor keyword data
        return {
            'status': 'pending',
            'message': 'Competitive analysis requires competitor data integration',
            'note': 'To be implemented in Phase 2'
        }

    def _calculate_roi_score(
        self,
        traffic_impact: float,
        ease: float,
        competitive: float,
        momentum: float
    ) -> float:
        """
        Calculate ROI score (0-100).

        Formula:
        ROI = 0.40 × traffic_impact +
              0.30 × ease +
              0.20 × competitive +
              0.10 × momentum
        """
        roi = (
            0.40 * min(100, traffic_impact) +
            0.30 * min(100, ease) +
            0.20 * min(100, competitive) +
            0.10 * min(100, momentum)
        )
        return min(100, max(0, roi))

    def _get_severity(self, roi_score: float) -> OpportunitySeverity:
        """Get severity level based on ROI score."""
        if roi_score >= 80:
            return OpportunitySeverity.CRITICAL
        elif roi_score >= 60:
            return OpportunitySeverity.HIGH
        elif roi_score >= 40:
            return OpportunitySeverity.MEDIUM
        elif roi_score >= 20:
            return OpportunitySeverity.LOW
        else:
            return OpportunitySeverity.WATCH

    def _find_related_keywords(self, keyword: str) -> List[str]:
        """Find related keywords (placeholder)."""
        # TODO: Implement semantic similarity search
        # For now, return empty list
        return []


# Export for router usage
__all__ = [
    'GSCStrategyInsightsService',
    'StrategyOpportunity',
    'StrategyType',
    'OpportunitySeverity',
    'HealthMetrics',
    'TrendMetric',
]