ALwrity/backend/services/content_gap_analyzer/competitor_analyzer.py

"""
Competitor Analyzer Service
Converted from competitor_analyzer.py for FastAPI integration.
"""

from typing import Dict, Any, List, Optional
from sqlalchemy.orm import Session
from loguru import logger
from datetime import datetime
import asyncio
import json
from collections import Counter, defaultdict

# Import AI providers
from services.llm_providers.main_text_generation import llm_text_gen
from services.llm_providers.gemini_provider import gemini_structured_json_response

# Import existing modules (will be updated to use FastAPI services)
from services.database import get_db_session
from .ai_engine_service import AIEngineService
from .website_analyzer import WebsiteAnalyzer

class CompetitorAnalyzer:
    """Analyzes competitor content and market position."""

    def __init__(self):
        """Initialize the competitor analyzer."""
        self.website_analyzer = WebsiteAnalyzer()
        self.ai_engine = AIEngineService()

        logger.info("CompetitorAnalyzer initialized")

    async def analyze_competitors(self, competitor_urls: List[str], industry: str) -> Dict[str, Any]:
        """
        Analyze competitor websites.

        Args:
            competitor_urls: List of competitor URLs to analyze
            industry: Industry category

        Returns:
            Dictionary containing competitor analysis results
        """
        try:
            logger.info(f"Starting competitor analysis for {len(competitor_urls)} competitors in {industry} industry")

            results = {
                'competitors': [],
                'market_position': {},
                'content_gaps': [],
                'advantages': [],
                'analysis_timestamp': datetime.utcnow().isoformat(),
                'industry': industry
            }

            # Analyze each competitor
            for url in competitor_urls:
                competitor_analysis = await self._analyze_single_competitor(url, industry)
                if competitor_analysis:
                    results['competitors'].append({
                        'url': url,
                        'analysis': competitor_analysis
                    })

            # Generate market position analysis using AI
            if results['competitors']:
                market_position = await self._evaluate_market_position(results['competitors'], industry)
                results['market_position'] = market_position

                # Identify content gaps
                content_gaps = await self._identify_content_gaps(results['competitors'])
                results['content_gaps'] = content_gaps

                # Generate competitive insights
                competitive_insights = await self._generate_competitive_insights(results)
                results['advantages'] = competitive_insights

            logger.info(f"Competitor analysis completed for {len(competitor_urls)} competitors")
            return results

        except Exception as e:
            logger.error(f"Error in competitor analysis: {str(e)}")
            return {}

    async def _analyze_single_competitor(self, url: str, industry: str) -> Optional[Dict[str, Any]]:
        """
        Analyze a single competitor website.

        Args:
            url: Competitor URL
            industry: Industry category

        Returns:
            Competitor analysis results
        """
        try:
            logger.info(f"Analyzing competitor: {url}")

            # TODO: Integrate with actual website analysis service
            # This will use the website analyzer service

            # Simulate competitor analysis
            analysis = {
                'content_count': 150,
                'avg_quality_score': 8.5,
                'top_keywords': ['AI', 'ML', 'Data Science'],
                'content_types': ['blog', 'case_study', 'whitepaper'],
                'publishing_frequency': 'weekly',
                'engagement_metrics': {
                    'avg_time_on_page': 180,
                    'bounce_rate': 0.35,
                    'social_shares': 45
                },
                'seo_metrics': {
                    'domain_authority': 75,
                    'page_speed': 85,
                    'mobile_friendly': True
                }
            }

            return analysis

        except Exception as e:
            logger.error(f"Error analyzing competitor {url}: {str(e)}")
            return None

    async def _evaluate_market_position(self, competitors: List[Dict[str, Any]], industry: str) -> Dict[str, Any]:
        """
        Evaluate market position using AI.

        Args:
            competitors: List of competitor analysis results
            industry: Industry category

        Returns:
            Market position analysis
        """
        try:
            logger.info("🤖 Evaluating market position using AI")

            # Create comprehensive prompt for market position analysis
            prompt = f"""
            Analyze the market position of competitors in the {industry} industry:

            Competitor Analyses:
            {json.dumps(competitors, indent=2)}

            Provide comprehensive market position analysis including:
            1. Market leader identification
            2. Content leader analysis
            3. Quality leader assessment
            4. Market gaps identification
            5. Opportunities analysis
            6. Competitive advantages
            7. Strategic positioning recommendations

            Format as structured JSON with detailed analysis.
            """

            # Use structured JSON response for better parsing
            response = gemini_structured_json_response(
                prompt=prompt,
                schema={
                    "type": "object",
                    "properties": {
                        "market_leader": {"type": "string"},
                        "content_leader": {"type": "string"},
                        "quality_leader": {"type": "string"},
                        "market_gaps": {
                            "type": "array",
                            "items": {"type": "string"}
                        },
                        "opportunities": {
                            "type": "array",
                            "items": {"type": "string"}
                        },
                        "competitive_advantages": {
                            "type": "array",
                            "items": {"type": "string"}
                        },
                        "strategic_recommendations": {
                            "type": "array",
                            "items": {
                                "type": "object",
                                "properties": {
                                    "type": {"type": "string"},
                                    "recommendation": {"type": "string"},
                                    "priority": {"type": "string"},
                                    "estimated_impact": {"type": "string"}
                                }
                            }
                        }
                    }
                }
            )

            # Handle response - gemini_structured_json_response returns dict directly
            if isinstance(response, dict):
                market_position = response
            elif isinstance(response, str):
                # If it's a string, try to parse as JSON
                try:
                    market_position = json.loads(response)
                except json.JSONDecodeError as e:
                    logger.error(f"Failed to parse AI response as JSON: {e}")
                    raise Exception(f"Invalid AI response format: {str(e)}")
            else:
                logger.error(f"Unexpected response type from AI service: {type(response)}")
                raise Exception(f"Unexpected response type from AI service: {type(response)}")
            logger.info("✅ AI market position analysis completed")
            return market_position

        except Exception as e:
            logger.error(f"Error evaluating market position: {str(e)}")
            # Return fallback response if AI fails
            return {
                'market_leader': 'competitor1.com',
                'content_leader': 'competitor2.com',
                'quality_leader': 'competitor3.com',
                'market_gaps': [
                    'Video content',
                    'Interactive content',
                    'User-generated content',
                    'Expert interviews',
                    'Industry reports'
                ],
                'opportunities': [
                    'Niche content development',
                    'Expert interviews',
                    'Industry reports',
                    'Case studies',
                    'Tutorial series'
                ],
                'competitive_advantages': [
                    'Technical expertise',
                    'Comprehensive guides',
                    'Industry insights',
                    'Expert opinions'
                ],
                'strategic_recommendations': [
                    {
                        'type': 'differentiation',
                        'recommendation': 'Focus on unique content angles',
                        'priority': 'high',
                        'estimated_impact': 'Brand differentiation'
                    },
                    {
                        'type': 'quality',
                        'recommendation': 'Improve content quality and depth',
                        'priority': 'high',
                        'estimated_impact': 'Authority building'
                    },
                    {
                        'type': 'innovation',
                        'recommendation': 'Develop innovative content formats',
                        'priority': 'medium',
                        'estimated_impact': 'Engagement improvement'
                    }
                ]
            }

    async def _identify_content_gaps(self, competitors: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
        """
        Identify content gaps using AI.

        Args:
            competitors: List of competitor analysis results

        Returns:
            List of content gaps
        """
        try:
            logger.info("🤖 Identifying content gaps using AI")

            # Create comprehensive prompt for content gap identification
            prompt = f"""
            Identify content gaps based on the following competitor analysis:

            Competitor Analysis: {json.dumps(competitors, indent=2)}

            Provide comprehensive content gap analysis including:
            1. Missing content topics
            2. Content depth gaps
            3. Content format gaps
            4. Content quality gaps
            5. SEO opportunity gaps
            6. Implementation priorities

            Format as structured JSON with detailed gaps.
            """

            # Use structured JSON response for better parsing
            response = gemini_structured_json_response(
                prompt=prompt,
                schema={
                    "type": "object",
                    "properties": {
                        "content_gaps": {
                            "type": "array",
                            "items": {
                                "type": "object",
                                "properties": {
                                    "gap_type": {"type": "string"},
                                    "description": {"type": "string"},
                                    "opportunity_level": {"type": "string"},
                                    "estimated_impact": {"type": "string"},
                                    "content_suggestions": {
                                        "type": "array",
                                        "items": {"type": "string"}
                                    },
                                    "priority": {"type": "string"},
                                    "implementation_time": {"type": "string"}
                                }
                            }
                        }
                    }
                }
            )

            # Handle response - gemini_structured_json_response returns dict directly
            if isinstance(response, dict):
                result = response
            elif isinstance(response, str):
                # If it's a string, try to parse as JSON
                try:
                    result = json.loads(response)
                except json.JSONDecodeError as e:
                    logger.error(f"Failed to parse AI response as JSON: {e}")
                    raise Exception(f"Invalid AI response format: {str(e)}")
            else:
                logger.error(f"Unexpected response type from AI service: {type(response)}")
                raise Exception(f"Unexpected response type from AI service: {type(response)}")

            content_gaps = result.get('content_gaps', [])
            logger.info(f"✅ AI content gap identification completed: {len(content_gaps)} gaps found")
            return content_gaps

        except Exception as e:
            logger.error(f"Error identifying content gaps: {str(e)}")
            # Return fallback response if AI fails
            return [
                {
                    'gap_type': 'video_content',
                    'description': 'Limited video tutorials and demonstrations',
                    'opportunity_level': 'high',
                    'estimated_impact': 'High engagement potential',
                    'content_suggestions': ['Video tutorials', 'Product demos', 'Expert interviews'],
                    'priority': 'high',
                    'implementation_time': '3-6 months'
                },
                {
                    'gap_type': 'interactive_content',
                    'description': 'No interactive tools or calculators',
                    'opportunity_level': 'medium',
                    'estimated_impact': 'Lead generation and engagement',
                    'content_suggestions': ['Interactive calculators', 'Assessment tools', 'Quizzes'],
                    'priority': 'medium',
                    'implementation_time': '2-4 months'
                },
                {
                    'gap_type': 'expert_insights',
                    'description': 'Limited expert interviews and insights',
                    'opportunity_level': 'high',
                    'estimated_impact': 'Authority building',
                    'content_suggestions': ['Expert interviews', 'Industry insights', 'Thought leadership'],
                    'priority': 'high',
                    'implementation_time': '1-3 months'
                }
            ]

    async def _generate_competitive_insights(self, analysis_results: Dict[str, Any]) -> List[Dict[str, Any]]:
        """
        Generate competitive insights using AI.

        Args:
            analysis_results: Complete competitor analysis results

        Returns:
            List of competitive insights
        """
        try:
            logger.info("🤖 Generating competitive insights using AI")

            # Create comprehensive prompt for competitive insight generation
            prompt = f"""
            Generate competitive insights based on the following analysis results:

            Analysis Results: {json.dumps(analysis_results, indent=2)}

            Provide comprehensive competitive insights including:
            1. Competitive advantages identification
            2. Market positioning opportunities
            3. Content strategy recommendations
            4. Differentiation strategies
            5. Implementation priorities
            6. Risk assessment and mitigation

            Format as structured JSON with detailed insights.
            """

            # Use structured JSON response for better parsing
            response = gemini_structured_json_response(
                prompt=prompt,
                schema={
                    "type": "object",
                    "properties": {
                        "competitive_insights": {
                            "type": "array",
                            "items": {
                                "type": "object",
                                "properties": {
                                    "insight_type": {"type": "string"},
                                    "insight": {"type": "string"},
                                    "opportunity": {"type": "string"},
                                    "priority": {"type": "string"},
                                    "estimated_impact": {"type": "string"},
                                    "implementation_suggestion": {"type": "string"}
                                }
                            }
                        }
                    }
                }
            )

            # Handle response - gemini_structured_json_response returns dict directly
            if isinstance(response, dict):
                result = response
            elif isinstance(response, str):
                # If it's a string, try to parse as JSON
                try:
                    result = json.loads(response)
                except json.JSONDecodeError as e:
                    logger.error(f"Failed to parse AI response as JSON: {e}")
                    raise Exception(f"Invalid AI response format: {str(e)}")
            else:
                logger.error(f"Unexpected response type from AI service: {type(response)}")
                raise Exception(f"Unexpected response type from AI service: {type(response)}")

            competitive_insights = result.get('competitive_insights', [])
            logger.info(f"✅ AI competitive insights generated: {len(competitive_insights)} insights")
            return competitive_insights

        except Exception as e:
            logger.error(f"Error generating competitive insights: {str(e)}")
            # Return fallback response if AI fails
            return [
                {
                    'insight_type': 'content_gap',
                    'insight': 'Competitors lack comprehensive video content',
                    'opportunity': 'Develop video tutorial series',
                    'priority': 'high',
                    'estimated_impact': 'High engagement and differentiation',
                    'implementation_suggestion': 'Start with basic tutorials, then advanced content'
                },
                {
                    'insight_type': 'quality_advantage',
                    'insight': 'Focus on depth over breadth in content',
                    'opportunity': 'Create comprehensive, authoritative content',
                    'priority': 'high',
                    'estimated_impact': 'Authority building and trust',
                    'implementation_suggestion': 'Develop pillar content with detailed sub-topics'
                },
                {
                    'insight_type': 'format_innovation',
                    'insight': 'Interactive content is missing from market',
                    'opportunity': 'Create interactive tools and calculators',
                    'priority': 'medium',
                    'estimated_impact': 'Lead generation and engagement',
                    'implementation_suggestion': 'Start with simple calculators, then complex tools'
                }
            ]

    async def analyze_content_structure(self, competitor_urls: List[str]) -> Dict[str, Any]:
        """
        Analyze content structure across competitors.

        Args:
            competitor_urls: List of competitor URLs

        Returns:
            Content structure analysis
        """
        try:
            logger.info("Analyzing content structure across competitors")

            structure_analysis = {
                'title_patterns': {},
                'meta_description_patterns': {},
                'content_hierarchy': {},
                'internal_linking': {},
                'external_linking': {}
            }

            # TODO: Implement actual content structure analysis
            # This will analyze title patterns, meta descriptions, content hierarchy, etc.

            for url in competitor_urls:
                # Simulate structure analysis
                structure_analysis['title_patterns'][url] = {
                    'avg_length': 55,
                    'keyword_density': 0.15,
                    'brand_mention': True
                }

                structure_analysis['meta_description_patterns'][url] = {
                    'avg_length': 155,
                    'call_to_action': True,
                    'keyword_inclusion': 0.8
                }

                structure_analysis['content_hierarchy'][url] = {
                    'h1_usage': 95,
                    'h2_usage': 85,
                    'h3_usage': 70,
                    'proper_hierarchy': True
                }

            logger.info("Content structure analysis completed")
            return structure_analysis

        except Exception as e:
            logger.error(f"Error in content structure analysis: {str(e)}")
            return {}

    async def analyze_content_performance(self, competitor_urls: List[str]) -> Dict[str, Any]:
        """
        Analyze content performance metrics for competitors.

        Args:
            competitor_urls: List of competitor URLs to analyze

        Returns:
            Content performance analysis
        """
        try:
            logger.info(f"Analyzing content performance for {len(competitor_urls)} competitors")

            # TODO: Implement actual content performance analysis
            # This would analyze engagement metrics, content quality, etc.

            performance_analysis = {
                'competitors_analyzed': len(competitor_urls),
                'performance_metrics': {
                    'average_engagement_rate': 0.045,
                    'content_frequency': '2.3 posts/week',
                    'top_performing_content_types': ['How-to guides', 'Case studies', 'Industry insights'],
                    'content_quality_score': 8.2
                },
                'recommendations': [
                    'Focus on educational content',
                    'Increase video content production',
                    'Optimize for mobile engagement'
                ],
                'timestamp': datetime.utcnow().isoformat()
            }

            return performance_analysis

        except Exception as e:
            logger.error(f"Error analyzing content performance: {str(e)}")
            raise

    async def health_check(self) -> Dict[str, Any]:
        """
        Health check for the competitor analyzer service.

        Returns:
            Health status information
        """
        try:
            logger.info("Performing health check for CompetitorAnalyzer")

            health_status = {
                'service': 'CompetitorAnalyzer',
                'status': 'healthy',
                'dependencies': {
                    'ai_engine': 'operational',
                    'website_analyzer': 'operational'
                },
                'timestamp': datetime.utcnow().isoformat()
            }

            logger.info("CompetitorAnalyzer health check passed")
            return health_status

        except Exception as e:
            logger.error(f"CompetitorAnalyzer health check failed: {str(e)}")
            return {
                'service': 'CompetitorAnalyzer',
                'status': 'unhealthy',
                'error': str(e),
                'timestamp': datetime.utcnow().isoformat()
            }

    async def get_competitor_summary(self, analysis_id: str) -> Dict[str, Any]:
        """
        Get summary of competitor analysis.

        Args:
            analysis_id: Analysis identifier

        Returns:
            Competitor analysis summary
        """
        try:
            logger.info(f"Getting competitor analysis summary for {analysis_id}")

            # TODO: Retrieve analysis from database
            # This will be implemented when database integration is complete

            summary = {
                'analysis_id': analysis_id,
                'status': 'completed',
                'timestamp': datetime.utcnow().isoformat(),
                'summary': {
                    'competitors_analyzed': 5,
                    'content_gaps_identified': 8,
                    'competitive_insights': 6,
                    'market_position': 'Competitive',
                    'estimated_impact': 'High'
                }
            }

            return summary

        except Exception as e:
            logger.error(f"Error getting competitor summary: {str(e)}")
            return {}

    # Advanced Features Implementation

    async def _run_seo_analysis(self, url: str) -> Dict[str, Any]:
        """
        Run comprehensive SEO analysis on competitor website.

        Args:
            url: The URL to analyze

        Returns:
            SEO analysis results
        """
        try:
            logger.info(f"Running SEO analysis for {url}")

            # TODO: Integrate with actual website analyzer service
            # For now, simulate SEO analysis

            seo_analysis = {
                'onpage_seo': {
                    'meta_tags': {
                        'title': {'status': 'good', 'length': 55, 'keyword_density': 0.02},
                        'description': {'status': 'good', 'length': 145, 'keyword_density': 0.015},
                        'keywords': {'status': 'missing', 'recommendation': 'Add meta keywords'}
                    },
                    'content': {
                        'readability_score': 75,
                        'content_quality_score': 82,
                        'keyword_density': 0.025,
                        'heading_structure': 'good'
                    },
                    'recommendations': [
                        'Optimize meta descriptions',
                        'Improve heading structure',
                        'Add more internal links',
                        'Enhance content readability'
                    ]
                },
                'url_seo': {
                    'title': 'Competitor Page Title',
                    'meta_description': 'Competitor meta description with keywords',
                    'has_robots_txt': True,
                    'has_sitemap': True,
                    'url_structure': 'clean',
                    'canonical_url': 'properly_set'
                },
                'technical_seo': {
                    'page_speed': 85,
                    'mobile_friendly': True,
                    'ssl_certificate': True,
                    'structured_data': 'implemented',
                    'internal_linking': 'good',
                    'external_linking': 'moderate'
                }
            }

            return seo_analysis

        except Exception as e:
            logger.error(f"Error running SEO analysis: {str(e)}")
            return {}

    async def _analyze_title_patterns(self, url: str) -> Dict[str, Any]:
        """
        Analyze title patterns using AI.

        Args:
            url: The URL to analyze

        Returns:
            Title pattern analysis results
        """
        try:
            logger.info(f"Analyzing title patterns for {url}")

            # TODO: Integrate with actual title pattern analyzer
            # For now, simulate analysis

            title_analysis = {
                'patterns': {
                    'question_format': 0.3,
                    'how_to_format': 0.25,
                    'list_format': 0.2,
                    'comparison_format': 0.15,
                    'other_format': 0.1
                },
                'suggestions': [
                    'Use question-based titles for engagement',
                    'Include numbers for better CTR',
                    'Add emotional triggers',
                    'Keep titles under 60 characters',
                    'Include target keywords naturally'
                ],
                'best_practices': [
                    'Start with power words',
                    'Include target keyword',
                    'Add urgency or scarcity',
                    'Use brackets for additional info',
                    'Test different formats'
                ],
                'examples': [
                    'How to [Topic] in 2024: Complete Guide',
                    '10 Best [Topic] Strategies That Work',
                    '[Topic] vs [Alternative]: Which is Better?',
                    'The Ultimate Guide to [Topic]',
                    'Why [Topic] Matters for Your Business'
                ]
            }

            return title_analysis

        except Exception as e:
            logger.error(f"Error analyzing title patterns: {str(e)}")
            return {}

    async def _compare_competitors(self, results: Dict[str, Any]) -> Dict[str, Any]:
        """
        Compare results across all competitors.

        Args:
            results: Analysis results for all competitors

        Returns:
            Comparative analysis results
        """
        try:
            logger.info("Comparing competitors across all metrics")

            comparison = {
                'content_comparison': await self._compare_content(results),
                'seo_comparison': await self._compare_seo(results),
                'title_comparison': await self._compare_titles(results),
                'performance_metrics': await self._compare_performance(results),
                'content_gaps': await self._find_missing_topics(results),
                'opportunities': await self._identify_opportunities(results),
                'format_gaps': await self._analyze_format_gaps(results),
                'quality_gaps': await self._analyze_quality_gaps(results),
                'seo_gaps': await self._analyze_seo_gaps(results)
            }

            # Add AI-enhanced insights
            comparison['ai_insights'] = await self.ai_engine.analyze_competitor_comparison(comparison)

            return comparison

        except Exception as e:
            logger.error(f"Error comparing competitors: {str(e)}")
            return {}

    async def _compare_content(self, results: Dict[str, Any]) -> Dict[str, Any]:
        """Compare content structure across competitors."""
        try:
            content_comparison = {
                'topic_distribution': await self._analyze_topic_distribution(results),
                'content_depth': await self._analyze_content_depth(results),
                'content_formats': await self._analyze_content_formats(results),
                'content_quality': await self._analyze_content_quality(results)
            }

            return content_comparison

        except Exception as e:
            logger.error(f"Error comparing content: {str(e)}")
            return {}

    async def _analyze_topic_distribution(self, results: Dict[str, Any]) -> Dict[str, Any]:
        """Analyze topic distribution across competitors."""
        try:
            all_topics = []
            topic_frequency = Counter()

            for url, data in results.items():
                topics = data.get('content_structure', {}).get('topics', [])
                all_topics.extend([t['topic'] for t in topics])
                topic_frequency.update([t['topic'] for t in topics])

            return {
                'common_topics': [topic for topic, count in topic_frequency.most_common(10)],
                'unique_topics': list(set(all_topics)),
                'topic_frequency': dict(topic_frequency.most_common()),
                'topic_coverage': len(set(all_topics)) / len(all_topics) if all_topics else 0
            }

        except Exception as e:
            logger.error(f"Error analyzing topic distribution: {str(e)}")
            return {}

    async def _analyze_content_depth(self, results: Dict[str, Any]) -> Dict[str, Any]:
        """Analyze content depth across competitors."""
        try:
            depth_metrics = {
                'word_counts': {},
                'section_counts': {},
                'heading_distribution': defaultdict(list),
                'content_hierarchy': {}
            }

            for url, data in results.items():
                content_structure = data.get('content_structure', {})

                # Word count analysis
                depth_metrics['word_counts'][url] = content_structure.get('text_statistics', {}).get('word_count', 0)

                # Section analysis
                depth_metrics['section_counts'][url] = len(content_structure.get('sections', []))

                # Heading distribution
                for level, count in content_structure.get('hierarchy', {}).get('heading_distribution', {}).items():
                    depth_metrics['heading_distribution'][level].append(count)

                # Content hierarchy
                depth_metrics['content_hierarchy'][url] = content_structure.get('hierarchy', {})

            return depth_metrics

        except Exception as e:
            logger.error(f"Error analyzing content depth: {str(e)}")
            return {}

    async def _analyze_content_formats(self, results: Dict[str, Any]) -> Dict[str, Any]:
        """Analyze content formats across competitors."""
        try:
            format_analysis = {
                'format_types': defaultdict(int),
                'format_distribution': defaultdict(list),
                'format_effectiveness': {}
            }

            for url, data in results.items():
                sections = data.get('content_structure', {}).get('sections', [])

                for section in sections:
                    format_type = section.get('type', 'unknown')
                    format_analysis['format_types'][format_type] += 1
                    format_analysis['format_distribution'][format_type].append({
                        'url': url,
                        'heading': section.get('heading', ''),
                        'word_count': section.get('word_count', 0)
                    })

            return format_analysis

        except Exception as e:
            logger.error(f"Error analyzing content formats: {str(e)}")
            return {}

    async def _analyze_content_quality(self, results: Dict[str, Any]) -> Dict[str, Any]:
        """Analyze content quality across competitors."""
        try:
            quality_metrics = {
                'readability_scores': {},
                'content_structure_scores': {},
                'engagement_metrics': {},
                'overall_quality': {}
            }

            for url, data in results.items():
                content_structure = data.get('content_structure', {})

                # Readability analysis
                readability = content_structure.get('readability', {})
                quality_metrics['readability_scores'][url] = {
                    'flesch_score': readability.get('flesch_score', 0),
                    'avg_sentence_length': readability.get('avg_sentence_length', 0),
                    'avg_word_length': readability.get('avg_word_length', 0)
                }

                # Structure analysis
                hierarchy = content_structure.get('hierarchy', {})
                quality_metrics['content_structure_scores'][url] = {
                    'has_proper_hierarchy': hierarchy.get('has_proper_hierarchy', False),
                    'heading_distribution': hierarchy.get('heading_distribution', {}),
                    'max_depth': hierarchy.get('max_depth', 0)
                }

            return quality_metrics

        except Exception as e:
            logger.error(f"Error analyzing content quality: {str(e)}")
            return {}

    async def _compare_seo(self, results: Dict[str, Any]) -> Dict[str, Any]:
        """Compare SEO metrics across competitors."""
        try:
            seo_comparison = {
                'onpage_metrics': defaultdict(list),
                'technical_metrics': defaultdict(list),
                'content_metrics': defaultdict(list),
                'overall_seo_score': {}
            }

            for url, data in results.items():
                seo_info = data.get('website_analysis', {}).get('analysis', {}).get('seo_info', {})

                # On-page SEO metrics
                meta_tags = seo_info.get('meta_tags', {})
                seo_comparison['onpage_metrics']['title_score'].append(
                    100 if meta_tags.get('title', {}).get('status') == 'good' else 50
                )
                seo_comparison['onpage_metrics']['description_score'].append(
                    100 if meta_tags.get('description', {}).get('status') == 'good' else 50
                )
                seo_comparison['onpage_metrics']['keywords_score'].append(
                    100 if meta_tags.get('keywords', {}).get('status') == 'good' else 50
                )

                # Technical SEO metrics
                technical = data.get('website_analysis', {}).get('analysis', {}).get('basic_info', {})
                seo_comparison['technical_metrics']['has_robots_txt'].append(
                    100 if technical.get('robots_txt') else 0
                )
                seo_comparison['technical_metrics']['has_sitemap'].append(
                    100 if technical.get('sitemap') else 0
                )

                # Content SEO metrics
                content = seo_info.get('content', {})
                seo_comparison['content_metrics']['readability_score'].append(
                    content.get('readability_score', 0)
                )
                seo_comparison['content_metrics']['content_quality_score'].append(
                    content.get('content_quality_score', 0)
                )

                # Overall SEO score
                seo_comparison['overall_seo_score'][url] = seo_info.get('overall_score', 0)

            return seo_comparison

        except Exception as e:
            logger.error(f"Error comparing SEO: {str(e)}")
            return {}

    async def _compare_titles(self, results: Dict[str, Any]) -> Dict[str, Any]:
        """Compare title patterns across competitors."""
        try:
            title_comparison = {
                'pattern_distribution': defaultdict(int),
                'length_distribution': defaultdict(list),
                'keyword_usage': defaultdict(int),
                'format_preferences': defaultdict(int)
            }

            for url, data in results.items():
                title_patterns = data.get('title_patterns', {})

                # Pattern analysis
                for pattern in title_patterns.get('patterns', {}):
                    title_comparison['pattern_distribution'][pattern] += 1

                # Length analysis
                for suggestion in title_patterns.get('suggestions', []):
                    title_comparison['length_distribution'][len(suggestion)].append(suggestion)

                # Keyword analysis
                for suggestion in title_patterns.get('suggestions', []):
                    words = suggestion.lower().split()
                    for word in words:
                        if len(word) > 3:  # Filter out short words
                            title_comparison['keyword_usage'][word] += 1

            return title_comparison

        except Exception as e:
            logger.error(f"Error comparing titles: {str(e)}")
            return {}

    async def _compare_performance(self, results: Dict[str, Any]) -> Dict[str, Any]:
        """Compare performance metrics across competitors."""
        try:
            performance_metrics = {
                'content_effectiveness': {},
                'engagement_metrics': {},
                'technical_performance': {},
                'overall_performance': {}
            }

            for url, data in results.items():
                # Content effectiveness
                content_structure = data.get('content_structure', {})
                performance_metrics['content_effectiveness'][url] = {
                    'content_depth': content_structure.get('text_statistics', {}).get('word_count', 0),
                    'content_quality': content_structure.get('readability', {}).get('flesch_score', 0),
                    'content_structure': content_structure.get('hierarchy', {}).get('has_proper_hierarchy', False)
                }

                # Technical performance
                seo_analysis = data.get('seo_analysis', {})
                performance_metrics['technical_performance'][url] = {
                    'onpage_score': sum(1 for v in seo_analysis.get('onpage_seo', {}).values() if v),
                    'technical_score': sum(1 for v in seo_analysis.get('url_seo', {}).values() if v)
                }

            return performance_metrics

        except Exception as e:
            logger.error(f"Error comparing performance: {str(e)}")
            return {}

    async def _find_missing_topics(self, results: Dict[str, Any]) -> List[Dict[str, Any]]:
        """Find topics that are missing or underrepresented."""
        try:
            all_topics = set()
            topic_coverage = defaultdict(int)

            # Collect all topics and their coverage
            for url, data in results.items():
                topics = data.get('content_structure', {}).get('topics', [])
                for topic in topics:
                    all_topics.add(topic['topic'])
                    topic_coverage[topic['topic']] += 1

            # Identify missing or underrepresented topics
            missing_topics = []
            total_competitors = len(results)

            for topic in all_topics:
                coverage = topic_coverage[topic] / total_competitors
                if coverage < 0.5:  # Topic covered by less than 50% of competitors
                    missing_topics.append({
                        'topic': topic,
                        'coverage': coverage,
                        'opportunity_score': 1 - coverage
                    })

            return sorted(missing_topics, key=lambda x: x['opportunity_score'], reverse=True)

        except Exception as e:
            logger.error(f"Error finding missing topics: {str(e)}")
            return []

    async def _identify_opportunities(self, results: Dict[str, Any]) -> List[Dict[str, Any]]:
        """Identify content opportunities based on analysis."""
        try:
            opportunities = []

            # Analyze content depth opportunities
            depth_metrics = await self._analyze_content_depth(results)
            avg_word_count = sum(depth_metrics['word_counts'].values()) / len(depth_metrics['word_counts'])

            for url, word_count in depth_metrics['word_counts'].items():
                if word_count < avg_word_count * 0.7:  # Content depth significantly below average
                    opportunities.append({
                        'type': 'content_depth',
                        'url': url,
                        'current_value': word_count,
                        'target_value': avg_word_count,
                        'opportunity_score': (avg_word_count - word_count) / avg_word_count
                    })

            # Analyze format opportunities
            format_analysis = await self._analyze_content_formats(results)
            for format_type, distribution in format_analysis['format_distribution'].items():
                if len(distribution) < len(results) * 0.3:  # Format used by less than 30% of competitors
                    opportunities.append({
                        'type': 'content_format',
                        'format': format_type,
                        'current_coverage': len(distribution) / len(results),
                        'opportunity_score': 1 - (len(distribution) / len(results))
                    })

            return sorted(opportunities, key=lambda x: x['opportunity_score'], reverse=True)

        except Exception as e:
            logger.error(f"Error identifying opportunities: {str(e)}")
            return []

    async def _analyze_format_gaps(self, results: Dict[str, Any]) -> List[Dict[str, Any]]:
        """Analyze gaps in content formats."""
        try:
            format_gaps = []
            format_analysis = await self._analyze_content_formats(results)

            # Identify underutilized formats
            for format_type, count in format_analysis['format_types'].items():
                if count < len(results) * 0.3:  # Format used by less than 30% of competitors
                    format_gaps.append({
                        'format': format_type,
                        'current_usage': count,
                        'potential_impact': 'high' if count < len(results) * 0.2 else 'medium',
                        'suggested_implementation': await self._generate_format_suggestions(format_type)
                    })

            return format_gaps

        except Exception as e:
            logger.error(f"Error analyzing format gaps: {str(e)}")
            return []

    async def _analyze_quality_gaps(self, results: Dict[str, Any]) -> List[Dict[str, Any]]:
        """Analyze gaps in content quality."""
        try:
            quality_gaps = []
            quality_metrics = await self._analyze_content_quality(results)

            # Analyze readability gaps
            readability_scores = quality_metrics['readability_scores']
            avg_flesch = sum(score['flesch_score'] for score in readability_scores.values()) / len(readability_scores)

            for url, scores in readability_scores.items():
                if scores['flesch_score'] < avg_flesch * 0.8:  # Readability significantly below average
                    quality_gaps.append({
                        'type': 'readability',
                        'url': url,
                        'current_score': scores['flesch_score'],
                        'target_score': avg_flesch,
                        'improvement_needed': avg_flesch - scores['flesch_score']
                    })

            return quality_gaps

        except Exception as e:
            logger.error(f"Error analyzing quality gaps: {str(e)}")
            return []

    async def _analyze_seo_gaps(self, results: Dict[str, Any]) -> List[Dict[str, Any]]:
        """Analyze gaps in SEO implementation."""
        try:
            seo_gaps = []
            seo_comparison = await self._compare_seo(results)

            # Analyze on-page SEO gaps
            for metric, values in seo_comparison['onpage_metrics'].items():
                avg_value = sum(values) / len(values)
                for url, value in zip(results.keys(), values):
                    if value < avg_value * 0.7:  # Significantly below average
                        seo_gaps.append({
                            'type': 'onpage_seo',
                            'metric': metric,
                            'url': url,
                            'current_value': value,
                            'target_value': avg_value,
                            'improvement_needed': avg_value - value
                        })

            # Analyze technical SEO gaps
            for metric, values in seo_comparison['technical_metrics'].items():
                avg_value = sum(values) / len(values)
                for url, value in zip(results.keys(), values):
                    if value < avg_value * 0.7:  # Significantly below average
                        seo_gaps.append({
                            'type': 'technical_seo',
                            'metric': metric,
                            'url': url,
                            'current_value': value,
                            'target_value': avg_value,
                            'improvement_needed': avg_value - value
                        })

            # Analyze content SEO gaps
            for metric, values in seo_comparison['content_metrics'].items():
                avg_value = sum(values) / len(values)
                for url, value in zip(results.keys(), values):
                    if value < avg_value * 0.7:  # Significantly below average
                        seo_gaps.append({
                            'type': 'content_seo',
                            'metric': metric,
                            'url': url,
                            'current_value': value,
                            'target_value': avg_value,
                            'improvement_needed': avg_value - value
                        })

            return seo_gaps

        except Exception as e:
            logger.error(f"Error analyzing SEO gaps: {str(e)}")
            return []

    async def _generate_format_suggestions(self, format_type: str) -> List[str]:
        """Generate suggestions for implementing specific content formats."""
        try:
            format_suggestions = {
                'article': [
                    'Create in-depth articles with comprehensive coverage',
                    'Include expert quotes and statistics',
                    'Add visual elements and infographics'
                ],
                'blog_post': [
                    'Write engaging blog posts with personal insights',
                    'Include call-to-actions',
                    'Add social sharing buttons'
                ],
                'how-to': [
                    'Create step-by-step guides',
                    'Include screenshots or videos',
                    'Add troubleshooting sections'
                ],
                'case_study': [
                    'Present real-world examples',
                    'Include metrics and results',
                    'Add client testimonials'
                ],
                'video': [
                    'Create engaging video content',
                    'Include transcripts and captions',
                    'Optimize for different platforms'
                ],
                'infographic': [
                    'Design visually appealing graphics',
                    'Include key statistics and data',
                    'Make it shareable on social media'
                ]
            }

            return format_suggestions.get(format_type, [
                'Research successful examples',
                'Analyze competitor implementation',
                'Create unique value proposition'
            ])

        except Exception as e:
            logger.error(f"Error generating format suggestions: {str(e)}")
            return []