ALwrity/lib/ai_seo_tools/content_gap_analysis/main.py

"""
Main module for content gap analysis.
"""

from typing import Dict, Any, List, Optional
import streamlit as st
from loguru import logger
from lib.utils.website_analyzer.analyzer import WebsiteAnalyzer
from .competitor_analyzer import CompetitorAnalyzer
from .keyword_researcher import KeywordResearcher
from .recommendation_engine import RecommendationEngine
from .utils.ai_processor import AIProcessor, ProgressTracker
from .utils.storage import ContentGapAnalysisStorage
from datetime import datetime
import asyncio
import sys
import os
from lib.gpt_providers.text_generation.main_text_generation import llm_text_gen
from .utils.content_parser import ContentParser

# Configure logger
logger.remove()  # Remove default handler
logger.add(
    "logs/content_gap_analysis.log",
    rotation="50 MB",
    retention="10 days",
    level="DEBUG",
    format="{time:YYYY-MM-DD HH:mm:ss} | {level} | {message}"
)
logger.add(
    sys.stdout,
    level="INFO",
    format="<green>{time:YYYY-MM-DD HH:mm:ss}</green> | <level>{level: <8}</level> | <cyan>{message}</cyan>"
)

# Ensure logs directory exists
os.makedirs("logs", exist_ok=True)

class ContentGapAnalysis:
    """Main class for content gap analysis."""

    def __init__(self, db_session=None):
        """Initialize the content gap analysis components."""
        self.website_analyzer = WebsiteAnalyzer()
        self.competitor_analyzer = CompetitorAnalyzer()
        self.keyword_researcher = KeywordResearcher()
        self.recommendation_engine = RecommendationEngine()
        self.ai_processor = AIProcessor()
        self.progress = ProgressTracker()
        self.storage = ContentGapAnalysisStorage(db_session) if db_session else None

        # Define analysis phases
        self.phases = {
            'website_analysis': {
                'name': 'Website Analysis',
                'steps': [
                    'Initializing website analysis',
                    'Analyzing website content',
                    'Evaluating SEO elements',
                    'Generating website insights'
                ]
            },
            'competitor_analysis': {
                'name': 'Competitor Analysis',
                'steps': [
                    'Initializing competitor analysis',
                    'Analyzing competitor content',
                    'Comparing market position',
                    'Generating competitive insights'
                ]
            },
            'keyword_analysis': {
                'name': 'Keyword Analysis',
                'steps': [
                    'Initializing keyword research',
                    'Analyzing keyword trends',
                    'Evaluating search intent',
                    'Generating keyword insights'
                ]
            },
            'recommendation_generation': {
                'name': 'Recommendation Generation',
                'steps': [
                    'Initializing recommendation engine',
                    'Analyzing content gaps',
                    'Generating recommendations',
                    'Creating implementation plan'
                ]
            }
        }

        logger.info("ContentGapAnalysis initialized")

    def analyze(self, url: str, industry: str, competitor_urls: Optional[List[str]] = None, user_id: Optional[int] = None) -> Dict[str, Any]:
        """
        Run the complete content gap analysis workflow.

        Args:
            url: Target website URL
            industry: Industry category
            competitor_urls: Optional list of competitor URLs
            user_id: Optional user ID for storing results

        Returns:
            Dictionary containing analysis results
        """
        try:
            results = {}
            start_time = datetime.utcnow()

            # Phase 1: Website Analysis
            self.progress.start_stage('website_analysis')
            self.progress.next_step()

            website_analysis = self.website_analyzer.analyze(url)
            results['website'] = website_analysis

            self.progress.next_step()
            self.progress.complete_stage()

            # Phase 2: Competitor Analysis
            if competitor_urls:
                self.progress.start_stage('competitor_analysis')
                self.progress.next_step()

                competitor_analysis = self.competitor_analyzer.analyze(competitor_urls, industry)
                results['competitors'] = competitor_analysis

                self.progress.next_step()
                self.progress.complete_stage()

            # Phase 3: Keyword Analysis
            self.progress.start_stage('keyword_analysis')
            self.progress.next_step()

            keyword_analysis = self.keyword_researcher.analyze(industry, url)
            results['keywords'] = keyword_analysis

            self.progress.next_step()
            self.progress.complete_stage()

            # Phase 4: Recommendation Generation
            self.progress.start_stage('recommendation_generation')
            self.progress.next_step()

            recommendations = self.recommendation_engine.generate_recommendations(
                website_analysis,
                competitor_analysis if competitor_urls else None,
                keyword_analysis
            )
            results['recommendations'] = recommendations

            self.progress.next_step()
            self.progress.complete_stage()

            # Calculate analysis duration
            end_time = datetime.utcnow()
            results['duration'] = (end_time - start_time).total_seconds()

            # Store results if user_id is provided and storage is available
            if user_id and self.storage:
                analysis_id = self.storage.save_analysis(user_id, url, industry, results)
                if analysis_id:
                    results['analysis_id'] = analysis_id

            return results

        except Exception as e:
            if self.progress.current_stage:
                self.progress.update_progress(0, f"Error in {self.progress.stages[self.progress.current_stage]['name']}: {str(e)}")
            st.error(f"Error in content gap analysis: {str(e)}")
            return {
                'error': str(e),
                'website': {},
                'competitors': [],
                'keywords': {},
                'recommendations': []
            }

    def get_analysis(self, analysis_id: int) -> Optional[Dict[str, Any]]:
        """
        Retrieve stored analysis results.

        Args:
            analysis_id: Analysis ID

        Returns:
            Dictionary containing analysis results if found, None otherwise
        """
        if not self.storage:
            st.error("Storage not initialized")
            return None
        return self.storage.get_analysis(analysis_id)

    def get_user_analyses(self, user_id: int) -> List[Dict[str, Any]]:
        """
        Get all analyses for a user.

        Args:
            user_id: User ID

        Returns:
            List of analysis summaries
        """
        if not self.storage:
            st.error("Storage not initialized")
            return []
        return self.storage.get_user_analyses(user_id)

    def update_recommendation_status(self, recommendation_id: int, status: str) -> bool:
        """
        Update the status of a recommendation.

        Args:
            recommendation_id: Recommendation ID
            status: New status

        Returns:
            True if successful, False otherwise
        """
        if not self.storage:
            st.error("Storage not initialized")
            return False
        return self.storage.update_recommendation_status(recommendation_id, status)

    def delete_analysis(self, analysis_id: int) -> bool:
        """
        Delete an analysis and all related data.

        Args:
            analysis_id: Analysis ID

        Returns:
            True if successful, False otherwise
        """
        if not self.storage:
            st.error("Storage not initialized")
            return False
        return self.storage.delete_analysis(analysis_id)

    def get_analysis_summary(self, results: Dict[str, Any]) -> Dict[str, Any]:
        """
        Generate a summary of the analysis results.

        Args:
            results: Dictionary containing analysis results

        Returns:
            Dictionary containing summary metrics and insights
        """
        try:
            self.progress.start_stage('summary_generation')
            self.progress.next_step()

            summary = {
                'website_metrics': self._summarize_website_metrics(results.get('website', {})),
                'competitor_insights': self._summarize_competitor_insights(results.get('competitors', {})),
                'keyword_opportunities': self._summarize_keyword_opportunities(results.get('keywords', {})),
                'recommendation_highlights': self._summarize_recommendations(results.get('recommendations', {})),
                'ai_insights': results.get('ai_insights', {})
            }

            self.progress.complete_stage()
            return summary

        except Exception as e:
            if self.progress.current_stage:
                self.progress.update_progress(0, f"Error generating summary: {str(e)}")
            st.error(f"Error generating analysis summary: {str(e)}")
            return {
                'error': str(e),
                'website_metrics': {},
                'competitor_insights': {},
                'keyword_opportunities': {},
                'recommendation_highlights': {},
                'ai_insights': {}
            }

    def export_results(self, results: Dict[str, Any], format: str = 'json') -> str:
        """
        Export analysis results in the specified format.

        Args:
            results: Dictionary containing analysis results
            format: Export format ('json' or 'csv')

        Returns:
            String containing exported results
        """
        try:
            self.progress.start_stage('export')
            self.progress.next_step()

            if format.lower() == 'json':
                import json
                exported = json.dumps(results, indent=2)
            elif format.lower() == 'csv':
                import pandas as pd
                # Convert results to DataFrame and then to CSV
                df = pd.DataFrame(results)
                exported = df.to_csv(index=False)
            else:
                raise ValueError(f"Unsupported export format: {format}")

            self.progress.complete_stage()
            return exported

        except Exception as e:
            if self.progress.current_stage:
                self.progress.update_progress(0, f"Error exporting results: {str(e)}")
            st.error(f"Error exporting results: {str(e)}")
            return str(e)

    def _summarize_website_metrics(self, website_data: Dict[str, Any]) -> Dict[str, Any]:
        """Generate summary of website metrics."""
        try:
            return {
                'content_score': website_data.get('content_score', 0),
                'seo_score': website_data.get('seo_score', 0),
                'structure_score': website_data.get('structure_score', 0),
                'key_insights': website_data.get('insights', [])[:5]  # Top 5 insights
            }
        except Exception as e:
            st.error(f"Error summarizing website metrics: {str(e)}")
            return {}

    def _summarize_competitor_insights(self, competitor_data: Dict[str, Any]) -> Dict[str, Any]:
        """Generate summary of competitor insights."""
        try:
            return {
                'market_position': competitor_data.get('market_position', {}),
                'content_gaps': competitor_data.get('content_gaps', [])[:5],  # Top 5 gaps
                'competitive_advantages': competitor_data.get('advantages', [])[:5]  # Top 5 advantages
            }
        except Exception as e:
            st.error(f"Error summarizing competitor insights: {str(e)}")
            return {}

    def _summarize_keyword_opportunities(self, keyword_data: Dict[str, Any]) -> Dict[str, Any]:
        """Generate summary of keyword opportunities."""
        try:
            return {
                'top_keywords': keyword_data.get('top_keywords', [])[:10],  # Top 10 keywords
                'search_intent': keyword_data.get('search_intent', {}),
                'opportunities': keyword_data.get('opportunities', [])[:5]  # Top 5 opportunities
            }
        except Exception as e:
            st.error(f"Error summarizing keyword opportunities: {str(e)}")
            return {}

    def _summarize_recommendations(self, recommendation_data: Dict[str, Any]) -> Dict[str, Any]:
        """Generate summary of recommendations."""
        try:
            return {
                'priority_recommendations': recommendation_data.get('priority_recommendations', [])[:5],  # Top 5 recommendations
                'implementation_timeline': recommendation_data.get('timeline', {}),
                'expected_impact': recommendation_data.get('impact', {})
            }
        except Exception as e:
            st.error(f"Error summarizing recommendations: {str(e)}")
            return {}