ALwrity/ToBeMigrated/database/twitter_init.py

"""
Twitter Database Initialization and Migration Script
===================================================

This module provides utilities for initializing the Twitter database,
handling schema migrations, and managing database setup.

Features:
- Database initialization and table creation
- Schema migration utilities
- Data seeding for development/testing
- Database health checks and maintenance
"""

import os
import logging
from typing import Dict, Any, List, Optional
from datetime import datetime
import json
from pathlib import Path

from sqlalchemy import create_engine, text, inspect
from sqlalchemy.orm import sessionmaker
from sqlalchemy.exc import SQLAlchemyError

from .twitter_models import (
    Base, TwitterUser, Tweet, ScheduledTweet, TwitterAnalytics,
    TweetAnalytics, EngagementData, AudienceInsight, HashtagPerformance,
    ContentTemplate, TwitterSettings, TwitterAccountType, TweetType,
    TweetStatus, EngagementType, AnalyticsTimeframe, ContentCategory
)

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

class TwitterDatabaseInitializer:
    """
    Handles Twitter database initialization and management.
    """

    def __init__(self, db_url: str = "sqlite:///twitter_data.db"):
        """Initialize the database initializer."""
        self.db_url = db_url
        self.engine = create_engine(db_url, echo=False)
        self.SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=self.engine)

        # Create database directory if using SQLite
        if db_url.startswith('sqlite:///'):
            db_path = db_url.replace('sqlite:///', '')
            os.makedirs(os.path.dirname(os.path.abspath(db_path)), exist_ok=True)

    def initialize_database(self, force_recreate: bool = False) -> bool:
        """
        Initialize the Twitter database with all required tables.

        Args:
            force_recreate: If True, drop existing tables and recreate

        Returns:
            bool: True if successful, False otherwise
        """
        try:
            if force_recreate:
                logger.info("Dropping existing tables...")
                Base.metadata.drop_all(bind=self.engine)

            logger.info("Creating Twitter database tables...")
            Base.metadata.create_all(bind=self.engine)

            # Verify tables were created
            inspector = inspect(self.engine)
            tables = inspector.get_table_names()

            expected_tables = [
                'twitter_users', 'tweets', 'scheduled_tweets', 'twitter_analytics',
                'tweet_analytics', 'engagement_data', 'audience_insights',
                'hashtag_performance', 'content_templates', 'twitter_settings'
            ]

            missing_tables = [table for table in expected_tables if table not in tables]

            if missing_tables:
                logger.error(f"Missing tables: {missing_tables}")
                return False

            logger.info(f"Successfully created {len(tables)} tables")

            # Create indexes for better performance
            self._create_indexes()

            # Seed initial data if needed
            self._seed_initial_data()

            logger.info("Twitter database initialization completed successfully")
            return True

        except Exception as e:
            logger.error(f"Error initializing database: {e}")
            return False

    def _create_indexes(self):
        """Create database indexes for better query performance."""
        try:
            with self.engine.connect() as conn:
                # User indexes
                conn.execute(text("CREATE INDEX IF NOT EXISTS idx_twitter_users_user_id ON twitter_users(user_id)"))
                conn.execute(text("CREATE INDEX IF NOT EXISTS idx_twitter_users_twitter_user_id ON twitter_users(twitter_user_id)"))
                conn.execute(text("CREATE INDEX IF NOT EXISTS idx_twitter_users_username ON twitter_users(username)"))

                # Tweet indexes
                conn.execute(text("CREATE INDEX IF NOT EXISTS idx_tweets_user_id ON tweets(user_id)"))
                conn.execute(text("CREATE INDEX IF NOT EXISTS idx_tweets_status ON tweets(status)"))
                conn.execute(text("CREATE INDEX IF NOT EXISTS idx_tweets_posted_at ON tweets(posted_at)"))
                conn.execute(text("CREATE INDEX IF NOT EXISTS idx_tweets_tweet_id ON tweets(tweet_id)"))

                # Scheduled tweet indexes
                conn.execute(text("CREATE INDEX IF NOT EXISTS idx_scheduled_tweets_user_id ON scheduled_tweets(user_id)"))
                conn.execute(text("CREATE INDEX IF NOT EXISTS idx_scheduled_tweets_status ON scheduled_tweets(status)"))
                conn.execute(text("CREATE INDEX IF NOT EXISTS idx_scheduled_tweets_scheduled_time ON scheduled_tweets(scheduled_time)"))

                # Analytics indexes
                conn.execute(text("CREATE INDEX IF NOT EXISTS idx_twitter_analytics_user_id ON twitter_analytics(user_id)"))
                conn.execute(text("CREATE INDEX IF NOT EXISTS idx_twitter_analytics_date ON twitter_analytics(date)"))
                conn.execute(text("CREATE INDEX IF NOT EXISTS idx_twitter_analytics_timeframe ON twitter_analytics(timeframe)"))

                # Tweet analytics indexes
                conn.execute(text("CREATE INDEX IF NOT EXISTS idx_tweet_analytics_tweet_id ON tweet_analytics(tweet_id)"))
                conn.execute(text("CREATE INDEX IF NOT EXISTS idx_tweet_analytics_recorded_at ON tweet_analytics(recorded_at)"))

                # Engagement data indexes
                conn.execute(text("CREATE INDEX IF NOT EXISTS idx_engagement_data_tweet_id ON engagement_data(tweet_id)"))
                conn.execute(text("CREATE INDEX IF NOT EXISTS idx_engagement_data_occurred_at ON engagement_data(occurred_at)"))
                conn.execute(text("CREATE INDEX IF NOT EXISTS idx_engagement_data_type ON engagement_data(engagement_type)"))

                # Hashtag performance indexes
                conn.execute(text("CREATE INDEX IF NOT EXISTS idx_hashtag_performance_user_id ON hashtag_performance(user_id)"))
                conn.execute(text("CREATE INDEX IF NOT EXISTS idx_hashtag_performance_hashtag ON hashtag_performance(hashtag)"))

                # Content template indexes
                conn.execute(text("CREATE INDEX IF NOT EXISTS idx_content_templates_user_id ON content_templates(user_id)"))
                conn.execute(text("CREATE INDEX IF NOT EXISTS idx_content_templates_category ON content_templates(category)"))
                conn.execute(text("CREATE INDEX IF NOT EXISTS idx_content_templates_is_active ON content_templates(is_active)"))

                conn.commit()
                logger.info("Database indexes created successfully")

        except Exception as e:
            logger.error(f"Error creating indexes: {e}")

    def _seed_initial_data(self):
        """Seed the database with initial data for development/testing."""
        try:
            session = self.SessionLocal()

            # Check if we already have data
            if session.query(TwitterUser).count() > 0:
                logger.info("Database already contains data, skipping seeding")
                session.close()
                return

            # Create sample content templates
            sample_templates = [
                {
                    'name': 'Daily Motivation',
                    'description': 'Motivational quotes and thoughts',
                    'template_text': 'Start your day with this thought: {quote} #motivation #success',
                    'category': ContentCategory.PERSONAL,
                    'variables': ['quote'],
                    'default_hashtags': ['#motivation', '#success', '#mindset'],
                    'ai_prompt': 'Generate an inspiring motivational quote',
                    'ai_tone': 'inspirational',
                    'ai_target_audience': 'professionals and entrepreneurs'
                },
                {
                    'name': 'Tech News Share',
                    'description': 'Template for sharing tech news',
                    'template_text': 'Interesting development in {topic}: {summary} {link} #tech #innovation',
                    'category': ContentCategory.EDUCATIONAL,
                    'variables': ['topic', 'summary', 'link'],
                    'default_hashtags': ['#tech', '#innovation', '#technology'],
                    'ai_prompt': 'Summarize this tech news in an engaging way',
                    'ai_tone': 'informative',
                    'ai_target_audience': 'tech enthusiasts and professionals'
                },
                {
                    'name': 'Question Engagement',
                    'description': 'Template for asking engaging questions',
                    'template_text': 'Quick question for my followers: {question} What do you think? #community #discussion',
                    'category': ContentCategory.QUESTION,
                    'variables': ['question'],
                    'default_hashtags': ['#community', '#discussion', '#question'],
                    'ai_prompt': 'Generate an engaging question for social media',
                    'ai_tone': 'conversational',
                    'ai_target_audience': 'general audience'
                },
                {
                    'name': 'Product Update',
                    'description': 'Template for product announcements',
                    'template_text': 'Excited to share: {update} {details} #product #update #announcement',
                    'category': ContentCategory.PROMOTIONAL,
                    'variables': ['update', 'details'],
                    'default_hashtags': ['#product', '#update', '#announcement'],
                    'ai_prompt': 'Write an exciting product update announcement',
                    'ai_tone': 'enthusiastic',
                    'ai_target_audience': 'customers and prospects'
                }
            ]

            # Note: We can't create templates without a user, so we'll skip this for now
            # In a real scenario, templates would be created when users are added

            session.close()
            logger.info("Initial data seeding completed")

        except Exception as e:
            logger.error(f"Error seeding initial data: {e}")

    def check_database_health(self) -> Dict[str, Any]:
        """
        Check the health and status of the Twitter database.

        Returns:
            Dict containing health check results
        """
        health_status = {
            'status': 'healthy',
            'timestamp': datetime.utcnow().isoformat(),
            'tables': {},
            'indexes': {},
            'issues': []
        }

        try:
            inspector = inspect(self.engine)

            # Check table existence and row counts
            expected_tables = [
                'twitter_users', 'tweets', 'scheduled_tweets', 'twitter_analytics',
                'tweet_analytics', 'engagement_data', 'audience_insights',
                'hashtag_performance', 'content_templates', 'twitter_settings'
            ]

            session = self.SessionLocal()

            for table_name in expected_tables:
                if table_name in inspector.get_table_names():
                    # Get row count
                    try:
                        result = session.execute(text(f"SELECT COUNT(*) FROM {table_name}"))
                        count = result.scalar()
                        health_status['tables'][table_name] = {
                            'exists': True,
                            'row_count': count
                        }
                    except Exception as e:
                        health_status['tables'][table_name] = {
                            'exists': True,
                            'row_count': 'error',
                            'error': str(e)
                        }
                        health_status['issues'].append(f"Error counting rows in {table_name}: {e}")
                else:
                    health_status['tables'][table_name] = {'exists': False}
                    health_status['issues'].append(f"Missing table: {table_name}")

            # Check indexes
            for table_name in inspector.get_table_names():
                indexes = inspector.get_indexes(table_name)
                health_status['indexes'][table_name] = len(indexes)

            session.close()

            # Set overall status
            if health_status['issues']:
                health_status['status'] = 'issues_found'

            return health_status

        except Exception as e:
            health_status['status'] = 'error'
            health_status['error'] = str(e)
            logger.error(f"Error checking database health: {e}")
            return health_status

    def backup_database(self, backup_path: str) -> bool:
        """
        Create a backup of the database.

        Args:
            backup_path: Path where to save the backup

        Returns:
            bool: True if successful, False otherwise
        """
        try:
            if not self.db_url.startswith('sqlite:///'):
                logger.error("Backup currently only supported for SQLite databases")
                return False

            # Get the database file path
            db_file = self.db_url.replace('sqlite:///', '')

            if not os.path.exists(db_file):
                logger.error(f"Database file not found: {db_file}")
                return False

            # Create backup directory if it doesn't exist
            os.makedirs(os.path.dirname(backup_path), exist_ok=True)

            # Copy the database file
            import shutil
            shutil.copy2(db_file, backup_path)

            logger.info(f"Database backed up to: {backup_path}")
            return True

        except Exception as e:
            logger.error(f"Error backing up database: {e}")
            return False

    def restore_database(self, backup_path: str) -> bool:
        """
        Restore database from a backup.

        Args:
            backup_path: Path to the backup file

        Returns:
            bool: True if successful, False otherwise
        """
        try:
            if not self.db_url.startswith('sqlite:///'):
                logger.error("Restore currently only supported for SQLite databases")
                return False

            if not os.path.exists(backup_path):
                logger.error(f"Backup file not found: {backup_path}")
                return False

            # Get the database file path
            db_file = self.db_url.replace('sqlite:///', '')

            # Copy the backup file to the database location
            import shutil
            shutil.copy2(backup_path, db_file)

            logger.info(f"Database restored from: {backup_path}")
            return True

        except Exception as e:
            logger.error(f"Error restoring database: {e}")
            return False

    def migrate_schema(self, migration_scripts: List[str]) -> bool:
        """
        Apply schema migration scripts.

        Args:
            migration_scripts: List of SQL migration scripts

        Returns:
            bool: True if successful, False otherwise
        """
        try:
            with self.engine.connect() as conn:
                # Create migration tracking table if it doesn't exist
                conn.execute(text("""
                    CREATE TABLE IF NOT EXISTS schema_migrations (
                        id INTEGER PRIMARY KEY AUTOINCREMENT,
                        migration_name TEXT NOT NULL UNIQUE,
                        applied_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
                    )
                """))

                for script in migration_scripts:
                    # Check if migration was already applied
                    result = conn.execute(text(
                        "SELECT COUNT(*) FROM schema_migrations WHERE migration_name = :name"
                    ), {"name": script})

                    if result.scalar() == 0:
                        # Apply migration
                        logger.info(f"Applying migration: {script}")

                        # Read and execute migration script
                        script_path = Path(script)
                        if script_path.exists():
                            with open(script_path, 'r') as f:
                                migration_sql = f.read()

                            conn.execute(text(migration_sql))

                            # Record migration as applied
                            conn.execute(text(
                                "INSERT INTO schema_migrations (migration_name) VALUES (:name)"
                            ), {"name": script})
                        else:
                            logger.error(f"Migration script not found: {script}")
                            return False
                    else:
                        logger.info(f"Migration already applied: {script}")

                conn.commit()
                logger.info("Schema migration completed successfully")
                return True

        except Exception as e:
            logger.error(f"Error applying schema migration: {e}")
            return False

    def cleanup_old_data(self, days: int = 90) -> Dict[str, int]:
        """
        Clean up old data to maintain database performance.

        Args:
            days: Number of days to keep data for

        Returns:
            Dict with cleanup statistics
        """
        try:
            cutoff_date = datetime.utcnow().replace(hour=0, minute=0, second=0, microsecond=0)
            cutoff_date = cutoff_date.replace(day=cutoff_date.day - days)

            session = self.SessionLocal()

            # Count records to be deleted
            old_tweet_analytics = session.query(TweetAnalytics).filter(
                TweetAnalytics.recorded_at < cutoff_date
            ).count()

            old_engagement_data = session.query(EngagementData).filter(
                EngagementData.occurred_at < cutoff_date
            ).count()

            # Delete old records
            session.query(TweetAnalytics).filter(
                TweetAnalytics.recorded_at < cutoff_date
            ).delete()

            session.query(EngagementData).filter(
                EngagementData.occurred_at < cutoff_date
            ).delete()

            session.commit()
            session.close()

            cleanup_stats = {
                'tweet_analytics_deleted': old_tweet_analytics,
                'engagement_data_deleted': old_engagement_data,
                'cutoff_date': cutoff_date.isoformat()
            }

            logger.info(f"Cleanup completed: {cleanup_stats}")
            return cleanup_stats

        except Exception as e:
            logger.error(f"Error during cleanup: {e}")
            return {'error': str(e)}

def initialize_twitter_database(db_url: str = "sqlite:///twitter_data.db", force_recreate: bool = False) -> bool:
    """
    Convenience function to initialize the Twitter database.

    Args:
        db_url: Database URL
        force_recreate: Whether to recreate existing tables

    Returns:
        bool: True if successful, False otherwise
    """
    initializer = TwitterDatabaseInitializer(db_url)
    return initializer.initialize_database(force_recreate)

def check_twitter_database_health(db_url: str = "sqlite:///twitter_data.db") -> Dict[str, Any]:
    """
    Convenience function to check Twitter database health.

    Args:
        db_url: Database URL

    Returns:
        Dict with health check results
    """
    initializer = TwitterDatabaseInitializer(db_url)
    return initializer.check_database_health()

if __name__ == "__main__":
    # Command line interface for database management
    import argparse

    parser = argparse.ArgumentParser(description="Twitter Database Management")
    parser.add_argument("--db-url", default="sqlite:///twitter_data.db", help="Database URL")
    parser.add_argument("--init", action="store_true", help="Initialize database")
    parser.add_argument("--force", action="store_true", help="Force recreate tables")
    parser.add_argument("--health", action="store_true", help="Check database health")
    parser.add_argument("--backup", help="Create database backup")
    parser.add_argument("--restore", help="Restore from backup")
    parser.add_argument("--cleanup", type=int, help="Cleanup data older than N days")

    args = parser.parse_args()

    initializer = TwitterDatabaseInitializer(args.db_url)

    if args.init:
        success = initializer.initialize_database(args.force)
        print(f"Database initialization: {'SUCCESS' if success else 'FAILED'}")

    if args.health:
        health = initializer.check_database_health()
        print(json.dumps(health, indent=2))

    if args.backup:
        success = initializer.backup_database(args.backup)
        print(f"Database backup: {'SUCCESS' if success else 'FAILED'}")

    if args.restore:
        success = initializer.restore_database(args.restore)
        print(f"Database restore: {'SUCCESS' if success else 'FAILED'}")

    if args.cleanup:
        stats = initializer.cleanup_old_data(args.cleanup)
        print(f"Cleanup completed: {stats}")