Added new features to the project

2025-06-30 07:49:48 +05:30
parent bbe56a364d
commit b21cbb68da
48 changed files with 19774 additions and 1889 deletions
--- a/lib/database/twitter_init.py
+++ b/lib/database/twitter_init.py
@@ -0,0 +1,524 @@
+"""
+Twitter Database Initialization and Migration Script
+===================================================
+
+This module provides utilities for initializing the Twitter database,
+handling schema migrations, and managing database setup.
+
+Features:
+- Database initialization and table creation
+- Schema migration utilities
+- Data seeding for development/testing
+- Database health checks and maintenance
+"""
+
+import os
+import logging
+from typing import Dict, Any, List, Optional
+from datetime import datetime
+import json
+from pathlib import Path
+
+from sqlalchemy import create_engine, text, inspect
+from sqlalchemy.orm import sessionmaker
+from sqlalchemy.exc import SQLAlchemyError
+
+from .twitter_models import (
+    Base, TwitterUser, Tweet, ScheduledTweet, TwitterAnalytics,
+    TweetAnalytics, EngagementData, AudienceInsight, HashtagPerformance,
+    ContentTemplate, TwitterSettings, TwitterAccountType, TweetType,
+    TweetStatus, EngagementType, AnalyticsTimeframe, ContentCategory
+)
+
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+class TwitterDatabaseInitializer:
+    """
+    Handles Twitter database initialization and management.
+    """
+    
+    def __init__(self, db_url: str = "sqlite:///twitter_data.db"):
+        """Initialize the database initializer."""
+        self.db_url = db_url
+        self.engine = create_engine(db_url, echo=False)
+        self.SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=self.engine)
+        
+        # Create database directory if using SQLite
+        if db_url.startswith('sqlite:///'):
+            db_path = db_url.replace('sqlite:///', '')
+            os.makedirs(os.path.dirname(os.path.abspath(db_path)), exist_ok=True)
+    
+    def initialize_database(self, force_recreate: bool = False) -> bool:
+        """
+        Initialize the Twitter database with all required tables.
+        
+        Args:
+            force_recreate: If True, drop existing tables and recreate
+            
+        Returns:
+            bool: True if successful, False otherwise
+        """
+        try:
+            if force_recreate:
+                logger.info("Dropping existing tables...")
+                Base.metadata.drop_all(bind=self.engine)
+            
+            logger.info("Creating Twitter database tables...")
+            Base.metadata.create_all(bind=self.engine)
+            
+            # Verify tables were created
+            inspector = inspect(self.engine)
+            tables = inspector.get_table_names()
+            
+            expected_tables = [
+                'twitter_users', 'tweets', 'scheduled_tweets', 'twitter_analytics',
+                'tweet_analytics', 'engagement_data', 'audience_insights',
+                'hashtag_performance', 'content_templates', 'twitter_settings'
+            ]
+            
+            missing_tables = [table for table in expected_tables if table not in tables]
+            
+            if missing_tables:
+                logger.error(f"Missing tables: {missing_tables}")
+                return False
+            
+            logger.info(f"Successfully created {len(tables)} tables")
+            
+            # Create indexes for better performance
+            self._create_indexes()
+            
+            # Seed initial data if needed
+            self._seed_initial_data()
+            
+            logger.info("Twitter database initialization completed successfully")
+            return True
+            
+        except Exception as e:
+            logger.error(f"Error initializing database: {e}")
+            return False
+    
+    def _create_indexes(self):
+        """Create database indexes for better query performance."""
+        try:
+            with self.engine.connect() as conn:
+                # User indexes
+                conn.execute(text("CREATE INDEX IF NOT EXISTS idx_twitter_users_user_id ON twitter_users(user_id)"))
+                conn.execute(text("CREATE INDEX IF NOT EXISTS idx_twitter_users_twitter_user_id ON twitter_users(twitter_user_id)"))
+                conn.execute(text("CREATE INDEX IF NOT EXISTS idx_twitter_users_username ON twitter_users(username)"))
+                
+                # Tweet indexes
+                conn.execute(text("CREATE INDEX IF NOT EXISTS idx_tweets_user_id ON tweets(user_id)"))
+                conn.execute(text("CREATE INDEX IF NOT EXISTS idx_tweets_status ON tweets(status)"))
+                conn.execute(text("CREATE INDEX IF NOT EXISTS idx_tweets_posted_at ON tweets(posted_at)"))
+                conn.execute(text("CREATE INDEX IF NOT EXISTS idx_tweets_tweet_id ON tweets(tweet_id)"))
+                
+                # Scheduled tweet indexes
+                conn.execute(text("CREATE INDEX IF NOT EXISTS idx_scheduled_tweets_user_id ON scheduled_tweets(user_id)"))
+                conn.execute(text("CREATE INDEX IF NOT EXISTS idx_scheduled_tweets_status ON scheduled_tweets(status)"))
+                conn.execute(text("CREATE INDEX IF NOT EXISTS idx_scheduled_tweets_scheduled_time ON scheduled_tweets(scheduled_time)"))
+                
+                # Analytics indexes
+                conn.execute(text("CREATE INDEX IF NOT EXISTS idx_twitter_analytics_user_id ON twitter_analytics(user_id)"))
+                conn.execute(text("CREATE INDEX IF NOT EXISTS idx_twitter_analytics_date ON twitter_analytics(date)"))
+                conn.execute(text("CREATE INDEX IF NOT EXISTS idx_twitter_analytics_timeframe ON twitter_analytics(timeframe)"))
+                
+                # Tweet analytics indexes
+                conn.execute(text("CREATE INDEX IF NOT EXISTS idx_tweet_analytics_tweet_id ON tweet_analytics(tweet_id)"))
+                conn.execute(text("CREATE INDEX IF NOT EXISTS idx_tweet_analytics_recorded_at ON tweet_analytics(recorded_at)"))
+                
+                # Engagement data indexes
+                conn.execute(text("CREATE INDEX IF NOT EXISTS idx_engagement_data_tweet_id ON engagement_data(tweet_id)"))
+                conn.execute(text("CREATE INDEX IF NOT EXISTS idx_engagement_data_occurred_at ON engagement_data(occurred_at)"))
+                conn.execute(text("CREATE INDEX IF NOT EXISTS idx_engagement_data_type ON engagement_data(engagement_type)"))
+                
+                # Hashtag performance indexes
+                conn.execute(text("CREATE INDEX IF NOT EXISTS idx_hashtag_performance_user_id ON hashtag_performance(user_id)"))
+                conn.execute(text("CREATE INDEX IF NOT EXISTS idx_hashtag_performance_hashtag ON hashtag_performance(hashtag)"))
+                
+                # Content template indexes
+                conn.execute(text("CREATE INDEX IF NOT EXISTS idx_content_templates_user_id ON content_templates(user_id)"))
+                conn.execute(text("CREATE INDEX IF NOT EXISTS idx_content_templates_category ON content_templates(category)"))
+                conn.execute(text("CREATE INDEX IF NOT EXISTS idx_content_templates_is_active ON content_templates(is_active)"))
+                
+                conn.commit()
+                logger.info("Database indexes created successfully")
+                
+        except Exception as e:
+            logger.error(f"Error creating indexes: {e}")
+    
+    def _seed_initial_data(self):
+        """Seed the database with initial data for development/testing."""
+        try:
+            session = self.SessionLocal()
+            
+            # Check if we already have data
+            if session.query(TwitterUser).count() > 0:
+                logger.info("Database already contains data, skipping seeding")
+                session.close()
+                return
+            
+            # Create sample content templates
+            sample_templates = [
+                {
+                    'name': 'Daily Motivation',
+                    'description': 'Motivational quotes and thoughts',
+                    'template_text': 'Start your day with this thought: {quote} #motivation #success',
+                    'category': ContentCategory.PERSONAL,
+                    'variables': ['quote'],
+                    'default_hashtags': ['#motivation', '#success', '#mindset'],
+                    'ai_prompt': 'Generate an inspiring motivational quote',
+                    'ai_tone': 'inspirational',
+                    'ai_target_audience': 'professionals and entrepreneurs'
+                },
+                {
+                    'name': 'Tech News Share',
+                    'description': 'Template for sharing tech news',
+                    'template_text': 'Interesting development in {topic}: {summary} {link} #tech #innovation',
+                    'category': ContentCategory.EDUCATIONAL,
+                    'variables': ['topic', 'summary', 'link'],
+                    'default_hashtags': ['#tech', '#innovation', '#technology'],
+                    'ai_prompt': 'Summarize this tech news in an engaging way',
+                    'ai_tone': 'informative',
+                    'ai_target_audience': 'tech enthusiasts and professionals'
+                },
+                {
+                    'name': 'Question Engagement',
+                    'description': 'Template for asking engaging questions',
+                    'template_text': 'Quick question for my followers: {question} What do you think? #community #discussion',
+                    'category': ContentCategory.QUESTION,
+                    'variables': ['question'],
+                    'default_hashtags': ['#community', '#discussion', '#question'],
+                    'ai_prompt': 'Generate an engaging question for social media',
+                    'ai_tone': 'conversational',
+                    'ai_target_audience': 'general audience'
+                },
+                {
+                    'name': 'Product Update',
+                    'description': 'Template for product announcements',
+                    'template_text': 'Excited to share: {update} {details} #product #update #announcement',
+                    'category': ContentCategory.PROMOTIONAL,
+                    'variables': ['update', 'details'],
+                    'default_hashtags': ['#product', '#update', '#announcement'],
+                    'ai_prompt': 'Write an exciting product update announcement',
+                    'ai_tone': 'enthusiastic',
+                    'ai_target_audience': 'customers and prospects'
+                }
+            ]
+            
+            # Note: We can't create templates without a user, so we'll skip this for now
+            # In a real scenario, templates would be created when users are added
+            
+            session.close()
+            logger.info("Initial data seeding completed")
+            
+        except Exception as e:
+            logger.error(f"Error seeding initial data: {e}")
+    
+    def check_database_health(self) -> Dict[str, Any]:
+        """
+        Check the health and status of the Twitter database.
+        
+        Returns:
+            Dict containing health check results
+        """
+        health_status = {
+            'status': 'healthy',
+            'timestamp': datetime.utcnow().isoformat(),
+            'tables': {},
+            'indexes': {},
+            'issues': []
+        }
+        
+        try:
+            inspector = inspect(self.engine)
+            
+            # Check table existence and row counts
+            expected_tables = [
+                'twitter_users', 'tweets', 'scheduled_tweets', 'twitter_analytics',
+                'tweet_analytics', 'engagement_data', 'audience_insights',
+                'hashtag_performance', 'content_templates', 'twitter_settings'
+            ]
+            
+            session = self.SessionLocal()
+            
+            for table_name in expected_tables:
+                if table_name in inspector.get_table_names():
+                    # Get row count
+                    try:
+                        result = session.execute(text(f"SELECT COUNT(*) FROM {table_name}"))
+                        count = result.scalar()
+                        health_status['tables'][table_name] = {
+                            'exists': True,
+                            'row_count': count
+                        }
+                    except Exception as e:
+                        health_status['tables'][table_name] = {
+                            'exists': True,
+                            'row_count': 'error',
+                            'error': str(e)
+                        }
+                        health_status['issues'].append(f"Error counting rows in {table_name}: {e}")
+                else:
+                    health_status['tables'][table_name] = {'exists': False}
+                    health_status['issues'].append(f"Missing table: {table_name}")
+            
+            # Check indexes
+            for table_name in inspector.get_table_names():
+                indexes = inspector.get_indexes(table_name)
+                health_status['indexes'][table_name] = len(indexes)
+            
+            session.close()
+            
+            # Set overall status
+            if health_status['issues']:
+                health_status['status'] = 'issues_found'
+            
+            return health_status
+            
+        except Exception as e:
+            health_status['status'] = 'error'
+            health_status['error'] = str(e)
+            logger.error(f"Error checking database health: {e}")
+            return health_status
+    
+    def backup_database(self, backup_path: str) -> bool:
+        """
+        Create a backup of the database.
+        
+        Args:
+            backup_path: Path where to save the backup
+            
+        Returns:
+            bool: True if successful, False otherwise
+        """
+        try:
+            if not self.db_url.startswith('sqlite:///'):
+                logger.error("Backup currently only supported for SQLite databases")
+                return False
+            
+            # Get the database file path
+            db_file = self.db_url.replace('sqlite:///', '')
+            
+            if not os.path.exists(db_file):
+                logger.error(f"Database file not found: {db_file}")
+                return False
+            
+            # Create backup directory if it doesn't exist
+            os.makedirs(os.path.dirname(backup_path), exist_ok=True)
+            
+            # Copy the database file
+            import shutil
+            shutil.copy2(db_file, backup_path)
+            
+            logger.info(f"Database backed up to: {backup_path}")
+            return True
+            
+        except Exception as e:
+            logger.error(f"Error backing up database: {e}")
+            return False
+    
+    def restore_database(self, backup_path: str) -> bool:
+        """
+        Restore database from a backup.
+        
+        Args:
+            backup_path: Path to the backup file
+            
+        Returns:
+            bool: True if successful, False otherwise
+        """
+        try:
+            if not self.db_url.startswith('sqlite:///'):
+                logger.error("Restore currently only supported for SQLite databases")
+                return False
+            
+            if not os.path.exists(backup_path):
+                logger.error(f"Backup file not found: {backup_path}")
+                return False
+            
+            # Get the database file path
+            db_file = self.db_url.replace('sqlite:///', '')
+            
+            # Copy the backup file to the database location
+            import shutil
+            shutil.copy2(backup_path, db_file)
+            
+            logger.info(f"Database restored from: {backup_path}")
+            return True
+            
+        except Exception as e:
+            logger.error(f"Error restoring database: {e}")
+            return False
+    
+    def migrate_schema(self, migration_scripts: List[str]) -> bool:
+        """
+        Apply schema migration scripts.
+        
+        Args:
+            migration_scripts: List of SQL migration scripts
+            
+        Returns:
+            bool: True if successful, False otherwise
+        """
+        try:
+            with self.engine.connect() as conn:
+                # Create migration tracking table if it doesn't exist
+                conn.execute(text("""
+                    CREATE TABLE IF NOT EXISTS schema_migrations (
+                        id INTEGER PRIMARY KEY AUTOINCREMENT,
+                        migration_name TEXT NOT NULL UNIQUE,
+                        applied_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+                    )
+                """))
+                
+                for script in migration_scripts:
+                    # Check if migration was already applied
+                    result = conn.execute(text(
+                        "SELECT COUNT(*) FROM schema_migrations WHERE migration_name = :name"
+                    ), {"name": script})
+                    
+                    if result.scalar() == 0:
+                        # Apply migration
+                        logger.info(f"Applying migration: {script}")
+                        
+                        # Read and execute migration script
+                        script_path = Path(script)
+                        if script_path.exists():
+                            with open(script_path, 'r') as f:
+                                migration_sql = f.read()
+                            
+                            conn.execute(text(migration_sql))
+                            
+                            # Record migration as applied
+                            conn.execute(text(
+                                "INSERT INTO schema_migrations (migration_name) VALUES (:name)"
+                            ), {"name": script})
+                        else:
+                            logger.error(f"Migration script not found: {script}")
+                            return False
+                    else:
+                        logger.info(f"Migration already applied: {script}")
+                
+                conn.commit()
+                logger.info("Schema migration completed successfully")
+                return True
+                
+        except Exception as e:
+            logger.error(f"Error applying schema migration: {e}")
+            return False
+    
+    def cleanup_old_data(self, days: int = 90) -> Dict[str, int]:
+        """
+        Clean up old data to maintain database performance.
+        
+        Args:
+            days: Number of days to keep data for
+            
+        Returns:
+            Dict with cleanup statistics
+        """
+        try:
+            cutoff_date = datetime.utcnow().replace(hour=0, minute=0, second=0, microsecond=0)
+            cutoff_date = cutoff_date.replace(day=cutoff_date.day - days)
+            
+            session = self.SessionLocal()
+            
+            # Count records to be deleted
+            old_tweet_analytics = session.query(TweetAnalytics).filter(
+                TweetAnalytics.recorded_at < cutoff_date
+            ).count()
+            
+            old_engagement_data = session.query(EngagementData).filter(
+                EngagementData.occurred_at < cutoff_date
+            ).count()
+            
+            # Delete old records
+            session.query(TweetAnalytics).filter(
+                TweetAnalytics.recorded_at < cutoff_date
+            ).delete()
+            
+            session.query(EngagementData).filter(
+                EngagementData.occurred_at < cutoff_date
+            ).delete()
+            
+            session.commit()
+            session.close()
+            
+            cleanup_stats = {
+                'tweet_analytics_deleted': old_tweet_analytics,
+                'engagement_data_deleted': old_engagement_data,
+                'cutoff_date': cutoff_date.isoformat()
+            }
+            
+            logger.info(f"Cleanup completed: {cleanup_stats}")
+            return cleanup_stats
+            
+        except Exception as e:
+            logger.error(f"Error during cleanup: {e}")
+            return {'error': str(e)}
+
+def initialize_twitter_database(db_url: str = "sqlite:///twitter_data.db", force_recreate: bool = False) -> bool:
+    """
+    Convenience function to initialize the Twitter database.
+    
+    Args:
+        db_url: Database URL
+        force_recreate: Whether to recreate existing tables
+        
+    Returns:
+        bool: True if successful, False otherwise
+    """
+    initializer = TwitterDatabaseInitializer(db_url)
+    return initializer.initialize_database(force_recreate)
+
+def check_twitter_database_health(db_url: str = "sqlite:///twitter_data.db") -> Dict[str, Any]:
+    """
+    Convenience function to check Twitter database health.
+    
+    Args:
+        db_url: Database URL
+        
+    Returns:
+        Dict with health check results
+    """
+    initializer = TwitterDatabaseInitializer(db_url)
+    return initializer.check_database_health()
+
+if __name__ == "__main__":
+    # Command line interface for database management
+    import argparse
+    
+    parser = argparse.ArgumentParser(description="Twitter Database Management")
+    parser.add_argument("--db-url", default="sqlite:///twitter_data.db", help="Database URL")
+    parser.add_argument("--init", action="store_true", help="Initialize database")
+    parser.add_argument("--force", action="store_true", help="Force recreate tables")
+    parser.add_argument("--health", action="store_true", help="Check database health")
+    parser.add_argument("--backup", help="Create database backup")
+    parser.add_argument("--restore", help="Restore from backup")
+    parser.add_argument("--cleanup", type=int, help="Cleanup data older than N days")
+    
+    args = parser.parse_args()
+    
+    initializer = TwitterDatabaseInitializer(args.db_url)
+    
+    if args.init:
+        success = initializer.initialize_database(args.force)
+        print(f"Database initialization: {'SUCCESS' if success else 'FAILED'}")
+    
+    if args.health:
+        health = initializer.check_database_health()
+        print(json.dumps(health, indent=2))
+    
+    if args.backup:
+        success = initializer.backup_database(args.backup)
+        print(f"Database backup: {'SUCCESS' if success else 'FAILED'}")
+    
+    if args.restore:
+        success = initializer.restore_database(args.restore)
+        print(f"Database restore: {'SUCCESS' if success else 'FAILED'}")
+    
+    if args.cleanup:
+        stats = initializer.cleanup_old_data(args.cleanup)
+        print(f"Cleanup completed: {stats}")