""" Twitter Database Initialization and Migration Script =================================================== This module provides utilities for initializing the Twitter database, handling schema migrations, and managing database setup. Features: - Database initialization and table creation - Schema migration utilities - Data seeding for development/testing - Database health checks and maintenance """ import os import logging from typing import Dict, Any, List, Optional from datetime import datetime import json from pathlib import Path from sqlalchemy import create_engine, text, inspect from sqlalchemy.orm import sessionmaker from sqlalchemy.exc import SQLAlchemyError from .twitter_models import ( Base, TwitterUser, Tweet, ScheduledTweet, TwitterAnalytics, TweetAnalytics, EngagementData, AudienceInsight, HashtagPerformance, ContentTemplate, TwitterSettings, TwitterAccountType, TweetType, TweetStatus, EngagementType, AnalyticsTimeframe, ContentCategory ) # Configure logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) class TwitterDatabaseInitializer: """ Handles Twitter database initialization and management. """ def __init__(self, db_url: str = "sqlite:///twitter_data.db"): """Initialize the database initializer.""" self.db_url = db_url self.engine = create_engine(db_url, echo=False) self.SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=self.engine) # Create database directory if using SQLite if db_url.startswith('sqlite:///'): db_path = db_url.replace('sqlite:///', '') os.makedirs(os.path.dirname(os.path.abspath(db_path)), exist_ok=True) def initialize_database(self, force_recreate: bool = False) -> bool: """ Initialize the Twitter database with all required tables. Args: force_recreate: If True, drop existing tables and recreate Returns: bool: True if successful, False otherwise """ try: if force_recreate: logger.info("Dropping existing tables...") Base.metadata.drop_all(bind=self.engine) logger.info("Creating Twitter database tables...") Base.metadata.create_all(bind=self.engine) # Verify tables were created inspector = inspect(self.engine) tables = inspector.get_table_names() expected_tables = [ 'twitter_users', 'tweets', 'scheduled_tweets', 'twitter_analytics', 'tweet_analytics', 'engagement_data', 'audience_insights', 'hashtag_performance', 'content_templates', 'twitter_settings' ] missing_tables = [table for table in expected_tables if table not in tables] if missing_tables: logger.error(f"Missing tables: {missing_tables}") return False logger.info(f"Successfully created {len(tables)} tables") # Create indexes for better performance self._create_indexes() # Seed initial data if needed self._seed_initial_data() logger.info("Twitter database initialization completed successfully") return True except Exception as e: logger.error(f"Error initializing database: {e}") return False def _create_indexes(self): """Create database indexes for better query performance.""" try: with self.engine.connect() as conn: # User indexes conn.execute(text("CREATE INDEX IF NOT EXISTS idx_twitter_users_user_id ON twitter_users(user_id)")) conn.execute(text("CREATE INDEX IF NOT EXISTS idx_twitter_users_twitter_user_id ON twitter_users(twitter_user_id)")) conn.execute(text("CREATE INDEX IF NOT EXISTS idx_twitter_users_username ON twitter_users(username)")) # Tweet indexes conn.execute(text("CREATE INDEX IF NOT EXISTS idx_tweets_user_id ON tweets(user_id)")) conn.execute(text("CREATE INDEX IF NOT EXISTS idx_tweets_status ON tweets(status)")) conn.execute(text("CREATE INDEX IF NOT EXISTS idx_tweets_posted_at ON tweets(posted_at)")) conn.execute(text("CREATE INDEX IF NOT EXISTS idx_tweets_tweet_id ON tweets(tweet_id)")) # Scheduled tweet indexes conn.execute(text("CREATE INDEX IF NOT EXISTS idx_scheduled_tweets_user_id ON scheduled_tweets(user_id)")) conn.execute(text("CREATE INDEX IF NOT EXISTS idx_scheduled_tweets_status ON scheduled_tweets(status)")) conn.execute(text("CREATE INDEX IF NOT EXISTS idx_scheduled_tweets_scheduled_time ON scheduled_tweets(scheduled_time)")) # Analytics indexes conn.execute(text("CREATE INDEX IF NOT EXISTS idx_twitter_analytics_user_id ON twitter_analytics(user_id)")) conn.execute(text("CREATE INDEX IF NOT EXISTS idx_twitter_analytics_date ON twitter_analytics(date)")) conn.execute(text("CREATE INDEX IF NOT EXISTS idx_twitter_analytics_timeframe ON twitter_analytics(timeframe)")) # Tweet analytics indexes conn.execute(text("CREATE INDEX IF NOT EXISTS idx_tweet_analytics_tweet_id ON tweet_analytics(tweet_id)")) conn.execute(text("CREATE INDEX IF NOT EXISTS idx_tweet_analytics_recorded_at ON tweet_analytics(recorded_at)")) # Engagement data indexes conn.execute(text("CREATE INDEX IF NOT EXISTS idx_engagement_data_tweet_id ON engagement_data(tweet_id)")) conn.execute(text("CREATE INDEX IF NOT EXISTS idx_engagement_data_occurred_at ON engagement_data(occurred_at)")) conn.execute(text("CREATE INDEX IF NOT EXISTS idx_engagement_data_type ON engagement_data(engagement_type)")) # Hashtag performance indexes conn.execute(text("CREATE INDEX IF NOT EXISTS idx_hashtag_performance_user_id ON hashtag_performance(user_id)")) conn.execute(text("CREATE INDEX IF NOT EXISTS idx_hashtag_performance_hashtag ON hashtag_performance(hashtag)")) # Content template indexes conn.execute(text("CREATE INDEX IF NOT EXISTS idx_content_templates_user_id ON content_templates(user_id)")) conn.execute(text("CREATE INDEX IF NOT EXISTS idx_content_templates_category ON content_templates(category)")) conn.execute(text("CREATE INDEX IF NOT EXISTS idx_content_templates_is_active ON content_templates(is_active)")) conn.commit() logger.info("Database indexes created successfully") except Exception as e: logger.error(f"Error creating indexes: {e}") def _seed_initial_data(self): """Seed the database with initial data for development/testing.""" try: session = self.SessionLocal() # Check if we already have data if session.query(TwitterUser).count() > 0: logger.info("Database already contains data, skipping seeding") session.close() return # Create sample content templates sample_templates = [ { 'name': 'Daily Motivation', 'description': 'Motivational quotes and thoughts', 'template_text': 'Start your day with this thought: {quote} #motivation #success', 'category': ContentCategory.PERSONAL, 'variables': ['quote'], 'default_hashtags': ['#motivation', '#success', '#mindset'], 'ai_prompt': 'Generate an inspiring motivational quote', 'ai_tone': 'inspirational', 'ai_target_audience': 'professionals and entrepreneurs' }, { 'name': 'Tech News Share', 'description': 'Template for sharing tech news', 'template_text': 'Interesting development in {topic}: {summary} {link} #tech #innovation', 'category': ContentCategory.EDUCATIONAL, 'variables': ['topic', 'summary', 'link'], 'default_hashtags': ['#tech', '#innovation', '#technology'], 'ai_prompt': 'Summarize this tech news in an engaging way', 'ai_tone': 'informative', 'ai_target_audience': 'tech enthusiasts and professionals' }, { 'name': 'Question Engagement', 'description': 'Template for asking engaging questions', 'template_text': 'Quick question for my followers: {question} What do you think? #community #discussion', 'category': ContentCategory.QUESTION, 'variables': ['question'], 'default_hashtags': ['#community', '#discussion', '#question'], 'ai_prompt': 'Generate an engaging question for social media', 'ai_tone': 'conversational', 'ai_target_audience': 'general audience' }, { 'name': 'Product Update', 'description': 'Template for product announcements', 'template_text': 'Excited to share: {update} {details} #product #update #announcement', 'category': ContentCategory.PROMOTIONAL, 'variables': ['update', 'details'], 'default_hashtags': ['#product', '#update', '#announcement'], 'ai_prompt': 'Write an exciting product update announcement', 'ai_tone': 'enthusiastic', 'ai_target_audience': 'customers and prospects' } ] # Note: We can't create templates without a user, so we'll skip this for now # In a real scenario, templates would be created when users are added session.close() logger.info("Initial data seeding completed") except Exception as e: logger.error(f"Error seeding initial data: {e}") def check_database_health(self) -> Dict[str, Any]: """ Check the health and status of the Twitter database. Returns: Dict containing health check results """ health_status = { 'status': 'healthy', 'timestamp': datetime.utcnow().isoformat(), 'tables': {}, 'indexes': {}, 'issues': [] } try: inspector = inspect(self.engine) # Check table existence and row counts expected_tables = [ 'twitter_users', 'tweets', 'scheduled_tweets', 'twitter_analytics', 'tweet_analytics', 'engagement_data', 'audience_insights', 'hashtag_performance', 'content_templates', 'twitter_settings' ] session = self.SessionLocal() for table_name in expected_tables: if table_name in inspector.get_table_names(): # Get row count try: result = session.execute(text(f"SELECT COUNT(*) FROM {table_name}")) count = result.scalar() health_status['tables'][table_name] = { 'exists': True, 'row_count': count } except Exception as e: health_status['tables'][table_name] = { 'exists': True, 'row_count': 'error', 'error': str(e) } health_status['issues'].append(f"Error counting rows in {table_name}: {e}") else: health_status['tables'][table_name] = {'exists': False} health_status['issues'].append(f"Missing table: {table_name}") # Check indexes for table_name in inspector.get_table_names(): indexes = inspector.get_indexes(table_name) health_status['indexes'][table_name] = len(indexes) session.close() # Set overall status if health_status['issues']: health_status['status'] = 'issues_found' return health_status except Exception as e: health_status['status'] = 'error' health_status['error'] = str(e) logger.error(f"Error checking database health: {e}") return health_status def backup_database(self, backup_path: str) -> bool: """ Create a backup of the database. Args: backup_path: Path where to save the backup Returns: bool: True if successful, False otherwise """ try: if not self.db_url.startswith('sqlite:///'): logger.error("Backup currently only supported for SQLite databases") return False # Get the database file path db_file = self.db_url.replace('sqlite:///', '') if not os.path.exists(db_file): logger.error(f"Database file not found: {db_file}") return False # Create backup directory if it doesn't exist os.makedirs(os.path.dirname(backup_path), exist_ok=True) # Copy the database file import shutil shutil.copy2(db_file, backup_path) logger.info(f"Database backed up to: {backup_path}") return True except Exception as e: logger.error(f"Error backing up database: {e}") return False def restore_database(self, backup_path: str) -> bool: """ Restore database from a backup. Args: backup_path: Path to the backup file Returns: bool: True if successful, False otherwise """ try: if not self.db_url.startswith('sqlite:///'): logger.error("Restore currently only supported for SQLite databases") return False if not os.path.exists(backup_path): logger.error(f"Backup file not found: {backup_path}") return False # Get the database file path db_file = self.db_url.replace('sqlite:///', '') # Copy the backup file to the database location import shutil shutil.copy2(backup_path, db_file) logger.info(f"Database restored from: {backup_path}") return True except Exception as e: logger.error(f"Error restoring database: {e}") return False def migrate_schema(self, migration_scripts: List[str]) -> bool: """ Apply schema migration scripts. Args: migration_scripts: List of SQL migration scripts Returns: bool: True if successful, False otherwise """ try: with self.engine.connect() as conn: # Create migration tracking table if it doesn't exist conn.execute(text(""" CREATE TABLE IF NOT EXISTS schema_migrations ( id INTEGER PRIMARY KEY AUTOINCREMENT, migration_name TEXT NOT NULL UNIQUE, applied_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ) """)) for script in migration_scripts: # Check if migration was already applied result = conn.execute(text( "SELECT COUNT(*) FROM schema_migrations WHERE migration_name = :name" ), {"name": script}) if result.scalar() == 0: # Apply migration logger.info(f"Applying migration: {script}") # Read and execute migration script script_path = Path(script) if script_path.exists(): with open(script_path, 'r') as f: migration_sql = f.read() conn.execute(text(migration_sql)) # Record migration as applied conn.execute(text( "INSERT INTO schema_migrations (migration_name) VALUES (:name)" ), {"name": script}) else: logger.error(f"Migration script not found: {script}") return False else: logger.info(f"Migration already applied: {script}") conn.commit() logger.info("Schema migration completed successfully") return True except Exception as e: logger.error(f"Error applying schema migration: {e}") return False def cleanup_old_data(self, days: int = 90) -> Dict[str, int]: """ Clean up old data to maintain database performance. Args: days: Number of days to keep data for Returns: Dict with cleanup statistics """ try: cutoff_date = datetime.utcnow().replace(hour=0, minute=0, second=0, microsecond=0) cutoff_date = cutoff_date.replace(day=cutoff_date.day - days) session = self.SessionLocal() # Count records to be deleted old_tweet_analytics = session.query(TweetAnalytics).filter( TweetAnalytics.recorded_at < cutoff_date ).count() old_engagement_data = session.query(EngagementData).filter( EngagementData.occurred_at < cutoff_date ).count() # Delete old records session.query(TweetAnalytics).filter( TweetAnalytics.recorded_at < cutoff_date ).delete() session.query(EngagementData).filter( EngagementData.occurred_at < cutoff_date ).delete() session.commit() session.close() cleanup_stats = { 'tweet_analytics_deleted': old_tweet_analytics, 'engagement_data_deleted': old_engagement_data, 'cutoff_date': cutoff_date.isoformat() } logger.info(f"Cleanup completed: {cleanup_stats}") return cleanup_stats except Exception as e: logger.error(f"Error during cleanup: {e}") return {'error': str(e)} def initialize_twitter_database(db_url: str = "sqlite:///twitter_data.db", force_recreate: bool = False) -> bool: """ Convenience function to initialize the Twitter database. Args: db_url: Database URL force_recreate: Whether to recreate existing tables Returns: bool: True if successful, False otherwise """ initializer = TwitterDatabaseInitializer(db_url) return initializer.initialize_database(force_recreate) def check_twitter_database_health(db_url: str = "sqlite:///twitter_data.db") -> Dict[str, Any]: """ Convenience function to check Twitter database health. Args: db_url: Database URL Returns: Dict with health check results """ initializer = TwitterDatabaseInitializer(db_url) return initializer.check_database_health() if __name__ == "__main__": # Command line interface for database management import argparse parser = argparse.ArgumentParser(description="Twitter Database Management") parser.add_argument("--db-url", default="sqlite:///twitter_data.db", help="Database URL") parser.add_argument("--init", action="store_true", help="Initialize database") parser.add_argument("--force", action="store_true", help="Force recreate tables") parser.add_argument("--health", action="store_true", help="Check database health") parser.add_argument("--backup", help="Create database backup") parser.add_argument("--restore", help="Restore from backup") parser.add_argument("--cleanup", type=int, help="Cleanup data older than N days") args = parser.parse_args() initializer = TwitterDatabaseInitializer(args.db_url) if args.init: success = initializer.initialize_database(args.force) print(f"Database initialization: {'SUCCESS' if success else 'FAILED'}") if args.health: health = initializer.check_database_health() print(json.dumps(health, indent=2)) if args.backup: success = initializer.backup_database(args.backup) print(f"Database backup: {'SUCCESS' if success else 'FAILED'}") if args.restore: success = initializer.restore_database(args.restore) print(f"Database restore: {'SUCCESS' if success else 'FAILED'}") if args.cleanup: stats = initializer.cleanup_old_data(args.cleanup) print(f"Cleanup completed: {stats}")