524 lines
22 KiB
Python
524 lines
22 KiB
Python
"""
|
|
Twitter Database Initialization and Migration Script
|
|
===================================================
|
|
|
|
This module provides utilities for initializing the Twitter database,
|
|
handling schema migrations, and managing database setup.
|
|
|
|
Features:
|
|
- Database initialization and table creation
|
|
- Schema migration utilities
|
|
- Data seeding for development/testing
|
|
- Database health checks and maintenance
|
|
"""
|
|
|
|
import os
|
|
import logging
|
|
from typing import Dict, Any, List, Optional
|
|
from datetime import datetime
|
|
import json
|
|
from pathlib import Path
|
|
|
|
from sqlalchemy import create_engine, text, inspect
|
|
from sqlalchemy.orm import sessionmaker
|
|
from sqlalchemy.exc import SQLAlchemyError
|
|
|
|
from .twitter_models import (
|
|
Base, TwitterUser, Tweet, ScheduledTweet, TwitterAnalytics,
|
|
TweetAnalytics, EngagementData, AudienceInsight, HashtagPerformance,
|
|
ContentTemplate, TwitterSettings, TwitterAccountType, TweetType,
|
|
TweetStatus, EngagementType, AnalyticsTimeframe, ContentCategory
|
|
)
|
|
|
|
# Configure logging
|
|
logging.basicConfig(level=logging.INFO)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
class TwitterDatabaseInitializer:
|
|
"""
|
|
Handles Twitter database initialization and management.
|
|
"""
|
|
|
|
def __init__(self, db_url: str = "sqlite:///twitter_data.db"):
|
|
"""Initialize the database initializer."""
|
|
self.db_url = db_url
|
|
self.engine = create_engine(db_url, echo=False)
|
|
self.SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=self.engine)
|
|
|
|
# Create database directory if using SQLite
|
|
if db_url.startswith('sqlite:///'):
|
|
db_path = db_url.replace('sqlite:///', '')
|
|
os.makedirs(os.path.dirname(os.path.abspath(db_path)), exist_ok=True)
|
|
|
|
def initialize_database(self, force_recreate: bool = False) -> bool:
|
|
"""
|
|
Initialize the Twitter database with all required tables.
|
|
|
|
Args:
|
|
force_recreate: If True, drop existing tables and recreate
|
|
|
|
Returns:
|
|
bool: True if successful, False otherwise
|
|
"""
|
|
try:
|
|
if force_recreate:
|
|
logger.info("Dropping existing tables...")
|
|
Base.metadata.drop_all(bind=self.engine)
|
|
|
|
logger.info("Creating Twitter database tables...")
|
|
Base.metadata.create_all(bind=self.engine)
|
|
|
|
# Verify tables were created
|
|
inspector = inspect(self.engine)
|
|
tables = inspector.get_table_names()
|
|
|
|
expected_tables = [
|
|
'twitter_users', 'tweets', 'scheduled_tweets', 'twitter_analytics',
|
|
'tweet_analytics', 'engagement_data', 'audience_insights',
|
|
'hashtag_performance', 'content_templates', 'twitter_settings'
|
|
]
|
|
|
|
missing_tables = [table for table in expected_tables if table not in tables]
|
|
|
|
if missing_tables:
|
|
logger.error(f"Missing tables: {missing_tables}")
|
|
return False
|
|
|
|
logger.info(f"Successfully created {len(tables)} tables")
|
|
|
|
# Create indexes for better performance
|
|
self._create_indexes()
|
|
|
|
# Seed initial data if needed
|
|
self._seed_initial_data()
|
|
|
|
logger.info("Twitter database initialization completed successfully")
|
|
return True
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error initializing database: {e}")
|
|
return False
|
|
|
|
def _create_indexes(self):
|
|
"""Create database indexes for better query performance."""
|
|
try:
|
|
with self.engine.connect() as conn:
|
|
# User indexes
|
|
conn.execute(text("CREATE INDEX IF NOT EXISTS idx_twitter_users_user_id ON twitter_users(user_id)"))
|
|
conn.execute(text("CREATE INDEX IF NOT EXISTS idx_twitter_users_twitter_user_id ON twitter_users(twitter_user_id)"))
|
|
conn.execute(text("CREATE INDEX IF NOT EXISTS idx_twitter_users_username ON twitter_users(username)"))
|
|
|
|
# Tweet indexes
|
|
conn.execute(text("CREATE INDEX IF NOT EXISTS idx_tweets_user_id ON tweets(user_id)"))
|
|
conn.execute(text("CREATE INDEX IF NOT EXISTS idx_tweets_status ON tweets(status)"))
|
|
conn.execute(text("CREATE INDEX IF NOT EXISTS idx_tweets_posted_at ON tweets(posted_at)"))
|
|
conn.execute(text("CREATE INDEX IF NOT EXISTS idx_tweets_tweet_id ON tweets(tweet_id)"))
|
|
|
|
# Scheduled tweet indexes
|
|
conn.execute(text("CREATE INDEX IF NOT EXISTS idx_scheduled_tweets_user_id ON scheduled_tweets(user_id)"))
|
|
conn.execute(text("CREATE INDEX IF NOT EXISTS idx_scheduled_tweets_status ON scheduled_tweets(status)"))
|
|
conn.execute(text("CREATE INDEX IF NOT EXISTS idx_scheduled_tweets_scheduled_time ON scheduled_tweets(scheduled_time)"))
|
|
|
|
# Analytics indexes
|
|
conn.execute(text("CREATE INDEX IF NOT EXISTS idx_twitter_analytics_user_id ON twitter_analytics(user_id)"))
|
|
conn.execute(text("CREATE INDEX IF NOT EXISTS idx_twitter_analytics_date ON twitter_analytics(date)"))
|
|
conn.execute(text("CREATE INDEX IF NOT EXISTS idx_twitter_analytics_timeframe ON twitter_analytics(timeframe)"))
|
|
|
|
# Tweet analytics indexes
|
|
conn.execute(text("CREATE INDEX IF NOT EXISTS idx_tweet_analytics_tweet_id ON tweet_analytics(tweet_id)"))
|
|
conn.execute(text("CREATE INDEX IF NOT EXISTS idx_tweet_analytics_recorded_at ON tweet_analytics(recorded_at)"))
|
|
|
|
# Engagement data indexes
|
|
conn.execute(text("CREATE INDEX IF NOT EXISTS idx_engagement_data_tweet_id ON engagement_data(tweet_id)"))
|
|
conn.execute(text("CREATE INDEX IF NOT EXISTS idx_engagement_data_occurred_at ON engagement_data(occurred_at)"))
|
|
conn.execute(text("CREATE INDEX IF NOT EXISTS idx_engagement_data_type ON engagement_data(engagement_type)"))
|
|
|
|
# Hashtag performance indexes
|
|
conn.execute(text("CREATE INDEX IF NOT EXISTS idx_hashtag_performance_user_id ON hashtag_performance(user_id)"))
|
|
conn.execute(text("CREATE INDEX IF NOT EXISTS idx_hashtag_performance_hashtag ON hashtag_performance(hashtag)"))
|
|
|
|
# Content template indexes
|
|
conn.execute(text("CREATE INDEX IF NOT EXISTS idx_content_templates_user_id ON content_templates(user_id)"))
|
|
conn.execute(text("CREATE INDEX IF NOT EXISTS idx_content_templates_category ON content_templates(category)"))
|
|
conn.execute(text("CREATE INDEX IF NOT EXISTS idx_content_templates_is_active ON content_templates(is_active)"))
|
|
|
|
conn.commit()
|
|
logger.info("Database indexes created successfully")
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error creating indexes: {e}")
|
|
|
|
def _seed_initial_data(self):
|
|
"""Seed the database with initial data for development/testing."""
|
|
try:
|
|
session = self.SessionLocal()
|
|
|
|
# Check if we already have data
|
|
if session.query(TwitterUser).count() > 0:
|
|
logger.info("Database already contains data, skipping seeding")
|
|
session.close()
|
|
return
|
|
|
|
# Create sample content templates
|
|
sample_templates = [
|
|
{
|
|
'name': 'Daily Motivation',
|
|
'description': 'Motivational quotes and thoughts',
|
|
'template_text': 'Start your day with this thought: {quote} #motivation #success',
|
|
'category': ContentCategory.PERSONAL,
|
|
'variables': ['quote'],
|
|
'default_hashtags': ['#motivation', '#success', '#mindset'],
|
|
'ai_prompt': 'Generate an inspiring motivational quote',
|
|
'ai_tone': 'inspirational',
|
|
'ai_target_audience': 'professionals and entrepreneurs'
|
|
},
|
|
{
|
|
'name': 'Tech News Share',
|
|
'description': 'Template for sharing tech news',
|
|
'template_text': 'Interesting development in {topic}: {summary} {link} #tech #innovation',
|
|
'category': ContentCategory.EDUCATIONAL,
|
|
'variables': ['topic', 'summary', 'link'],
|
|
'default_hashtags': ['#tech', '#innovation', '#technology'],
|
|
'ai_prompt': 'Summarize this tech news in an engaging way',
|
|
'ai_tone': 'informative',
|
|
'ai_target_audience': 'tech enthusiasts and professionals'
|
|
},
|
|
{
|
|
'name': 'Question Engagement',
|
|
'description': 'Template for asking engaging questions',
|
|
'template_text': 'Quick question for my followers: {question} What do you think? #community #discussion',
|
|
'category': ContentCategory.QUESTION,
|
|
'variables': ['question'],
|
|
'default_hashtags': ['#community', '#discussion', '#question'],
|
|
'ai_prompt': 'Generate an engaging question for social media',
|
|
'ai_tone': 'conversational',
|
|
'ai_target_audience': 'general audience'
|
|
},
|
|
{
|
|
'name': 'Product Update',
|
|
'description': 'Template for product announcements',
|
|
'template_text': 'Excited to share: {update} {details} #product #update #announcement',
|
|
'category': ContentCategory.PROMOTIONAL,
|
|
'variables': ['update', 'details'],
|
|
'default_hashtags': ['#product', '#update', '#announcement'],
|
|
'ai_prompt': 'Write an exciting product update announcement',
|
|
'ai_tone': 'enthusiastic',
|
|
'ai_target_audience': 'customers and prospects'
|
|
}
|
|
]
|
|
|
|
# Note: We can't create templates without a user, so we'll skip this for now
|
|
# In a real scenario, templates would be created when users are added
|
|
|
|
session.close()
|
|
logger.info("Initial data seeding completed")
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error seeding initial data: {e}")
|
|
|
|
def check_database_health(self) -> Dict[str, Any]:
|
|
"""
|
|
Check the health and status of the Twitter database.
|
|
|
|
Returns:
|
|
Dict containing health check results
|
|
"""
|
|
health_status = {
|
|
'status': 'healthy',
|
|
'timestamp': datetime.utcnow().isoformat(),
|
|
'tables': {},
|
|
'indexes': {},
|
|
'issues': []
|
|
}
|
|
|
|
try:
|
|
inspector = inspect(self.engine)
|
|
|
|
# Check table existence and row counts
|
|
expected_tables = [
|
|
'twitter_users', 'tweets', 'scheduled_tweets', 'twitter_analytics',
|
|
'tweet_analytics', 'engagement_data', 'audience_insights',
|
|
'hashtag_performance', 'content_templates', 'twitter_settings'
|
|
]
|
|
|
|
session = self.SessionLocal()
|
|
|
|
for table_name in expected_tables:
|
|
if table_name in inspector.get_table_names():
|
|
# Get row count
|
|
try:
|
|
result = session.execute(text(f"SELECT COUNT(*) FROM {table_name}"))
|
|
count = result.scalar()
|
|
health_status['tables'][table_name] = {
|
|
'exists': True,
|
|
'row_count': count
|
|
}
|
|
except Exception as e:
|
|
health_status['tables'][table_name] = {
|
|
'exists': True,
|
|
'row_count': 'error',
|
|
'error': str(e)
|
|
}
|
|
health_status['issues'].append(f"Error counting rows in {table_name}: {e}")
|
|
else:
|
|
health_status['tables'][table_name] = {'exists': False}
|
|
health_status['issues'].append(f"Missing table: {table_name}")
|
|
|
|
# Check indexes
|
|
for table_name in inspector.get_table_names():
|
|
indexes = inspector.get_indexes(table_name)
|
|
health_status['indexes'][table_name] = len(indexes)
|
|
|
|
session.close()
|
|
|
|
# Set overall status
|
|
if health_status['issues']:
|
|
health_status['status'] = 'issues_found'
|
|
|
|
return health_status
|
|
|
|
except Exception as e:
|
|
health_status['status'] = 'error'
|
|
health_status['error'] = str(e)
|
|
logger.error(f"Error checking database health: {e}")
|
|
return health_status
|
|
|
|
def backup_database(self, backup_path: str) -> bool:
|
|
"""
|
|
Create a backup of the database.
|
|
|
|
Args:
|
|
backup_path: Path where to save the backup
|
|
|
|
Returns:
|
|
bool: True if successful, False otherwise
|
|
"""
|
|
try:
|
|
if not self.db_url.startswith('sqlite:///'):
|
|
logger.error("Backup currently only supported for SQLite databases")
|
|
return False
|
|
|
|
# Get the database file path
|
|
db_file = self.db_url.replace('sqlite:///', '')
|
|
|
|
if not os.path.exists(db_file):
|
|
logger.error(f"Database file not found: {db_file}")
|
|
return False
|
|
|
|
# Create backup directory if it doesn't exist
|
|
os.makedirs(os.path.dirname(backup_path), exist_ok=True)
|
|
|
|
# Copy the database file
|
|
import shutil
|
|
shutil.copy2(db_file, backup_path)
|
|
|
|
logger.info(f"Database backed up to: {backup_path}")
|
|
return True
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error backing up database: {e}")
|
|
return False
|
|
|
|
def restore_database(self, backup_path: str) -> bool:
|
|
"""
|
|
Restore database from a backup.
|
|
|
|
Args:
|
|
backup_path: Path to the backup file
|
|
|
|
Returns:
|
|
bool: True if successful, False otherwise
|
|
"""
|
|
try:
|
|
if not self.db_url.startswith('sqlite:///'):
|
|
logger.error("Restore currently only supported for SQLite databases")
|
|
return False
|
|
|
|
if not os.path.exists(backup_path):
|
|
logger.error(f"Backup file not found: {backup_path}")
|
|
return False
|
|
|
|
# Get the database file path
|
|
db_file = self.db_url.replace('sqlite:///', '')
|
|
|
|
# Copy the backup file to the database location
|
|
import shutil
|
|
shutil.copy2(backup_path, db_file)
|
|
|
|
logger.info(f"Database restored from: {backup_path}")
|
|
return True
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error restoring database: {e}")
|
|
return False
|
|
|
|
def migrate_schema(self, migration_scripts: List[str]) -> bool:
|
|
"""
|
|
Apply schema migration scripts.
|
|
|
|
Args:
|
|
migration_scripts: List of SQL migration scripts
|
|
|
|
Returns:
|
|
bool: True if successful, False otherwise
|
|
"""
|
|
try:
|
|
with self.engine.connect() as conn:
|
|
# Create migration tracking table if it doesn't exist
|
|
conn.execute(text("""
|
|
CREATE TABLE IF NOT EXISTS schema_migrations (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
migration_name TEXT NOT NULL UNIQUE,
|
|
applied_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
|
)
|
|
"""))
|
|
|
|
for script in migration_scripts:
|
|
# Check if migration was already applied
|
|
result = conn.execute(text(
|
|
"SELECT COUNT(*) FROM schema_migrations WHERE migration_name = :name"
|
|
), {"name": script})
|
|
|
|
if result.scalar() == 0:
|
|
# Apply migration
|
|
logger.info(f"Applying migration: {script}")
|
|
|
|
# Read and execute migration script
|
|
script_path = Path(script)
|
|
if script_path.exists():
|
|
with open(script_path, 'r') as f:
|
|
migration_sql = f.read()
|
|
|
|
conn.execute(text(migration_sql))
|
|
|
|
# Record migration as applied
|
|
conn.execute(text(
|
|
"INSERT INTO schema_migrations (migration_name) VALUES (:name)"
|
|
), {"name": script})
|
|
else:
|
|
logger.error(f"Migration script not found: {script}")
|
|
return False
|
|
else:
|
|
logger.info(f"Migration already applied: {script}")
|
|
|
|
conn.commit()
|
|
logger.info("Schema migration completed successfully")
|
|
return True
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error applying schema migration: {e}")
|
|
return False
|
|
|
|
def cleanup_old_data(self, days: int = 90) -> Dict[str, int]:
|
|
"""
|
|
Clean up old data to maintain database performance.
|
|
|
|
Args:
|
|
days: Number of days to keep data for
|
|
|
|
Returns:
|
|
Dict with cleanup statistics
|
|
"""
|
|
try:
|
|
cutoff_date = datetime.utcnow().replace(hour=0, minute=0, second=0, microsecond=0)
|
|
cutoff_date = cutoff_date.replace(day=cutoff_date.day - days)
|
|
|
|
session = self.SessionLocal()
|
|
|
|
# Count records to be deleted
|
|
old_tweet_analytics = session.query(TweetAnalytics).filter(
|
|
TweetAnalytics.recorded_at < cutoff_date
|
|
).count()
|
|
|
|
old_engagement_data = session.query(EngagementData).filter(
|
|
EngagementData.occurred_at < cutoff_date
|
|
).count()
|
|
|
|
# Delete old records
|
|
session.query(TweetAnalytics).filter(
|
|
TweetAnalytics.recorded_at < cutoff_date
|
|
).delete()
|
|
|
|
session.query(EngagementData).filter(
|
|
EngagementData.occurred_at < cutoff_date
|
|
).delete()
|
|
|
|
session.commit()
|
|
session.close()
|
|
|
|
cleanup_stats = {
|
|
'tweet_analytics_deleted': old_tweet_analytics,
|
|
'engagement_data_deleted': old_engagement_data,
|
|
'cutoff_date': cutoff_date.isoformat()
|
|
}
|
|
|
|
logger.info(f"Cleanup completed: {cleanup_stats}")
|
|
return cleanup_stats
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error during cleanup: {e}")
|
|
return {'error': str(e)}
|
|
|
|
def initialize_twitter_database(db_url: str = "sqlite:///twitter_data.db", force_recreate: bool = False) -> bool:
|
|
"""
|
|
Convenience function to initialize the Twitter database.
|
|
|
|
Args:
|
|
db_url: Database URL
|
|
force_recreate: Whether to recreate existing tables
|
|
|
|
Returns:
|
|
bool: True if successful, False otherwise
|
|
"""
|
|
initializer = TwitterDatabaseInitializer(db_url)
|
|
return initializer.initialize_database(force_recreate)
|
|
|
|
def check_twitter_database_health(db_url: str = "sqlite:///twitter_data.db") -> Dict[str, Any]:
|
|
"""
|
|
Convenience function to check Twitter database health.
|
|
|
|
Args:
|
|
db_url: Database URL
|
|
|
|
Returns:
|
|
Dict with health check results
|
|
"""
|
|
initializer = TwitterDatabaseInitializer(db_url)
|
|
return initializer.check_database_health()
|
|
|
|
if __name__ == "__main__":
|
|
# Command line interface for database management
|
|
import argparse
|
|
|
|
parser = argparse.ArgumentParser(description="Twitter Database Management")
|
|
parser.add_argument("--db-url", default="sqlite:///twitter_data.db", help="Database URL")
|
|
parser.add_argument("--init", action="store_true", help="Initialize database")
|
|
parser.add_argument("--force", action="store_true", help="Force recreate tables")
|
|
parser.add_argument("--health", action="store_true", help="Check database health")
|
|
parser.add_argument("--backup", help="Create database backup")
|
|
parser.add_argument("--restore", help="Restore from backup")
|
|
parser.add_argument("--cleanup", type=int, help="Cleanup data older than N days")
|
|
|
|
args = parser.parse_args()
|
|
|
|
initializer = TwitterDatabaseInitializer(args.db_url)
|
|
|
|
if args.init:
|
|
success = initializer.initialize_database(args.force)
|
|
print(f"Database initialization: {'SUCCESS' if success else 'FAILED'}")
|
|
|
|
if args.health:
|
|
health = initializer.check_database_health()
|
|
print(json.dumps(health, indent=2))
|
|
|
|
if args.backup:
|
|
success = initializer.backup_database(args.backup)
|
|
print(f"Database backup: {'SUCCESS' if success else 'FAILED'}")
|
|
|
|
if args.restore:
|
|
success = initializer.restore_database(args.restore)
|
|
print(f"Database restore: {'SUCCESS' if success else 'FAILED'}")
|
|
|
|
if args.cleanup:
|
|
stats = initializer.cleanup_old_data(args.cleanup)
|
|
print(f"Cleanup completed: {stats}") |