ALwrity/backend/services/scheduler/utils/user_job_store.py

"""
User Job Store Utilities
Utilities for managing per-user job stores based on website root.
"""

from typing import Optional
from urllib.parse import urlparse
from loguru import logger
from sqlalchemy.orm import Session as SQLSession

from services.database import get_db_session
from models.onboarding import OnboardingSession, WebsiteAnalysis


def extract_domain_root(url: str) -> str:
    """
    Extract domain root from a website URL for use as job store identifier.

    Examples:
        https://www.example.com -> example
        https://blog.example.com -> example
        https://example.co.uk -> example
        http://subdomain.example.com/path -> example

    Args:
        url: Website URL

    Returns:
        Domain root (e.g., 'example') or 'default' if extraction fails
    """
    try:
        parsed = urlparse(url)
        hostname = parsed.netloc or parsed.path.split('/')[0]

        # Remove www. prefix if present
        if hostname.startswith('www.'):
            hostname = hostname[4:]

        # Split by dots and get the root domain
        # For example.com -> example, for example.co.uk -> example
        parts = hostname.split('.')
        if len(parts) >= 2:
            # Handle common TLDs that might be part of domain (e.g., co.uk)
            if len(parts) >= 3 and parts[-2] in ['co', 'com', 'net', 'org']:
                root = parts[-3]
            else:
                root = parts[-2]
        else:
            root = parts[0] if parts else 'default'

        # Clean and validate root
        root = root.lower().strip()
        # Remove invalid characters for job store name
        root = ''.join(c for c in root if c.isalnum() or c in ['-', '_'])

        if not root or len(root) < 2:
            return 'default'

        return root

    except Exception as e:
        logger.warning(f"Failed to extract domain root from URL '{url}': {e}")
        return 'default'


def get_user_job_store_name(user_id: str, db: SQLSession = None) -> str:
    """
    Get job store name for a user based on their website root from onboarding.

    Args:
        user_id: User ID (Clerk string)
        db: Optional database session (will create if not provided)

    Returns:
        Job store name (e.g., 'example' or 'default')
    """
    db_session = db
    close_db = False

    try:
        if not db_session:
            db_session = get_db_session()
            close_db = True

        if not db_session:
            logger.warning(f"Could not get database session for user {user_id}, using default job store")
            return 'default'

        # Get user's website URL from onboarding
        # Query directly since user_id is a string (Clerk ID)
        onboarding_session = db_session.query(OnboardingSession).filter(
            OnboardingSession.user_id == user_id
        ).order_by(OnboardingSession.updated_at.desc()).first()

        if not onboarding_session:
            logger.debug(
                f"[Job Store] No onboarding session found for user {user_id}, using default job store. "
                f"This is normal if user hasn't completed onboarding."
            )
            return 'default'

        # Get the latest website analysis for this session
        website_analysis = db_session.query(WebsiteAnalysis).filter(
            WebsiteAnalysis.session_id == onboarding_session.id
        ).order_by(WebsiteAnalysis.updated_at.desc()).first()

        if not website_analysis or not website_analysis.website_url:
            logger.debug(
                f"[Job Store] No website URL found for user {user_id} (session_id: {onboarding_session.id}), "
                f"using default job store. This is normal if website analysis wasn't completed."
            )
            return 'default'

        website_url = website_analysis.website_url
        domain_root = extract_domain_root(website_url)

        logger.debug(f"Job store for user {user_id}: {domain_root} (from {website_url})")
        return domain_root

    except Exception as e:
        logger.error(f"Error getting job store name for user {user_id}: {e}")
        return 'default'
    finally:
        if close_db and db_session:
            try:
                db_session.close()
            except Exception:
                pass