ALwrity/backend/services/linkedin/image_generation/linkedin_image_storage.py

"""
LinkedIn Image Storage Service

This service handles image storage, retrieval, and management for LinkedIn image generation.
It provides secure storage, efficient retrieval, and metadata management for generated images.
"""

import os
import re
import hashlib
import json
import shutil
from typing import Dict, Any, Optional, List, Tuple
from datetime import datetime, timedelta
from pathlib import Path
from PIL import Image
from io import BytesIO
from loguru import logger

# Import existing infrastructure
from ...onboarding.api_key_manager import APIKeyManager


class LinkedInImageStorage:
    """
    Handles storage and management of LinkedIn generated images.

    This service provides secure storage, efficient retrieval, metadata management,
    and cleanup functionality for LinkedIn image generation.
    """

    def __init__(self, storage_path: Optional[str] = None, api_key_manager: Optional[APIKeyManager] = None):
        """
        Initialize the LinkedIn Image Storage service.

        Args:
            storage_path: Base path for image storage
            api_key_manager: API key manager for authentication
        """
        self.api_key_manager = api_key_manager or APIKeyManager()

        # Set up storage paths
        if storage_path:
            self.base_storage_path = Path(storage_path)
        else:
            # Default to project-relative path: root/data/media/linkedin_images
            # services/linkedin/image_generation/linkedin_image_storage.py -> image_generation -> linkedin -> services -> backend -> root
            root_dir = Path(__file__).parent.parent.parent.parent.parent
            self.base_storage_path = root_dir / "data" / "media" / "linkedin_images"

        # Create storage directories
        self.images_path = self.base_storage_path / "images"
        self.metadata_path = self.base_storage_path / "metadata"
        self.temp_path = self.base_storage_path / "temp"

        # Ensure directories exist
        self._create_storage_directories()

        # Storage configuration
        self.max_storage_size_gb = 10  # Maximum storage size in GB
        self.image_retention_days = 30  # Days to keep images
        self.max_image_size_mb = 10    # Maximum individual image size in MB
        self.max_images_per_user = 100  # Maximum images per user
        self._uuid_pattern = re.compile(r'^[a-f0-9]{16}$')

        logger.info(f"LinkedIn Image Storage initialized at {self.base_storage_path}")

    def _create_storage_directories(self):
        """Create necessary storage directories."""
        try:
            self.images_path.mkdir(parents=True, exist_ok=True)
            self.metadata_path.mkdir(parents=True, exist_ok=True)
            self.temp_path.mkdir(parents=True, exist_ok=True)

            # Create subdirectories for organization
            (self.images_path / "posts").mkdir(exist_ok=True)
            (self.images_path / "articles").mkdir(exist_ok=True)
            (self.images_path / "carousels").mkdir(exist_ok=True)
            (self.images_path / "video_scripts").mkdir(exist_ok=True)

            logger.info("Storage directories created successfully")

        except Exception as e:
            logger.error(f"Error creating storage directories: {str(e)}")
            raise

    async def store_image(
        self,
        image_data: bytes,
        metadata: Dict[str, Any],
        content_type: str = "post",
        user_id: Optional[str] = None
    ) -> Dict[str, Any]:
        """
        Store generated image with metadata.

        Args:
            image_data: Image data in bytes
            metadata: Image metadata and context
            content_type: Type of LinkedIn content (post, article, carousel, video_script)
            user_id: Optional user ID for workspace storage

        Returns:
            Dict containing storage result and image ID
        """
        try:
            start_time = datetime.now()

            # Check per-user storage quota
            if user_id:
                user_count = await self._count_user_images(user_id)
                if user_count >= self.max_images_per_user:
                    return {
                        'success': False,
                        'error': f"User image limit ({self.max_images_per_user}) reached. Delete existing images or increase limit."
                    }

            # Check disk space
            if not await self._check_disk_space(len(image_data)):
                return {
                    'success': False,
                    'error': "Insufficient disk space for image storage."
                }

            # Generate unique image ID
            image_id = self._generate_image_id(image_data, metadata)

            # Validate image data
            validation_result = await self._validate_image_for_storage(image_data)
            if not validation_result['valid']:
                return {
                    'success': False,
                    'error': f"Image validation failed: {validation_result['error']}"
                }

            # Determine storage path based on content type
            storage_path = self._get_storage_path(content_type, image_id, user_id)

            # Store image file
            image_stored = await self._store_image_file(image_data, storage_path)
            if not image_stored:
                return {
                    'success': False,
                    'error': 'Failed to store image file'
                }

            # Store metadata
            metadata_stored = await self._store_metadata(image_id, metadata, storage_path, user_id)
            if not metadata_stored:
                # Clean up image file if metadata storage fails
                await self._cleanup_failed_storage(storage_path)
                return {
                    'success': False,
                    'error': 'Failed to store image metadata'
                }

            # Update storage statistics
            await self._update_storage_stats()

            storage_time = (datetime.now() - start_time).total_seconds()

            return {
                'success': True,
                'image_id': image_id,
                'storage_path': str(storage_path),
                'metadata': {
                    'stored_at': datetime.now().isoformat(),
                    'storage_time': storage_time,
                    'file_size': len(image_data),
                    'content_type': content_type
                }
            }

        except Exception as e:
            logger.error(f"Error storing LinkedIn image: {str(e)}")
            return {
                'success': False,
                'error': f"Image storage failed: {str(e)}"
            }

    async def retrieve_image(self, image_id: str, user_id: Optional[str] = None) -> Dict[str, Any]:
        """
        Retrieve stored image by ID.

        Args:
            image_id: Unique image identifier
            user_id: Optional user ID to locate the image

        Returns:
            Dict containing image data and metadata
        """
        try:
            if not self._validate_image_id(image_id):
                return {'success': False, 'error': f'Invalid image ID format: {image_id}'}

            # Find image file
            image_path = await self._find_image_by_id(image_id, user_id)
            if not image_path:
                return {
                    'success': False,
                    'error': f'Image not found: {image_id}'
                }

            # Load metadata
            metadata = await self._load_metadata(image_id, user_id)
            if not metadata:
                return {
                    'success': False,
                    'error': f'Metadata not found for image: {image_id}'
                }

            # Read image data
            with open(image_path, 'rb') as f:
                image_data = f.read()

            return {
                'success': True,
                'image_data': image_data,
                'metadata': metadata,
                'image_path': str(image_path)
            }

        except Exception as e:
            logger.error(f"Error retrieving LinkedIn image {image_id}: {str(e)}")
            return {
                'success': False,
                'error': f"Image retrieval failed: {str(e)}"
            }

    async def delete_image(self, image_id: str, user_id: Optional[str] = None) -> Dict[str, Any]:
        """
        Delete stored image and metadata.

        Args:
            image_id: Unique image identifier
            user_id: Optional user ID to locate the image

        Returns:
            Dict containing deletion result
        """
        try:
            if not self._validate_image_id(image_id):
                return {'success': False, 'error': f'Invalid image ID format: {image_id}'}

            # Find image file
            image_path = await self._find_image_by_id(image_id, user_id)
            if not image_path:
                return {
                    'success': False,
                    'error': f'Image not found: {image_id}'
                }

            # Delete image file
            if image_path.exists():
                image_path.unlink()
                logger.info(f"Deleted image file: {image_path}")

            # Delete metadata
            _, metadata_base = self._get_workspace_paths(user_id)
            metadata_path = metadata_base / f"{image_id}.json"
            if metadata_path.exists():
                metadata_path.unlink()
                logger.info(f"Deleted metadata file: {metadata_path}")

            # Update storage statistics
            await self._update_storage_stats()

            return {
                'success': True,
                'message': f'Image {image_id} deleted successfully'
            }

        except Exception as e:
            logger.error(f"Error deleting LinkedIn image {image_id}: {str(e)}")
            return {
                'success': False,
                'error': f"Image deletion failed: {str(e)}"
            }

    async def list_images(
        self,
        content_type: Optional[str] = None,
        limit: int = 50,
        offset: int = 0
    ) -> Dict[str, Any]:
        """
        List stored images with optional filtering.

        Args:
            content_type: Filter by content type
            limit: Maximum number of images to return
            offset: Number of images to skip

        Returns:
            Dict containing list of images and metadata
        """
        try:
            images = []

            # Scan metadata directory
            metadata_files = list(self.metadata_path.glob("*.json"))

            for metadata_file in metadata_files[offset:offset + limit]:
                try:
                    with open(metadata_file, 'r') as f:
                        metadata = json.load(f)

                    # Apply content type filter
                    if content_type and metadata.get('content_type') != content_type:
                        continue

                    # Check if image file still exists
                    image_id = metadata_file.stem
                    image_path = await self._find_image_by_id(image_id)

                    if image_path and image_path.exists():
                        # Add file size and last modified info
                        stat = image_path.stat()
                        metadata['file_size'] = stat.st_size
                        metadata['last_modified'] = datetime.fromtimestamp(stat.st_mtime).isoformat()

                        images.append(metadata)

                except Exception as e:
                    logger.warning(f"Error reading metadata file {metadata_file}: {str(e)}")
                    continue

            return {
                'success': True,
                'images': images,
                'total_count': len(images),
                'limit': limit,
                'offset': offset
            }

        except Exception as e:
            logger.error(f"Error listing LinkedIn images: {str(e)}")
            return {
                'success': False,
                'error': f"Image listing failed: {str(e)}"
            }

    async def cleanup_old_images(self, days_old: Optional[int] = None) -> Dict[str, Any]:
        """
        Clean up old images based on retention policy.

        Args:
            days_old: Minimum age in days for cleanup (defaults to retention policy)

        Returns:
            Dict containing cleanup results
        """
        try:
            if days_old is None:
                days_old = self.image_retention_days

            cutoff_date = datetime.now() - timedelta(days=days_old)
            deleted_count = 0
            errors = []

            # Scan metadata directory
            metadata_files = list(self.metadata_path.glob("*.json"))

            for metadata_file in metadata_files:
                try:
                    with open(metadata_file, 'r') as f:
                        metadata = json.load(f)

                    # Check creation date
                    created_at = metadata.get('stored_at')
                    if created_at:
                        created_date = datetime.fromisoformat(created_at)
                        if created_date < cutoff_date:
                            # Delete old image
                            image_id = metadata_file.stem
                            delete_result = await self.delete_image(image_id)

                            if delete_result['success']:
                                deleted_count += 1
                            else:
                                errors.append(f"Failed to delete {image_id}: {delete_result['error']}")

                except Exception as e:
                    logger.warning(f"Error processing metadata file {metadata_file}: {str(e)}")
                    continue

            return {
                'success': True,
                'deleted_count': deleted_count,
                'errors': errors,
                'cutoff_date': cutoff_date.isoformat()
            }

        except Exception as e:
            logger.error(f"Error cleaning up old LinkedIn images: {str(e)}")
            return {
                'success': False,
                'error': f"Cleanup failed: {str(e)}"
            }

    async def get_storage_stats(self) -> Dict[str, Any]:
        """
        Get storage statistics and usage information.

        Returns:
            Dict containing storage statistics
        """
        try:
            total_size = 0
            total_files = 0
            content_type_counts = {}

            # Calculate storage usage
            for content_type_dir in self.images_path.iterdir():
                if content_type_dir.is_dir():
                    content_type = content_type_dir.name
                    content_type_counts[content_type] = 0

                    for image_file in content_type_dir.glob("*"):
                        if image_file.is_file():
                            total_size += image_file.stat().st_size
                            total_files += 1
                            content_type_counts[content_type] += 1

            # Check storage limits
            total_size_gb = total_size / (1024 ** 3)
            storage_limit_exceeded = total_size_gb > self.max_storage_size_gb

            return {
                'success': True,
                'total_size_bytes': total_size,
                'total_size_gb': round(total_size_gb, 2),
                'total_files': total_files,
                'content_type_counts': content_type_counts,
                'storage_limit_gb': self.max_storage_size_gb,
                'storage_limit_exceeded': storage_limit_exceeded,
                'retention_days': self.image_retention_days
            }

        except Exception as e:
            logger.error(f"Error getting storage stats: {str(e)}")
            return {
                'success': False,
                'error': f"Failed to get storage stats: {str(e)}"
            }

    def _validate_image_id(self, image_id: str) -> bool:
        """Validate image_id against expected format to prevent path traversal."""
        return bool(self._uuid_pattern.match(image_id))

    async def _count_user_images(self, user_id: str) -> int:
        """Count total images stored for a given user."""
        try:
            images_path, _ = self._get_workspace_paths(user_id)
            count = 0
            if images_path.exists():
                for content_dir in images_path.iterdir():
                    if content_dir.is_dir():
                        count += sum(1 for f in content_dir.glob("*.png") if f.is_file())
            return count
        except Exception as e:
            logger.warning(f"Error counting images for user {user_id}: {e}")
            return 0

    async def _check_disk_space(self, required_bytes: int) -> bool:
        """Check if sufficient disk space is available."""
        try:
            usage = shutil.disk_usage(self.base_storage_path)
            return usage.free > required_bytes * 2  # require 2x headroom
        except Exception:
            return True  # if we can't check, allow the write

    def _generate_image_id(self, image_data: bytes, metadata: Dict[str, Any]) -> str:
        """Generate unique image ID based on content and metadata."""
        # Create hash from image data and key metadata
        hash_input = f"{image_data[:1000]}{metadata.get('topic', '')}{metadata.get('industry', '')}{datetime.now().isoformat()}"
        return hashlib.sha256(hash_input.encode()).hexdigest()[:16]

    async def _validate_image_for_storage(self, image_data: bytes) -> Dict[str, Any]:
        """Validate image data before storage."""
        try:
            # Check file size
            if len(image_data) > self.max_image_size_mb * 1024 * 1024:
                return {
                    'valid': False,
                    'error': f'Image size {len(image_data) / (1024*1024):.2f}MB exceeds maximum {self.max_image_size_mb}MB'
                }

            # Validate image format
            try:
                image = Image.open(BytesIO(image_data))
                if image.format not in ['PNG', 'JPEG', 'JPG']:
                    return {
                        'valid': False,
                        'error': f'Unsupported image format: {image.format}'
                    }
            except Exception as e:
                return {
                    'valid': False,
                    'error': f'Invalid image data: {str(e)}'
                }

            return {'valid': True}

        except Exception as e:
            return {
                'valid': False,
                'error': f'Validation error: {str(e)}'
            }

    def _get_workspace_paths(self, user_id: Optional[str]) -> Tuple[Path, Path]:
        """
        Get images and metadata paths for a user or default global paths.
        Returns (images_path, metadata_path).
        """
        if user_id:
            try:
                # Use local import to avoid circular dependency
                from services.database import get_db
                from services.user_workspace_manager import UserWorkspaceManager

                db_gen = get_db()
                db = next(db_gen)
                try:
                    workspace_manager = UserWorkspaceManager(db)
                    workspace = workspace_manager.get_user_workspace(user_id)
                    if workspace:
                        # Align with global structure: linkedin_images/images and linkedin_images/metadata
                        base = Path(workspace['workspace_path']) / "media" / "linkedin_images"
                        return (base / "images", base / "metadata")
                finally:
                    if 'db' in locals():
                        db.close()
            except Exception as e:
                logger.warning(f"Failed to resolve user workspace path: {e}")

        return (self.images_path, self.metadata_path)

    def _get_storage_path(self, content_type: str, image_id: str, user_id: Optional[str] = None) -> Path:
        """Get storage path for image based on content type."""
        # Map content types to directory names
        content_type_map = {
            'post': 'posts',
            'article': 'articles',
            'carousel': 'carousels',
            'video_script': 'video_scripts'
        }

        directory = content_type_map.get(content_type, 'posts')

        images_path, _ = self._get_workspace_paths(user_id)
        return images_path / directory / f"{image_id}.png"

    async def _store_image_file(self, image_data: bytes, storage_path: Path) -> bool:
        """Store image file to disk."""
        try:
            # Ensure directory exists
            storage_path.parent.mkdir(parents=True, exist_ok=True)

            # Write image data
            with open(storage_path, 'wb') as f:
                f.write(image_data)

            logger.info(f"Stored image file: {storage_path}")
            return True

        except Exception as e:
            logger.error(f"Error storing image file: {str(e)}")
            return False

    async def _store_metadata(self, image_id: str, metadata: Dict[str, Any], storage_path: Path, user_id: Optional[str] = None) -> bool:
        """Store image metadata to JSON file."""
        try:
            # Add storage metadata
            metadata['image_id'] = image_id
            metadata['storage_path'] = str(storage_path)
            metadata['stored_at'] = datetime.now().isoformat()

            # Determine metadata path
            _, metadata_base = self._get_workspace_paths(user_id)
            metadata_base.mkdir(parents=True, exist_ok=True)

            # Write metadata file
            metadata_path = metadata_base / f"{image_id}.json"
            with open(metadata_path, 'w') as f:
                json.dump(metadata, f, indent=2, default=str)

            logger.info(f"Stored metadata: {metadata_path}")
            return True

        except Exception as e:
            logger.error(f"Error storing metadata: {str(e)}")
            return False

    async def _find_image_by_id(self, image_id: str, user_id: Optional[str] = None) -> Optional[Path]:
        """Find image file by ID across all content type directories."""
        images_path, _ = self._get_workspace_paths(user_id)

        # If user_id is NOT provided, we might want to check global path only,
        # OR we might want to check if it's a global image.
        # Current implementation assumes if user_id is provided, look there.
        # If not provided, look in global.

        if images_path.exists():
            for content_dir in images_path.iterdir():
                if content_dir.is_dir():
                    image_path = content_dir / f"{image_id}.png"
                    if image_path.exists():
                        return image_path

        return None

    async def get_image_metadata(self, image_id: str, user_id: Optional[str] = None) -> Optional[Dict[str, Any]]:
        """
        Get metadata for an image.

        Args:
            image_id: Unique image identifier
            user_id: Optional user ID

        Returns:
            Dict containing image metadata if found
        """
        if not self._validate_image_id(image_id):
            logger.warning(f"Invalid image ID format in metadata request: {image_id}")
            return None
        return await self._load_metadata(image_id, user_id)

    async def _load_metadata(self, image_id: str, user_id: Optional[str] = None) -> Optional[Dict[str, Any]]:
        """Load metadata for image ID."""
        try:
            _, metadata_base = self._get_workspace_paths(user_id)
            metadata_path = metadata_base / f"{image_id}.json"
            if metadata_path.exists():
                with open(metadata_path, 'r') as f:
                    return json.load(f)
        except Exception as e:
            logger.error(f"Error loading metadata for {image_id}: {str(e)}")

        return None

    async def _cleanup_failed_storage(self, storage_path: Path):
        """Clean up files if storage operation fails."""
        try:
            if storage_path.exists():
                storage_path.unlink()
                logger.info(f"Cleaned up failed storage: {storage_path}")
        except Exception as e:
            logger.error(f"Error cleaning up failed storage: {str(e)}")

    async def _update_storage_stats(self):
        """Update storage statistics (placeholder for future implementation)."""
        # This could be implemented to track storage usage over time
        pass