ALwrity Prompts - AI Integration Plan

2025-09-03 23:16:39 +05:30
parent 5efee4235d
commit c19fc3f225
104 changed files with 9392 additions and 17462 deletions
--- a/backend/services/linkedin/image_generation/linkedin_image_storage.py
+++ b/backend/services/linkedin/image_generation/linkedin_image_storage.py
@@ -0,0 +1,536 @@
+"""
+LinkedIn Image Storage Service
+
+This service handles image storage, retrieval, and management for LinkedIn image generation.
+It provides secure storage, efficient retrieval, and metadata management for generated images.
+"""
+
+import os
+import hashlib
+import json
+from typing import Dict, Any, Optional, List, Tuple
+from datetime import datetime, timedelta
+from pathlib import Path
+from PIL import Image
+from io import BytesIO
+from loguru import logger
+
+# Import existing infrastructure
+from ...api_key_manager import APIKeyManager
+
+
+class LinkedInImageStorage:
+    """
+    Handles storage and management of LinkedIn generated images.
+    
+    This service provides secure storage, efficient retrieval, metadata management,
+    and cleanup functionality for LinkedIn image generation.
+    """
+    
+    def __init__(self, storage_path: Optional[str] = None, api_key_manager: Optional[APIKeyManager] = None):
+        """
+        Initialize the LinkedIn Image Storage service.
+        
+        Args:
+            storage_path: Base path for image storage
+            api_key_manager: API key manager for authentication
+        """
+        self.api_key_manager = api_key_manager or APIKeyManager()
+        
+        # Set up storage paths
+        if storage_path:
+            self.base_storage_path = Path(storage_path)
+        else:
+            # Default to project-relative path
+            self.base_storage_path = Path(__file__).parent.parent.parent.parent / "linkedin_images"
+        
+        # Create storage directories
+        self.images_path = self.base_storage_path / "images"
+        self.metadata_path = self.base_storage_path / "metadata"
+        self.temp_path = self.base_storage_path / "temp"
+        
+        # Ensure directories exist
+        self._create_storage_directories()
+        
+        # Storage configuration
+        self.max_storage_size_gb = 10  # Maximum storage size in GB
+        self.image_retention_days = 30  # Days to keep images
+        self.max_image_size_mb = 10    # Maximum individual image size in MB
+        
+        logger.info(f"LinkedIn Image Storage initialized at {self.base_storage_path}")
+    
+    def _create_storage_directories(self):
+        """Create necessary storage directories."""
+        try:
+            self.images_path.mkdir(parents=True, exist_ok=True)
+            self.metadata_path.mkdir(parents=True, exist_ok=True)
+            self.temp_path.mkdir(parents=True, exist_ok=True)
+            
+            # Create subdirectories for organization
+            (self.images_path / "posts").mkdir(exist_ok=True)
+            (self.images_path / "articles").mkdir(exist_ok=True)
+            (self.images_path / "carousels").mkdir(exist_ok=True)
+            (self.images_path / "video_scripts").mkdir(exist_ok=True)
+            
+            logger.info("Storage directories created successfully")
+            
+        except Exception as e:
+            logger.error(f"Error creating storage directories: {str(e)}")
+            raise
+    
+    async def store_image(
+        self, 
+        image_data: bytes, 
+        metadata: Dict[str, Any],
+        content_type: str = "post"
+    ) -> Dict[str, Any]:
+        """
+        Store generated image with metadata.
+        
+        Args:
+            image_data: Image data in bytes
+            image_metadata: Image metadata and context
+            content_type: Type of LinkedIn content (post, article, carousel, video_script)
+            
+        Returns:
+            Dict containing storage result and image ID
+        """
+        try:
+            start_time = datetime.now()
+            
+            # Generate unique image ID
+            image_id = self._generate_image_id(image_data, metadata)
+            
+            # Validate image data
+            validation_result = await self._validate_image_for_storage(image_data)
+            if not validation_result['valid']:
+                return {
+                    'success': False,
+                    'error': f"Image validation failed: {validation_result['error']}"
+                }
+            
+            # Determine storage path based on content type
+            storage_path = self._get_storage_path(content_type, image_id)
+            
+            # Store image file
+            image_stored = await self._store_image_file(image_data, storage_path)
+            if not image_stored:
+                return {
+                    'success': False,
+                    'error': 'Failed to store image file'
+                }
+            
+            # Store metadata
+            metadata_stored = await self._store_metadata(image_id, metadata, storage_path)
+            if not metadata_stored:
+                # Clean up image file if metadata storage fails
+                await self._cleanup_failed_storage(storage_path)
+                return {
+                    'success': False,
+                    'error': 'Failed to store image metadata'
+                }
+            
+            # Update storage statistics
+            await self._update_storage_stats()
+            
+            storage_time = (datetime.now() - start_time).total_seconds()
+            
+            return {
+                'success': True,
+                'image_id': image_id,
+                'storage_path': str(storage_path),
+                'metadata': {
+                    'stored_at': datetime.now().isoformat(),
+                    'storage_time': storage_time,
+                    'file_size': len(image_data),
+                    'content_type': content_type
+                }
+            }
+            
+        except Exception as e:
+            logger.error(f"Error storing LinkedIn image: {str(e)}")
+            return {
+                'success': False,
+                'error': f"Image storage failed: {str(e)}"
+            }
+    
+    async def retrieve_image(self, image_id: str) -> Dict[str, Any]:
+        """
+        Retrieve stored image by ID.
+        
+        Args:
+            image_id: Unique image identifier
+            
+        Returns:
+            Dict containing image data and metadata
+        """
+        try:
+            # Find image file
+            image_path = await self._find_image_by_id(image_id)
+            if not image_path:
+                return {
+                    'success': False,
+                    'error': f'Image not found: {image_id}'
+                }
+            
+            # Load metadata
+            metadata = await self._load_metadata(image_id)
+            if not metadata:
+                return {
+                    'success': False,
+                    'error': f'Metadata not found for image: {image_id}'
+                }
+            
+            # Read image data
+            with open(image_path, 'rb') as f:
+                image_data = f.read()
+            
+            return {
+                'success': True,
+                'image_data': image_data,
+                'metadata': metadata,
+                'image_path': str(image_path)
+            }
+            
+        except Exception as e:
+            logger.error(f"Error retrieving LinkedIn image {image_id}: {str(e)}")
+            return {
+                'success': False,
+                'error': f"Image retrieval failed: {str(e)}"
+            }
+    
+    async def delete_image(self, image_id: str) -> Dict[str, Any]:
+        """
+        Delete stored image and metadata.
+        
+        Args:
+            image_id: Unique image identifier
+            
+        Returns:
+            Dict containing deletion result
+        """
+        try:
+            # Find image file
+            image_path = await self._find_image_by_id(image_id)
+            if not image_path:
+                return {
+                    'success': False,
+                    'error': f'Image not found: {image_id}'
+                }
+            
+            # Delete image file
+            if image_path.exists():
+                image_path.unlink()
+                logger.info(f"Deleted image file: {image_path}")
+            
+            # Delete metadata
+            metadata_path = self.metadata_path / f"{image_id}.json"
+            if metadata_path.exists():
+                metadata_path.unlink()
+                logger.info(f"Deleted metadata file: {metadata_path}")
+            
+            # Update storage statistics
+            await self._update_storage_stats()
+            
+            return {
+                'success': True,
+                'message': f'Image {image_id} deleted successfully'
+            }
+            
+        except Exception as e:
+            logger.error(f"Error deleting LinkedIn image {image_id}: {str(e)}")
+            return {
+                'success': False,
+                'error': f"Image deletion failed: {str(e)}"
+            }
+    
+    async def list_images(
+        self, 
+        content_type: Optional[str] = None,
+        limit: int = 50,
+        offset: int = 0
+    ) -> Dict[str, Any]:
+        """
+        List stored images with optional filtering.
+        
+        Args:
+            content_type: Filter by content type
+            limit: Maximum number of images to return
+            offset: Number of images to skip
+            
+        Returns:
+            Dict containing list of images and metadata
+        """
+        try:
+            images = []
+            
+            # Scan metadata directory
+            metadata_files = list(self.metadata_path.glob("*.json"))
+            
+            for metadata_file in metadata_files[offset:offset + limit]:
+                try:
+                    with open(metadata_file, 'r') as f:
+                        metadata = json.load(f)
+                    
+                    # Apply content type filter
+                    if content_type and metadata.get('content_type') != content_type:
+                        continue
+                    
+                    # Check if image file still exists
+                    image_id = metadata_file.stem
+                    image_path = await self._find_image_by_id(image_id)
+                    
+                    if image_path and image_path.exists():
+                        # Add file size and last modified info
+                        stat = image_path.stat()
+                        metadata['file_size'] = stat.st_size
+                        metadata['last_modified'] = datetime.fromtimestamp(stat.st_mtime).isoformat()
+                        
+                        images.append(metadata)
+                    
+                except Exception as e:
+                    logger.warning(f"Error reading metadata file {metadata_file}: {str(e)}")
+                    continue
+            
+            return {
+                'success': True,
+                'images': images,
+                'total_count': len(images),
+                'limit': limit,
+                'offset': offset
+            }
+            
+        except Exception as e:
+            logger.error(f"Error listing LinkedIn images: {str(e)}")
+            return {
+                'success': False,
+                'error': f"Image listing failed: {str(e)}"
+            }
+    
+    async def cleanup_old_images(self, days_old: Optional[int] = None) -> Dict[str, Any]:
+        """
+        Clean up old images based on retention policy.
+        
+        Args:
+            days_old: Minimum age in days for cleanup (defaults to retention policy)
+            
+        Returns:
+            Dict containing cleanup results
+        """
+        try:
+            if days_old is None:
+                days_old = self.image_retention_days
+            
+            cutoff_date = datetime.now() - timedelta(days=days_old)
+            deleted_count = 0
+            errors = []
+            
+            # Scan metadata directory
+            metadata_files = list(self.metadata_path.glob("*.json"))
+            
+            for metadata_file in metadata_files:
+                try:
+                    with open(metadata_file, 'r') as f:
+                        metadata = json.load(f)
+                    
+                    # Check creation date
+                    created_at = metadata.get('stored_at')
+                    if created_at:
+                        created_date = datetime.fromisoformat(created_at)
+                        if created_date < cutoff_date:
+                            # Delete old image
+                            image_id = metadata_file.stem
+                            delete_result = await self.delete_image(image_id)
+                            
+                            if delete_result['success']:
+                                deleted_count += 1
+                            else:
+                                errors.append(f"Failed to delete {image_id}: {delete_result['error']}")
+                    
+                except Exception as e:
+                    logger.warning(f"Error processing metadata file {metadata_file}: {str(e)}")
+                    continue
+            
+            return {
+                'success': True,
+                'deleted_count': deleted_count,
+                'errors': errors,
+                'cutoff_date': cutoff_date.isoformat()
+            }
+            
+        except Exception as e:
+            logger.error(f"Error cleaning up old LinkedIn images: {str(e)}")
+            return {
+                'success': False,
+                'error': f"Cleanup failed: {str(e)}"
+            }
+    
+    async def get_storage_stats(self) -> Dict[str, Any]:
+        """
+        Get storage statistics and usage information.
+        
+        Returns:
+            Dict containing storage statistics
+        """
+        try:
+            total_size = 0
+            total_files = 0
+            content_type_counts = {}
+            
+            # Calculate storage usage
+            for content_type_dir in self.images_path.iterdir():
+                if content_type_dir.is_dir():
+                    content_type = content_type_dir.name
+                    content_type_counts[content_type] = 0
+                    
+                    for image_file in content_type_dir.glob("*"):
+                        if image_file.is_file():
+                            total_size += image_file.stat().st_size
+                            total_files += 1
+                            content_type_counts[content_type] += 1
+            
+            # Check storage limits
+            total_size_gb = total_size / (1024 ** 3)
+            storage_limit_exceeded = total_size_gb > self.max_storage_size_gb
+            
+            return {
+                'success': True,
+                'total_size_bytes': total_size,
+                'total_size_gb': round(total_size_gb, 2),
+                'total_files': total_files,
+                'content_type_counts': content_type_counts,
+                'storage_limit_gb': self.max_storage_size_gb,
+                'storage_limit_exceeded': storage_limit_exceeded,
+                'retention_days': self.image_retention_days
+            }
+            
+        except Exception as e:
+            logger.error(f"Error getting storage stats: {str(e)}")
+            return {
+                'success': False,
+                'error': f"Failed to get storage stats: {str(e)}"
+            }
+    
+    def _generate_image_id(self, image_data: bytes, metadata: Dict[str, Any]) -> str:
+        """Generate unique image ID based on content and metadata."""
+        # Create hash from image data and key metadata
+        hash_input = f"{image_data[:1000]}{metadata.get('topic', '')}{metadata.get('industry', '')}{datetime.now().isoformat()}"
+        return hashlib.sha256(hash_input.encode()).hexdigest()[:16]
+    
+    async def _validate_image_for_storage(self, image_data: bytes) -> Dict[str, Any]:
+        """Validate image data before storage."""
+        try:
+            # Check file size
+            if len(image_data) > self.max_image_size_mb * 1024 * 1024:
+                return {
+                    'valid': False,
+                    'error': f'Image size {len(image_data) / (1024*1024):.2f}MB exceeds maximum {self.max_image_size_mb}MB'
+                }
+            
+            # Validate image format
+            try:
+                image = Image.open(BytesIO(image_data))
+                if image.format not in ['PNG', 'JPEG', 'JPG']:
+                    return {
+                        'valid': False,
+                        'error': f'Unsupported image format: {image.format}'
+                    }
+            except Exception as e:
+                return {
+                    'valid': False,
+                    'error': f'Invalid image data: {str(e)}'
+                }
+            
+            return {'valid': True}
+            
+        except Exception as e:
+            return {
+                'valid': False,
+                'error': f'Validation error: {str(e)}'
+            }
+    
+    def _get_storage_path(self, content_type: str, image_id: str) -> Path:
+        """Get storage path for image based on content type."""
+        # Map content types to directory names
+        content_type_map = {
+            'post': 'posts',
+            'article': 'articles',
+            'carousel': 'carousels',
+            'video_script': 'video_scripts'
+        }
+        
+        directory = content_type_map.get(content_type, 'posts')
+        return self.images_path / directory / f"{image_id}.png"
+    
+    async def _store_image_file(self, image_data: bytes, storage_path: Path) -> bool:
+        """Store image file to disk."""
+        try:
+            # Ensure directory exists
+            storage_path.parent.mkdir(parents=True, exist_ok=True)
+            
+            # Write image data
+            with open(storage_path, 'wb') as f:
+                f.write(image_data)
+            
+            logger.info(f"Stored image file: {storage_path}")
+            return True
+            
+        except Exception as e:
+            logger.error(f"Error storing image file: {str(e)}")
+            return False
+    
+    async def _store_metadata(self, image_id: str, metadata: Dict[str, Any], storage_path: Path) -> bool:
+        """Store image metadata to JSON file."""
+        try:
+            # Add storage metadata
+            metadata['image_id'] = image_id
+            metadata['storage_path'] = str(storage_path)
+            metadata['stored_at'] = datetime.now().isoformat()
+            
+            # Write metadata file
+            metadata_path = self.metadata_path / f"{image_id}.json"
+            with open(metadata_path, 'w') as f:
+                json.dump(metadata, f, indent=2, default=str)
+            
+            logger.info(f"Stored metadata: {metadata_path}")
+            return True
+            
+        except Exception as e:
+            logger.error(f"Error storing metadata: {str(e)}")
+            return False
+    
+    async def _find_image_by_id(self, image_id: str) -> Optional[Path]:
+        """Find image file by ID across all content type directories."""
+        for content_dir in self.images_path.iterdir():
+            if content_dir.is_dir():
+                image_path = content_dir / f"{image_id}.png"
+                if image_path.exists():
+                    return image_path
+        
+        return None
+    
+    async def _load_metadata(self, image_id: str) -> Optional[Dict[str, Any]]:
+        """Load metadata for image ID."""
+        try:
+            metadata_path = self.metadata_path / f"{image_id}.json"
+            if metadata_path.exists():
+                with open(metadata_path, 'r') as f:
+                    return json.load(f)
+        except Exception as e:
+            logger.error(f"Error loading metadata for {image_id}: {str(e)}")
+        
+        return None
+    
+    async def _cleanup_failed_storage(self, storage_path: Path):
+        """Clean up files if storage operation fails."""
+        try:
+            if storage_path.exists():
+                storage_path.unlink()
+                logger.info(f"Cleaned up failed storage: {storage_path}")
+        except Exception as e:
+            logger.error(f"Error cleaning up failed storage: {str(e)}")
+    
+    async def _update_storage_stats(self):
+        """Update storage statistics (placeholder for future implementation)."""
+        # This could be implemented to track storage usage over time
+        pass