feat: LinkedIn LLM alignment - Phase 1-3 complete

Phase 1: Dead Code Cleanup - Remove GeminiGroundedProvider import and property from linkedin_service.py - Remove fallback_provider property (gemini_provider imports) - Fix routers/linkedin.py edit endpoint to use llm_text_gen - Delete dead LinkedInImageEditor class - Remove dead _transform_gemini_sources from content_generator.py Phase 2: Research Infrastructure Alignment - Add user_id to _conduct_research() for pre-flight validation - Add validate_exa_research_operations() before Exa/Tavily calls - Pass user_id to provider.simple_search() for usage tracking - Inject research content into LLM prompts via _build_research_context() - Fix Google engine path to fallback to Exa - Add Exa → Tavily fallback on research failure Phase 3: Cosmetic Cleanup - Rename _generate_prompts_with_gemini → _generate_prompts_with_llm - Rename _build_gemini_prompt → _build_image_prompt - Rename _parse_gemini_response → _parse_llm_response - Remove all Gemini references from LinkedIn code (0 remaining) - Update docstrings and log messages Additional: - Research caching using existing ResearchCache - Shared ExaContentResearchProvider in services/research/ - Persona service uses llm_text_gen instead of gemini_structured_json_response - LinkedInWriter.tsx ChatMessage → ChatMsg type mapping fix - RegisterLinkedInActionsEnhanced.tsx content_format_rules typing fix
2026-06-12 18:58:53 +05:30
parent e54aaa7a3e
commit 63a0df2536
37 changed files with 2891 additions and 1355 deletions
--- a/backend/services/linkedin/image_generation/linkedin_image_storage.py
+++ b/backend/services/linkedin/image_generation/linkedin_image_storage.py
@@ -6,8 +6,10 @@ It provides secure storage, efficient retrieval, and metadata management for gen
 """

 import os
+import re
 import hashlib
 import json
+import shutil
 from typing import Dict, Any, Optional, List, Tuple
 from datetime import datetime, timedelta
 from pathlib import Path
@@ -58,6 +60,8 @@ class LinkedInImageStorage:
        self.max_storage_size_gb = 10  # Maximum storage size in GB
        self.image_retention_days = 30  # Days to keep images
        self.max_image_size_mb = 10    # Maximum individual image size in MB
+        self.max_images_per_user = 100  # Maximum images per user
+        self._uuid_pattern = re.compile(r'^[a-f0-9]{16}$')
        
        logger.info(f"LinkedIn Image Storage initialized at {self.base_storage_path}")
    
@@ -102,6 +106,22 @@ class LinkedInImageStorage:
        try:
            start_time = datetime.now()
            
+            # Check per-user storage quota
+            if user_id:
+                user_count = await self._count_user_images(user_id)
+                if user_count >= self.max_images_per_user:
+                    return {
+                        'success': False,
+                        'error': f"User image limit ({self.max_images_per_user}) reached. Delete existing images or increase limit."
+                    }
+            
+            # Check disk space
+            if not await self._check_disk_space(len(image_data)):
+                return {
+                    'success': False,
+                    'error': "Insufficient disk space for image storage."
+                }
+            
            # Generate unique image ID
            image_id = self._generate_image_id(image_data, metadata)
            
@@ -170,6 +190,9 @@ class LinkedInImageStorage:
            Dict containing image data and metadata
        """
        try:
+            if not self._validate_image_id(image_id):
+                return {'success': False, 'error': f'Invalid image ID format: {image_id}'}
+            
            # Find image file
            image_path = await self._find_image_by_id(image_id, user_id)
            if not image_path:
@@ -216,6 +239,9 @@ class LinkedInImageStorage:
            Dict containing deletion result
        """
        try:
+            if not self._validate_image_id(image_id):
+                return {'success': False, 'error': f'Invalid image ID format: {image_id}'}
+            
            # Find image file
            image_path = await self._find_image_by_id(image_id, user_id)
            if not image_path:
@@ -418,6 +444,32 @@ class LinkedInImageStorage:
                'error': f"Failed to get storage stats: {str(e)}"
            }
    
+    def _validate_image_id(self, image_id: str) -> bool:
+        """Validate image_id against expected format to prevent path traversal."""
+        return bool(self._uuid_pattern.match(image_id))
+    
+    async def _count_user_images(self, user_id: str) -> int:
+        """Count total images stored for a given user."""
+        try:
+            images_path, _ = self._get_workspace_paths(user_id)
+            count = 0
+            if images_path.exists():
+                for content_dir in images_path.iterdir():
+                    if content_dir.is_dir():
+                        count += sum(1 for f in content_dir.glob("*.png") if f.is_file())
+            return count
+        except Exception as e:
+            logger.warning(f"Error counting images for user {user_id}: {e}")
+            return 0
+    
+    async def _check_disk_space(self, required_bytes: int) -> bool:
+        """Check if sufficient disk space is available."""
+        try:
+            usage = shutil.disk_usage(self.base_storage_path)
+            return usage.free > required_bytes * 2  # require 2x headroom
+        except Exception:
+            return True  # if we can't check, allow the write
+    
    def _generate_image_id(self, image_data: bytes, metadata: Dict[str, Any]) -> str:
        """Generate unique image ID based on content and metadata."""
        # Create hash from image data and key metadata
@@ -569,6 +621,9 @@ class LinkedInImageStorage:
        Returns:
            Dict containing image metadata if found
        """
+        if not self._validate_image_id(image_id):
+            logger.warning(f"Invalid image ID format in metadata request: {image_id}")
+            return None
        return await self._load_metadata(image_id, user_id)

    async def _load_metadata(self, image_id: str, user_id: Optional[str] = None) -> Optional[Dict[str, Any]]: