feat: LinkedIn LLM alignment - Phase 1-3 complete
Phase 1: Dead Code Cleanup - Remove GeminiGroundedProvider import and property from linkedin_service.py - Remove fallback_provider property (gemini_provider imports) - Fix routers/linkedin.py edit endpoint to use llm_text_gen - Delete dead LinkedInImageEditor class - Remove dead _transform_gemini_sources from content_generator.py Phase 2: Research Infrastructure Alignment - Add user_id to _conduct_research() for pre-flight validation - Add validate_exa_research_operations() before Exa/Tavily calls - Pass user_id to provider.simple_search() for usage tracking - Inject research content into LLM prompts via _build_research_context() - Fix Google engine path to fallback to Exa - Add Exa → Tavily fallback on research failure Phase 3: Cosmetic Cleanup - Rename _generate_prompts_with_gemini → _generate_prompts_with_llm - Rename _build_gemini_prompt → _build_image_prompt - Rename _parse_gemini_response → _parse_llm_response - Remove all Gemini references from LinkedIn code (0 remaining) - Update docstrings and log messages Additional: - Research caching using existing ResearchCache - Shared ExaContentResearchProvider in services/research/ - Persona service uses llm_text_gen instead of gemini_structured_json_response - LinkedInWriter.tsx ChatMessage → ChatMsg type mapping fix - RegisterLinkedInActionsEnhanced.tsx content_format_rules typing fix
This commit is contained in:
@@ -6,8 +6,10 @@ It provides secure storage, efficient retrieval, and metadata management for gen
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
import hashlib
|
||||
import json
|
||||
import shutil
|
||||
from typing import Dict, Any, Optional, List, Tuple
|
||||
from datetime import datetime, timedelta
|
||||
from pathlib import Path
|
||||
@@ -58,6 +60,8 @@ class LinkedInImageStorage:
|
||||
self.max_storage_size_gb = 10 # Maximum storage size in GB
|
||||
self.image_retention_days = 30 # Days to keep images
|
||||
self.max_image_size_mb = 10 # Maximum individual image size in MB
|
||||
self.max_images_per_user = 100 # Maximum images per user
|
||||
self._uuid_pattern = re.compile(r'^[a-f0-9]{16}$')
|
||||
|
||||
logger.info(f"LinkedIn Image Storage initialized at {self.base_storage_path}")
|
||||
|
||||
@@ -102,6 +106,22 @@ class LinkedInImageStorage:
|
||||
try:
|
||||
start_time = datetime.now()
|
||||
|
||||
# Check per-user storage quota
|
||||
if user_id:
|
||||
user_count = await self._count_user_images(user_id)
|
||||
if user_count >= self.max_images_per_user:
|
||||
return {
|
||||
'success': False,
|
||||
'error': f"User image limit ({self.max_images_per_user}) reached. Delete existing images or increase limit."
|
||||
}
|
||||
|
||||
# Check disk space
|
||||
if not await self._check_disk_space(len(image_data)):
|
||||
return {
|
||||
'success': False,
|
||||
'error': "Insufficient disk space for image storage."
|
||||
}
|
||||
|
||||
# Generate unique image ID
|
||||
image_id = self._generate_image_id(image_data, metadata)
|
||||
|
||||
@@ -170,6 +190,9 @@ class LinkedInImageStorage:
|
||||
Dict containing image data and metadata
|
||||
"""
|
||||
try:
|
||||
if not self._validate_image_id(image_id):
|
||||
return {'success': False, 'error': f'Invalid image ID format: {image_id}'}
|
||||
|
||||
# Find image file
|
||||
image_path = await self._find_image_by_id(image_id, user_id)
|
||||
if not image_path:
|
||||
@@ -216,6 +239,9 @@ class LinkedInImageStorage:
|
||||
Dict containing deletion result
|
||||
"""
|
||||
try:
|
||||
if not self._validate_image_id(image_id):
|
||||
return {'success': False, 'error': f'Invalid image ID format: {image_id}'}
|
||||
|
||||
# Find image file
|
||||
image_path = await self._find_image_by_id(image_id, user_id)
|
||||
if not image_path:
|
||||
@@ -418,6 +444,32 @@ class LinkedInImageStorage:
|
||||
'error': f"Failed to get storage stats: {str(e)}"
|
||||
}
|
||||
|
||||
def _validate_image_id(self, image_id: str) -> bool:
|
||||
"""Validate image_id against expected format to prevent path traversal."""
|
||||
return bool(self._uuid_pattern.match(image_id))
|
||||
|
||||
async def _count_user_images(self, user_id: str) -> int:
|
||||
"""Count total images stored for a given user."""
|
||||
try:
|
||||
images_path, _ = self._get_workspace_paths(user_id)
|
||||
count = 0
|
||||
if images_path.exists():
|
||||
for content_dir in images_path.iterdir():
|
||||
if content_dir.is_dir():
|
||||
count += sum(1 for f in content_dir.glob("*.png") if f.is_file())
|
||||
return count
|
||||
except Exception as e:
|
||||
logger.warning(f"Error counting images for user {user_id}: {e}")
|
||||
return 0
|
||||
|
||||
async def _check_disk_space(self, required_bytes: int) -> bool:
|
||||
"""Check if sufficient disk space is available."""
|
||||
try:
|
||||
usage = shutil.disk_usage(self.base_storage_path)
|
||||
return usage.free > required_bytes * 2 # require 2x headroom
|
||||
except Exception:
|
||||
return True # if we can't check, allow the write
|
||||
|
||||
def _generate_image_id(self, image_data: bytes, metadata: Dict[str, Any]) -> str:
|
||||
"""Generate unique image ID based on content and metadata."""
|
||||
# Create hash from image data and key metadata
|
||||
@@ -569,6 +621,9 @@ class LinkedInImageStorage:
|
||||
Returns:
|
||||
Dict containing image metadata if found
|
||||
"""
|
||||
if not self._validate_image_id(image_id):
|
||||
logger.warning(f"Invalid image ID format in metadata request: {image_id}")
|
||||
return None
|
||||
return await self._load_metadata(image_id, user_id)
|
||||
|
||||
async def _load_metadata(self, image_id: str, user_id: Optional[str] = None) -> Optional[Dict[str, Any]]:
|
||||
|
||||
Reference in New Issue
Block a user