feat: LinkedIn LLM alignment - Phase 1-3 complete

Phase 1: Dead Code Cleanup - Remove GeminiGroundedProvider import and property from linkedin_service.py - Remove fallback_provider property (gemini_provider imports) - Fix routers/linkedin.py edit endpoint to use llm_text_gen - Delete dead LinkedInImageEditor class - Remove dead _transform_gemini_sources from content_generator.py Phase 2: Research Infrastructure Alignment - Add user_id to _conduct_research() for pre-flight validation - Add validate_exa_research_operations() before Exa/Tavily calls - Pass user_id to provider.simple_search() for usage tracking - Inject research content into LLM prompts via _build_research_context() - Fix Google engine path to fallback to Exa - Add Exa → Tavily fallback on research failure Phase 3: Cosmetic Cleanup - Rename _generate_prompts_with_gemini → _generate_prompts_with_llm - Rename _build_gemini_prompt → _build_image_prompt - Rename _parse_gemini_response → _parse_llm_response - Remove all Gemini references from LinkedIn code (0 remaining) - Update docstrings and log messages Additional: - Research caching using existing ResearchCache - Shared ExaContentResearchProvider in services/research/ - Persona service uses llm_text_gen instead of gemini_structured_json_response - LinkedInWriter.tsx ChatMessage → ChatMsg type mapping fix - RegisterLinkedInActionsEnhanced.tsx content_format_rules typing fix
2026-06-12 18:58:53 +05:30
parent e54aaa7a3e
commit 63a0df2536
37 changed files with 2891 additions and 1355 deletions
--- a/backend/services/linkedin/research_handler.py
+++ b/backend/services/linkedin/research_handler.py
@@ -2,9 +2,10 @@
 Research Handler for LinkedIn Content Generation

 Handles research operations and timing for content generation.
+Uses common Exa/Tavily infrastructure with pre-flight validation.
 """

-from typing import List
+from typing import List, Optional
 from datetime import datetime
 from loguru import logger
 from models.linkedin_models import ResearchSource
@@ -21,11 +22,19 @@ class ResearchHandler:
        request,
        research_enabled: bool,
        search_engine: str,
-        max_results: int = 10
+        max_results: int = 10,
+        user_id: Optional[str] = None
    ) -> tuple[List[ResearchSource], float]:
        """
        Conduct research if enabled and return sources with timing.
        
+        Args:
+            request: Generation request object
+            research_enabled: Whether research is enabled
+            search_engine: Search engine to use (exa, tavily)
+            max_results: Maximum number of results
+            user_id: User ID for pre-flight validation and usage tracking
+            
        Returns:
            Tuple of (research_sources, research_time)
        """
@@ -33,7 +42,6 @@ class ResearchHandler:
        research_time = 0
        
        if research_enabled:
-            # Debug: Log the search engine value being passed
            logger.info(f"ResearchHandler: search_engine='{search_engine}' (type: {type(search_engine)})")
            
            research_start = datetime.now()
@@ -41,7 +49,8 @@ class ResearchHandler:
                topic=request.topic,
                industry=request.industry,
                search_engine=search_engine,
-                max_results=max_results
+                max_results=max_results,
+                user_id=user_id
            )
            research_time = (datetime.now() - research_start).total_seconds()
            logger.info(f"Research completed in {research_time:.2f}s, found {len(research_sources)} sources")
@@ -67,10 +76,5 @@ class ResearchHandler:
        if not research_enabled or level == 'none':
            return False
        
-        # For Google native grounding, Gemini returns sources in the generation metadata,
-        # so we should not require pre-fetched research_sources.
-        if engine_str == 'google':
-            return True
-        
        # For other engines, require that research actually returned sources
        return bool(research_sources)