feat: LinkedIn LLM alignment - Phase 1-3 complete

Phase 1: Dead Code Cleanup
- Remove GeminiGroundedProvider import and property from linkedin_service.py
- Remove fallback_provider property (gemini_provider imports)
- Fix routers/linkedin.py edit endpoint to use llm_text_gen
- Delete dead LinkedInImageEditor class
- Remove dead _transform_gemini_sources from content_generator.py

Phase 2: Research Infrastructure Alignment
- Add user_id to _conduct_research() for pre-flight validation
- Add validate_exa_research_operations() before Exa/Tavily calls
- Pass user_id to provider.simple_search() for usage tracking
- Inject research content into LLM prompts via _build_research_context()
- Fix Google engine path to fallback to Exa
- Add Exa → Tavily fallback on research failure

Phase 3: Cosmetic Cleanup
- Rename _generate_prompts_with_gemini → _generate_prompts_with_llm
- Rename _build_gemini_prompt → _build_image_prompt
- Rename _parse_gemini_response → _parse_llm_response
- Remove all Gemini references from LinkedIn code (0 remaining)
- Update docstrings and log messages

Additional:
- Research caching using existing ResearchCache
- Shared ExaContentResearchProvider in services/research/
- Persona service uses llm_text_gen instead of gemini_structured_json_response
- LinkedInWriter.tsx ChatMessage → ChatMsg type mapping fix
- RegisterLinkedInActionsEnhanced.tsx content_format_rules typing fix
This commit is contained in:
ajaysi
2026-06-12 18:58:53 +05:30
parent e54aaa7a3e
commit 63a0df2536
37 changed files with 2891 additions and 1355 deletions

View File

@@ -2,9 +2,10 @@
Research Handler for LinkedIn Content Generation
Handles research operations and timing for content generation.
Uses common Exa/Tavily infrastructure with pre-flight validation.
"""
from typing import List
from typing import List, Optional
from datetime import datetime
from loguru import logger
from models.linkedin_models import ResearchSource
@@ -21,11 +22,19 @@ class ResearchHandler:
request,
research_enabled: bool,
search_engine: str,
max_results: int = 10
max_results: int = 10,
user_id: Optional[str] = None
) -> tuple[List[ResearchSource], float]:
"""
Conduct research if enabled and return sources with timing.
Args:
request: Generation request object
research_enabled: Whether research is enabled
search_engine: Search engine to use (exa, tavily)
max_results: Maximum number of results
user_id: User ID for pre-flight validation and usage tracking
Returns:
Tuple of (research_sources, research_time)
"""
@@ -33,7 +42,6 @@ class ResearchHandler:
research_time = 0
if research_enabled:
# Debug: Log the search engine value being passed
logger.info(f"ResearchHandler: search_engine='{search_engine}' (type: {type(search_engine)})")
research_start = datetime.now()
@@ -41,7 +49,8 @@ class ResearchHandler:
topic=request.topic,
industry=request.industry,
search_engine=search_engine,
max_results=max_results
max_results=max_results,
user_id=user_id
)
research_time = (datetime.now() - research_start).total_seconds()
logger.info(f"Research completed in {research_time:.2f}s, found {len(research_sources)} sources")
@@ -67,10 +76,5 @@ class ResearchHandler:
if not research_enabled or level == 'none':
return False
# For Google native grounding, Gemini returns sources in the generation metadata,
# so we should not require pre-fetched research_sources.
if engine_str == 'google':
return True
# For other engines, require that research actually returned sources
return bool(research_sources)