Files
ALwrity/backend/services/linkedin/research_handler.py
ajaysi 63a0df2536 feat: LinkedIn LLM alignment - Phase 1-3 complete
Phase 1: Dead Code Cleanup
- Remove GeminiGroundedProvider import and property from linkedin_service.py
- Remove fallback_provider property (gemini_provider imports)
- Fix routers/linkedin.py edit endpoint to use llm_text_gen
- Delete dead LinkedInImageEditor class
- Remove dead _transform_gemini_sources from content_generator.py

Phase 2: Research Infrastructure Alignment
- Add user_id to _conduct_research() for pre-flight validation
- Add validate_exa_research_operations() before Exa/Tavily calls
- Pass user_id to provider.simple_search() for usage tracking
- Inject research content into LLM prompts via _build_research_context()
- Fix Google engine path to fallback to Exa
- Add Exa → Tavily fallback on research failure

Phase 3: Cosmetic Cleanup
- Rename _generate_prompts_with_gemini → _generate_prompts_with_llm
- Rename _build_gemini_prompt → _build_image_prompt
- Rename _parse_gemini_response → _parse_llm_response
- Remove all Gemini references from LinkedIn code (0 remaining)
- Update docstrings and log messages

Additional:
- Research caching using existing ResearchCache
- Shared ExaContentResearchProvider in services/research/
- Persona service uses llm_text_gen instead of gemini_structured_json_response
- LinkedInWriter.tsx ChatMessage → ChatMsg type mapping fix
- RegisterLinkedInActionsEnhanced.tsx content_format_rules typing fix
2026-06-12 18:58:53 +05:30

81 lines
3.0 KiB
Python

"""
Research Handler for LinkedIn Content Generation
Handles research operations and timing for content generation.
Uses common Exa/Tavily infrastructure with pre-flight validation.
"""
from typing import List, Optional
from datetime import datetime
from loguru import logger
from models.linkedin_models import ResearchSource
class ResearchHandler:
"""Handles research operations and timing for LinkedIn content."""
def __init__(self, linkedin_service):
self.linkedin_service = linkedin_service
async def conduct_research(
self,
request,
research_enabled: bool,
search_engine: str,
max_results: int = 10,
user_id: Optional[str] = None
) -> tuple[List[ResearchSource], float]:
"""
Conduct research if enabled and return sources with timing.
Args:
request: Generation request object
research_enabled: Whether research is enabled
search_engine: Search engine to use (exa, tavily)
max_results: Maximum number of results
user_id: User ID for pre-flight validation and usage tracking
Returns:
Tuple of (research_sources, research_time)
"""
research_sources = []
research_time = 0
if research_enabled:
logger.info(f"ResearchHandler: search_engine='{search_engine}' (type: {type(search_engine)})")
research_start = datetime.now()
research_sources = await self.linkedin_service._conduct_research(
topic=request.topic,
industry=request.industry,
search_engine=search_engine,
max_results=max_results,
user_id=user_id
)
research_time = (datetime.now() - research_start).total_seconds()
logger.info(f"Research completed in {research_time:.2f}s, found {len(research_sources)} sources")
return research_sources, research_time
def determine_grounding_enabled(self, request, research_sources: List[ResearchSource]) -> bool:
"""Determine if grounding should be enabled based on request and research results."""
# Normalize values from possible Enum or string
try:
level_raw = getattr(request, 'grounding_level', 'enhanced')
level = (getattr(level_raw, 'value', level_raw) or '').strip().lower()
except Exception:
level = 'enhanced'
try:
engine_raw = getattr(request, 'search_engine', 'google')
engine_val = getattr(engine_raw, 'value', engine_raw)
engine_str = str(engine_val).split('.')[-1].strip().lower()
except Exception:
engine_str = 'google'
research_enabled = bool(getattr(request, 'research_enabled', True))
if not research_enabled or level == 'none':
return False
# For other engines, require that research actually returned sources
return bool(research_sources)