feat: LinkedIn LLM alignment - Phase 1-3 complete
Phase 1: Dead Code Cleanup - Remove GeminiGroundedProvider import and property from linkedin_service.py - Remove fallback_provider property (gemini_provider imports) - Fix routers/linkedin.py edit endpoint to use llm_text_gen - Delete dead LinkedInImageEditor class - Remove dead _transform_gemini_sources from content_generator.py Phase 2: Research Infrastructure Alignment - Add user_id to _conduct_research() for pre-flight validation - Add validate_exa_research_operations() before Exa/Tavily calls - Pass user_id to provider.simple_search() for usage tracking - Inject research content into LLM prompts via _build_research_context() - Fix Google engine path to fallback to Exa - Add Exa → Tavily fallback on research failure Phase 3: Cosmetic Cleanup - Rename _generate_prompts_with_gemini → _generate_prompts_with_llm - Rename _build_gemini_prompt → _build_image_prompt - Rename _parse_gemini_response → _parse_llm_response - Remove all Gemini references from LinkedIn code (0 remaining) - Update docstrings and log messages Additional: - Research caching using existing ResearchCache - Shared ExaContentResearchProvider in services/research/ - Persona service uses llm_text_gen instead of gemini_structured_json_response - LinkedInWriter.tsx ChatMessage → ChatMsg type mapping fix - RegisterLinkedInActionsEnhanced.tsx content_format_rules typing fix
This commit is contained in:
@@ -7,6 +7,7 @@ replacing mock research with real-time industry information.
|
||||
Available Services:
|
||||
- GoogleSearchService: Real-time industry research using Google Custom Search API
|
||||
- ExaService: Competitor discovery and analysis using Exa API
|
||||
- ExaContentResearchProvider: Shared content research provider for LinkedIn/Blog
|
||||
- TavilyService: AI-powered web search with real-time information
|
||||
- Source ranking and credibility assessment
|
||||
- Content extraction and insight generation
|
||||
@@ -17,12 +18,13 @@ Core Module (v2.0):
|
||||
- ParameterOptimizer: AI-driven parameter optimization
|
||||
|
||||
Author: ALwrity Team
|
||||
Version: 2.0
|
||||
Last Updated: December 2025
|
||||
Version: 2.1
|
||||
Last Updated: June 2026
|
||||
"""
|
||||
|
||||
from .google_search_service import GoogleSearchService
|
||||
from .exa_service import ExaService
|
||||
from .exa_content_research import ExaContentResearchProvider, get_exa_content_provider
|
||||
from .tavily_service import TavilyService
|
||||
|
||||
# Core Research Engine (v2.0)
|
||||
@@ -43,6 +45,10 @@ __all__ = [
|
||||
"ExaService",
|
||||
"TavilyService",
|
||||
|
||||
# Shared content research provider
|
||||
"ExaContentResearchProvider",
|
||||
"get_exa_content_provider",
|
||||
|
||||
# Core Research Engine (v2.0)
|
||||
"ResearchEngine",
|
||||
"ResearchContext",
|
||||
|
||||
198
backend/services/research/exa_content_research.py
Normal file
198
backend/services/research/exa_content_research.py
Normal file
@@ -0,0 +1,198 @@
|
||||
"""
|
||||
Exa Content Research Provider
|
||||
|
||||
Shared Exa neural search provider for content research across ALwrity modules.
|
||||
Provides simple_search() for fact-checking, content grounding, and research.
|
||||
|
||||
Used by:
|
||||
- LinkedIn Writer (content generation research)
|
||||
- Blog Writer (fact-checking and writing assistance)
|
||||
|
||||
This is the content-research variant. For competitor discovery/analysis,
|
||||
use ExaService in exa_service.py.
|
||||
"""
|
||||
|
||||
import os
|
||||
import asyncio
|
||||
from typing import List, Dict, Any, Optional
|
||||
from loguru import logger
|
||||
|
||||
|
||||
class ExaContentResearchProvider:
|
||||
"""Exa neural search provider for content research."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the Exa content research provider."""
|
||||
self.api_key = os.getenv("EXA_API_KEY")
|
||||
if not self.api_key:
|
||||
raise RuntimeError("EXA_API_KEY not configured")
|
||||
|
||||
from exa_py import Exa
|
||||
self.exa = Exa(self.api_key)
|
||||
logger.info("✅ Exa Content Research Provider initialized")
|
||||
|
||||
async def simple_search(
|
||||
self,
|
||||
query: str,
|
||||
num_results: int = 5,
|
||||
user_id: str = None,
|
||||
include_domains: List[str] = None,
|
||||
exclude_domains: List[str] = None,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Simple Exa search for content research and fact-checking.
|
||||
Handles subscription preflight check and usage tracking.
|
||||
|
||||
Args:
|
||||
query: Search query string
|
||||
num_results: Number of results to return (default 5)
|
||||
user_id: Optional user ID for subscription checking
|
||||
include_domains: Only return results from these domains
|
||||
exclude_domains: Exclude results from these domains
|
||||
|
||||
Returns:
|
||||
List of source dicts with title, url, text, publishedDate, author, score keys
|
||||
|
||||
Raises:
|
||||
HTTPException(429): If user has exceeded subscription limits
|
||||
Exception: If Exa API key not configured or search fails
|
||||
"""
|
||||
# Preflight subscription check
|
||||
if user_id:
|
||||
from models.subscription_models import APIProvider
|
||||
from services.subscription import PricingService
|
||||
from services.database import get_session_for_user
|
||||
from fastapi import HTTPException
|
||||
|
||||
db = get_session_for_user(user_id)
|
||||
if db:
|
||||
try:
|
||||
pricing_service = PricingService(db)
|
||||
can_proceed, message, usage_info = pricing_service.check_usage_limits(
|
||||
user_id=user_id,
|
||||
provider=APIProvider.EXA,
|
||||
tokens_requested=0,
|
||||
actual_provider_name="exa",
|
||||
)
|
||||
if not can_proceed:
|
||||
raise HTTPException(status_code=429, detail={
|
||||
'error': 'insufficient_balance',
|
||||
'message': message,
|
||||
'provider': 'exa',
|
||||
'usage_info': usage_info or {}
|
||||
})
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.warning(f"[Exa simple_search] Preflight check failed: {e}")
|
||||
finally:
|
||||
try:
|
||||
db.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
search_kwargs = {
|
||||
"type": "auto",
|
||||
"num_results": num_results,
|
||||
"text": {"max_characters": 1000},
|
||||
"highlights": {"num_sentences": 2, "highlights_per_url": 2},
|
||||
}
|
||||
if include_domains:
|
||||
search_kwargs["include_domains"] = include_domains
|
||||
if exclude_domains:
|
||||
search_kwargs["exclude_domains"] = exclude_domains
|
||||
|
||||
try:
|
||||
loop = asyncio.get_running_loop()
|
||||
results = await loop.run_in_executor(
|
||||
None,
|
||||
lambda: self.exa.search_and_contents(query, **search_kwargs),
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"[Exa simple_search] API call failed: {e}")
|
||||
# Retry with simpler parameters
|
||||
retry_kwargs = {"type": "auto", "num_results": num_results, "text": True}
|
||||
if include_domains:
|
||||
retry_kwargs["include_domains"] = include_domains
|
||||
if exclude_domains:
|
||||
retry_kwargs["exclude_domains"] = exclude_domains
|
||||
try:
|
||||
logger.info("[Exa simple_search] Retrying with simplified parameters")
|
||||
results = await loop.run_in_executor(
|
||||
None,
|
||||
lambda: self.exa.search_and_contents(query, **retry_kwargs),
|
||||
)
|
||||
except Exception as retry_error:
|
||||
logger.error(f"[Exa simple_search] Retry also failed: {retry_error}")
|
||||
raise RuntimeError(f"Exa search failed: {str(retry_error)}") from retry_error
|
||||
|
||||
sources = []
|
||||
for result in results.results:
|
||||
sources.append({
|
||||
'title': getattr(result, 'title', 'Untitled'),
|
||||
'url': getattr(result, 'url', ''),
|
||||
'text': getattr(result, 'text', ''),
|
||||
'publishedDate': getattr(result, 'publishedDate', ''),
|
||||
'author': getattr(result, 'author', ''),
|
||||
'score': (lambda v: v if v is not None else 0.5)(getattr(result, 'score', 0.5)),
|
||||
})
|
||||
|
||||
# Track usage
|
||||
if user_id:
|
||||
cost = 0.005 # ~0.5 cents per search
|
||||
try:
|
||||
self.track_usage(user_id, cost)
|
||||
except Exception as e:
|
||||
logger.warning(f"[Exa simple_search] Failed to track usage: {e}")
|
||||
|
||||
logger.info(f"[Exa simple_search] Found {len(sources)} sources for query: {query[:80]}...")
|
||||
return sources
|
||||
|
||||
def track_usage(self, user_id: str, cost: float):
|
||||
"""Track Exa API usage after successful call."""
|
||||
from services.database import get_session_for_user
|
||||
from services.subscription import PricingService
|
||||
from sqlalchemy import text
|
||||
|
||||
db = get_session_for_user(user_id)
|
||||
if not db:
|
||||
logger.warning(f"[track_usage] Could not get DB session for user {user_id}")
|
||||
return
|
||||
try:
|
||||
pricing_service = PricingService(db)
|
||||
current_period = pricing_service.get_current_billing_period(user_id)
|
||||
|
||||
# Update exa_calls and exa_cost via SQL UPDATE
|
||||
update_query = text("""
|
||||
UPDATE usage_summaries
|
||||
SET exa_calls = COALESCE(exa_calls, 0) + 1,
|
||||
exa_cost = COALESCE(exa_cost, 0) + :cost,
|
||||
total_calls = total_calls + 1,
|
||||
total_cost = total_cost + :cost
|
||||
WHERE user_id = :user_id AND billing_period = :period
|
||||
""")
|
||||
db.execute(update_query, {
|
||||
'cost': cost,
|
||||
'user_id': user_id,
|
||||
'period': current_period
|
||||
})
|
||||
db.commit()
|
||||
|
||||
logger.info(f"[Exa] Tracked usage: user={user_id}, cost=${cost}")
|
||||
except Exception as e:
|
||||
logger.error(f"[Exa] Failed to track usage: {e}")
|
||||
db.rollback()
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
# Global singleton instance
|
||||
_exa_content_provider: Optional[ExaContentResearchProvider] = None
|
||||
|
||||
|
||||
def get_exa_content_provider() -> ExaContentResearchProvider:
|
||||
"""Get or create the global Exa content research provider instance."""
|
||||
global _exa_content_provider
|
||||
if _exa_content_provider is None:
|
||||
_exa_content_provider = ExaContentResearchProvider()
|
||||
return _exa_content_provider
|
||||
Reference in New Issue
Block a user