Files
ALwrity/backend/services/research/exa_content_research.py
ajaysi 63a0df2536 feat: LinkedIn LLM alignment - Phase 1-3 complete
Phase 1: Dead Code Cleanup
- Remove GeminiGroundedProvider import and property from linkedin_service.py
- Remove fallback_provider property (gemini_provider imports)
- Fix routers/linkedin.py edit endpoint to use llm_text_gen
- Delete dead LinkedInImageEditor class
- Remove dead _transform_gemini_sources from content_generator.py

Phase 2: Research Infrastructure Alignment
- Add user_id to _conduct_research() for pre-flight validation
- Add validate_exa_research_operations() before Exa/Tavily calls
- Pass user_id to provider.simple_search() for usage tracking
- Inject research content into LLM prompts via _build_research_context()
- Fix Google engine path to fallback to Exa
- Add Exa → Tavily fallback on research failure

Phase 3: Cosmetic Cleanup
- Rename _generate_prompts_with_gemini → _generate_prompts_with_llm
- Rename _build_gemini_prompt → _build_image_prompt
- Rename _parse_gemini_response → _parse_llm_response
- Remove all Gemini references from LinkedIn code (0 remaining)
- Update docstrings and log messages

Additional:
- Research caching using existing ResearchCache
- Shared ExaContentResearchProvider in services/research/
- Persona service uses llm_text_gen instead of gemini_structured_json_response
- LinkedInWriter.tsx ChatMessage → ChatMsg type mapping fix
- RegisterLinkedInActionsEnhanced.tsx content_format_rules typing fix
2026-06-12 18:58:53 +05:30

199 lines
7.6 KiB
Python

"""
Exa Content Research Provider
Shared Exa neural search provider for content research across ALwrity modules.
Provides simple_search() for fact-checking, content grounding, and research.
Used by:
- LinkedIn Writer (content generation research)
- Blog Writer (fact-checking and writing assistance)
This is the content-research variant. For competitor discovery/analysis,
use ExaService in exa_service.py.
"""
import os
import asyncio
from typing import List, Dict, Any, Optional
from loguru import logger
class ExaContentResearchProvider:
"""Exa neural search provider for content research."""
def __init__(self):
"""Initialize the Exa content research provider."""
self.api_key = os.getenv("EXA_API_KEY")
if not self.api_key:
raise RuntimeError("EXA_API_KEY not configured")
from exa_py import Exa
self.exa = Exa(self.api_key)
logger.info("✅ Exa Content Research Provider initialized")
async def simple_search(
self,
query: str,
num_results: int = 5,
user_id: str = None,
include_domains: List[str] = None,
exclude_domains: List[str] = None,
) -> List[Dict[str, Any]]:
"""
Simple Exa search for content research and fact-checking.
Handles subscription preflight check and usage tracking.
Args:
query: Search query string
num_results: Number of results to return (default 5)
user_id: Optional user ID for subscription checking
include_domains: Only return results from these domains
exclude_domains: Exclude results from these domains
Returns:
List of source dicts with title, url, text, publishedDate, author, score keys
Raises:
HTTPException(429): If user has exceeded subscription limits
Exception: If Exa API key not configured or search fails
"""
# Preflight subscription check
if user_id:
from models.subscription_models import APIProvider
from services.subscription import PricingService
from services.database import get_session_for_user
from fastapi import HTTPException
db = get_session_for_user(user_id)
if db:
try:
pricing_service = PricingService(db)
can_proceed, message, usage_info = pricing_service.check_usage_limits(
user_id=user_id,
provider=APIProvider.EXA,
tokens_requested=0,
actual_provider_name="exa",
)
if not can_proceed:
raise HTTPException(status_code=429, detail={
'error': 'insufficient_balance',
'message': message,
'provider': 'exa',
'usage_info': usage_info or {}
})
except HTTPException:
raise
except Exception as e:
logger.warning(f"[Exa simple_search] Preflight check failed: {e}")
finally:
try:
db.close()
except Exception:
pass
search_kwargs = {
"type": "auto",
"num_results": num_results,
"text": {"max_characters": 1000},
"highlights": {"num_sentences": 2, "highlights_per_url": 2},
}
if include_domains:
search_kwargs["include_domains"] = include_domains
if exclude_domains:
search_kwargs["exclude_domains"] = exclude_domains
try:
loop = asyncio.get_running_loop()
results = await loop.run_in_executor(
None,
lambda: self.exa.search_and_contents(query, **search_kwargs),
)
except Exception as e:
logger.error(f"[Exa simple_search] API call failed: {e}")
# Retry with simpler parameters
retry_kwargs = {"type": "auto", "num_results": num_results, "text": True}
if include_domains:
retry_kwargs["include_domains"] = include_domains
if exclude_domains:
retry_kwargs["exclude_domains"] = exclude_domains
try:
logger.info("[Exa simple_search] Retrying with simplified parameters")
results = await loop.run_in_executor(
None,
lambda: self.exa.search_and_contents(query, **retry_kwargs),
)
except Exception as retry_error:
logger.error(f"[Exa simple_search] Retry also failed: {retry_error}")
raise RuntimeError(f"Exa search failed: {str(retry_error)}") from retry_error
sources = []
for result in results.results:
sources.append({
'title': getattr(result, 'title', 'Untitled'),
'url': getattr(result, 'url', ''),
'text': getattr(result, 'text', ''),
'publishedDate': getattr(result, 'publishedDate', ''),
'author': getattr(result, 'author', ''),
'score': (lambda v: v if v is not None else 0.5)(getattr(result, 'score', 0.5)),
})
# Track usage
if user_id:
cost = 0.005 # ~0.5 cents per search
try:
self.track_usage(user_id, cost)
except Exception as e:
logger.warning(f"[Exa simple_search] Failed to track usage: {e}")
logger.info(f"[Exa simple_search] Found {len(sources)} sources for query: {query[:80]}...")
return sources
def track_usage(self, user_id: str, cost: float):
"""Track Exa API usage after successful call."""
from services.database import get_session_for_user
from services.subscription import PricingService
from sqlalchemy import text
db = get_session_for_user(user_id)
if not db:
logger.warning(f"[track_usage] Could not get DB session for user {user_id}")
return
try:
pricing_service = PricingService(db)
current_period = pricing_service.get_current_billing_period(user_id)
# Update exa_calls and exa_cost via SQL UPDATE
update_query = text("""
UPDATE usage_summaries
SET exa_calls = COALESCE(exa_calls, 0) + 1,
exa_cost = COALESCE(exa_cost, 0) + :cost,
total_calls = total_calls + 1,
total_cost = total_cost + :cost
WHERE user_id = :user_id AND billing_period = :period
""")
db.execute(update_query, {
'cost': cost,
'user_id': user_id,
'period': current_period
})
db.commit()
logger.info(f"[Exa] Tracked usage: user={user_id}, cost=${cost}")
except Exception as e:
logger.error(f"[Exa] Failed to track usage: {e}")
db.rollback()
finally:
db.close()
# Global singleton instance
_exa_content_provider: Optional[ExaContentResearchProvider] = None
def get_exa_content_provider() -> ExaContentResearchProvider:
"""Get or create the global Exa content research provider instance."""
global _exa_content_provider
if _exa_content_provider is None:
_exa_content_provider = ExaContentResearchProvider()
return _exa_content_provider