AI platform insights monitoring and website analysis monitoring services added

2025-11-11 15:57:45 +05:30
parent d99c7c83a7
commit 7191c7e7f0
81 changed files with 10860 additions and 1567 deletions
--- a/backend/services/blog_writer/research/init.py
+++ b/backend/services/blog_writer/research/init.py
@@ -16,6 +16,7 @@ from .data_filter import ResearchDataFilter
 from .base_provider import ResearchProvider as BaseResearchProvider
 from .google_provider import GoogleResearchProvider
 from .exa_provider import ExaResearchProvider
+from .tavily_provider import TavilyResearchProvider

 __all__ = [
    'ResearchService',
@@ -26,4 +27,5 @@ __all__ = [
    'BaseResearchProvider',
    'GoogleResearchProvider',
    'ExaResearchProvider',
+    'TavilyResearchProvider',
 ]
--- a/backend/services/blog_writer/research/research_service.py
+++ b/backend/services/blog_writer/research/research_service.py
@@ -150,8 +150,94 @@ class ResearchService:
                        raw_result = None
                    else:
                        raise
+            
+            elif config.provider == ResearchProvider.TAVILY:
+                # Tavily research workflow
+                from .tavily_provider import TavilyResearchProvider
+                from services.database import get_db
+                from services.subscription import PricingService
+                import os
+                import time
                
-            if config.provider != ResearchProvider.EXA:
+                # Pre-flight validation (similar to Exa)
+                db_val = next(get_db())
+                try:
+                    pricing_service = PricingService(db_val)
+                    # Check Tavily usage limits
+                    limits = pricing_service.get_user_limits(user_id)
+                    tavily_limit = limits.get('limits', {}).get('tavily_calls', 0) if limits else 0
+                    
+                    # Get current usage
+                    from models.subscription_models import UsageSummary
+                    from datetime import datetime
+                    current_period = pricing_service.get_current_billing_period(user_id) or datetime.now().strftime("%Y-%m")
+                    usage = db_val.query(UsageSummary).filter(
+                        UsageSummary.user_id == user_id,
+                        UsageSummary.billing_period == current_period
+                    ).first()
+                    
+                    current_calls = getattr(usage, 'tavily_calls', 0) or 0 if usage else 0
+                    
+                    if tavily_limit > 0 and current_calls >= tavily_limit:
+                        raise HTTPException(
+                            status_code=429,
+                            detail={
+                                'error': 'Tavily API call limit exceeded',
+                                'message': f'You have reached your Tavily API call limit ({tavily_limit} calls). Please upgrade your plan or wait for the next billing period.',
+                                'provider': 'tavily',
+                                'usage_info': {
+                                    'current': current_calls,
+                                    'limit': tavily_limit
+                                }
+                            }
+                        )
+                except HTTPException:
+                    raise
+                except Exception as e:
+                    logger.warning(f"Error checking Tavily limits: {e}")
+                finally:
+                    db_val.close()
+                
+                # Execute Tavily search
+                api_start_time = time.time()
+                try:
+                    tavily_provider = TavilyResearchProvider()
+                    raw_result = await tavily_provider.search(
+                        research_prompt, topic, industry, target_audience, config, user_id
+                    )
+                    api_duration_ms = (time.time() - api_start_time) * 1000
+                    
+                    # Track usage
+                    cost = raw_result.get('cost', {}).get('total', 0.001) if isinstance(raw_result.get('cost'), dict) else 0.001
+                    search_depth = config.tavily_search_depth or "basic"
+                    tavily_provider.track_tavily_usage(user_id, cost, search_depth)
+                    
+                    # Log API call performance
+                    blog_writer_logger.log_api_call(
+                        "tavily_search",
+                        "search",
+                        api_duration_ms,
+                        token_usage={},
+                        content_length=len(raw_result.get('content', ''))
+                    )
+                    
+                    # Extract content for downstream analysis
+                    content = raw_result.get('content', '')
+                    sources = raw_result.get('sources', [])
+                    search_widget = ""  # Tavily doesn't provide search widgets
+                    search_queries = raw_result.get('search_queries', [])
+                    grounding_metadata = None  # Tavily doesn't provide grounding metadata
+                    
+                except RuntimeError as e:
+                    if "TAVILY_API_KEY not configured" in str(e):
+                        logger.warning("Tavily not configured, falling back to Google")
+                        config.provider = ResearchProvider.GOOGLE
+                        # Continue to Google flow below
+                        raw_result = None
+                    else:
+                        raise
+                
+            if config.provider not in [ResearchProvider.EXA, ResearchProvider.TAVILY]:
                # Google research (existing flow) or fallback from Exa
                from .google_provider import GoogleResearchProvider
                import time
@@ -412,8 +498,94 @@ class ResearchService:
                        # Continue to Google flow below
                    else:
                        raise
+            
+            elif config.provider == ResearchProvider.TAVILY:
+                # Tavily research workflow
+                from .tavily_provider import TavilyResearchProvider
+                from services.database import get_db
+                from services.subscription import PricingService
+                import os
                
-            if config.provider != ResearchProvider.EXA:
+                await task_manager.update_progress(task_id, "🌐 Connecting to Tavily AI search...")
+                
+                # Pre-flight validation
+                db_val = next(get_db())
+                try:
+                    pricing_service = PricingService(db_val)
+                    # Check Tavily usage limits
+                    limits = pricing_service.get_user_limits(user_id)
+                    tavily_limit = limits.get('limits', {}).get('tavily_calls', 0) if limits else 0
+                    
+                    # Get current usage
+                    from models.subscription_models import UsageSummary
+                    from datetime import datetime
+                    current_period = pricing_service.get_current_billing_period(user_id) or datetime.now().strftime("%Y-%m")
+                    usage = db_val.query(UsageSummary).filter(
+                        UsageSummary.user_id == user_id,
+                        UsageSummary.billing_period == current_period
+                    ).first()
+                    
+                    current_calls = getattr(usage, 'tavily_calls', 0) or 0 if usage else 0
+                    
+                    if tavily_limit > 0 and current_calls >= tavily_limit:
+                        await task_manager.update_progress(task_id, f"❌ Tavily API call limit exceeded ({current_calls}/{tavily_limit})")
+                        raise HTTPException(
+                            status_code=429,
+                            detail={
+                                'error': 'Tavily API call limit exceeded',
+                                'message': f'You have reached your Tavily API call limit ({tavily_limit} calls). Please upgrade your plan or wait for the next billing period.',
+                                'provider': 'tavily',
+                                'usage_info': {
+                                    'current': current_calls,
+                                    'limit': tavily_limit
+                                }
+                            }
+                        )
+                except HTTPException:
+                    raise
+                except Exception as e:
+                    logger.warning(f"Error checking Tavily limits: {e}")
+                finally:
+                    db_val.close()
+                
+                # Execute Tavily search
+                await task_manager.update_progress(task_id, "🤖 Executing Tavily AI search...")
+                try:
+                    tavily_provider = TavilyResearchProvider()
+                    raw_result = await tavily_provider.search(
+                        research_prompt, topic, industry, target_audience, config, user_id
+                    )
+                    
+                    # Track usage
+                    cost = raw_result.get('cost', {}).get('total', 0.001) if isinstance(raw_result.get('cost'), dict) else 0.001
+                    search_depth = config.tavily_search_depth or "basic"
+                    tavily_provider.track_tavily_usage(user_id, cost, search_depth)
+                    
+                    # Extract content for downstream analysis
+                    if raw_result is None:
+                        logger.error("raw_result is None after Tavily search")
+                        raise ValueError("Tavily research result is None - search operation failed unexpectedly")
+                    
+                    if not isinstance(raw_result, dict):
+                        logger.warning(f"raw_result is not a dict (type: {type(raw_result)}), using defaults")
+                        raw_result = {}
+                    
+                    content = raw_result.get('content', '')
+                    sources = raw_result.get('sources', []) or []
+                    search_widget = ""  # Tavily doesn't provide search widgets
+                    search_queries = raw_result.get('search_queries', []) or []
+                    grounding_metadata = None  # Tavily doesn't provide grounding metadata
+                    
+                except RuntimeError as e:
+                    if "TAVILY_API_KEY not configured" in str(e):
+                        logger.warning("Tavily not configured, falling back to Google")
+                        await task_manager.update_progress(task_id, "⚠️ Tavily not configured, falling back to Google Search")
+                        config.provider = ResearchProvider.GOOGLE
+                        # Continue to Google flow below
+                    else:
+                        raise
+                
+            if config.provider not in [ResearchProvider.EXA, ResearchProvider.TAVILY]:
                # Google research (existing flow)
                from .google_provider import GoogleResearchProvider
                
--- a/backend/services/blog_writer/research/tavily_provider.py
+++ b/backend/services/blog_writer/research/tavily_provider.py
@@ -0,0 +1,169 @@
+"""
+Tavily Research Provider
+
+AI-powered search implementation using Tavily API for high-quality research.
+"""
+
+import os
+from loguru import logger
+from models.subscription_models import APIProvider
+from services.research.tavily_service import TavilyService
+from .base_provider import ResearchProvider as BaseProvider
+
+
+class TavilyResearchProvider(BaseProvider):
+    """Tavily AI-powered search provider."""
+    
+    def __init__(self):
+        self.api_key = os.getenv("TAVILY_API_KEY")
+        if not self.api_key:
+            raise RuntimeError("TAVILY_API_KEY not configured")
+        self.tavily_service = TavilyService()
+        logger.info("✅ Tavily Research Provider initialized")
+    
+    async def search(self, prompt, topic, industry, target_audience, config, user_id):
+        """Execute Tavily search and return standardized results."""
+        # Build Tavily query
+        query = f"{topic} {industry} {target_audience}"
+        
+        # Get Tavily-specific config options
+        topic = config.tavily_topic or "general"
+        search_depth = config.tavily_search_depth or "basic"
+        
+        logger.info(f"[Tavily Research] Executing search: {query}")
+        
+        # Execute Tavily search
+        result = await self.tavily_service.search(
+            query=query,
+            topic=topic,
+            search_depth=search_depth,
+            max_results=min(config.max_sources, 20),
+            include_domains=config.tavily_include_domains or None,
+            exclude_domains=config.tavily_exclude_domains or None,
+            include_answer=config.tavily_include_answer or False,
+            include_raw_content=config.tavily_include_raw_content or False,
+            include_images=config.tavily_include_images or False,
+            include_image_descriptions=config.tavily_include_image_descriptions or False,
+            time_range=config.tavily_time_range,
+            start_date=config.tavily_start_date,
+            end_date=config.tavily_end_date,
+            country=config.tavily_country,
+            chunks_per_source=config.tavily_chunks_per_source or 3,
+            auto_parameters=config.tavily_auto_parameters or False
+        )
+        
+        if not result.get("success"):
+            raise RuntimeError(f"Tavily search failed: {result.get('error', 'Unknown error')}")
+        
+        # Transform to standardized format
+        sources = self._transform_sources(result.get("results", []))
+        content = self._aggregate_content(result.get("results", []))
+        
+        # Calculate cost (basic = 1 credit, advanced = 2 credits)
+        cost = 0.001 if search_depth == "basic" else 0.002  # Estimate cost per search
+        
+        logger.info(f"[Tavily Research] Search completed: {len(sources)} sources, depth: {search_depth}")
+        
+        return {
+            'sources': sources,
+            'content': content,
+            'search_type': search_depth,
+            'provider': 'tavily',
+            'search_queries': [query],
+            'cost': {'total': cost},
+            'answer': result.get("answer"),  # If include_answer was requested
+            'images': result.get("images", [])
+        }
+    
+    def get_provider_enum(self):
+        """Return TAVILY provider enum for subscription tracking."""
+        return APIProvider.TAVILY
+    
+    def estimate_tokens(self) -> int:
+        """Estimate token usage for Tavily (not token-based, but we estimate API calls)."""
+        return 0  # Tavily is per-search, not token-based
+    
+    def _transform_sources(self, results):
+        """Transform Tavily results to ResearchSource format."""
+        sources = []
+        for idx, result in enumerate(results):
+            source_type = self._determine_source_type(result.get("url", ""))
+            
+            sources.append({
+                'title': result.get("title", ""),
+                'url': result.get("url", ""),
+                'excerpt': result.get("content", "")[:500],  # First 500 chars
+                'credibility_score': result.get("relevance_score", 0.5),
+                'published_at': result.get("published_date"),
+                'index': idx,
+                'source_type': source_type,
+                'content': result.get("content", ""),
+                'raw_content': result.get("raw_content"),  # If include_raw_content was requested
+                'score': result.get("score", result.get("relevance_score", 0.5)),
+                'favicon': result.get("favicon")
+            })
+        
+        return sources
+    
+    def _determine_source_type(self, url):
+        """Determine source type from URL."""
+        if not url:
+            return 'web'
+        
+        url_lower = url.lower()
+        if 'arxiv.org' in url_lower or 'research' in url_lower or '.edu' in url_lower:
+            return 'academic'
+        elif any(news in url_lower for news in ['cnn.com', 'bbc.com', 'reuters.com', 'theguardian.com', 'nytimes.com']):
+            return 'news'
+        elif 'linkedin.com' in url_lower:
+            return 'expert'
+        elif '.gov' in url_lower:
+            return 'government'
+        else:
+            return 'web'
+    
+    def _aggregate_content(self, results):
+        """Aggregate content from Tavily results for LLM analysis."""
+        content_parts = []
+        
+        for idx, result in enumerate(results):
+            content = result.get("content", "")
+            if content:
+                content_parts.append(f"Source {idx + 1}: {content}")
+        
+        return "\n\n".join(content_parts)
+    
+    def track_tavily_usage(self, user_id: str, cost: float, search_depth: str):
+        """Track Tavily API usage after successful call."""
+        from services.database import get_db
+        from services.subscription import PricingService
+        from sqlalchemy import text
+        
+        db = next(get_db())
+        try:
+            pricing_service = PricingService(db)
+            current_period = pricing_service.get_current_billing_period(user_id)
+            
+            # Update tavily_calls and tavily_cost via SQL UPDATE
+            update_query = text("""
+                UPDATE usage_summaries 
+                SET tavily_calls = COALESCE(tavily_calls, 0) + 1,
+                    tavily_cost = COALESCE(tavily_cost, 0) + :cost,
+                    total_calls = COALESCE(total_calls, 0) + 1,
+                    total_cost = COALESCE(total_cost, 0) + :cost
+                WHERE user_id = :user_id AND billing_period = :period
+            """)
+            db.execute(update_query, {
+                'cost': cost,
+                'user_id': user_id,
+                'period': current_period
+            })
+            db.commit()
+            
+            logger.info(f"[Tavily] Tracked usage: user={user_id}, cost=${cost}, depth={search_depth}")
+        except Exception as e:
+            logger.error(f"[Tavily] Failed to track usage: {e}", exc_info=True)
+            db.rollback()
+        finally:
+            db.close()
+