AI platform insights monitoring and website analysis monitoring services added
This commit is contained in:
@@ -81,6 +81,30 @@ class OnboardingCompletionService:
|
||||
# Non-critical: log but don't fail onboarding completion
|
||||
logger.warning(f"Failed to create OAuth token monitoring tasks for user {user_id}: {e}")
|
||||
|
||||
# Create website analysis tasks for user's website and competitors
|
||||
try:
|
||||
from services.database import SessionLocal
|
||||
from services.website_analysis_monitoring_service import create_website_analysis_tasks
|
||||
db = SessionLocal()
|
||||
try:
|
||||
result = create_website_analysis_tasks(user_id=user_id, db=db)
|
||||
if result.get('success'):
|
||||
tasks_count = result.get('tasks_created', 0)
|
||||
logger.info(
|
||||
f"Created {tasks_count} website analysis tasks for user {user_id} "
|
||||
f"on onboarding completion"
|
||||
)
|
||||
else:
|
||||
error = result.get('error', 'Unknown error')
|
||||
logger.warning(
|
||||
f"Failed to create website analysis tasks for user {user_id}: {error}"
|
||||
)
|
||||
finally:
|
||||
db.close()
|
||||
except Exception as e:
|
||||
# Non-critical: log but don't fail onboarding completion
|
||||
logger.warning(f"Failed to create website analysis tasks for user {user_id}: {e}")
|
||||
|
||||
return {
|
||||
"message": "Onboarding completed successfully",
|
||||
"completed_at": datetime.now().isoformat(),
|
||||
|
||||
@@ -432,13 +432,13 @@ class Step3ResearchService:
|
||||
logger.error(f"Error storing research data: {str(e)}")
|
||||
return False
|
||||
|
||||
async def get_research_data(self, session_id: str) -> Dict[str, Any]:
|
||||
async def get_research_data(self, session_id: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Retrieve research data for a session.
|
||||
|
||||
|
||||
Args:
|
||||
session_id: Onboarding session ID
|
||||
|
||||
|
||||
Returns:
|
||||
Dictionary containing research data
|
||||
"""
|
||||
@@ -447,25 +447,76 @@ class Step3ResearchService:
|
||||
session = db.query(OnboardingSession).filter(
|
||||
OnboardingSession.id == session_id
|
||||
).first()
|
||||
|
||||
|
||||
if not session:
|
||||
return {
|
||||
"success": False,
|
||||
"error": "Session not found"
|
||||
}
|
||||
|
||||
research_data = session.step_data.get("step3_research_data") if session.step_data else None
|
||||
|
||||
|
||||
# Check if step_data attribute exists (it may not be in the model)
|
||||
# If it doesn't exist, try to get data from CompetitorAnalysis table
|
||||
research_data = None
|
||||
if hasattr(session, 'step_data') and session.step_data:
|
||||
research_data = session.step_data.get("step3_research_data") if isinstance(session.step_data, dict) else None
|
||||
|
||||
# If not found in step_data, try CompetitorAnalysis table
|
||||
if not research_data:
|
||||
try:
|
||||
from models.onboarding import CompetitorAnalysis
|
||||
competitor_records = db.query(CompetitorAnalysis).filter(
|
||||
CompetitorAnalysis.session_id == session.id
|
||||
).all()
|
||||
|
||||
if competitor_records:
|
||||
competitors = []
|
||||
for record in competitor_records:
|
||||
analysis_data = record.analysis_data or {}
|
||||
competitor_info = {
|
||||
"url": record.competitor_url,
|
||||
"domain": record.competitor_domain or record.competitor_url,
|
||||
"title": analysis_data.get("title", record.competitor_domain or ""),
|
||||
"summary": analysis_data.get("summary", ""),
|
||||
"relevance_score": analysis_data.get("relevance_score", 0.5),
|
||||
"highlights": analysis_data.get("highlights", []),
|
||||
"favicon": analysis_data.get("favicon"),
|
||||
"image": analysis_data.get("image"),
|
||||
"published_date": analysis_data.get("published_date"),
|
||||
"author": analysis_data.get("author"),
|
||||
"competitive_insights": analysis_data.get("competitive_analysis", {}),
|
||||
"content_insights": analysis_data.get("content_insights", {})
|
||||
}
|
||||
competitors.append(competitor_info)
|
||||
|
||||
if competitors:
|
||||
# Map competitor fields to match frontend expectations
|
||||
mapped_competitors = []
|
||||
for comp in competitors:
|
||||
mapped_comp = {
|
||||
**comp, # Keep all original fields
|
||||
"name": comp.get("title") or comp.get("name") or comp.get("domain", ""),
|
||||
"description": comp.get("summary") or comp.get("description", ""),
|
||||
"similarity_score": comp.get("relevance_score") or comp.get("similarity_score", 0.5)
|
||||
}
|
||||
mapped_competitors.append(mapped_comp)
|
||||
|
||||
research_data = {
|
||||
"competitors": mapped_competitors,
|
||||
"completed_at": competitor_records[0].created_at.isoformat() if competitor_records[0].created_at else None
|
||||
}
|
||||
except Exception as e:
|
||||
logger.warning(f"Could not retrieve competitors from CompetitorAnalysis table: {e}")
|
||||
|
||||
if not research_data:
|
||||
return {
|
||||
"success": False,
|
||||
"error": "No research data found for this session"
|
||||
"error": "No research data found for this session"
|
||||
}
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"research_data": research_data,
|
||||
"session_id": session_id
|
||||
"step3_research_data": research_data,
|
||||
"research_data": research_data # Keep for backward compatibility
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
|
||||
@@ -4,12 +4,12 @@ Provides provider availability and persona-aware defaults for research.
|
||||
"""
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query
|
||||
from typing import Dict, Any, Optional
|
||||
from typing import Dict, Any, Optional, List
|
||||
from loguru import logger
|
||||
from pydantic import BaseModel
|
||||
|
||||
from middleware.auth_middleware import get_current_user
|
||||
from services.user_api_key_context import get_exa_key, get_gemini_key
|
||||
from services.user_api_key_context import get_exa_key, get_gemini_key, get_tavily_key
|
||||
from services.onboarding.database_service import OnboardingDatabaseService
|
||||
from services.onboarding.progress_service import get_onboarding_progress_service
|
||||
from services.database import get_db
|
||||
@@ -26,8 +26,10 @@ class ProviderAvailability(BaseModel):
|
||||
"""Provider availability status."""
|
||||
google_available: bool
|
||||
exa_available: bool
|
||||
tavily_available: bool
|
||||
gemini_key_status: str # 'configured' | 'missing'
|
||||
exa_key_status: str # 'configured' | 'missing'
|
||||
tavily_key_status: str # 'configured' | 'missing'
|
||||
|
||||
|
||||
class PersonaDefaults(BaseModel):
|
||||
@@ -47,6 +49,17 @@ class ResearchConfigResponse(BaseModel):
|
||||
persona_scheduled: bool = False
|
||||
|
||||
|
||||
class CompetitorAnalysisResponse(BaseModel):
|
||||
"""Response model for competitor analysis data."""
|
||||
success: bool
|
||||
competitors: Optional[List[Dict[str, Any]]] = None
|
||||
social_media_accounts: Optional[Dict[str, str]] = None
|
||||
social_media_citations: Optional[List[Dict[str, Any]]] = None
|
||||
research_summary: Optional[Dict[str, Any]] = None
|
||||
analysis_timestamp: Optional[str] = None
|
||||
error: Optional[str] = None
|
||||
|
||||
|
||||
@router.get("/provider-availability", response_model=ProviderAvailability)
|
||||
async def get_provider_availability(
|
||||
current_user: Dict = Depends(get_current_user)
|
||||
@@ -57,6 +70,7 @@ async def get_provider_availability(
|
||||
Returns:
|
||||
- google_available: True if Gemini key is configured
|
||||
- exa_available: True if Exa key is configured
|
||||
- tavily_available: True if Tavily key is configured
|
||||
- Key status for each provider
|
||||
"""
|
||||
try:
|
||||
@@ -65,15 +79,19 @@ async def get_provider_availability(
|
||||
# Check API key availability
|
||||
gemini_key = get_gemini_key(user_id)
|
||||
exa_key = get_exa_key(user_id)
|
||||
tavily_key = get_tavily_key(user_id)
|
||||
|
||||
google_available = bool(gemini_key and gemini_key.strip())
|
||||
exa_available = bool(exa_key and exa_key.strip())
|
||||
tavily_available = bool(tavily_key and tavily_key.strip())
|
||||
|
||||
return ProviderAvailability(
|
||||
google_available=google_available,
|
||||
exa_available=exa_available,
|
||||
tavily_available=tavily_available,
|
||||
gemini_key_status='configured' if google_available else 'missing',
|
||||
exa_key_status='configured' if exa_available else 'missing'
|
||||
exa_key_status='configured' if exa_available else 'missing',
|
||||
tavily_key_status='configured' if tavily_available else 'missing'
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"[ResearchConfig] Error checking provider availability for user {user_id if 'user_id' in locals() else 'unknown'}: {e}", exc_info=True)
|
||||
@@ -211,15 +229,19 @@ async def get_research_config(
|
||||
logger.debug(f"[ResearchConfig] Getting provider availability for user {user_id}")
|
||||
gemini_key = get_gemini_key(user_id)
|
||||
exa_key = get_exa_key(user_id)
|
||||
tavily_key = get_tavily_key(user_id)
|
||||
|
||||
google_available = bool(gemini_key and gemini_key.strip())
|
||||
exa_available = bool(exa_key and exa_key.strip())
|
||||
tavily_available = bool(tavily_key and tavily_key.strip())
|
||||
|
||||
provider_availability = ProviderAvailability(
|
||||
google_available=google_available,
|
||||
exa_available=exa_available,
|
||||
tavily_available=tavily_available,
|
||||
gemini_key_status='configured' if google_available else 'missing',
|
||||
exa_key_status='configured' if exa_available else 'missing'
|
||||
exa_key_status='configured' if exa_available else 'missing',
|
||||
tavily_key_status='configured' if tavily_available else 'missing'
|
||||
)
|
||||
|
||||
# Get persona defaults
|
||||
@@ -355,11 +377,190 @@ async def get_research_config(
|
||||
import traceback
|
||||
logger.error(f"[ResearchConfig] Full traceback:\n{traceback.format_exc()}")
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
status_code=500,
|
||||
detail=f"Failed to get research config: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
@router.get("/competitor-analysis", response_model=CompetitorAnalysisResponse)
|
||||
async def get_competitor_analysis(
|
||||
current_user: Dict = Depends(get_current_user),
|
||||
db: Session = Depends(get_db)
|
||||
):
|
||||
"""
|
||||
Get competitor analysis data from onboarding for the current user.
|
||||
|
||||
Returns competitor data including competitors list, social media accounts,
|
||||
social media citations, and research summary that was collected during onboarding step 3.
|
||||
"""
|
||||
user_id = None
|
||||
try:
|
||||
user_id = str(current_user.get('id'))
|
||||
print(f"\n[COMPETITOR_ANALYSIS] ===== START: Getting competitor analysis for user_id={user_id} =====")
|
||||
print(f"[COMPETITOR_ANALYSIS] Current user dict keys: {list(current_user.keys())}")
|
||||
logger.info(f"[ResearchConfig] Getting competitor analysis for user {user_id}")
|
||||
|
||||
if not db:
|
||||
print(f"[COMPETITOR_ANALYSIS] ❌ ERROR: Database session is None for user {user_id}")
|
||||
logger.error(f"[ResearchConfig] Database session is None for user {user_id}")
|
||||
raise HTTPException(status_code=500, detail="Database session not available")
|
||||
|
||||
db_service = OnboardingDatabaseService(db=db)
|
||||
|
||||
# Get onboarding session - using same pattern as onboarding completion check
|
||||
print(f"[COMPETITOR_ANALYSIS] Looking up onboarding session for user_id={user_id} (Clerk ID)")
|
||||
session = db_service.get_session_by_user(user_id, db)
|
||||
if not session:
|
||||
print(f"[COMPETITOR_ANALYSIS] ❌ WARNING: No onboarding session found for user_id={user_id}")
|
||||
logger.warning(f"[ResearchConfig] No onboarding session found for user {user_id}")
|
||||
return CompetitorAnalysisResponse(
|
||||
success=False,
|
||||
error="No onboarding session found. Please complete onboarding first."
|
||||
)
|
||||
|
||||
print(f"[COMPETITOR_ANALYSIS] ✅ Found onboarding session: id={session.id}, user_id={session.user_id}, current_step={session.current_step}")
|
||||
|
||||
# Check if step 3 is completed - same pattern as elsewhere (check current_step >= 3 or research_preferences exists)
|
||||
research_preferences = db_service.get_research_preferences(user_id, db)
|
||||
print(f"[COMPETITOR_ANALYSIS] Step check: current_step={session.current_step}, research_preferences exists={research_preferences is not None}")
|
||||
if not research_preferences and session.current_step < 3:
|
||||
print(f"[COMPETITOR_ANALYSIS] ❌ Step 3 not completed for user_id={user_id} (current_step={session.current_step})")
|
||||
logger.info(f"[ResearchConfig] Step 3 not completed for user {user_id} (current_step={session.current_step})")
|
||||
return CompetitorAnalysisResponse(
|
||||
success=False,
|
||||
error="Onboarding step 3 (Competitor Analysis) is not completed. Please complete onboarding step 3 first."
|
||||
)
|
||||
|
||||
print(f"[COMPETITOR_ANALYSIS] ✅ Step 3 is completed (current_step={session.current_step} or research_preferences exists)")
|
||||
|
||||
# Try Method 1: Get competitor data from CompetitorAnalysis table using OnboardingDatabaseService
|
||||
# This follows the same pattern as get_website_analysis()
|
||||
print(f"[COMPETITOR_ANALYSIS] 🔍 Method 1: Querying CompetitorAnalysis table using OnboardingDatabaseService...")
|
||||
try:
|
||||
competitors = db_service.get_competitor_analysis(user_id, db)
|
||||
|
||||
if competitors:
|
||||
print(f"[COMPETITOR_ANALYSIS] ✅ Found {len(competitors)} competitor records from CompetitorAnalysis table")
|
||||
logger.info(f"[ResearchConfig] Found {len(competitors)} competitors from CompetitorAnalysis table for user {user_id}")
|
||||
|
||||
# Map competitor fields to match frontend expectations
|
||||
mapped_competitors = []
|
||||
for comp in competitors:
|
||||
mapped_comp = {
|
||||
**comp, # Keep all original fields
|
||||
"name": comp.get("title") or comp.get("name") or comp.get("domain", ""),
|
||||
"description": comp.get("summary") or comp.get("description", ""),
|
||||
"similarity_score": comp.get("relevance_score") or comp.get("similarity_score", 0.5)
|
||||
}
|
||||
mapped_competitors.append(mapped_comp)
|
||||
|
||||
print(f"[COMPETITOR_ANALYSIS] ✅ SUCCESS: Returning {len(mapped_competitors)} competitors for user_id={user_id}")
|
||||
return CompetitorAnalysisResponse(
|
||||
success=True,
|
||||
competitors=mapped_competitors,
|
||||
social_media_accounts={},
|
||||
social_media_citations=[],
|
||||
research_summary={
|
||||
"total_competitors": len(mapped_competitors),
|
||||
"market_insights": f"Found {len(mapped_competitors)} competitors analyzed during onboarding"
|
||||
},
|
||||
analysis_timestamp=None
|
||||
)
|
||||
else:
|
||||
print(f"[COMPETITOR_ANALYSIS] ⚠️ No competitor records found in CompetitorAnalysis table for user_id={user_id}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"[COMPETITOR_ANALYSIS] ❌ EXCEPTION in Method 1: {e}")
|
||||
import traceback
|
||||
print(f"[COMPETITOR_ANALYSIS] Traceback:\n{traceback.format_exc()}")
|
||||
logger.warning(f"[ResearchConfig] Could not retrieve competitor data from CompetitorAnalysis table: {e}", exc_info=True)
|
||||
|
||||
# Try Method 2: Get data from Step3ResearchService (which accesses step_data)
|
||||
# This is where step3_research_service._store_research_data() saves the data
|
||||
print(f"[COMPETITOR_ANALYSIS] 🔄 Method 2: Trying Step3ResearchService.get_research_data()...")
|
||||
try:
|
||||
from api.onboarding_utils.step3_research_service import Step3ResearchService
|
||||
|
||||
# Step3ResearchService.get_research_data() expects session_id (integer), but we have user_id (string)
|
||||
# The service uses session.id internally, so we need to pass the session.id
|
||||
step3_service = Step3ResearchService()
|
||||
research_data_result = await step3_service.get_research_data(str(session.id))
|
||||
|
||||
print(f"[COMPETITOR_ANALYSIS] Step3ResearchService.get_research_data() result: success={research_data_result.get('success')}")
|
||||
|
||||
if research_data_result.get('success'):
|
||||
# Handle both 'research_data' and 'step3_research_data' keys
|
||||
research_data = research_data_result.get('step3_research_data') or research_data_result.get('research_data', {})
|
||||
print(f"[COMPETITOR_ANALYSIS] Research data keys: {list(research_data.keys()) if isinstance(research_data, dict) else 'Not a dict'}")
|
||||
|
||||
if isinstance(research_data, dict) and research_data.get('competitors'):
|
||||
competitors_list = research_data.get('competitors', [])
|
||||
print(f"[COMPETITOR_ANALYSIS] ✅ Found {len(competitors_list)} competitors in step_data via Step3ResearchService")
|
||||
|
||||
if competitors_list:
|
||||
analysis_metadata = research_data.get('analysis_metadata', {})
|
||||
social_media_data = analysis_metadata.get('social_media_data', {})
|
||||
|
||||
# Map competitor fields to match frontend expectations
|
||||
mapped_competitors = []
|
||||
for comp in competitors_list:
|
||||
mapped_comp = {
|
||||
**comp, # Keep all original fields
|
||||
"name": comp.get("title") or comp.get("name") or comp.get("domain", ""),
|
||||
"description": comp.get("summary") or comp.get("description", ""),
|
||||
"similarity_score": comp.get("relevance_score") or comp.get("similarity_score", 0.5)
|
||||
}
|
||||
mapped_competitors.append(mapped_comp)
|
||||
|
||||
print(f"[COMPETITOR_ANALYSIS] ✅ SUCCESS: Returning {len(mapped_competitors)} competitors from step_data for user_id={user_id}")
|
||||
logger.info(f"[ResearchConfig] Found {len(mapped_competitors)} competitors from step_data via Step3ResearchService for user {user_id}")
|
||||
return CompetitorAnalysisResponse(
|
||||
success=True,
|
||||
competitors=mapped_competitors,
|
||||
social_media_accounts=social_media_data.get('social_media_accounts', {}),
|
||||
social_media_citations=social_media_data.get('citations', []),
|
||||
research_summary=research_data.get('research_summary'),
|
||||
analysis_timestamp=research_data.get('completed_at')
|
||||
)
|
||||
else:
|
||||
print(f"[COMPETITOR_ANALYSIS] ⚠️ Step3ResearchService returned competitors list but it's empty")
|
||||
else:
|
||||
print(f"[COMPETITOR_ANALYSIS] ⚠️ Step3ResearchService returned success=True but no competitors in data")
|
||||
else:
|
||||
error_msg = research_data_result.get('error', 'Unknown error')
|
||||
print(f"[COMPETITOR_ANALYSIS] ⚠️ Step3ResearchService returned success=False, error: {error_msg}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"[COMPETITOR_ANALYSIS] ❌ EXCEPTION in Method 2: {e}")
|
||||
import traceback
|
||||
print(f"[COMPETITOR_ANALYSIS] Traceback:\n{traceback.format_exc()}")
|
||||
logger.warning(f"[ResearchConfig] Could not retrieve competitor data from Step3ResearchService: {e}", exc_info=True)
|
||||
|
||||
# Fallback: Return empty response with helpful message
|
||||
print(f"[COMPETITOR_ANALYSIS] ❌ FALLBACK: No competitor analysis data found for user_id={user_id}")
|
||||
print(f"[COMPETITOR_ANALYSIS] Step 3 is completed (current_step={session.current_step}) but no data found in either source")
|
||||
logger.info(f"[ResearchConfig] No competitor analysis data found for user {user_id} (step 3 completed but no data found)")
|
||||
return CompetitorAnalysisResponse(
|
||||
success=False,
|
||||
error="Competitor analysis data was not found in the database. Please re-run competitor discovery in Step 3 of onboarding to generate and save competitor data."
|
||||
)
|
||||
|
||||
except HTTPException:
|
||||
print(f"[COMPETITOR_ANALYSIS] ❌ HTTPException raised (will be re-raised)")
|
||||
raise
|
||||
except Exception as e:
|
||||
print(f"[COMPETITOR_ANALYSIS] ❌ CRITICAL ERROR: {e}")
|
||||
import traceback
|
||||
print(f"[COMPETITOR_ANALYSIS] Traceback:\n{traceback.format_exc()}")
|
||||
logger.error(f"[ResearchConfig] Error getting competitor analysis for user {user_id if user_id else 'unknown'}: {e}", exc_info=True)
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Failed to get competitor analysis: {str(e)}"
|
||||
)
|
||||
finally:
|
||||
print(f"[COMPETITOR_ANALYSIS] ===== END: Getting competitor analysis for user_id={user_id} =====\n")
|
||||
|
||||
|
||||
# Helper functions from RESEARCH_AI_HYPERPERSONALIZATION.md
|
||||
|
||||
def _get_domain_suggestions(industry: str) -> list[str]:
|
||||
|
||||
@@ -18,11 +18,68 @@ from middleware.auth_middleware import get_current_user
|
||||
from models.monitoring_models import TaskExecutionLog, MonitoringTask
|
||||
from models.scheduler_models import SchedulerEventLog
|
||||
from models.oauth_token_monitoring_models import OAuthTokenMonitoringTask
|
||||
from sqlalchemy import func
|
||||
from models.platform_insights_monitoring_models import PlatformInsightsTask, PlatformInsightsExecutionLog
|
||||
from models.website_analysis_monitoring_models import WebsiteAnalysisTask, WebsiteAnalysisExecutionLog
|
||||
|
||||
router = APIRouter(prefix="/api/scheduler", tags=["scheduler-dashboard"])
|
||||
|
||||
|
||||
def _rebuild_cumulative_stats_from_events(db: Session) -> Dict[str, int]:
|
||||
"""
|
||||
Rebuild cumulative stats by aggregating all check_cycle events from event logs.
|
||||
This is used as a fallback when the cumulative stats table doesn't exist or is invalid.
|
||||
|
||||
Args:
|
||||
db: Database session
|
||||
|
||||
Returns:
|
||||
Dictionary with cumulative stats
|
||||
"""
|
||||
try:
|
||||
# Aggregate check cycle events for cumulative totals
|
||||
result = db.query(
|
||||
func.count(SchedulerEventLog.id),
|
||||
func.sum(SchedulerEventLog.tasks_found),
|
||||
func.sum(SchedulerEventLog.tasks_executed),
|
||||
func.sum(SchedulerEventLog.tasks_failed)
|
||||
).filter(
|
||||
SchedulerEventLog.event_type == 'check_cycle'
|
||||
).first()
|
||||
|
||||
if result:
|
||||
# SQLAlchemy returns tuple for multi-column queries
|
||||
# SUM returns NULL when no rows, handle that
|
||||
total_cycles = result[0] if result[0] is not None else 0
|
||||
total_found = result[1] if result[1] is not None else 0
|
||||
total_executed = result[2] if result[2] is not None else 0
|
||||
total_failed = result[3] if result[3] is not None else 0
|
||||
|
||||
return {
|
||||
'total_check_cycles': int(total_cycles),
|
||||
'cumulative_tasks_found': int(total_found),
|
||||
'cumulative_tasks_executed': int(total_executed),
|
||||
'cumulative_tasks_failed': int(total_failed),
|
||||
'cumulative_tasks_skipped': 0 # Not tracked in event logs currently
|
||||
}
|
||||
else:
|
||||
return {
|
||||
'total_check_cycles': 0,
|
||||
'cumulative_tasks_found': 0,
|
||||
'cumulative_tasks_executed': 0,
|
||||
'cumulative_tasks_failed': 0,
|
||||
'cumulative_tasks_skipped': 0
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"[Dashboard] Error rebuilding cumulative stats from events: {e}", exc_info=True)
|
||||
return {
|
||||
'total_check_cycles': 0,
|
||||
'cumulative_tasks_found': 0,
|
||||
'cumulative_tasks_executed': 0,
|
||||
'cumulative_tasks_failed': 0,
|
||||
'cumulative_tasks_skipped': 0
|
||||
}
|
||||
|
||||
|
||||
@router.get("/dashboard")
|
||||
async def get_scheduler_dashboard(
|
||||
current_user: Dict[str, Any] = Depends(get_current_user),
|
||||
@@ -139,98 +196,172 @@ async def get_scheduler_dashboard(
|
||||
except Exception as e:
|
||||
logger.error(f"Error loading OAuth token monitoring tasks: {e}", exc_info=True)
|
||||
|
||||
# Load website analysis tasks
|
||||
try:
|
||||
website_analysis_tasks = db.query(WebsiteAnalysisTask).filter(
|
||||
WebsiteAnalysisTask.status == 'active'
|
||||
).all()
|
||||
|
||||
# Filter by user if user_id_str is provided
|
||||
if user_id_str:
|
||||
website_analysis_tasks = [t for t in website_analysis_tasks if t.user_id == user_id_str]
|
||||
|
||||
for task in website_analysis_tasks:
|
||||
try:
|
||||
user_job_store = get_user_job_store_name(task.user_id, db)
|
||||
except Exception as e:
|
||||
user_job_store = 'default'
|
||||
logger.debug(f"Could not get job store for user {task.user_id}: {e}")
|
||||
|
||||
# Format as recurring job
|
||||
job_info = {
|
||||
'id': f"website_analysis_{task.task_type}_{task.user_id}_{task.id}",
|
||||
'trigger_type': 'CronTrigger', # Recurring based on frequency_days
|
||||
'next_run_time': task.next_check.isoformat() if task.next_check else None,
|
||||
'user_id': task.user_id,
|
||||
'job_store': 'default',
|
||||
'user_job_store': user_job_store,
|
||||
'function_name': 'website_analysis_executor.execute_task',
|
||||
'task_type': task.task_type, # 'user_website' or 'competitor'
|
||||
'website_url': task.website_url,
|
||||
'competitor_id': task.competitor_id,
|
||||
'task_id': task.id,
|
||||
'is_database_task': True,
|
||||
'frequency': f'Every {task.frequency_days} days',
|
||||
'task_category': 'website_analysis'
|
||||
}
|
||||
|
||||
formatted_jobs.append(job_info)
|
||||
except Exception as e:
|
||||
logger.error(f"Error loading website analysis tasks: {e}", exc_info=True)
|
||||
|
||||
# Load platform insights tasks (GSC and Bing)
|
||||
try:
|
||||
insights_tasks = db.query(PlatformInsightsTask).filter(
|
||||
PlatformInsightsTask.status == 'active'
|
||||
).all()
|
||||
|
||||
# Filter by user if user_id_str is provided
|
||||
if user_id_str:
|
||||
insights_tasks = [t for t in insights_tasks if t.user_id == user_id_str]
|
||||
|
||||
for task in insights_tasks:
|
||||
try:
|
||||
user_job_store = get_user_job_store_name(task.user_id, db)
|
||||
except Exception as e:
|
||||
user_job_store = 'default'
|
||||
logger.debug(f"Could not get job store for user {task.user_id}: {e}")
|
||||
|
||||
# Format as recurring weekly job
|
||||
job_info = {
|
||||
'id': f"platform_insights_{task.platform}_{task.user_id}",
|
||||
'trigger_type': 'CronTrigger', # Weekly recurring
|
||||
'next_run_time': task.next_check.isoformat() if task.next_check else None,
|
||||
'user_id': task.user_id,
|
||||
'job_store': 'default',
|
||||
'user_job_store': user_job_store,
|
||||
'function_name': f'{task.platform}_insights_executor.execute_task',
|
||||
'platform': task.platform,
|
||||
'task_id': task.id,
|
||||
'is_database_task': True,
|
||||
'frequency': 'Weekly',
|
||||
'task_category': 'platform_insights'
|
||||
}
|
||||
|
||||
formatted_jobs.append(job_info)
|
||||
except Exception as e:
|
||||
logger.error(f"Error loading platform insights tasks: {e}", exc_info=True)
|
||||
|
||||
# Get active strategies count
|
||||
active_strategies = stats.get('active_strategies_count', 0)
|
||||
|
||||
# Get last_update from stats (added by scheduler for frontend polling)
|
||||
last_update = stats.get('last_update')
|
||||
|
||||
# Calculate cumulative/historical values from scheduler_event_logs
|
||||
# Calculate cumulative/historical values from persistent cumulative stats table
|
||||
# Fallback to event logs aggregation if cumulative stats table doesn't exist or is invalid
|
||||
cumulative_stats = {}
|
||||
try:
|
||||
# First, check total events in database for debugging
|
||||
total_events = db.query(func.count(SchedulerEventLog.id)).scalar() or 0
|
||||
from models.scheduler_cumulative_stats_model import SchedulerCumulativeStats
|
||||
|
||||
# Check for check_cycle events specifically
|
||||
check_cycle_count = db.query(func.count(SchedulerEventLog.id)).filter(
|
||||
SchedulerEventLog.event_type == 'check_cycle'
|
||||
).scalar() or 0
|
||||
# Try to get cumulative stats from dedicated table (persistent across restarts)
|
||||
cumulative_stats_row = db.query(SchedulerCumulativeStats).filter(
|
||||
SchedulerCumulativeStats.id == 1
|
||||
).first()
|
||||
|
||||
# Also check for other event types that might have task counts
|
||||
job_failed_count = db.query(func.count(SchedulerEventLog.id)).filter(
|
||||
SchedulerEventLog.event_type == 'job_failed'
|
||||
).scalar() or 0
|
||||
job_completed_count = db.query(func.count(SchedulerEventLog.id)).filter(
|
||||
SchedulerEventLog.event_type == 'job_completed'
|
||||
).scalar() or 0
|
||||
|
||||
logger.warning(
|
||||
f"[Dashboard] Database stats: {total_events} total events, "
|
||||
f"{check_cycle_count} check_cycles, {job_failed_count} job_failed, "
|
||||
f"{job_completed_count} job_completed"
|
||||
)
|
||||
|
||||
if check_cycle_count > 0:
|
||||
logger.warning(f"[Dashboard] Found {check_cycle_count} check cycle events in database")
|
||||
# Aggregate check cycle events for cumulative totals
|
||||
result = db.query(
|
||||
func.count(SchedulerEventLog.id),
|
||||
func.sum(SchedulerEventLog.tasks_found),
|
||||
func.sum(SchedulerEventLog.tasks_executed),
|
||||
func.sum(SchedulerEventLog.tasks_failed)
|
||||
).filter(
|
||||
SchedulerEventLog.event_type == 'check_cycle'
|
||||
).first()
|
||||
if cumulative_stats_row:
|
||||
# Use persistent cumulative stats
|
||||
cumulative_stats = {
|
||||
'total_check_cycles': int(cumulative_stats_row.total_check_cycles or 0),
|
||||
'cumulative_tasks_found': int(cumulative_stats_row.cumulative_tasks_found or 0),
|
||||
'cumulative_tasks_executed': int(cumulative_stats_row.cumulative_tasks_executed or 0),
|
||||
'cumulative_tasks_failed': int(cumulative_stats_row.cumulative_tasks_failed or 0),
|
||||
'cumulative_tasks_skipped': int(cumulative_stats_row.cumulative_tasks_skipped or 0),
|
||||
'cumulative_job_completed': int(cumulative_stats_row.cumulative_job_completed or 0),
|
||||
'cumulative_job_failed': int(cumulative_stats_row.cumulative_job_failed or 0)
|
||||
}
|
||||
|
||||
if result:
|
||||
# SQLAlchemy returns tuple for multi-column queries
|
||||
# SUM returns NULL when no rows, handle that
|
||||
total_cycles = result[0] if result[0] is not None else 0
|
||||
total_found = result[1] if result[1] is not None else 0
|
||||
total_executed = result[2] if result[2] is not None else 0
|
||||
total_failed = result[3] if result[3] is not None else 0
|
||||
|
||||
cumulative_stats = {
|
||||
'total_check_cycles': int(total_cycles),
|
||||
'cumulative_tasks_found': int(total_found),
|
||||
'cumulative_tasks_executed': int(total_executed),
|
||||
'cumulative_tasks_failed': int(total_failed)
|
||||
}
|
||||
|
||||
logger.warning(f"[Dashboard] Cumulative stats from check_cycles: {cumulative_stats}")
|
||||
else:
|
||||
# No results (shouldn't happen with COUNT, but handle it)
|
||||
cumulative_stats = {
|
||||
'total_check_cycles': 0,
|
||||
'cumulative_tasks_found': 0,
|
||||
'cumulative_tasks_executed': 0,
|
||||
'cumulative_tasks_failed': 0
|
||||
}
|
||||
logger.warning("[Dashboard] Query returned None (no check cycle events)")
|
||||
logger.debug(
|
||||
f"[Dashboard] Using persistent cumulative stats: "
|
||||
f"cycles={cumulative_stats['total_check_cycles']}, "
|
||||
f"found={cumulative_stats['cumulative_tasks_found']}, "
|
||||
f"executed={cumulative_stats['cumulative_tasks_executed']}, "
|
||||
f"failed={cumulative_stats['cumulative_tasks_failed']}"
|
||||
)
|
||||
|
||||
# Validate cumulative stats by comparing with event logs (for verification)
|
||||
check_cycle_count = db.query(func.count(SchedulerEventLog.id)).filter(
|
||||
SchedulerEventLog.event_type == 'check_cycle'
|
||||
).scalar() or 0
|
||||
|
||||
if cumulative_stats['total_check_cycles'] != check_cycle_count:
|
||||
logger.warning(
|
||||
f"[Dashboard] ⚠️ Cumulative stats validation mismatch: "
|
||||
f"cumulative_stats.total_check_cycles={cumulative_stats['total_check_cycles']} "
|
||||
f"vs event_logs.count={check_cycle_count}. "
|
||||
f"Rebuilding cumulative stats from event logs..."
|
||||
)
|
||||
# Rebuild cumulative stats from event logs
|
||||
cumulative_stats = _rebuild_cumulative_stats_from_events(db)
|
||||
# Update the persistent table
|
||||
if cumulative_stats_row:
|
||||
cumulative_stats_row.total_check_cycles = cumulative_stats['total_check_cycles']
|
||||
cumulative_stats_row.cumulative_tasks_found = cumulative_stats['cumulative_tasks_found']
|
||||
cumulative_stats_row.cumulative_tasks_executed = cumulative_stats['cumulative_tasks_executed']
|
||||
cumulative_stats_row.cumulative_tasks_failed = cumulative_stats['cumulative_tasks_failed']
|
||||
cumulative_stats_row.cumulative_tasks_skipped = cumulative_stats.get('cumulative_tasks_skipped', 0)
|
||||
db.commit()
|
||||
logger.warning(f"[Dashboard] ✅ Rebuilt cumulative stats: {cumulative_stats}")
|
||||
else:
|
||||
# No check cycles yet, but we can still show job counts
|
||||
# Log detailed info about why cumulative stats are 0
|
||||
if stats.get('total_checks', 0) > 0:
|
||||
logger.warning(
|
||||
f"[Dashboard] ⚠️ Scheduler shows {stats.get('total_checks', 0)} checks in memory, "
|
||||
f"but NO check_cycle events found in database. "
|
||||
f"This suggests check_cycle events are not being saved properly."
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
f"[Dashboard] No check_cycle events yet. "
|
||||
f"Scheduler interval: {stats.get('check_interval_minutes', 60)}min. "
|
||||
f"First check cycle will run after interval expires. "
|
||||
f"One-time jobs: {job_completed_count} completed, {job_failed_count} failed"
|
||||
)
|
||||
# Cumulative stats table doesn't exist or is empty, rebuild from event logs
|
||||
logger.warning(
|
||||
"[Dashboard] Cumulative stats table not found or empty. "
|
||||
"Rebuilding from event logs..."
|
||||
)
|
||||
cumulative_stats = _rebuild_cumulative_stats_from_events(db)
|
||||
|
||||
# Create/update the persistent table
|
||||
cumulative_stats_row = SchedulerCumulativeStats.get_or_create(db)
|
||||
cumulative_stats_row.total_check_cycles = cumulative_stats['total_check_cycles']
|
||||
cumulative_stats_row.cumulative_tasks_found = cumulative_stats['cumulative_tasks_found']
|
||||
cumulative_stats_row.cumulative_tasks_executed = cumulative_stats['cumulative_tasks_executed']
|
||||
cumulative_stats_row.cumulative_tasks_failed = cumulative_stats['cumulative_tasks_failed']
|
||||
cumulative_stats_row.cumulative_tasks_skipped = cumulative_stats.get('cumulative_tasks_skipped', 0)
|
||||
db.commit()
|
||||
logger.warning(f"[Dashboard] ✅ Created/updated cumulative stats: {cumulative_stats}")
|
||||
|
||||
except ImportError:
|
||||
# Cumulative stats model doesn't exist yet (migration not run)
|
||||
logger.warning(
|
||||
"[Dashboard] Cumulative stats model not found. "
|
||||
"Falling back to event logs aggregation. "
|
||||
"Run migration: create_scheduler_cumulative_stats.sql"
|
||||
)
|
||||
cumulative_stats = _rebuild_cumulative_stats_from_events(db)
|
||||
except Exception as e:
|
||||
logger.error(f"Error calculating cumulative stats: {e}", exc_info=True)
|
||||
cumulative_stats = {
|
||||
'total_check_cycles': 0,
|
||||
'cumulative_tasks_found': 0,
|
||||
'cumulative_tasks_executed': 0,
|
||||
'cumulative_tasks_failed': 0
|
||||
}
|
||||
logger.error(f"[Dashboard] Error getting cumulative stats: {e}", exc_info=True)
|
||||
# Fallback to event logs aggregation
|
||||
cumulative_stats = _rebuild_cumulative_stats_from_events(db)
|
||||
|
||||
return {
|
||||
'stats': {
|
||||
@@ -259,8 +390,9 @@ async def get_scheduler_dashboard(
|
||||
},
|
||||
'jobs': formatted_jobs,
|
||||
'job_count': len(formatted_jobs),
|
||||
'recurring_jobs': 1 + len([j for j in formatted_jobs if j.get('is_database_task')]), # check_due_tasks + OAuth tasks
|
||||
'recurring_jobs': 1 + len([j for j in formatted_jobs if j.get('is_database_task')]), # check_due_tasks + all DB tasks
|
||||
'one_time_jobs': len([j for j in formatted_jobs if not j.get('is_database_task') and j.get('trigger_type') == 'DateTrigger']),
|
||||
'registered_task_types': stats.get('registered_types', []), # Include registered task types
|
||||
'user_isolation': {
|
||||
'enabled': True,
|
||||
'current_user_id': user_id_str
|
||||
@@ -704,3 +836,381 @@ async def get_recent_scheduler_logs(
|
||||
logger.error(f"Error getting recent scheduler logs: {e}")
|
||||
raise HTTPException(status_code=500, detail=f"Failed to get recent scheduler logs: {str(e)}")
|
||||
|
||||
|
||||
@router.get("/platform-insights/status/{user_id}")
|
||||
async def get_platform_insights_status(
|
||||
user_id: str,
|
||||
db: Session = Depends(get_db),
|
||||
current_user: Dict[str, Any] = Depends(get_current_user)
|
||||
):
|
||||
"""
|
||||
Get platform insights task status for a user.
|
||||
|
||||
Returns:
|
||||
- GSC insights tasks
|
||||
- Bing insights tasks
|
||||
- Task details and execution logs
|
||||
"""
|
||||
try:
|
||||
# Verify user can only access their own data
|
||||
if str(current_user.get('id')) != user_id:
|
||||
raise HTTPException(status_code=403, detail="Access denied")
|
||||
|
||||
logger.debug(f"[Platform Insights Status] Getting status for user: {user_id}")
|
||||
|
||||
# Get all insights tasks for user
|
||||
tasks = db.query(PlatformInsightsTask).filter(
|
||||
PlatformInsightsTask.user_id == user_id
|
||||
).order_by(PlatformInsightsTask.platform, PlatformInsightsTask.created_at).all()
|
||||
|
||||
# Check if user has connected platforms but missing insights tasks
|
||||
# Auto-create missing tasks for connected platforms
|
||||
from services.oauth_token_monitoring_service import get_connected_platforms
|
||||
from services.platform_insights_monitoring_service import create_platform_insights_task
|
||||
|
||||
connected_platforms = get_connected_platforms(user_id)
|
||||
insights_platforms = ['gsc', 'bing']
|
||||
connected_insights = [p for p in connected_platforms if p in insights_platforms]
|
||||
|
||||
existing_platforms = {task.platform for task in tasks}
|
||||
missing_platforms = [p for p in connected_insights if p not in existing_platforms]
|
||||
|
||||
if missing_platforms:
|
||||
logger.info(
|
||||
f"[Platform Insights Status] User {user_id} has connected platforms {missing_platforms} "
|
||||
f"but missing insights tasks. Creating tasks..."
|
||||
)
|
||||
|
||||
for platform in missing_platforms:
|
||||
try:
|
||||
# Don't fetch site_url here - it requires API calls
|
||||
# The executor will fetch it when the task runs
|
||||
# Create task without site_url to avoid API calls during status checks
|
||||
result = create_platform_insights_task(
|
||||
user_id=user_id,
|
||||
platform=platform,
|
||||
site_url=None, # Will be fetched by executor when task runs
|
||||
db=db
|
||||
)
|
||||
|
||||
if result.get('success'):
|
||||
logger.info(f"[Platform Insights Status] Created {platform.upper()} insights task for user {user_id}")
|
||||
else:
|
||||
logger.warning(f"[Platform Insights Status] Failed to create {platform} task: {result.get('error')}")
|
||||
except Exception as e:
|
||||
logger.warning(f"[Platform Insights Status] Error creating {platform} task: {e}", exc_info=True)
|
||||
|
||||
# Re-query tasks after creation
|
||||
tasks = db.query(PlatformInsightsTask).filter(
|
||||
PlatformInsightsTask.user_id == user_id
|
||||
).order_by(PlatformInsightsTask.platform, PlatformInsightsTask.created_at).all()
|
||||
|
||||
# Group tasks by platform
|
||||
gsc_tasks = [t for t in tasks if t.platform == 'gsc']
|
||||
bing_tasks = [t for t in tasks if t.platform == 'bing']
|
||||
|
||||
logger.debug(
|
||||
f"[Platform Insights Status] Found {len(tasks)} total tasks: "
|
||||
f"{len(gsc_tasks)} GSC, {len(bing_tasks)} Bing"
|
||||
)
|
||||
|
||||
# Format tasks
|
||||
def format_task(task: PlatformInsightsTask) -> Dict[str, Any]:
|
||||
return {
|
||||
'id': task.id,
|
||||
'platform': task.platform,
|
||||
'site_url': task.site_url,
|
||||
'status': task.status,
|
||||
'last_check': task.last_check.isoformat() if task.last_check else None,
|
||||
'last_success': task.last_success.isoformat() if task.last_success else None,
|
||||
'last_failure': task.last_failure.isoformat() if task.last_failure else None,
|
||||
'failure_reason': task.failure_reason,
|
||||
'next_check': task.next_check.isoformat() if task.next_check else None,
|
||||
'created_at': task.created_at.isoformat() if task.created_at else None,
|
||||
'updated_at': task.updated_at.isoformat() if task.updated_at else None
|
||||
}
|
||||
|
||||
return {
|
||||
'success': True,
|
||||
'user_id': user_id,
|
||||
'gsc_tasks': [format_task(t) for t in gsc_tasks],
|
||||
'bing_tasks': [format_task(t) for t in bing_tasks],
|
||||
'total_tasks': len(tasks)
|
||||
}
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting platform insights status for user {user_id}: {e}", exc_info=True)
|
||||
raise HTTPException(status_code=500, detail=f"Failed to get platform insights status: {str(e)}")
|
||||
|
||||
|
||||
@router.get("/website-analysis/status/{user_id}")
|
||||
async def get_website_analysis_status(
|
||||
user_id: str,
|
||||
db: Session = Depends(get_db),
|
||||
current_user: Dict[str, Any] = Depends(get_current_user)
|
||||
):
|
||||
"""
|
||||
Get website analysis task status for a user.
|
||||
|
||||
Returns:
|
||||
- User website tasks
|
||||
- Competitor website tasks
|
||||
- Task details and execution logs
|
||||
"""
|
||||
try:
|
||||
# Verify user can only access their own data
|
||||
if str(current_user.get('id')) != user_id:
|
||||
raise HTTPException(status_code=403, detail="Access denied")
|
||||
|
||||
logger.debug(f"[Website Analysis Status] Getting status for user: {user_id}")
|
||||
|
||||
# Get all website analysis tasks for user
|
||||
tasks = db.query(WebsiteAnalysisTask).filter(
|
||||
WebsiteAnalysisTask.user_id == user_id
|
||||
).order_by(WebsiteAnalysisTask.task_type, WebsiteAnalysisTask.created_at).all()
|
||||
|
||||
# Separate user website and competitor tasks
|
||||
user_website_tasks = [t for t in tasks if t.task_type == 'user_website']
|
||||
competitor_tasks = [t for t in tasks if t.task_type == 'competitor']
|
||||
|
||||
logger.debug(
|
||||
f"[Website Analysis Status] Found {len(tasks)} tasks for user {user_id}: "
|
||||
f"{len(user_website_tasks)} user website, {len(competitor_tasks)} competitors"
|
||||
)
|
||||
|
||||
# Format tasks
|
||||
def format_task(task: WebsiteAnalysisTask) -> Dict[str, Any]:
|
||||
return {
|
||||
'id': task.id,
|
||||
'website_url': task.website_url,
|
||||
'task_type': task.task_type,
|
||||
'competitor_id': task.competitor_id,
|
||||
'status': task.status,
|
||||
'last_check': task.last_check.isoformat() if task.last_check else None,
|
||||
'last_success': task.last_success.isoformat() if task.last_success else None,
|
||||
'last_failure': task.last_failure.isoformat() if task.last_failure else None,
|
||||
'failure_reason': task.failure_reason,
|
||||
'next_check': task.next_check.isoformat() if task.next_check else None,
|
||||
'frequency_days': task.frequency_days,
|
||||
'created_at': task.created_at.isoformat() if task.created_at else None,
|
||||
'updated_at': task.updated_at.isoformat() if task.updated_at else None
|
||||
}
|
||||
|
||||
active_tasks = len([t for t in tasks if t.status == 'active'])
|
||||
failed_tasks = len([t for t in tasks if t.status == 'failed'])
|
||||
|
||||
return {
|
||||
'success': True,
|
||||
'data': {
|
||||
'user_id': user_id,
|
||||
'user_website_tasks': [format_task(t) for t in user_website_tasks],
|
||||
'competitor_tasks': [format_task(t) for t in competitor_tasks],
|
||||
'total_tasks': len(tasks),
|
||||
'active_tasks': active_tasks,
|
||||
'failed_tasks': failed_tasks
|
||||
}
|
||||
}
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting website analysis status for user {user_id}: {e}", exc_info=True)
|
||||
raise HTTPException(status_code=500, detail=f"Failed to get website analysis status: {str(e)}")
|
||||
|
||||
|
||||
@router.get("/website-analysis/logs/{user_id}")
|
||||
async def get_website_analysis_logs(
|
||||
user_id: str,
|
||||
task_id: Optional[int] = Query(None),
|
||||
limit: int = Query(10, ge=1, le=100),
|
||||
offset: int = Query(0, ge=0),
|
||||
db: Session = Depends(get_db),
|
||||
current_user: Dict[str, Any] = Depends(get_current_user)
|
||||
):
|
||||
"""
|
||||
Get execution logs for website analysis tasks.
|
||||
|
||||
Args:
|
||||
user_id: User ID
|
||||
task_id: Optional task ID to filter logs
|
||||
limit: Maximum number of logs to return
|
||||
offset: Pagination offset
|
||||
|
||||
Returns:
|
||||
List of execution logs
|
||||
"""
|
||||
try:
|
||||
# Verify user can only access their own data
|
||||
if str(current_user.get('id')) != user_id:
|
||||
raise HTTPException(status_code=403, detail="Access denied")
|
||||
|
||||
query = db.query(WebsiteAnalysisExecutionLog).join(
|
||||
WebsiteAnalysisTask,
|
||||
WebsiteAnalysisExecutionLog.task_id == WebsiteAnalysisTask.id
|
||||
).filter(
|
||||
WebsiteAnalysisTask.user_id == user_id
|
||||
)
|
||||
|
||||
if task_id:
|
||||
query = query.filter(WebsiteAnalysisExecutionLog.task_id == task_id)
|
||||
|
||||
# Get total count
|
||||
total_count = query.count()
|
||||
|
||||
logs = query.order_by(
|
||||
desc(WebsiteAnalysisExecutionLog.execution_date)
|
||||
).offset(offset).limit(limit).all()
|
||||
|
||||
# Format logs
|
||||
formatted_logs = []
|
||||
for log in logs:
|
||||
# Get task details
|
||||
task = db.query(WebsiteAnalysisTask).filter(WebsiteAnalysisTask.id == log.task_id).first()
|
||||
|
||||
formatted_logs.append({
|
||||
'id': log.id,
|
||||
'task_id': log.task_id,
|
||||
'website_url': task.website_url if task else None,
|
||||
'task_type': task.task_type if task else None,
|
||||
'execution_date': log.execution_date.isoformat() if log.execution_date else None,
|
||||
'status': log.status,
|
||||
'result_data': log.result_data,
|
||||
'error_message': log.error_message,
|
||||
'execution_time_ms': log.execution_time_ms,
|
||||
'created_at': log.created_at.isoformat() if log.created_at else None
|
||||
})
|
||||
|
||||
return {
|
||||
'logs': formatted_logs,
|
||||
'total_count': total_count,
|
||||
'limit': limit,
|
||||
'offset': offset,
|
||||
'has_more': (offset + limit) < total_count
|
||||
}
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting website analysis logs for user {user_id}: {e}", exc_info=True)
|
||||
raise HTTPException(status_code=500, detail=f"Failed to get website analysis logs: {str(e)}")
|
||||
|
||||
|
||||
@router.post("/website-analysis/retry/{task_id}")
|
||||
async def retry_website_analysis(
|
||||
task_id: int,
|
||||
db: Session = Depends(get_db),
|
||||
current_user: Dict[str, Any] = Depends(get_current_user)
|
||||
):
|
||||
"""
|
||||
Manually retry a failed website analysis task.
|
||||
|
||||
Args:
|
||||
task_id: Task ID to retry
|
||||
|
||||
Returns:
|
||||
Success status and updated task details
|
||||
"""
|
||||
try:
|
||||
# Get task
|
||||
task = db.query(WebsiteAnalysisTask).filter(WebsiteAnalysisTask.id == task_id).first()
|
||||
|
||||
if not task:
|
||||
raise HTTPException(status_code=404, detail="Task not found")
|
||||
|
||||
# Verify user can only access their own tasks
|
||||
if str(current_user.get('id')) != task.user_id:
|
||||
raise HTTPException(status_code=403, detail="Access denied")
|
||||
|
||||
# Reset task status and schedule immediate execution
|
||||
task.status = 'active'
|
||||
task.failure_reason = None
|
||||
task.next_check = datetime.utcnow() # Schedule immediately
|
||||
task.updated_at = datetime.utcnow()
|
||||
|
||||
db.commit()
|
||||
|
||||
logger.info(f"Manually retried website analysis task {task_id} for user {task.user_id}")
|
||||
|
||||
return {
|
||||
'success': True,
|
||||
'message': f'Website analysis task {task_id} scheduled for immediate execution',
|
||||
'task': {
|
||||
'id': task.id,
|
||||
'website_url': task.website_url,
|
||||
'status': task.status,
|
||||
'next_check': task.next_check.isoformat() if task.next_check else None
|
||||
}
|
||||
}
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Error retrying website analysis task {task_id}: {e}", exc_info=True)
|
||||
raise HTTPException(status_code=500, detail=f"Failed to retry website analysis: {str(e)}")
|
||||
|
||||
|
||||
@router.get("/platform-insights/logs/{user_id}")
|
||||
async def get_platform_insights_logs(
|
||||
user_id: str,
|
||||
task_id: Optional[int] = Query(None),
|
||||
limit: int = Query(10, ge=1, le=100),
|
||||
db: Session = Depends(get_db),
|
||||
current_user: Dict[str, Any] = Depends(get_current_user)
|
||||
):
|
||||
"""
|
||||
Get execution logs for platform insights tasks.
|
||||
|
||||
Args:
|
||||
user_id: User ID
|
||||
task_id: Optional task ID to filter logs
|
||||
limit: Maximum number of logs to return
|
||||
|
||||
Returns:
|
||||
List of execution logs
|
||||
"""
|
||||
try:
|
||||
# Verify user can only access their own data
|
||||
if str(current_user.get('id')) != user_id:
|
||||
raise HTTPException(status_code=403, detail="Access denied")
|
||||
|
||||
query = db.query(PlatformInsightsExecutionLog).join(
|
||||
PlatformInsightsTask,
|
||||
PlatformInsightsExecutionLog.task_id == PlatformInsightsTask.id
|
||||
).filter(
|
||||
PlatformInsightsTask.user_id == user_id
|
||||
)
|
||||
|
||||
if task_id:
|
||||
query = query.filter(PlatformInsightsExecutionLog.task_id == task_id)
|
||||
|
||||
logs = query.order_by(
|
||||
desc(PlatformInsightsExecutionLog.execution_date)
|
||||
).limit(limit).all()
|
||||
|
||||
def format_log(log: PlatformInsightsExecutionLog) -> Dict[str, Any]:
|
||||
return {
|
||||
'id': log.id,
|
||||
'task_id': log.task_id,
|
||||
'execution_date': log.execution_date.isoformat() if log.execution_date else None,
|
||||
'status': log.status,
|
||||
'result_data': log.result_data,
|
||||
'error_message': log.error_message,
|
||||
'execution_time_ms': log.execution_time_ms,
|
||||
'data_source': log.data_source,
|
||||
'created_at': log.created_at.isoformat() if log.created_at else None
|
||||
}
|
||||
|
||||
return {
|
||||
'success': True,
|
||||
'logs': [format_log(log) for log in logs],
|
||||
'total_count': len(logs)
|
||||
}
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting platform insights logs for user {user_id}: {e}", exc_info=True)
|
||||
raise HTTPException(status_code=500, detail=f"Failed to get platform insights logs: {str(e)}")
|
||||
|
||||
|
||||
@@ -5,18 +5,24 @@ Handles Wix authentication, connection status, and blog publishing.
|
||||
"""
|
||||
|
||||
from fastapi import APIRouter, HTTPException, Depends, Request
|
||||
from fastapi.responses import HTMLResponse
|
||||
from typing import Dict, Any, Optional
|
||||
from loguru import logger
|
||||
from pydantic import BaseModel
|
||||
|
||||
from services.wix_service import WixService
|
||||
from services.integrations.wix_oauth import WixOAuthService
|
||||
from middleware.auth_middleware import get_current_user
|
||||
import os
|
||||
|
||||
router = APIRouter(prefix="/api/wix", tags=["Wix Integration"])
|
||||
|
||||
# Initialize Wix service
|
||||
wix_service = WixService()
|
||||
|
||||
# Initialize Wix OAuth service for token storage
|
||||
wix_oauth_service = WixOAuthService(db_path=os.path.abspath("alwrity.db"))
|
||||
|
||||
|
||||
class WixAuthRequest(BaseModel):
|
||||
"""Request model for Wix authentication"""
|
||||
@@ -88,17 +94,41 @@ async def handle_oauth_callback(request: WixAuthRequest, current_user: dict = De
|
||||
Token information and connection status
|
||||
"""
|
||||
try:
|
||||
user_id = current_user.get('id')
|
||||
if not user_id:
|
||||
raise HTTPException(status_code=400, detail="User ID not found")
|
||||
|
||||
# Exchange code for tokens
|
||||
tokens = wix_service.exchange_code_for_tokens(request.code)
|
||||
|
||||
# Get site information
|
||||
# Get site information to extract site_id and member_id
|
||||
site_info = wix_service.get_site_info(tokens['access_token'])
|
||||
site_id = site_info.get('siteId') or site_info.get('site_id')
|
||||
|
||||
# Extract member_id from token if possible
|
||||
member_id = None
|
||||
try:
|
||||
member_id = wix_service.extract_member_id_from_access_token(tokens['access_token'])
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Check permissions
|
||||
permissions = wix_service.check_blog_permissions(tokens['access_token'])
|
||||
|
||||
# TODO: Store tokens securely in database associated with current_user
|
||||
# For now, we'll return them (in production, store in encrypted database)
|
||||
# Store tokens securely in database
|
||||
stored = wix_oauth_service.store_tokens(
|
||||
user_id=user_id,
|
||||
access_token=tokens['access_token'],
|
||||
refresh_token=tokens.get('refresh_token'),
|
||||
expires_in=tokens.get('expires_in'),
|
||||
token_type=tokens.get('token_type', 'Bearer'),
|
||||
scope=tokens.get('scope'),
|
||||
site_id=site_id,
|
||||
member_id=member_id
|
||||
)
|
||||
|
||||
if not stored:
|
||||
logger.warning(f"Failed to store Wix tokens for user {user_id}, but OAuth succeeded")
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
@@ -125,6 +155,29 @@ async def handle_oauth_callback_get(code: str, state: Optional[str] = None, requ
|
||||
tokens = wix_service.exchange_code_for_tokens(code)
|
||||
site_info = wix_service.get_site_info(tokens['access_token'])
|
||||
permissions = wix_service.check_blog_permissions(tokens['access_token'])
|
||||
|
||||
# Store tokens in database if we have user_id
|
||||
user_id = current_user.get('id') if current_user else None
|
||||
if user_id:
|
||||
site_id = site_info.get('siteId') or site_info.get('site_id')
|
||||
member_id = None
|
||||
try:
|
||||
member_id = wix_service.extract_member_id_from_access_token(tokens['access_token'])
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
stored = wix_oauth_service.store_tokens(
|
||||
user_id=user_id,
|
||||
access_token=tokens['access_token'],
|
||||
refresh_token=tokens.get('refresh_token'),
|
||||
expires_in=tokens.get('expires_in'),
|
||||
token_type=tokens.get('token_type', 'Bearer'),
|
||||
scope=tokens.get('scope'),
|
||||
site_id=site_id,
|
||||
member_id=member_id
|
||||
)
|
||||
if not stored:
|
||||
logger.warning(f"Failed to store Wix tokens for user {user_id} in GET callback")
|
||||
|
||||
# Build success payload for postMessage
|
||||
payload = {
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
from pydantic import BaseModel, Field
|
||||
from typing import List, Optional, Dict, Any
|
||||
from typing import List, Optional, Dict, Any, Union
|
||||
from enum import Enum
|
||||
|
||||
|
||||
@@ -81,6 +81,7 @@ class ResearchProvider(str, Enum):
|
||||
"""Research provider options."""
|
||||
GOOGLE = "google" # Gemini native grounding
|
||||
EXA = "exa" # Exa neural search
|
||||
TAVILY = "tavily" # Tavily AI-powered search
|
||||
|
||||
|
||||
class ResearchConfig(BaseModel):
|
||||
@@ -100,6 +101,23 @@ class ResearchConfig(BaseModel):
|
||||
exa_include_domains: List[str] = [] # Domain whitelist
|
||||
exa_exclude_domains: List[str] = [] # Domain blacklist
|
||||
exa_search_type: Optional[str] = "auto" # "auto", "keyword", "neural"
|
||||
|
||||
# Tavily-specific options
|
||||
tavily_topic: Optional[str] = "general" # general, news, finance
|
||||
tavily_search_depth: Optional[str] = "basic" # basic (1 credit), advanced (2 credits)
|
||||
tavily_include_domains: List[str] = [] # Domain whitelist (max 300)
|
||||
tavily_exclude_domains: List[str] = [] # Domain blacklist (max 150)
|
||||
tavily_include_answer: Union[bool, str] = False # basic, advanced, true, false
|
||||
tavily_include_raw_content: Union[bool, str] = False # markdown, text, true, false
|
||||
tavily_include_images: bool = False
|
||||
tavily_include_image_descriptions: bool = False
|
||||
tavily_include_favicon: bool = False
|
||||
tavily_time_range: Optional[str] = None # day, week, month, year, d, w, m, y
|
||||
tavily_start_date: Optional[str] = None # YYYY-MM-DD
|
||||
tavily_end_date: Optional[str] = None # YYYY-MM-DD
|
||||
tavily_country: Optional[str] = None # Country code (only for general topic)
|
||||
tavily_chunks_per_source: int = 3 # 1-3 (only for advanced search)
|
||||
tavily_auto_parameters: bool = False # Auto-configure parameters based on query
|
||||
|
||||
|
||||
class BlogResearchRequest(BaseModel):
|
||||
|
||||
@@ -17,6 +17,7 @@ class OnboardingSession(Base):
|
||||
website_analyses = relationship('WebsiteAnalysis', back_populates='session', cascade="all, delete-orphan")
|
||||
research_preferences = relationship('ResearchPreferences', back_populates='session', cascade="all, delete-orphan", uselist=False)
|
||||
persona_data = relationship('PersonaData', back_populates='session', cascade="all, delete-orphan", uselist=False)
|
||||
competitor_analyses = relationship('CompetitorAnalysis', back_populates='session', cascade="all, delete-orphan")
|
||||
|
||||
def __repr__(self):
|
||||
return f"<OnboardingSession(id={self.id}, user_id={self.user_id}, step={self.current_step}, progress={self.progress})>"
|
||||
@@ -188,4 +189,46 @@ class PersonaData(Base):
|
||||
'research_persona_generated_at': self.research_persona_generated_at.isoformat() if self.research_persona_generated_at else None,
|
||||
'created_at': self.created_at.isoformat() if self.created_at else None,
|
||||
'updated_at': self.updated_at.isoformat() if self.updated_at else None
|
||||
}
|
||||
|
||||
class CompetitorAnalysis(Base):
|
||||
"""Stores competitor website analysis results from scheduled analysis tasks."""
|
||||
__tablename__ = 'competitor_analyses'
|
||||
|
||||
id = Column(Integer, primary_key=True, autoincrement=True)
|
||||
session_id = Column(Integer, ForeignKey('onboarding_sessions.id', ondelete='CASCADE'), nullable=False)
|
||||
competitor_url = Column(String(500), nullable=False)
|
||||
competitor_domain = Column(String(255), nullable=True) # Extracted domain for easier queries
|
||||
analysis_date = Column(DateTime, default=func.now())
|
||||
|
||||
# Complete analysis data (same structure as WebsiteAnalysis)
|
||||
analysis_data = Column(JSON) # Contains style_analysis, crawl_result, style_patterns, style_guidelines
|
||||
|
||||
# Metadata
|
||||
status = Column(String(50), default='completed') # completed, failed, in_progress
|
||||
error_message = Column(Text, nullable=True)
|
||||
warning_message = Column(Text, nullable=True)
|
||||
created_at = Column(DateTime, default=func.now())
|
||||
updated_at = Column(DateTime, default=func.now(), onupdate=func.now())
|
||||
|
||||
# Relationships
|
||||
session = relationship('OnboardingSession', back_populates='competitor_analyses')
|
||||
|
||||
def __repr__(self):
|
||||
return f"<CompetitorAnalysis(id={self.id}, url={self.competitor_url}, status={self.status})>"
|
||||
|
||||
def to_dict(self):
|
||||
"""Convert to dictionary for API responses."""
|
||||
return {
|
||||
'id': self.id,
|
||||
'session_id': self.session_id,
|
||||
'competitor_url': self.competitor_url,
|
||||
'competitor_domain': self.competitor_domain,
|
||||
'analysis_date': self.analysis_date.isoformat() if self.analysis_date else None,
|
||||
'analysis_data': self.analysis_data,
|
||||
'status': self.status,
|
||||
'error_message': self.error_message,
|
||||
'warning_message': self.warning_message,
|
||||
'created_at': self.created_at.isoformat() if self.created_at else None,
|
||||
'updated_at': self.updated_at.isoformat() if self.updated_at else None
|
||||
}
|
||||
100
backend/models/platform_insights_monitoring_models.py
Normal file
100
backend/models/platform_insights_monitoring_models.py
Normal file
@@ -0,0 +1,100 @@
|
||||
"""
|
||||
Platform Insights Monitoring Models
|
||||
Database models for tracking platform insights (GSC/Bing) fetch tasks.
|
||||
"""
|
||||
|
||||
from sqlalchemy import Column, Integer, String, Text, DateTime, JSON, Index, ForeignKey
|
||||
from sqlalchemy.orm import relationship
|
||||
from datetime import datetime
|
||||
|
||||
# Import the same Base from enhanced_strategy_models
|
||||
from models.enhanced_strategy_models import Base
|
||||
|
||||
|
||||
class PlatformInsightsTask(Base):
|
||||
"""
|
||||
Model for storing platform insights fetch tasks.
|
||||
|
||||
Tracks per-user, per-platform insights fetching with weekly updates.
|
||||
"""
|
||||
__tablename__ = "platform_insights_tasks"
|
||||
|
||||
id = Column(Integer, primary_key=True, index=True)
|
||||
|
||||
# User and Platform Identification
|
||||
user_id = Column(String(255), nullable=False, index=True) # Clerk user ID (string)
|
||||
platform = Column(String(50), nullable=False) # 'gsc' or 'bing'
|
||||
site_url = Column(String(500), nullable=True) # Optional: specific site URL
|
||||
|
||||
# Task Status
|
||||
status = Column(String(50), default='active') # 'active', 'failed', 'paused'
|
||||
|
||||
# Execution Tracking
|
||||
last_check = Column(DateTime, nullable=True)
|
||||
last_success = Column(DateTime, nullable=True)
|
||||
last_failure = Column(DateTime, nullable=True)
|
||||
failure_reason = Column(Text, nullable=True)
|
||||
|
||||
# Scheduling
|
||||
next_check = Column(DateTime, nullable=True, index=True) # Next scheduled check time
|
||||
|
||||
# Metadata
|
||||
created_at = Column(DateTime, default=datetime.utcnow)
|
||||
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
|
||||
|
||||
# Execution Logs Relationship
|
||||
execution_logs = relationship(
|
||||
"PlatformInsightsExecutionLog",
|
||||
back_populates="task",
|
||||
cascade="all, delete-orphan"
|
||||
)
|
||||
|
||||
# Indexes for efficient queries
|
||||
__table_args__ = (
|
||||
Index('idx_platform_insights_user_platform', 'user_id', 'platform'),
|
||||
Index('idx_platform_insights_next_check', 'next_check'),
|
||||
Index('idx_platform_insights_status', 'status'),
|
||||
)
|
||||
|
||||
def __repr__(self):
|
||||
return f"<PlatformInsightsTask(id={self.id}, user_id={self.user_id}, platform={self.platform}, status={self.status})>"
|
||||
|
||||
|
||||
class PlatformInsightsExecutionLog(Base):
|
||||
"""
|
||||
Model for storing platform insights fetch execution logs.
|
||||
|
||||
Tracks individual execution attempts with results and error details.
|
||||
"""
|
||||
__tablename__ = "platform_insights_execution_logs"
|
||||
|
||||
id = Column(Integer, primary_key=True, index=True)
|
||||
|
||||
# Task Reference
|
||||
task_id = Column(Integer, ForeignKey("platform_insights_tasks.id"), nullable=False, index=True)
|
||||
|
||||
# Execution Details
|
||||
execution_date = Column(DateTime, default=datetime.utcnow, nullable=False)
|
||||
status = Column(String(50), nullable=False) # 'success', 'failed', 'skipped'
|
||||
|
||||
# Results
|
||||
result_data = Column(JSON, nullable=True) # Insights data, metrics, etc.
|
||||
error_message = Column(Text, nullable=True)
|
||||
execution_time_ms = Column(Integer, nullable=True)
|
||||
data_source = Column(String(50), nullable=True) # 'cached', 'api', 'onboarding'
|
||||
|
||||
# Metadata
|
||||
created_at = Column(DateTime, default=datetime.utcnow)
|
||||
|
||||
# Relationship to task
|
||||
task = relationship("PlatformInsightsTask", back_populates="execution_logs")
|
||||
|
||||
# Indexes for efficient queries
|
||||
__table_args__ = (
|
||||
Index('idx_platform_insights_log_task_execution_date', 'task_id', 'execution_date'),
|
||||
Index('idx_platform_insights_log_status', 'status'),
|
||||
)
|
||||
|
||||
def __repr__(self):
|
||||
return f"<PlatformInsightsExecutionLog(id={self.id}, task_id={self.task_id}, status={self.status}, execution_date={self.execution_date})>"
|
||||
|
||||
48
backend/models/scheduler_cumulative_stats_model.py
Normal file
48
backend/models/scheduler_cumulative_stats_model.py
Normal file
@@ -0,0 +1,48 @@
|
||||
"""
|
||||
Scheduler Cumulative Stats Model
|
||||
Model for storing persistent cumulative scheduler metrics that survive restarts.
|
||||
"""
|
||||
|
||||
from sqlalchemy import Column, Integer, DateTime, Index
|
||||
from datetime import datetime
|
||||
from models.enhanced_strategy_models import Base
|
||||
|
||||
|
||||
class SchedulerCumulativeStats(Base):
|
||||
"""Model for storing cumulative scheduler metrics that persist across restarts"""
|
||||
__tablename__ = "scheduler_cumulative_stats"
|
||||
|
||||
id = Column(Integer, primary_key=True, index=True, default=1) # Always use id=1
|
||||
total_check_cycles = Column(Integer, default=0, nullable=False)
|
||||
cumulative_tasks_found = Column(Integer, default=0, nullable=False)
|
||||
cumulative_tasks_executed = Column(Integer, default=0, nullable=False)
|
||||
cumulative_tasks_failed = Column(Integer, default=0, nullable=False)
|
||||
cumulative_tasks_skipped = Column(Integer, default=0, nullable=False)
|
||||
cumulative_job_completed = Column(Integer, default=0, nullable=False)
|
||||
cumulative_job_failed = Column(Integer, default=0, nullable=False)
|
||||
|
||||
last_updated = Column(DateTime, default=datetime.utcnow, nullable=False, onupdate=datetime.utcnow)
|
||||
last_check_cycle_id = Column(Integer, nullable=True) # Reference to last check_cycle event log ID
|
||||
|
||||
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|
||||
updated_at = Column(DateTime, default=datetime.utcnow, nullable=False, onupdate=datetime.utcnow)
|
||||
|
||||
__table_args__ = (
|
||||
Index('idx_scheduler_cumulative_stats_single_row', 'id', unique=True),
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def get_or_create(cls, db_session):
|
||||
"""
|
||||
Get the cumulative stats row (id=1) or create it if it doesn't exist.
|
||||
|
||||
Returns:
|
||||
SchedulerCumulativeStats instance
|
||||
"""
|
||||
stats = db_session.query(cls).filter(cls.id == 1).first()
|
||||
if not stats:
|
||||
stats = cls(id=1)
|
||||
db_session.add(stats)
|
||||
db_session.commit()
|
||||
return stats
|
||||
|
||||
105
backend/models/website_analysis_monitoring_models.py
Normal file
105
backend/models/website_analysis_monitoring_models.py
Normal file
@@ -0,0 +1,105 @@
|
||||
"""
|
||||
Website Analysis Monitoring Models
|
||||
Database models for tracking website analysis tasks and execution logs.
|
||||
"""
|
||||
|
||||
from sqlalchemy import Column, Integer, String, Text, DateTime, Boolean, JSON, Index, ForeignKey
|
||||
from sqlalchemy.orm import relationship
|
||||
from datetime import datetime
|
||||
|
||||
# Import the same Base from enhanced_strategy_models
|
||||
from models.enhanced_strategy_models import Base
|
||||
|
||||
|
||||
class WebsiteAnalysisTask(Base):
|
||||
"""
|
||||
Model for storing website analysis monitoring tasks.
|
||||
|
||||
Tracks per-user, per-URL website analysis with recurring checks.
|
||||
"""
|
||||
__tablename__ = "website_analysis_tasks"
|
||||
|
||||
id = Column(Integer, primary_key=True, index=True)
|
||||
|
||||
# User and URL Identification
|
||||
user_id = Column(String(255), nullable=False, index=True) # Clerk user ID (string)
|
||||
website_url = Column(String(500), nullable=False) # URL to analyze
|
||||
task_type = Column(String(50), nullable=False) # 'user_website' or 'competitor'
|
||||
competitor_id = Column(String(255), nullable=True) # For competitor tasks (domain or identifier)
|
||||
|
||||
# Task Status
|
||||
status = Column(String(50), default='active') # 'active', 'failed', 'paused'
|
||||
|
||||
# Execution Tracking
|
||||
last_check = Column(DateTime, nullable=True)
|
||||
last_success = Column(DateTime, nullable=True)
|
||||
last_failure = Column(DateTime, nullable=True)
|
||||
failure_reason = Column(Text, nullable=True)
|
||||
|
||||
# Scheduling
|
||||
next_check = Column(DateTime, nullable=True, index=True) # Next scheduled check time
|
||||
frequency_days = Column(Integer, default=10) # Recurring frequency in days
|
||||
|
||||
# Metadata
|
||||
created_at = Column(DateTime, default=datetime.utcnow)
|
||||
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
|
||||
|
||||
# Execution Logs Relationship
|
||||
execution_logs = relationship(
|
||||
"WebsiteAnalysisExecutionLog",
|
||||
back_populates="task",
|
||||
cascade="all, delete-orphan"
|
||||
)
|
||||
|
||||
# Indexes for efficient queries
|
||||
# Note: Index names match migration script to avoid conflicts
|
||||
__table_args__ = (
|
||||
Index('idx_website_analysis_tasks_user_url', 'user_id', 'website_url'),
|
||||
Index('idx_website_analysis_tasks_user_task_type', 'user_id', 'task_type'),
|
||||
Index('idx_website_analysis_tasks_next_check', 'next_check'),
|
||||
Index('idx_website_analysis_tasks_status', 'status'),
|
||||
Index('idx_website_analysis_tasks_task_type', 'task_type'),
|
||||
)
|
||||
|
||||
def __repr__(self):
|
||||
return f"<WebsiteAnalysisTask(id={self.id}, user_id={self.user_id}, url={self.website_url}, type={self.task_type}, status={self.status})>"
|
||||
|
||||
|
||||
class WebsiteAnalysisExecutionLog(Base):
|
||||
"""
|
||||
Model for storing website analysis execution logs.
|
||||
|
||||
Tracks individual execution attempts with results and error details.
|
||||
"""
|
||||
__tablename__ = "website_analysis_execution_logs"
|
||||
|
||||
id = Column(Integer, primary_key=True, index=True)
|
||||
|
||||
# Task Reference
|
||||
task_id = Column(Integer, ForeignKey("website_analysis_tasks.id"), nullable=False, index=True)
|
||||
|
||||
# Execution Details
|
||||
execution_date = Column(DateTime, default=datetime.utcnow, nullable=False)
|
||||
status = Column(String(50), nullable=False) # 'success', 'failed', 'skipped', 'running'
|
||||
|
||||
# Results
|
||||
result_data = Column(JSON, nullable=True) # Analysis results (style_analysis, crawl_result, etc.)
|
||||
error_message = Column(Text, nullable=True)
|
||||
execution_time_ms = Column(Integer, nullable=True)
|
||||
|
||||
# Metadata
|
||||
created_at = Column(DateTime, default=datetime.utcnow)
|
||||
|
||||
# Relationship to task
|
||||
task = relationship("WebsiteAnalysisTask", back_populates="execution_logs")
|
||||
|
||||
# Indexes for efficient queries
|
||||
# Note: Index names match migration script to avoid conflicts
|
||||
__table_args__ = (
|
||||
Index('idx_website_analysis_execution_logs_task_execution_date', 'task_id', 'execution_date'),
|
||||
Index('idx_website_analysis_execution_logs_status', 'status'),
|
||||
)
|
||||
|
||||
def __repr__(self):
|
||||
return f"<WebsiteAnalysisExecutionLog(id={self.id}, task_id={self.task_id}, status={self.status}, execution_date={self.execution_date})>"
|
||||
|
||||
@@ -160,6 +160,43 @@ async def handle_bing_callback(
|
||||
"""
|
||||
return HTMLResponse(content=html_content)
|
||||
|
||||
# Create Bing insights task immediately after successful connection
|
||||
try:
|
||||
from services.database import SessionLocal
|
||||
from services.platform_insights_monitoring_service import create_platform_insights_task
|
||||
|
||||
# Get user_id from state (stored during OAuth flow)
|
||||
db = SessionLocal()
|
||||
try:
|
||||
# Get user_id from Bing OAuth service state lookup
|
||||
import sqlite3
|
||||
with sqlite3.connect(oauth_service.db_path) as conn:
|
||||
cursor = conn.cursor()
|
||||
cursor.execute('SELECT user_id FROM bing_oauth_states WHERE state = ?', (state,))
|
||||
result_db = cursor.fetchone()
|
||||
if result_db:
|
||||
user_id = result_db[0]
|
||||
|
||||
# Don't fetch site_url here - it requires API calls
|
||||
# The executor will fetch it when the task runs (weekly)
|
||||
# Create insights task without site_url to avoid API calls
|
||||
task_result = create_platform_insights_task(
|
||||
user_id=user_id,
|
||||
platform='bing',
|
||||
site_url=None, # Will be fetched by executor when task runs
|
||||
db=db
|
||||
)
|
||||
|
||||
if task_result.get('success'):
|
||||
logger.info(f"Created Bing insights task for user {user_id}")
|
||||
else:
|
||||
logger.warning(f"Failed to create Bing insights task: {task_result.get('error')}")
|
||||
finally:
|
||||
db.close()
|
||||
except Exception as e:
|
||||
# Non-critical: log but don't fail OAuth callback
|
||||
logger.warning(f"Failed to create Bing insights task after OAuth: {e}")
|
||||
|
||||
# Return success page with postMessage script
|
||||
html_content = f"""
|
||||
<!DOCTYPE html>
|
||||
|
||||
@@ -66,6 +66,45 @@ async def handle_gsc_callback(
|
||||
|
||||
if success:
|
||||
logger.info("GSC OAuth callback handled successfully")
|
||||
|
||||
# Create GSC insights task immediately after successful connection
|
||||
try:
|
||||
from services.database import SessionLocal
|
||||
from services.platform_insights_monitoring_service import create_platform_insights_task
|
||||
|
||||
# Get user_id from state (stored during OAuth flow)
|
||||
# Note: handle_oauth_callback already deleted state, so we need to get user_id from recent credentials
|
||||
db = SessionLocal()
|
||||
try:
|
||||
# Get user_id from most recent GSC credentials (since state was deleted)
|
||||
import sqlite3
|
||||
with sqlite3.connect(gsc_service.db_path) as conn:
|
||||
cursor = conn.cursor()
|
||||
cursor.execute('SELECT user_id FROM gsc_credentials ORDER BY updated_at DESC LIMIT 1')
|
||||
result = cursor.fetchone()
|
||||
if result:
|
||||
user_id = result[0]
|
||||
|
||||
# Don't fetch site_url here - it requires API calls
|
||||
# The executor will fetch it when the task runs (weekly)
|
||||
# Create insights task without site_url to avoid API calls
|
||||
task_result = create_platform_insights_task(
|
||||
user_id=user_id,
|
||||
platform='gsc',
|
||||
site_url=None, # Will be fetched by executor when task runs
|
||||
db=db
|
||||
)
|
||||
|
||||
if task_result.get('success'):
|
||||
logger.info(f"Created GSC insights task for user {user_id}")
|
||||
else:
|
||||
logger.warning(f"Failed to create GSC insights task: {task_result.get('error')}")
|
||||
finally:
|
||||
db.close()
|
||||
except Exception as e:
|
||||
# Non-critical: log but don't fail OAuth callback
|
||||
logger.warning(f"Failed to create GSC insights task after OAuth: {e}", exc_info=True)
|
||||
|
||||
html = """
|
||||
<!doctype html>
|
||||
<html>
|
||||
|
||||
90
backend/scripts/fix_website_analysis_indexes.py
Normal file
90
backend/scripts/fix_website_analysis_indexes.py
Normal file
@@ -0,0 +1,90 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Fix website analysis index name conflicts.
|
||||
Drops old conflicting indexes and ensures proper index names.
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
import sqlite3
|
||||
from pathlib import Path
|
||||
from loguru import logger
|
||||
|
||||
# Add the backend directory to the Python path
|
||||
backend_dir = Path(__file__).parent.parent
|
||||
sys.path.insert(0, str(backend_dir))
|
||||
|
||||
def fix_indexes():
|
||||
"""Fix index name conflicts."""
|
||||
db_path = backend_dir / "alwrity.db"
|
||||
|
||||
if not db_path.exists():
|
||||
logger.error(f"Database not found at {db_path}")
|
||||
return False
|
||||
|
||||
conn = sqlite3.connect(str(db_path))
|
||||
cursor = conn.cursor()
|
||||
|
||||
try:
|
||||
# Check for old conflicting indexes
|
||||
cursor.execute("""
|
||||
SELECT name, tbl_name
|
||||
FROM sqlite_master
|
||||
WHERE type='index'
|
||||
AND name = 'idx_status'
|
||||
AND tbl_name IN ('website_analysis_tasks', 'website_analysis_execution_logs')
|
||||
""")
|
||||
|
||||
conflicting = cursor.fetchall()
|
||||
|
||||
if conflicting:
|
||||
logger.warning(f"Found {len(conflicting)} conflicting indexes:")
|
||||
for name, tbl_name in conflicting:
|
||||
logger.warning(f" - {name} on {tbl_name}")
|
||||
|
||||
# Drop old indexes
|
||||
for name, tbl_name in conflicting:
|
||||
try:
|
||||
cursor.execute(f"DROP INDEX IF EXISTS {name}")
|
||||
logger.info(f"✅ Dropped old index: {name} on {tbl_name}")
|
||||
except Exception as e:
|
||||
logger.error(f"❌ Error dropping index {name}: {e}")
|
||||
|
||||
conn.commit()
|
||||
logger.info("✅ Index conflicts resolved")
|
||||
else:
|
||||
logger.info("✅ No conflicting indexes found")
|
||||
|
||||
# Verify correct indexes exist
|
||||
cursor.execute("""
|
||||
SELECT name, tbl_name
|
||||
FROM sqlite_master
|
||||
WHERE type='index'
|
||||
AND (name LIKE '%website_analysis%' OR name LIKE '%competitor_analyses%')
|
||||
ORDER BY tbl_name, name
|
||||
""")
|
||||
|
||||
indexes = cursor.fetchall()
|
||||
logger.info(f"\n📋 Current website analysis indexes ({len(indexes)}):")
|
||||
for name, tbl_name in indexes:
|
||||
logger.info(f" - {name} on {tbl_name}")
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error fixing indexes: {e}")
|
||||
conn.rollback()
|
||||
return False
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
if __name__ == "__main__":
|
||||
logger.info("🔧 Fixing website analysis index conflicts...")
|
||||
success = fix_indexes()
|
||||
if success:
|
||||
logger.info("✅ Index fix complete. You can now restart the backend.")
|
||||
sys.exit(0)
|
||||
else:
|
||||
logger.error("❌ Index fix failed")
|
||||
sys.exit(1)
|
||||
|
||||
35
backend/scripts/run_cumulative_stats_migration.py
Normal file
35
backend/scripts/run_cumulative_stats_migration.py
Normal file
@@ -0,0 +1,35 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Script to run the cumulative stats migration.
|
||||
This creates the scheduler_cumulative_stats table.
|
||||
"""
|
||||
|
||||
import sqlite3
|
||||
import os
|
||||
import sys
|
||||
|
||||
# Get the database path
|
||||
script_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
backend_dir = os.path.dirname(script_dir)
|
||||
db_path = os.path.join(backend_dir, 'alwrity.db')
|
||||
migration_path = os.path.join(backend_dir, 'database', 'migrations', 'create_scheduler_cumulative_stats.sql')
|
||||
|
||||
if not os.path.exists(db_path):
|
||||
print(f"❌ Database not found at {db_path}")
|
||||
sys.exit(1)
|
||||
|
||||
if not os.path.exists(migration_path):
|
||||
print(f"❌ Migration file not found at {migration_path}")
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
conn = sqlite3.connect(db_path)
|
||||
with open(migration_path, 'r') as f:
|
||||
conn.executescript(f.read())
|
||||
conn.commit()
|
||||
print("✅ Migration executed successfully")
|
||||
conn.close()
|
||||
except Exception as e:
|
||||
print(f"❌ Error running migration: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
30
backend/scripts/verify_cumulative_stats.py
Normal file
30
backend/scripts/verify_cumulative_stats.py
Normal file
@@ -0,0 +1,30 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Verify cumulative stats table exists and has data"""
|
||||
|
||||
import sqlite3
|
||||
import os
|
||||
|
||||
script_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
backend_dir = os.path.dirname(script_dir)
|
||||
db_path = os.path.join(backend_dir, 'alwrity.db')
|
||||
|
||||
conn = sqlite3.connect(db_path)
|
||||
cursor = conn.cursor()
|
||||
|
||||
# Check if table exists
|
||||
cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='scheduler_cumulative_stats'")
|
||||
result = cursor.fetchone()
|
||||
print(f"Table exists: {result is not None}")
|
||||
|
||||
if result:
|
||||
cursor.execute("SELECT * FROM scheduler_cumulative_stats WHERE id=1")
|
||||
row = cursor.fetchone()
|
||||
if row:
|
||||
print(f"Row data: {row}")
|
||||
else:
|
||||
print("Table exists but no row with id=1")
|
||||
else:
|
||||
print("Table does not exist")
|
||||
|
||||
conn.close()
|
||||
|
||||
@@ -16,6 +16,7 @@ from .data_filter import ResearchDataFilter
|
||||
from .base_provider import ResearchProvider as BaseResearchProvider
|
||||
from .google_provider import GoogleResearchProvider
|
||||
from .exa_provider import ExaResearchProvider
|
||||
from .tavily_provider import TavilyResearchProvider
|
||||
|
||||
__all__ = [
|
||||
'ResearchService',
|
||||
@@ -26,4 +27,5 @@ __all__ = [
|
||||
'BaseResearchProvider',
|
||||
'GoogleResearchProvider',
|
||||
'ExaResearchProvider',
|
||||
'TavilyResearchProvider',
|
||||
]
|
||||
|
||||
@@ -150,8 +150,94 @@ class ResearchService:
|
||||
raw_result = None
|
||||
else:
|
||||
raise
|
||||
|
||||
elif config.provider == ResearchProvider.TAVILY:
|
||||
# Tavily research workflow
|
||||
from .tavily_provider import TavilyResearchProvider
|
||||
from services.database import get_db
|
||||
from services.subscription import PricingService
|
||||
import os
|
||||
import time
|
||||
|
||||
if config.provider != ResearchProvider.EXA:
|
||||
# Pre-flight validation (similar to Exa)
|
||||
db_val = next(get_db())
|
||||
try:
|
||||
pricing_service = PricingService(db_val)
|
||||
# Check Tavily usage limits
|
||||
limits = pricing_service.get_user_limits(user_id)
|
||||
tavily_limit = limits.get('limits', {}).get('tavily_calls', 0) if limits else 0
|
||||
|
||||
# Get current usage
|
||||
from models.subscription_models import UsageSummary
|
||||
from datetime import datetime
|
||||
current_period = pricing_service.get_current_billing_period(user_id) or datetime.now().strftime("%Y-%m")
|
||||
usage = db_val.query(UsageSummary).filter(
|
||||
UsageSummary.user_id == user_id,
|
||||
UsageSummary.billing_period == current_period
|
||||
).first()
|
||||
|
||||
current_calls = getattr(usage, 'tavily_calls', 0) or 0 if usage else 0
|
||||
|
||||
if tavily_limit > 0 and current_calls >= tavily_limit:
|
||||
raise HTTPException(
|
||||
status_code=429,
|
||||
detail={
|
||||
'error': 'Tavily API call limit exceeded',
|
||||
'message': f'You have reached your Tavily API call limit ({tavily_limit} calls). Please upgrade your plan or wait for the next billing period.',
|
||||
'provider': 'tavily',
|
||||
'usage_info': {
|
||||
'current': current_calls,
|
||||
'limit': tavily_limit
|
||||
}
|
||||
}
|
||||
)
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.warning(f"Error checking Tavily limits: {e}")
|
||||
finally:
|
||||
db_val.close()
|
||||
|
||||
# Execute Tavily search
|
||||
api_start_time = time.time()
|
||||
try:
|
||||
tavily_provider = TavilyResearchProvider()
|
||||
raw_result = await tavily_provider.search(
|
||||
research_prompt, topic, industry, target_audience, config, user_id
|
||||
)
|
||||
api_duration_ms = (time.time() - api_start_time) * 1000
|
||||
|
||||
# Track usage
|
||||
cost = raw_result.get('cost', {}).get('total', 0.001) if isinstance(raw_result.get('cost'), dict) else 0.001
|
||||
search_depth = config.tavily_search_depth or "basic"
|
||||
tavily_provider.track_tavily_usage(user_id, cost, search_depth)
|
||||
|
||||
# Log API call performance
|
||||
blog_writer_logger.log_api_call(
|
||||
"tavily_search",
|
||||
"search",
|
||||
api_duration_ms,
|
||||
token_usage={},
|
||||
content_length=len(raw_result.get('content', ''))
|
||||
)
|
||||
|
||||
# Extract content for downstream analysis
|
||||
content = raw_result.get('content', '')
|
||||
sources = raw_result.get('sources', [])
|
||||
search_widget = "" # Tavily doesn't provide search widgets
|
||||
search_queries = raw_result.get('search_queries', [])
|
||||
grounding_metadata = None # Tavily doesn't provide grounding metadata
|
||||
|
||||
except RuntimeError as e:
|
||||
if "TAVILY_API_KEY not configured" in str(e):
|
||||
logger.warning("Tavily not configured, falling back to Google")
|
||||
config.provider = ResearchProvider.GOOGLE
|
||||
# Continue to Google flow below
|
||||
raw_result = None
|
||||
else:
|
||||
raise
|
||||
|
||||
if config.provider not in [ResearchProvider.EXA, ResearchProvider.TAVILY]:
|
||||
# Google research (existing flow) or fallback from Exa
|
||||
from .google_provider import GoogleResearchProvider
|
||||
import time
|
||||
@@ -412,8 +498,94 @@ class ResearchService:
|
||||
# Continue to Google flow below
|
||||
else:
|
||||
raise
|
||||
|
||||
elif config.provider == ResearchProvider.TAVILY:
|
||||
# Tavily research workflow
|
||||
from .tavily_provider import TavilyResearchProvider
|
||||
from services.database import get_db
|
||||
from services.subscription import PricingService
|
||||
import os
|
||||
|
||||
if config.provider != ResearchProvider.EXA:
|
||||
await task_manager.update_progress(task_id, "🌐 Connecting to Tavily AI search...")
|
||||
|
||||
# Pre-flight validation
|
||||
db_val = next(get_db())
|
||||
try:
|
||||
pricing_service = PricingService(db_val)
|
||||
# Check Tavily usage limits
|
||||
limits = pricing_service.get_user_limits(user_id)
|
||||
tavily_limit = limits.get('limits', {}).get('tavily_calls', 0) if limits else 0
|
||||
|
||||
# Get current usage
|
||||
from models.subscription_models import UsageSummary
|
||||
from datetime import datetime
|
||||
current_period = pricing_service.get_current_billing_period(user_id) or datetime.now().strftime("%Y-%m")
|
||||
usage = db_val.query(UsageSummary).filter(
|
||||
UsageSummary.user_id == user_id,
|
||||
UsageSummary.billing_period == current_period
|
||||
).first()
|
||||
|
||||
current_calls = getattr(usage, 'tavily_calls', 0) or 0 if usage else 0
|
||||
|
||||
if tavily_limit > 0 and current_calls >= tavily_limit:
|
||||
await task_manager.update_progress(task_id, f"❌ Tavily API call limit exceeded ({current_calls}/{tavily_limit})")
|
||||
raise HTTPException(
|
||||
status_code=429,
|
||||
detail={
|
||||
'error': 'Tavily API call limit exceeded',
|
||||
'message': f'You have reached your Tavily API call limit ({tavily_limit} calls). Please upgrade your plan or wait for the next billing period.',
|
||||
'provider': 'tavily',
|
||||
'usage_info': {
|
||||
'current': current_calls,
|
||||
'limit': tavily_limit
|
||||
}
|
||||
}
|
||||
)
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.warning(f"Error checking Tavily limits: {e}")
|
||||
finally:
|
||||
db_val.close()
|
||||
|
||||
# Execute Tavily search
|
||||
await task_manager.update_progress(task_id, "🤖 Executing Tavily AI search...")
|
||||
try:
|
||||
tavily_provider = TavilyResearchProvider()
|
||||
raw_result = await tavily_provider.search(
|
||||
research_prompt, topic, industry, target_audience, config, user_id
|
||||
)
|
||||
|
||||
# Track usage
|
||||
cost = raw_result.get('cost', {}).get('total', 0.001) if isinstance(raw_result.get('cost'), dict) else 0.001
|
||||
search_depth = config.tavily_search_depth or "basic"
|
||||
tavily_provider.track_tavily_usage(user_id, cost, search_depth)
|
||||
|
||||
# Extract content for downstream analysis
|
||||
if raw_result is None:
|
||||
logger.error("raw_result is None after Tavily search")
|
||||
raise ValueError("Tavily research result is None - search operation failed unexpectedly")
|
||||
|
||||
if not isinstance(raw_result, dict):
|
||||
logger.warning(f"raw_result is not a dict (type: {type(raw_result)}), using defaults")
|
||||
raw_result = {}
|
||||
|
||||
content = raw_result.get('content', '')
|
||||
sources = raw_result.get('sources', []) or []
|
||||
search_widget = "" # Tavily doesn't provide search widgets
|
||||
search_queries = raw_result.get('search_queries', []) or []
|
||||
grounding_metadata = None # Tavily doesn't provide grounding metadata
|
||||
|
||||
except RuntimeError as e:
|
||||
if "TAVILY_API_KEY not configured" in str(e):
|
||||
logger.warning("Tavily not configured, falling back to Google")
|
||||
await task_manager.update_progress(task_id, "⚠️ Tavily not configured, falling back to Google Search")
|
||||
config.provider = ResearchProvider.GOOGLE
|
||||
# Continue to Google flow below
|
||||
else:
|
||||
raise
|
||||
|
||||
if config.provider not in [ResearchProvider.EXA, ResearchProvider.TAVILY]:
|
||||
# Google research (existing flow)
|
||||
from .google_provider import GoogleResearchProvider
|
||||
|
||||
|
||||
169
backend/services/blog_writer/research/tavily_provider.py
Normal file
169
backend/services/blog_writer/research/tavily_provider.py
Normal file
@@ -0,0 +1,169 @@
|
||||
"""
|
||||
Tavily Research Provider
|
||||
|
||||
AI-powered search implementation using Tavily API for high-quality research.
|
||||
"""
|
||||
|
||||
import os
|
||||
from loguru import logger
|
||||
from models.subscription_models import APIProvider
|
||||
from services.research.tavily_service import TavilyService
|
||||
from .base_provider import ResearchProvider as BaseProvider
|
||||
|
||||
|
||||
class TavilyResearchProvider(BaseProvider):
|
||||
"""Tavily AI-powered search provider."""
|
||||
|
||||
def __init__(self):
|
||||
self.api_key = os.getenv("TAVILY_API_KEY")
|
||||
if not self.api_key:
|
||||
raise RuntimeError("TAVILY_API_KEY not configured")
|
||||
self.tavily_service = TavilyService()
|
||||
logger.info("✅ Tavily Research Provider initialized")
|
||||
|
||||
async def search(self, prompt, topic, industry, target_audience, config, user_id):
|
||||
"""Execute Tavily search and return standardized results."""
|
||||
# Build Tavily query
|
||||
query = f"{topic} {industry} {target_audience}"
|
||||
|
||||
# Get Tavily-specific config options
|
||||
topic = config.tavily_topic or "general"
|
||||
search_depth = config.tavily_search_depth or "basic"
|
||||
|
||||
logger.info(f"[Tavily Research] Executing search: {query}")
|
||||
|
||||
# Execute Tavily search
|
||||
result = await self.tavily_service.search(
|
||||
query=query,
|
||||
topic=topic,
|
||||
search_depth=search_depth,
|
||||
max_results=min(config.max_sources, 20),
|
||||
include_domains=config.tavily_include_domains or None,
|
||||
exclude_domains=config.tavily_exclude_domains or None,
|
||||
include_answer=config.tavily_include_answer or False,
|
||||
include_raw_content=config.tavily_include_raw_content or False,
|
||||
include_images=config.tavily_include_images or False,
|
||||
include_image_descriptions=config.tavily_include_image_descriptions or False,
|
||||
time_range=config.tavily_time_range,
|
||||
start_date=config.tavily_start_date,
|
||||
end_date=config.tavily_end_date,
|
||||
country=config.tavily_country,
|
||||
chunks_per_source=config.tavily_chunks_per_source or 3,
|
||||
auto_parameters=config.tavily_auto_parameters or False
|
||||
)
|
||||
|
||||
if not result.get("success"):
|
||||
raise RuntimeError(f"Tavily search failed: {result.get('error', 'Unknown error')}")
|
||||
|
||||
# Transform to standardized format
|
||||
sources = self._transform_sources(result.get("results", []))
|
||||
content = self._aggregate_content(result.get("results", []))
|
||||
|
||||
# Calculate cost (basic = 1 credit, advanced = 2 credits)
|
||||
cost = 0.001 if search_depth == "basic" else 0.002 # Estimate cost per search
|
||||
|
||||
logger.info(f"[Tavily Research] Search completed: {len(sources)} sources, depth: {search_depth}")
|
||||
|
||||
return {
|
||||
'sources': sources,
|
||||
'content': content,
|
||||
'search_type': search_depth,
|
||||
'provider': 'tavily',
|
||||
'search_queries': [query],
|
||||
'cost': {'total': cost},
|
||||
'answer': result.get("answer"), # If include_answer was requested
|
||||
'images': result.get("images", [])
|
||||
}
|
||||
|
||||
def get_provider_enum(self):
|
||||
"""Return TAVILY provider enum for subscription tracking."""
|
||||
return APIProvider.TAVILY
|
||||
|
||||
def estimate_tokens(self) -> int:
|
||||
"""Estimate token usage for Tavily (not token-based, but we estimate API calls)."""
|
||||
return 0 # Tavily is per-search, not token-based
|
||||
|
||||
def _transform_sources(self, results):
|
||||
"""Transform Tavily results to ResearchSource format."""
|
||||
sources = []
|
||||
for idx, result in enumerate(results):
|
||||
source_type = self._determine_source_type(result.get("url", ""))
|
||||
|
||||
sources.append({
|
||||
'title': result.get("title", ""),
|
||||
'url': result.get("url", ""),
|
||||
'excerpt': result.get("content", "")[:500], # First 500 chars
|
||||
'credibility_score': result.get("relevance_score", 0.5),
|
||||
'published_at': result.get("published_date"),
|
||||
'index': idx,
|
||||
'source_type': source_type,
|
||||
'content': result.get("content", ""),
|
||||
'raw_content': result.get("raw_content"), # If include_raw_content was requested
|
||||
'score': result.get("score", result.get("relevance_score", 0.5)),
|
||||
'favicon': result.get("favicon")
|
||||
})
|
||||
|
||||
return sources
|
||||
|
||||
def _determine_source_type(self, url):
|
||||
"""Determine source type from URL."""
|
||||
if not url:
|
||||
return 'web'
|
||||
|
||||
url_lower = url.lower()
|
||||
if 'arxiv.org' in url_lower or 'research' in url_lower or '.edu' in url_lower:
|
||||
return 'academic'
|
||||
elif any(news in url_lower for news in ['cnn.com', 'bbc.com', 'reuters.com', 'theguardian.com', 'nytimes.com']):
|
||||
return 'news'
|
||||
elif 'linkedin.com' in url_lower:
|
||||
return 'expert'
|
||||
elif '.gov' in url_lower:
|
||||
return 'government'
|
||||
else:
|
||||
return 'web'
|
||||
|
||||
def _aggregate_content(self, results):
|
||||
"""Aggregate content from Tavily results for LLM analysis."""
|
||||
content_parts = []
|
||||
|
||||
for idx, result in enumerate(results):
|
||||
content = result.get("content", "")
|
||||
if content:
|
||||
content_parts.append(f"Source {idx + 1}: {content}")
|
||||
|
||||
return "\n\n".join(content_parts)
|
||||
|
||||
def track_tavily_usage(self, user_id: str, cost: float, search_depth: str):
|
||||
"""Track Tavily API usage after successful call."""
|
||||
from services.database import get_db
|
||||
from services.subscription import PricingService
|
||||
from sqlalchemy import text
|
||||
|
||||
db = next(get_db())
|
||||
try:
|
||||
pricing_service = PricingService(db)
|
||||
current_period = pricing_service.get_current_billing_period(user_id)
|
||||
|
||||
# Update tavily_calls and tavily_cost via SQL UPDATE
|
||||
update_query = text("""
|
||||
UPDATE usage_summaries
|
||||
SET tavily_calls = COALESCE(tavily_calls, 0) + 1,
|
||||
tavily_cost = COALESCE(tavily_cost, 0) + :cost,
|
||||
total_calls = COALESCE(total_calls, 0) + 1,
|
||||
total_cost = COALESCE(total_cost, 0) + :cost
|
||||
WHERE user_id = :user_id AND billing_period = :period
|
||||
""")
|
||||
db.execute(update_query, {
|
||||
'cost': cost,
|
||||
'user_id': user_id,
|
||||
'period': current_period
|
||||
})
|
||||
db.commit()
|
||||
|
||||
logger.info(f"[Tavily] Tracked usage: user={user_id}, cost=${cost}, depth={search_depth}")
|
||||
except Exception as e:
|
||||
logger.error(f"[Tavily] Failed to track usage: {e}", exc_info=True)
|
||||
db.rollback()
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
265
backend/services/integrations/wix_oauth.py
Normal file
265
backend/services/integrations/wix_oauth.py
Normal file
@@ -0,0 +1,265 @@
|
||||
"""
|
||||
Wix OAuth2 Service
|
||||
Handles Wix OAuth2 authentication flow and token storage.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sqlite3
|
||||
from typing import Optional, Dict, Any, List
|
||||
from datetime import datetime, timedelta
|
||||
from loguru import logger
|
||||
|
||||
|
||||
class WixOAuthService:
|
||||
"""Manages Wix OAuth2 authentication flow and token storage."""
|
||||
|
||||
def __init__(self, db_path: str = "alwrity.db"):
|
||||
self.db_path = db_path
|
||||
self._init_db()
|
||||
|
||||
def _init_db(self):
|
||||
"""Initialize database tables for OAuth tokens."""
|
||||
with sqlite3.connect(self.db_path) as conn:
|
||||
cursor = conn.cursor()
|
||||
cursor.execute('''
|
||||
CREATE TABLE IF NOT EXISTS wix_oauth_tokens (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
user_id TEXT NOT NULL,
|
||||
access_token TEXT NOT NULL,
|
||||
refresh_token TEXT,
|
||||
token_type TEXT DEFAULT 'bearer',
|
||||
expires_at TIMESTAMP,
|
||||
expires_in INTEGER,
|
||||
scope TEXT,
|
||||
site_id TEXT,
|
||||
member_id TEXT,
|
||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
is_active BOOLEAN DEFAULT TRUE
|
||||
)
|
||||
''')
|
||||
conn.commit()
|
||||
logger.info("Wix OAuth database initialized.")
|
||||
|
||||
def store_tokens(
|
||||
self,
|
||||
user_id: str,
|
||||
access_token: str,
|
||||
refresh_token: Optional[str] = None,
|
||||
expires_in: Optional[int] = None,
|
||||
token_type: str = 'bearer',
|
||||
scope: Optional[str] = None,
|
||||
site_id: Optional[str] = None,
|
||||
member_id: Optional[str] = None
|
||||
) -> bool:
|
||||
"""
|
||||
Store Wix OAuth tokens in the database.
|
||||
|
||||
Args:
|
||||
user_id: User ID (Clerk string)
|
||||
access_token: Access token from Wix
|
||||
refresh_token: Optional refresh token
|
||||
expires_in: Optional expiration time in seconds
|
||||
token_type: Token type (default: 'bearer')
|
||||
scope: Optional OAuth scope
|
||||
site_id: Optional Wix site ID
|
||||
member_id: Optional Wix member ID
|
||||
|
||||
Returns:
|
||||
True if tokens were stored successfully
|
||||
"""
|
||||
try:
|
||||
expires_at = None
|
||||
if expires_in:
|
||||
expires_at = datetime.now() + timedelta(seconds=expires_in)
|
||||
|
||||
with sqlite3.connect(self.db_path) as conn:
|
||||
cursor = conn.cursor()
|
||||
cursor.execute('''
|
||||
INSERT INTO wix_oauth_tokens
|
||||
(user_id, access_token, refresh_token, token_type, expires_at, expires_in, scope, site_id, member_id)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
''', (user_id, access_token, refresh_token, token_type, expires_at, expires_in, scope, site_id, member_id))
|
||||
conn.commit()
|
||||
logger.info(f"Wix OAuth: Token inserted into database for user {user_id}")
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error storing Wix tokens for user {user_id}: {e}")
|
||||
return False
|
||||
|
||||
def get_user_tokens(self, user_id: str) -> List[Dict[str, Any]]:
|
||||
"""Get all active Wix tokens for a user."""
|
||||
try:
|
||||
with sqlite3.connect(self.db_path) as conn:
|
||||
cursor = conn.cursor()
|
||||
cursor.execute('''
|
||||
SELECT id, access_token, refresh_token, token_type, expires_at, expires_in, scope, site_id, member_id, created_at
|
||||
FROM wix_oauth_tokens
|
||||
WHERE user_id = ? AND is_active = TRUE AND (expires_at IS NULL OR expires_at > datetime('now'))
|
||||
ORDER BY created_at DESC
|
||||
''', (user_id,))
|
||||
|
||||
tokens = []
|
||||
for row in cursor.fetchall():
|
||||
tokens.append({
|
||||
"id": row[0],
|
||||
"access_token": row[1],
|
||||
"refresh_token": row[2],
|
||||
"token_type": row[3],
|
||||
"expires_at": row[4],
|
||||
"expires_in": row[5],
|
||||
"scope": row[6],
|
||||
"site_id": row[7],
|
||||
"member_id": row[8],
|
||||
"created_at": row[9]
|
||||
})
|
||||
|
||||
return tokens
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting Wix tokens for user {user_id}: {e}")
|
||||
return []
|
||||
|
||||
def get_user_token_status(self, user_id: str) -> Dict[str, Any]:
|
||||
"""Get detailed token status for a user including expired tokens."""
|
||||
try:
|
||||
with sqlite3.connect(self.db_path) as conn:
|
||||
cursor = conn.cursor()
|
||||
|
||||
# Get all tokens (active and expired)
|
||||
cursor.execute('''
|
||||
SELECT id, access_token, refresh_token, token_type, expires_at, expires_in, scope, site_id, member_id, created_at, is_active
|
||||
FROM wix_oauth_tokens
|
||||
WHERE user_id = ?
|
||||
ORDER BY created_at DESC
|
||||
''', (user_id,))
|
||||
|
||||
all_tokens = []
|
||||
active_tokens = []
|
||||
expired_tokens = []
|
||||
|
||||
for row in cursor.fetchall():
|
||||
token_data = {
|
||||
"id": row[0],
|
||||
"access_token": row[1],
|
||||
"refresh_token": row[2],
|
||||
"token_type": row[3],
|
||||
"expires_at": row[4],
|
||||
"expires_in": row[5],
|
||||
"scope": row[6],
|
||||
"site_id": row[7],
|
||||
"member_id": row[8],
|
||||
"created_at": row[9],
|
||||
"is_active": bool(row[10])
|
||||
}
|
||||
all_tokens.append(token_data)
|
||||
|
||||
# Determine expiry using robust parsing and is_active flag
|
||||
is_active_flag = bool(row[10])
|
||||
not_expired = False
|
||||
try:
|
||||
expires_at_val = row[4]
|
||||
if expires_at_val:
|
||||
# First try Python parsing
|
||||
try:
|
||||
dt = datetime.fromisoformat(expires_at_val) if isinstance(expires_at_val, str) else expires_at_val
|
||||
not_expired = dt > datetime.now()
|
||||
except Exception:
|
||||
# Fallback to SQLite comparison
|
||||
cursor.execute("SELECT datetime('now') < ?", (expires_at_val,))
|
||||
not_expired = cursor.fetchone()[0] == 1
|
||||
else:
|
||||
# No expiry stored => consider not expired
|
||||
not_expired = True
|
||||
except Exception:
|
||||
not_expired = False
|
||||
|
||||
if is_active_flag and not_expired:
|
||||
active_tokens.append(token_data)
|
||||
else:
|
||||
expired_tokens.append(token_data)
|
||||
|
||||
return {
|
||||
"has_tokens": len(all_tokens) > 0,
|
||||
"has_active_tokens": len(active_tokens) > 0,
|
||||
"has_expired_tokens": len(expired_tokens) > 0,
|
||||
"active_tokens": active_tokens,
|
||||
"expired_tokens": expired_tokens,
|
||||
"total_tokens": len(all_tokens),
|
||||
"last_token_date": all_tokens[0]["created_at"] if all_tokens else None
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting Wix token status for user {user_id}: {e}")
|
||||
return {
|
||||
"has_tokens": False,
|
||||
"has_active_tokens": False,
|
||||
"has_expired_tokens": False,
|
||||
"active_tokens": [],
|
||||
"expired_tokens": [],
|
||||
"total_tokens": 0,
|
||||
"last_token_date": None,
|
||||
"error": str(e)
|
||||
}
|
||||
|
||||
def update_tokens(
|
||||
self,
|
||||
user_id: str,
|
||||
access_token: str,
|
||||
refresh_token: Optional[str] = None,
|
||||
expires_in: Optional[int] = None
|
||||
) -> bool:
|
||||
"""Update tokens for a user (e.g., after refresh)."""
|
||||
try:
|
||||
expires_at = None
|
||||
if expires_in:
|
||||
expires_at = datetime.now() + timedelta(seconds=expires_in)
|
||||
|
||||
with sqlite3.connect(self.db_path) as conn:
|
||||
cursor = conn.cursor()
|
||||
if refresh_token:
|
||||
cursor.execute('''
|
||||
UPDATE wix_oauth_tokens
|
||||
SET access_token = ?, refresh_token = ?, expires_at = ?, expires_in = ?,
|
||||
is_active = TRUE, updated_at = datetime('now')
|
||||
WHERE user_id = ? AND refresh_token = ?
|
||||
''', (access_token, refresh_token, expires_at, expires_in, user_id, refresh_token))
|
||||
else:
|
||||
cursor.execute('''
|
||||
UPDATE wix_oauth_tokens
|
||||
SET access_token = ?, expires_at = ?, expires_in = ?,
|
||||
is_active = TRUE, updated_at = datetime('now')
|
||||
WHERE user_id = ? AND id = (SELECT id FROM wix_oauth_tokens WHERE user_id = ? ORDER BY created_at DESC LIMIT 1)
|
||||
''', (access_token, expires_at, expires_in, user_id, user_id))
|
||||
conn.commit()
|
||||
logger.info(f"Wix OAuth: Tokens updated for user {user_id}")
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error updating Wix tokens for user {user_id}: {e}")
|
||||
return False
|
||||
|
||||
def revoke_token(self, user_id: str, token_id: int) -> bool:
|
||||
"""Revoke a Wix OAuth token."""
|
||||
try:
|
||||
with sqlite3.connect(self.db_path) as conn:
|
||||
cursor = conn.cursor()
|
||||
cursor.execute('''
|
||||
UPDATE wix_oauth_tokens
|
||||
SET is_active = FALSE, updated_at = datetime('now')
|
||||
WHERE user_id = ? AND id = ?
|
||||
''', (user_id, token_id))
|
||||
conn.commit()
|
||||
|
||||
if cursor.rowcount > 0:
|
||||
logger.info(f"Wix token {token_id} revoked for user {user_id}")
|
||||
return True
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error revoking Wix token: {e}")
|
||||
return False
|
||||
|
||||
@@ -218,6 +218,87 @@ class WordPressOAuthService:
|
||||
logger.error(f"Error getting WordPress tokens for user {user_id}: {e}")
|
||||
return []
|
||||
|
||||
def get_user_token_status(self, user_id: str) -> Dict[str, Any]:
|
||||
"""Get detailed token status for a user including expired tokens."""
|
||||
try:
|
||||
with sqlite3.connect(self.db_path) as conn:
|
||||
cursor = conn.cursor()
|
||||
|
||||
# Get all tokens (active and expired)
|
||||
cursor.execute('''
|
||||
SELECT id, access_token, refresh_token, token_type, expires_at, scope, blog_id, blog_url, created_at, is_active
|
||||
FROM wordpress_oauth_tokens
|
||||
WHERE user_id = ?
|
||||
ORDER BY created_at DESC
|
||||
''', (user_id,))
|
||||
|
||||
all_tokens = []
|
||||
active_tokens = []
|
||||
expired_tokens = []
|
||||
|
||||
for row in cursor.fetchall():
|
||||
token_data = {
|
||||
"id": row[0],
|
||||
"access_token": row[1],
|
||||
"refresh_token": row[2],
|
||||
"token_type": row[3],
|
||||
"expires_at": row[4],
|
||||
"scope": row[5],
|
||||
"blog_id": row[6],
|
||||
"blog_url": row[7],
|
||||
"created_at": row[8],
|
||||
"is_active": bool(row[9])
|
||||
}
|
||||
all_tokens.append(token_data)
|
||||
|
||||
# Determine expiry using robust parsing and is_active flag
|
||||
is_active_flag = bool(row[9])
|
||||
not_expired = False
|
||||
try:
|
||||
expires_at_val = row[4]
|
||||
if expires_at_val:
|
||||
# First try Python parsing
|
||||
try:
|
||||
dt = datetime.fromisoformat(expires_at_val) if isinstance(expires_at_val, str) else expires_at_val
|
||||
not_expired = dt > datetime.now()
|
||||
except Exception:
|
||||
# Fallback to SQLite comparison
|
||||
cursor.execute("SELECT datetime('now') < ?", (expires_at_val,))
|
||||
not_expired = cursor.fetchone()[0] == 1
|
||||
else:
|
||||
# No expiry stored => consider not expired
|
||||
not_expired = True
|
||||
except Exception:
|
||||
not_expired = False
|
||||
|
||||
if is_active_flag and not_expired:
|
||||
active_tokens.append(token_data)
|
||||
else:
|
||||
expired_tokens.append(token_data)
|
||||
|
||||
return {
|
||||
"has_tokens": len(all_tokens) > 0,
|
||||
"has_active_tokens": len(active_tokens) > 0,
|
||||
"has_expired_tokens": len(expired_tokens) > 0,
|
||||
"active_tokens": active_tokens,
|
||||
"expired_tokens": expired_tokens,
|
||||
"total_tokens": len(all_tokens),
|
||||
"last_token_date": all_tokens[0]["created_at"] if all_tokens else None
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting WordPress token status for user {user_id}: {e}")
|
||||
return {
|
||||
"has_tokens": False,
|
||||
"has_active_tokens": False,
|
||||
"has_expired_tokens": False,
|
||||
"active_tokens": [],
|
||||
"expired_tokens": [],
|
||||
"total_tokens": 0,
|
||||
"last_token_date": None,
|
||||
"error": str(e)
|
||||
}
|
||||
|
||||
def test_token(self, access_token: str) -> bool:
|
||||
"""Test if a WordPress access token is valid."""
|
||||
try:
|
||||
|
||||
@@ -16,9 +16,7 @@ from models.oauth_token_monitoring_models import OAuthTokenMonitoringTask
|
||||
from services.gsc_service import GSCService
|
||||
from services.integrations.bing_oauth import BingOAuthService
|
||||
from services.integrations.wordpress_oauth import WordPressOAuthService
|
||||
|
||||
# Note: Wix tokens are stored in frontend sessionStorage, not backend database
|
||||
# So we cannot check for Wix connections from the backend yet
|
||||
from services.integrations.wix_oauth import WixOAuthService
|
||||
|
||||
|
||||
def get_connected_platforms(user_id: str) -> List[str]:
|
||||
@@ -29,7 +27,7 @@ def get_connected_platforms(user_id: str) -> List[str]:
|
||||
- GSC: gsc_credentials table
|
||||
- Bing: bing_oauth_tokens table
|
||||
- WordPress: wordpress_oauth_tokens table
|
||||
- Wix: Not checked (tokens in frontend sessionStorage)
|
||||
- Wix: wix_oauth_tokens table
|
||||
|
||||
Args:
|
||||
user_id: User ID (Clerk string)
|
||||
@@ -39,57 +37,84 @@ def get_connected_platforms(user_id: str) -> List[str]:
|
||||
"""
|
||||
connected = []
|
||||
|
||||
logger.warning(f"[OAuth Monitoring] Checking connected platforms for user: {user_id}")
|
||||
# Use DEBUG level for routine checks (called frequently by dashboard)
|
||||
logger.debug(f"[OAuth Monitoring] Checking connected platforms for user: {user_id}")
|
||||
|
||||
try:
|
||||
# Check GSC - use absolute database path
|
||||
db_path = os.path.abspath("alwrity.db")
|
||||
logger.warning(f"[OAuth Monitoring] Checking GSC with db_path: {db_path}")
|
||||
gsc_service = GSCService(db_path=db_path)
|
||||
gsc_credentials = gsc_service.load_user_credentials(user_id)
|
||||
if gsc_credentials:
|
||||
connected.append('gsc')
|
||||
logger.warning(f"[OAuth Monitoring] ✅ GSC connected for user {user_id}")
|
||||
logger.debug(f"[OAuth Monitoring] ✅ GSC connected for user {user_id}")
|
||||
else:
|
||||
logger.warning(f"[OAuth Monitoring] ❌ GSC not connected for user {user_id} (no credentials found)")
|
||||
logger.debug(f"[OAuth Monitoring] ❌ GSC not connected for user {user_id}")
|
||||
except Exception as e:
|
||||
logger.warning(f"[OAuth Monitoring] ⚠️ GSC check failed for user {user_id}: {e}", exc_info=True)
|
||||
|
||||
try:
|
||||
# Check Bing - use absolute database path
|
||||
db_path = os.path.abspath("alwrity.db")
|
||||
logger.warning(f"[OAuth Monitoring] Checking Bing with db_path: {db_path}")
|
||||
bing_service = BingOAuthService(db_path=db_path)
|
||||
token_status = bing_service.get_user_token_status(user_id)
|
||||
has_tokens = token_status.get('has_active_tokens', False)
|
||||
logger.warning(f"[OAuth Monitoring] Bing token_status keys: {list(token_status.keys())}, has_active_tokens: {has_tokens}")
|
||||
if has_tokens:
|
||||
has_active_tokens = token_status.get('has_active_tokens', False)
|
||||
has_expired_tokens = token_status.get('has_expired_tokens', False)
|
||||
expired_tokens = token_status.get('expired_tokens', [])
|
||||
|
||||
# Check if expired tokens have refresh tokens (can be refreshed)
|
||||
has_refreshable_tokens = any(token.get('refresh_token') for token in expired_tokens)
|
||||
|
||||
# Consider connected if user has active tokens OR expired tokens with refresh tokens
|
||||
if has_active_tokens or (has_expired_tokens and has_refreshable_tokens):
|
||||
connected.append('bing')
|
||||
logger.warning(f"[OAuth Monitoring] ✅ Bing connected for user {user_id}")
|
||||
logger.debug(f"[OAuth Monitoring] ✅ Bing connected for user {user_id}")
|
||||
else:
|
||||
logger.warning(f"[OAuth Monitoring] ❌ Bing not connected for user {user_id} (no active tokens)")
|
||||
logger.debug(f"[OAuth Monitoring] ❌ Bing not connected for user {user_id}")
|
||||
except Exception as e:
|
||||
logger.warning(f"[OAuth Monitoring] ⚠️ Bing check failed for user {user_id}: {e}", exc_info=True)
|
||||
|
||||
try:
|
||||
# Check WordPress - use absolute database path
|
||||
db_path = os.path.abspath("alwrity.db")
|
||||
logger.warning(f"[OAuth Monitoring] Checking WordPress with db_path: {db_path}")
|
||||
wordpress_service = WordPressOAuthService(db_path=db_path)
|
||||
tokens = wordpress_service.get_user_tokens(user_id)
|
||||
logger.warning(f"[OAuth Monitoring] WordPress tokens found: {len(tokens) if tokens else 0}")
|
||||
if tokens and len(tokens) > 0:
|
||||
token_status = wordpress_service.get_user_token_status(user_id)
|
||||
has_active_tokens = token_status.get('has_active_tokens', False)
|
||||
has_tokens = token_status.get('has_tokens', False)
|
||||
|
||||
# Consider connected if user has any tokens (WordPress tokens may not have refresh tokens)
|
||||
# If tokens exist, user was connected even if expired (may need re-auth)
|
||||
if has_tokens:
|
||||
connected.append('wordpress')
|
||||
logger.warning(f"[OAuth Monitoring] ✅ WordPress connected for user {user_id} ({len(tokens)} token(s))")
|
||||
logger.debug(f"[OAuth Monitoring] ✅ WordPress connected for user {user_id}")
|
||||
else:
|
||||
logger.warning(f"[OAuth Monitoring] ❌ WordPress not connected for user {user_id} (no tokens found)")
|
||||
logger.debug(f"[OAuth Monitoring] ❌ WordPress not connected for user {user_id}")
|
||||
except Exception as e:
|
||||
logger.warning(f"[OAuth Monitoring] ⚠️ WordPress check failed for user {user_id}: {e}", exc_info=True)
|
||||
|
||||
# Wix: Not checked (tokens in frontend sessionStorage)
|
||||
# TODO: Once backend storage is implemented, check wix_tokens table
|
||||
try:
|
||||
# Check Wix - use absolute database path
|
||||
db_path = os.path.abspath("alwrity.db")
|
||||
wix_service = WixOAuthService(db_path=db_path)
|
||||
token_status = wix_service.get_user_token_status(user_id)
|
||||
has_active_tokens = token_status.get('has_active_tokens', False)
|
||||
has_expired_tokens = token_status.get('has_expired_tokens', False)
|
||||
expired_tokens = token_status.get('expired_tokens', [])
|
||||
|
||||
# Check if expired tokens have refresh tokens (can be refreshed)
|
||||
has_refreshable_tokens = any(token.get('refresh_token') for token in expired_tokens)
|
||||
|
||||
# Consider connected if user has active tokens OR expired tokens with refresh tokens
|
||||
if has_active_tokens or (has_expired_tokens and has_refreshable_tokens):
|
||||
connected.append('wix')
|
||||
logger.debug(f"[OAuth Monitoring] ✅ Wix connected for user {user_id}")
|
||||
else:
|
||||
logger.debug(f"[OAuth Monitoring] ❌ Wix not connected for user {user_id}")
|
||||
except Exception as e:
|
||||
logger.warning(f"[OAuth Monitoring] ⚠️ Wix check failed for user {user_id}: {e}", exc_info=True)
|
||||
|
||||
logger.warning(f"[OAuth Monitoring] Connected platforms for user {user_id}: {connected}")
|
||||
# Don't log here - let the caller log a formatted summary if needed
|
||||
# This function is called frequently and should be silent
|
||||
return connected
|
||||
|
||||
|
||||
|
||||
@@ -265,7 +265,27 @@ class OnboardingProgress:
|
||||
# Log database save confirmation
|
||||
logger.info(f"✅ DATABASE: API key for {provider} saved to database for user {self.user_id}")
|
||||
elif step.step_number == 2: # Website Analysis
|
||||
self.db_service.save_website_analysis(self.user_id, step.data, db)
|
||||
# Transform frontend data structure to match database schema
|
||||
# Frontend sends: { website: "url", analysis: {...} }
|
||||
# Database expects: { website_url: "url", ...analysis (flattened) }
|
||||
analysis_for_db = {}
|
||||
if step.data:
|
||||
# Extract website_url from 'website' or 'website_url' field
|
||||
website_url = step.data.get('website') or step.data.get('website_url')
|
||||
if website_url:
|
||||
analysis_for_db['website_url'] = website_url
|
||||
# Flatten nested 'analysis' object if it exists
|
||||
if 'analysis' in step.data and isinstance(step.data['analysis'], dict):
|
||||
analysis_for_db.update(step.data['analysis'])
|
||||
# Also include any other top-level fields (except 'website' and 'analysis')
|
||||
for key, value in step.data.items():
|
||||
if key not in ['website', 'website_url', 'analysis']:
|
||||
analysis_for_db[key] = value
|
||||
# Ensure status is set
|
||||
if 'status' not in analysis_for_db:
|
||||
analysis_for_db['status'] = 'completed'
|
||||
|
||||
self.db_service.save_website_analysis(self.user_id, analysis_for_db, db)
|
||||
logger.info(f"✅ DATABASE: Website analysis saved to database for user {self.user_id}")
|
||||
elif step.step_number == 3: # Research Preferences
|
||||
self.db_service.save_research_preferences(self.user_id, step.data, db)
|
||||
|
||||
@@ -336,8 +336,13 @@ class OnboardingDatabaseService:
|
||||
).first()
|
||||
|
||||
if existing:
|
||||
# Update existing
|
||||
existing.website_url = normalized.get('website_url', existing.website_url)
|
||||
# Update existing - only update website_url if normalized value is not empty
|
||||
# This prevents overwriting a valid URL with an empty string when step.data
|
||||
# doesn't include the website field
|
||||
normalized_url = normalized.get('website_url', '').strip() if normalized.get('website_url') else ''
|
||||
if normalized_url:
|
||||
existing.website_url = normalized_url
|
||||
# If normalized_url is empty, keep existing.website_url unchanged
|
||||
existing.writing_style = normalized.get('writing_style')
|
||||
existing.content_characteristics = normalized.get('content_characteristics')
|
||||
existing.target_audience = normalized.get('target_audience')
|
||||
@@ -522,6 +527,52 @@ class OnboardingDatabaseService:
|
||||
logger.error(f"Error getting research preferences: {e}")
|
||||
return None
|
||||
|
||||
def get_competitor_analysis(self, user_id: str, db: Session = None) -> Optional[List[Dict[str, Any]]]:
|
||||
"""Get competitor analysis data for user from onboarding."""
|
||||
session_db = db or self.db
|
||||
if not session_db:
|
||||
raise ValueError("Database session required")
|
||||
|
||||
try:
|
||||
from models.onboarding import CompetitorAnalysis
|
||||
|
||||
session = self.get_session_by_user(user_id, session_db)
|
||||
if not session:
|
||||
return None
|
||||
|
||||
# Query CompetitorAnalysis table
|
||||
competitor_records = session_db.query(CompetitorAnalysis).filter(
|
||||
CompetitorAnalysis.session_id == session.id
|
||||
).all()
|
||||
|
||||
if not competitor_records:
|
||||
return None
|
||||
|
||||
# Convert to list of dicts
|
||||
competitors = []
|
||||
for record in competitor_records:
|
||||
analysis_data = record.analysis_data or {}
|
||||
competitors.append({
|
||||
"url": record.competitor_url,
|
||||
"domain": record.competitor_domain or record.competitor_url,
|
||||
"title": analysis_data.get("title", record.competitor_domain or ""),
|
||||
"summary": analysis_data.get("summary", ""),
|
||||
"relevance_score": analysis_data.get("relevance_score", 0.5),
|
||||
"highlights": analysis_data.get("highlights", []),
|
||||
"favicon": analysis_data.get("favicon"),
|
||||
"image": analysis_data.get("image"),
|
||||
"published_date": analysis_data.get("published_date"),
|
||||
"author": analysis_data.get("author"),
|
||||
"competitive_insights": analysis_data.get("competitive_analysis", {}),
|
||||
"content_insights": analysis_data.get("content_insights", {})
|
||||
})
|
||||
|
||||
return competitors
|
||||
|
||||
except SQLAlchemyError as e:
|
||||
logger.error(f"Error getting competitor analysis: {e}")
|
||||
return None
|
||||
|
||||
def get_persona_data(self, user_id: str, db: Session = None) -> Optional[Dict[str, Any]]:
|
||||
"""Get persona data for user."""
|
||||
session_db = db or self.db
|
||||
|
||||
136
backend/services/platform_insights_monitoring_service.py
Normal file
136
backend/services/platform_insights_monitoring_service.py
Normal file
@@ -0,0 +1,136 @@
|
||||
"""
|
||||
Platform Insights Monitoring Service
|
||||
Creates and manages platform insights (GSC/Bing) fetch tasks.
|
||||
"""
|
||||
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Dict, Any, Optional, List
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from models.platform_insights_monitoring_models import PlatformInsightsTask
|
||||
from utils.logger_utils import get_service_logger
|
||||
|
||||
logger = get_service_logger("platform_insights_monitoring")
|
||||
|
||||
|
||||
def create_platform_insights_task(
|
||||
user_id: str,
|
||||
platform: str, # 'gsc' or 'bing'
|
||||
site_url: Optional[str] = None,
|
||||
db: Session = None
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Create a platform insights fetch task for a user.
|
||||
|
||||
This should be called when user connects GSC or Bing in Step 5.
|
||||
|
||||
Args:
|
||||
user_id: Clerk user ID (string)
|
||||
platform: Platform name ('gsc' or 'bing')
|
||||
site_url: Optional site URL (for GSC/Bing specific site)
|
||||
db: Database session
|
||||
|
||||
Returns:
|
||||
Dictionary with success status and task details
|
||||
"""
|
||||
try:
|
||||
logger.info(
|
||||
f"[Platform Insights] Creating {platform} insights task for user: {user_id}"
|
||||
)
|
||||
|
||||
# Check if task already exists
|
||||
existing = db.query(PlatformInsightsTask).filter(
|
||||
PlatformInsightsTask.user_id == user_id,
|
||||
PlatformInsightsTask.platform == platform
|
||||
).first()
|
||||
|
||||
if existing:
|
||||
logger.info(
|
||||
f"[Platform Insights] Task already exists for user {user_id}, platform {platform}"
|
||||
)
|
||||
return {
|
||||
'success': True,
|
||||
'task_id': existing.id,
|
||||
'message': 'Task already exists',
|
||||
'existing': True
|
||||
}
|
||||
|
||||
# Calculate next check (7 days from now, weekly schedule)
|
||||
next_check = datetime.utcnow() + timedelta(days=7)
|
||||
|
||||
# Create new task
|
||||
task = PlatformInsightsTask(
|
||||
user_id=user_id,
|
||||
platform=platform,
|
||||
site_url=site_url,
|
||||
status='active',
|
||||
next_check=next_check,
|
||||
created_at=datetime.utcnow(),
|
||||
updated_at=datetime.utcnow()
|
||||
)
|
||||
|
||||
db.add(task)
|
||||
db.commit()
|
||||
db.refresh(task)
|
||||
|
||||
logger.info(
|
||||
f"[Platform Insights] Created {platform} insights task {task.id} for user {user_id}, "
|
||||
f"next_check: {next_check}"
|
||||
)
|
||||
|
||||
return {
|
||||
'success': True,
|
||||
'task_id': task.id,
|
||||
'platform': platform,
|
||||
'next_check': next_check.isoformat(),
|
||||
'message': f'{platform.upper()} insights task created successfully'
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Error creating {platform} insights task for user {user_id}: {e}",
|
||||
exc_info=True
|
||||
)
|
||||
db.rollback()
|
||||
return {
|
||||
'success': False,
|
||||
'error': str(e)
|
||||
}
|
||||
|
||||
|
||||
def get_user_insights_tasks(
|
||||
user_id: str,
|
||||
platform: Optional[str] = None,
|
||||
db: Session = None
|
||||
) -> List[PlatformInsightsTask]:
|
||||
"""
|
||||
Get all platform insights tasks for a user.
|
||||
|
||||
Args:
|
||||
user_id: Clerk user ID (string)
|
||||
platform: Optional platform filter ('gsc' or 'bing')
|
||||
db: Database session
|
||||
|
||||
Returns:
|
||||
List of PlatformInsightsTask instances
|
||||
"""
|
||||
try:
|
||||
query = db.query(PlatformInsightsTask).filter(
|
||||
PlatformInsightsTask.user_id == user_id
|
||||
)
|
||||
|
||||
if platform:
|
||||
query = query.filter(PlatformInsightsTask.platform == platform)
|
||||
|
||||
tasks = query.all()
|
||||
|
||||
logger.debug(
|
||||
f"[Platform Insights] Found {len(tasks)} insights tasks for user {user_id}"
|
||||
)
|
||||
|
||||
return tasks
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting insights tasks for user {user_id}: {e}", exc_info=True)
|
||||
return []
|
||||
|
||||
@@ -17,8 +17,10 @@ Last Updated: January 2025
|
||||
|
||||
from .google_search_service import GoogleSearchService
|
||||
from .exa_service import ExaService
|
||||
from .tavily_service import TavilyService
|
||||
|
||||
__all__ = [
|
||||
"GoogleSearchService",
|
||||
"ExaService"
|
||||
"ExaService",
|
||||
"TavilyService"
|
||||
]
|
||||
|
||||
425
backend/services/research/tavily_service.py
Normal file
425
backend/services/research/tavily_service.py
Normal file
@@ -0,0 +1,425 @@
|
||||
"""
|
||||
Tavily API Service for ALwrity
|
||||
|
||||
This service provides web search and research capabilities using the Tavily API,
|
||||
which offers AI-powered search with real-time information retrieval.
|
||||
|
||||
Key Features:
|
||||
- Web search with AI-powered results
|
||||
- Content extraction and summarization
|
||||
- Real-time information retrieval
|
||||
- Topic-based search (general, news, finance)
|
||||
- Advanced search depth options
|
||||
- Cost-effective API usage with caching
|
||||
|
||||
Dependencies:
|
||||
- aiohttp (for async HTTP requests)
|
||||
- os (for environment variables)
|
||||
- logging (for debugging)
|
||||
|
||||
Author: ALwrity Team
|
||||
Version: 1.0
|
||||
Last Updated: January 2025
|
||||
"""
|
||||
|
||||
import os
|
||||
import json
|
||||
import aiohttp
|
||||
from typing import Dict, List, Optional, Any, Union
|
||||
from datetime import datetime, timedelta
|
||||
from loguru import logger
|
||||
from urllib.parse import urlparse
|
||||
|
||||
|
||||
class TavilyService:
|
||||
"""
|
||||
Service for web search and research using the Tavily API.
|
||||
|
||||
This service provides AI-powered search capabilities to find relevant
|
||||
content and information for research purposes.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the Tavily Service with API credentials."""
|
||||
self.api_key = os.getenv("TAVILY_API_KEY")
|
||||
self.base_url = "https://api.tavily.com"
|
||||
self.enabled = False
|
||||
|
||||
# Don't assume key is available at import time in production.
|
||||
# Keys may be injected per-request via middleware, so defer init.
|
||||
self._try_initialize()
|
||||
|
||||
def _try_initialize(self) -> None:
|
||||
"""Attempt to (re)initialize the Tavily service from current environment."""
|
||||
if self.enabled and self.api_key:
|
||||
return
|
||||
try:
|
||||
self.api_key = os.getenv("TAVILY_API_KEY")
|
||||
if not self.api_key:
|
||||
# Leave disabled; caller may try again after middleware injection
|
||||
logger.warning("TAVILY_API_KEY not configured; Tavily service will be disabled")
|
||||
self.enabled = False
|
||||
return
|
||||
self.enabled = True
|
||||
logger.info("Tavily Service initialized successfully")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to initialize Tavily service: {e}")
|
||||
self.enabled = False
|
||||
|
||||
async def search(
|
||||
self,
|
||||
query: str,
|
||||
topic: str = "general",
|
||||
search_depth: str = "basic",
|
||||
max_results: int = 10,
|
||||
include_domains: Optional[List[str]] = None,
|
||||
exclude_domains: Optional[List[str]] = None,
|
||||
include_answer: Union[bool, str] = False,
|
||||
include_raw_content: Union[bool, str] = False,
|
||||
include_images: bool = False,
|
||||
include_image_descriptions: bool = False,
|
||||
include_favicon: bool = False,
|
||||
time_range: Optional[str] = None,
|
||||
start_date: Optional[str] = None,
|
||||
end_date: Optional[str] = None,
|
||||
country: Optional[str] = None,
|
||||
chunks_per_source: int = 3,
|
||||
auto_parameters: bool = False
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Execute a search query using Tavily API.
|
||||
|
||||
Args:
|
||||
query: The search query to execute
|
||||
topic: Category of search (general, news, finance)
|
||||
search_depth: Depth of search (basic, advanced) - basic costs 1 credit, advanced costs 2
|
||||
max_results: Maximum number of results to return (0-20)
|
||||
include_domains: List of domains to specifically include
|
||||
exclude_domains: List of domains to specifically exclude
|
||||
include_answer: Include LLM-generated answer (basic/advanced/true/false)
|
||||
include_raw_content: Include raw HTML content (markdown/text/true/false)
|
||||
include_images: Include image search results
|
||||
include_image_descriptions: Include image descriptions
|
||||
include_favicon: Include favicon URLs
|
||||
time_range: Time range filter (day, week, month, year, d, w, m, y)
|
||||
start_date: Start date filter (YYYY-MM-DD)
|
||||
end_date: End date filter (YYYY-MM-DD)
|
||||
country: Country filter (boost results from specific country)
|
||||
chunks_per_source: Maximum chunks per source (1-3, only for advanced search)
|
||||
auto_parameters: Auto-configure parameters based on query
|
||||
|
||||
Returns:
|
||||
Dictionary containing search results
|
||||
"""
|
||||
try:
|
||||
# Ensure we pick up any per-request injected key
|
||||
self._try_initialize()
|
||||
if not self.enabled:
|
||||
raise ValueError("Tavily Service is not enabled - API key missing")
|
||||
|
||||
logger.info(f"Starting Tavily search for: {query}")
|
||||
|
||||
# Build request payload
|
||||
payload = {
|
||||
"api_key": self.api_key,
|
||||
"query": query,
|
||||
"topic": topic,
|
||||
"search_depth": search_depth,
|
||||
"max_results": min(max_results, 20), # Tavily limit
|
||||
"include_favicon": include_favicon
|
||||
}
|
||||
|
||||
# Add optional parameters
|
||||
if include_domains:
|
||||
payload["include_domains"] = include_domains[:300] # Tavily limit
|
||||
|
||||
if exclude_domains:
|
||||
payload["exclude_domains"] = exclude_domains[:150] # Tavily limit
|
||||
|
||||
if include_answer:
|
||||
payload["include_answer"] = include_answer
|
||||
|
||||
if include_raw_content:
|
||||
payload["include_raw_content"] = include_raw_content
|
||||
|
||||
if include_images:
|
||||
payload["include_images"] = include_images
|
||||
if include_image_descriptions:
|
||||
payload["include_image_descriptions"] = include_image_descriptions
|
||||
|
||||
if time_range:
|
||||
payload["time_range"] = time_range
|
||||
|
||||
if start_date:
|
||||
payload["start_date"] = start_date
|
||||
|
||||
if end_date:
|
||||
payload["end_date"] = end_date
|
||||
|
||||
if country and topic == "general":
|
||||
payload["country"] = country
|
||||
|
||||
if search_depth == "advanced" and 1 <= chunks_per_source <= 3:
|
||||
payload["chunks_per_source"] = chunks_per_source
|
||||
|
||||
if auto_parameters:
|
||||
payload["auto_parameters"] = True
|
||||
|
||||
# Make API request
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.post(
|
||||
f"{self.base_url}/search",
|
||||
json=payload,
|
||||
headers={"Content-Type": "application/json"},
|
||||
timeout=aiohttp.ClientTimeout(total=60)
|
||||
) as response:
|
||||
if response.status == 200:
|
||||
result = await response.json()
|
||||
logger.info(f"Tavily search completed successfully. Found {len(result.get('results', []))} results.")
|
||||
|
||||
# Process and structure results
|
||||
processed_results = self._process_search_results(result, query)
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"query": result.get("query", query),
|
||||
"answer": result.get("answer"), # If include_answer was requested
|
||||
"results": processed_results,
|
||||
"images": result.get("images", []),
|
||||
"response_time": result.get("response_time"),
|
||||
"request_id": result.get("request_id"),
|
||||
"auto_parameters": result.get("auto_parameters"),
|
||||
"total_results": len(processed_results),
|
||||
"timestamp": datetime.utcnow().isoformat()
|
||||
}
|
||||
else:
|
||||
error_text = await response.text()
|
||||
logger.error(f"Tavily API error: {response.status} - {error_text}")
|
||||
raise RuntimeError(f"Tavily API error: {response.status} - {error_text}")
|
||||
|
||||
except aiohttp.ClientTimeout:
|
||||
logger.error("Tavily API request timed out")
|
||||
return {
|
||||
"success": False,
|
||||
"error": "Request timed out",
|
||||
"details": "The search request took too long to complete"
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"Error in Tavily search: {str(e)}")
|
||||
return {
|
||||
"success": False,
|
||||
"error": str(e),
|
||||
"details": "An unexpected error occurred during search"
|
||||
}
|
||||
|
||||
def _process_search_results(self, api_response: Dict[str, Any], query: str) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Process and structure Tavily API response into standardized format.
|
||||
|
||||
Args:
|
||||
api_response: Raw response from Tavily API
|
||||
query: Original search query
|
||||
|
||||
Returns:
|
||||
List of processed search results
|
||||
"""
|
||||
results = []
|
||||
raw_results = api_response.get("results", [])
|
||||
|
||||
for result in raw_results:
|
||||
try:
|
||||
# Extract domain from URL
|
||||
url = result.get("url", "")
|
||||
domain = urlparse(url).netloc if url else ""
|
||||
|
||||
# Calculate relevance score (Tavily provides score field)
|
||||
relevance_score = result.get("score", 0.5)
|
||||
|
||||
processed_result = {
|
||||
"url": url,
|
||||
"domain": domain,
|
||||
"title": result.get("title", ""),
|
||||
"content": result.get("content", ""),
|
||||
"raw_content": result.get("raw_content"), # If include_raw_content was requested
|
||||
"score": relevance_score,
|
||||
"relevance_score": relevance_score, # Alias for compatibility
|
||||
"favicon": result.get("favicon"),
|
||||
"published_date": result.get("published_date"),
|
||||
}
|
||||
|
||||
results.append(processed_result)
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Error processing Tavily result: {str(e)}")
|
||||
continue
|
||||
|
||||
# Sort by relevance score (highest first)
|
||||
results.sort(key=lambda x: x.get("relevance_score", 0), reverse=True)
|
||||
|
||||
return results
|
||||
|
||||
async def search_industry_trends(
|
||||
self,
|
||||
topic: str,
|
||||
industry: str,
|
||||
max_results: int = 10,
|
||||
search_depth: str = "basic"
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Search for current industry trends and insights.
|
||||
|
||||
Args:
|
||||
topic: The specific topic to research
|
||||
industry: The industry context for the search
|
||||
max_results: Maximum number of search results to return
|
||||
search_depth: Depth of search (basic or advanced)
|
||||
|
||||
Returns:
|
||||
Dictionary containing search results with industry context
|
||||
"""
|
||||
# Build industry-specific query
|
||||
search_query = f"{topic} {industry} trends insights"
|
||||
|
||||
# Use news topic for current trends
|
||||
return await self.search(
|
||||
query=search_query,
|
||||
topic="news" if search_depth == "basic" else "general",
|
||||
search_depth=search_depth,
|
||||
max_results=max_results,
|
||||
include_answer="basic",
|
||||
include_favicon=True,
|
||||
time_range="month" # Last month for current trends
|
||||
)
|
||||
|
||||
async def discover_competitors(
|
||||
self,
|
||||
user_url: str,
|
||||
num_results: int = 10,
|
||||
include_domains: Optional[List[str]] = None,
|
||||
exclude_domains: Optional[List[str]] = None,
|
||||
industry_context: Optional[str] = None,
|
||||
website_analysis_data: Optional[Dict[str, Any]] = None
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Discover competitors for a given website using Tavily search.
|
||||
|
||||
Args:
|
||||
user_url: The website URL to find competitors for
|
||||
num_results: Number of competitor results to return
|
||||
include_domains: List of domains to include in search
|
||||
exclude_domains: List of domains to exclude from search
|
||||
industry_context: Industry context for better competitor discovery
|
||||
|
||||
Returns:
|
||||
Dictionary containing competitor analysis results
|
||||
"""
|
||||
try:
|
||||
# Ensure we pick up any per-request injected key
|
||||
self._try_initialize()
|
||||
if not self.enabled:
|
||||
raise ValueError("Tavily Service is not enabled - API key missing")
|
||||
|
||||
logger.info(f"Starting competitor discovery for: {user_url}")
|
||||
|
||||
# Extract user domain for exclusion
|
||||
user_domain = urlparse(user_url).netloc
|
||||
exclude_domains_list = exclude_domains or []
|
||||
exclude_domains_list.append(user_domain)
|
||||
|
||||
# Build search query
|
||||
query_parts = ["similar websites", "competitors"]
|
||||
if industry_context:
|
||||
query_parts.append(f"in {industry_context}")
|
||||
|
||||
# Extract insights from website analysis if available
|
||||
if website_analysis_data:
|
||||
analysis = website_analysis_data.get('analysis', {})
|
||||
if 'target_audience' in analysis:
|
||||
audience = analysis['target_audience']
|
||||
if isinstance(audience, dict) and 'primary_audience' in audience:
|
||||
query_parts.append(audience['primary_audience'])
|
||||
|
||||
search_query = " ".join(query_parts)
|
||||
|
||||
# Perform search
|
||||
search_result = await self.search(
|
||||
query=search_query,
|
||||
topic="general",
|
||||
search_depth="advanced", # Use advanced for better competitor discovery
|
||||
max_results=num_results,
|
||||
include_domains=include_domains,
|
||||
exclude_domains=exclude_domains_list,
|
||||
include_favicon=True,
|
||||
chunks_per_source=3
|
||||
)
|
||||
|
||||
if not search_result.get("success"):
|
||||
return search_result
|
||||
|
||||
# Process results into competitor format
|
||||
competitors = []
|
||||
for result in search_result.get("results", []):
|
||||
competitor_data = {
|
||||
"url": result.get("url"),
|
||||
"domain": result.get("domain"),
|
||||
"title": result.get("title"),
|
||||
"summary": result.get("content", ""),
|
||||
"relevance_score": result.get("relevance_score", 0.5),
|
||||
"favicon": result.get("favicon"),
|
||||
"published_date": result.get("published_date"),
|
||||
"highlights": self._extract_highlights(result.get("content", "")),
|
||||
"competitive_insights": self._extract_competitive_insights(result),
|
||||
"content_insights": self._analyze_content_quality(result)
|
||||
}
|
||||
competitors.append(competitor_data)
|
||||
|
||||
logger.info(f"Successfully discovered {len(competitors)} competitors for {user_url}")
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"user_url": user_url,
|
||||
"competitors": competitors,
|
||||
"total_competitors": len(competitors),
|
||||
"analysis_timestamp": datetime.utcnow().isoformat(),
|
||||
"industry_context": industry_context,
|
||||
"request_id": search_result.get("request_id")
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in competitor discovery: {str(e)}")
|
||||
return {
|
||||
"success": False,
|
||||
"error": str(e),
|
||||
"details": "An unexpected error occurred during competitor discovery"
|
||||
}
|
||||
|
||||
def _extract_highlights(self, content: str, num_sentences: int = 3) -> List[str]:
|
||||
"""Extract key highlights from content."""
|
||||
if not content:
|
||||
return []
|
||||
|
||||
# Simple sentence extraction (can be enhanced with NLP)
|
||||
sentences = [s.strip() for s in content.split('.') if s.strip()]
|
||||
return sentences[:num_sentences]
|
||||
|
||||
def _extract_competitive_insights(self, result: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Extract competitive insights from search result."""
|
||||
content = result.get("content", "")
|
||||
title = result.get("title", "")
|
||||
|
||||
return {
|
||||
"business_model": "unknown",
|
||||
"target_audience": "unknown",
|
||||
"key_differentiators": []
|
||||
}
|
||||
|
||||
def _analyze_content_quality(self, result: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Analyze content quality metrics."""
|
||||
content = result.get("content", "")
|
||||
|
||||
return {
|
||||
"content_focus": "general",
|
||||
"content_quality": "medium",
|
||||
"publishing_frequency": "unknown"
|
||||
}
|
||||
|
||||
@@ -3,6 +3,8 @@ Task Scheduler Package
|
||||
Modular, pluggable scheduler for ALwrity tasks.
|
||||
"""
|
||||
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from .core.scheduler import TaskScheduler
|
||||
from .core.executor_interface import TaskExecutor, TaskExecutionResult
|
||||
from .core.exception_handler import (
|
||||
@@ -11,8 +13,13 @@ from .core.exception_handler import (
|
||||
)
|
||||
from .executors.monitoring_task_executor import MonitoringTaskExecutor
|
||||
from .executors.oauth_token_monitoring_executor import OAuthTokenMonitoringExecutor
|
||||
from .executors.website_analysis_executor import WebsiteAnalysisExecutor
|
||||
from .executors.gsc_insights_executor import GSCInsightsExecutor
|
||||
from .executors.bing_insights_executor import BingInsightsExecutor
|
||||
from .utils.task_loader import load_due_monitoring_tasks
|
||||
from .utils.oauth_token_task_loader import load_due_oauth_token_monitoring_tasks
|
||||
from .utils.website_analysis_task_loader import load_due_website_analysis_tasks
|
||||
from .utils.platform_insights_task_loader import load_due_platform_insights_tasks
|
||||
|
||||
# Global scheduler instance (initialized on first access)
|
||||
_scheduler_instance: TaskScheduler = None
|
||||
@@ -47,6 +54,37 @@ def get_scheduler() -> TaskScheduler:
|
||||
oauth_token_executor,
|
||||
load_due_oauth_token_monitoring_tasks
|
||||
)
|
||||
|
||||
# Register website analysis executor
|
||||
website_analysis_executor = WebsiteAnalysisExecutor()
|
||||
_scheduler_instance.register_executor(
|
||||
'website_analysis',
|
||||
website_analysis_executor,
|
||||
load_due_website_analysis_tasks
|
||||
)
|
||||
|
||||
# Register platform insights executors
|
||||
# GSC insights executor
|
||||
def load_due_gsc_insights_tasks(db: Session, user_id=None):
|
||||
return load_due_platform_insights_tasks(db, user_id, platform='gsc')
|
||||
|
||||
gsc_insights_executor = GSCInsightsExecutor()
|
||||
_scheduler_instance.register_executor(
|
||||
'gsc_insights',
|
||||
gsc_insights_executor,
|
||||
load_due_gsc_insights_tasks
|
||||
)
|
||||
|
||||
# Bing insights executor
|
||||
def load_due_bing_insights_tasks(db: Session, user_id=None):
|
||||
return load_due_platform_insights_tasks(db, user_id, platform='bing')
|
||||
|
||||
bing_insights_executor = BingInsightsExecutor()
|
||||
_scheduler_instance.register_executor(
|
||||
'bing_insights',
|
||||
bing_insights_executor,
|
||||
load_due_bing_insights_tasks
|
||||
)
|
||||
|
||||
return _scheduler_instance
|
||||
|
||||
@@ -57,6 +95,9 @@ __all__ = [
|
||||
'TaskExecutionResult',
|
||||
'MonitoringTaskExecutor',
|
||||
'OAuthTokenMonitoringExecutor',
|
||||
'WebsiteAnalysisExecutor',
|
||||
'GSCInsightsExecutor',
|
||||
'BingInsightsExecutor',
|
||||
'get_scheduler',
|
||||
# Exception handling
|
||||
'SchedulerExceptionHandler',
|
||||
|
||||
@@ -10,6 +10,7 @@ from sqlalchemy.orm import Session
|
||||
from services.database import get_db_session
|
||||
from utils.logger_utils import get_service_logger
|
||||
from models.scheduler_models import SchedulerEventLog
|
||||
from models.scheduler_cumulative_stats_model import SchedulerCumulativeStats
|
||||
from .exception_handler import DatabaseError
|
||||
from .interval_manager import adjust_check_interval_if_needed
|
||||
|
||||
@@ -100,6 +101,7 @@ async def check_and_execute_due_tasks(scheduler: 'TaskScheduler'):
|
||||
logger.warning("\n".join(check_lines))
|
||||
|
||||
# Save check cycle event to database for historical tracking
|
||||
event_log_id = None
|
||||
try:
|
||||
event_log = SchedulerEventLog(
|
||||
event_type='check_cycle',
|
||||
@@ -119,11 +121,63 @@ async def check_and_execute_due_tasks(scheduler: 'TaskScheduler'):
|
||||
}
|
||||
)
|
||||
db.add(event_log)
|
||||
db.flush() # Flush to get the ID without committing
|
||||
event_log_id = event_log.id
|
||||
db.commit()
|
||||
logger.debug(f"[Check Cycle] Saved event log with ID: {event_log_id}")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to save check cycle event log: {e}")
|
||||
logger.error(f"[Check Cycle] ❌ Failed to save check cycle event log: {e}", exc_info=True)
|
||||
if db:
|
||||
db.rollback()
|
||||
# Continue execution even if event log save fails
|
||||
|
||||
# Update cumulative stats table (persistent across restarts)
|
||||
try:
|
||||
cumulative_stats = SchedulerCumulativeStats.get_or_create(db)
|
||||
|
||||
# Update cumulative metrics by adding this cycle's values
|
||||
# Get current cycle values (incremental, not total)
|
||||
cycle_tasks_found = cycle_summary.get('total_found', 0)
|
||||
cycle_tasks_executed = cycle_summary.get('total_executed', 0)
|
||||
cycle_tasks_failed = cycle_summary.get('total_failed', 0)
|
||||
|
||||
# Update cumulative totals (additive)
|
||||
cumulative_stats.total_check_cycles += 1
|
||||
cumulative_stats.cumulative_tasks_found += cycle_tasks_found
|
||||
cumulative_stats.cumulative_tasks_executed += cycle_tasks_executed
|
||||
cumulative_stats.cumulative_tasks_failed += cycle_tasks_failed
|
||||
# Note: tasks_skipped in scheduler.stats is a running total, not per-cycle
|
||||
# We track it as-is from scheduler.stats (it's already cumulative)
|
||||
# This ensures we don't double-count skipped tasks
|
||||
if cumulative_stats.cumulative_tasks_skipped is None:
|
||||
cumulative_stats.cumulative_tasks_skipped = 0
|
||||
# Update to current total from scheduler (which is already cumulative)
|
||||
current_skipped = scheduler.stats.get('tasks_skipped', 0)
|
||||
if current_skipped > cumulative_stats.cumulative_tasks_skipped:
|
||||
cumulative_stats.cumulative_tasks_skipped = current_skipped
|
||||
cumulative_stats.last_check_cycle_id = event_log_id
|
||||
cumulative_stats.last_updated = datetime.utcnow()
|
||||
cumulative_stats.updated_at = datetime.utcnow()
|
||||
|
||||
db.commit()
|
||||
# Log at DEBUG level to avoid noise during normal operation
|
||||
# This is expected behavior, not a warning
|
||||
logger.debug(
|
||||
f"[Check Cycle] Updated cumulative stats: "
|
||||
f"cycles={cumulative_stats.total_check_cycles}, "
|
||||
f"found={cumulative_stats.cumulative_tasks_found}, "
|
||||
f"executed={cumulative_stats.cumulative_tasks_executed}, "
|
||||
f"failed={cumulative_stats.cumulative_tasks_failed}"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"[Check Cycle] ❌ Failed to update cumulative stats: {e}", exc_info=True)
|
||||
if db:
|
||||
db.rollback()
|
||||
# Log warning but continue - cumulative stats can be rebuilt from event logs
|
||||
logger.warning(
|
||||
"[Check Cycle] ⚠️ Cumulative stats update failed. "
|
||||
"Stats can be rebuilt from event logs on next dashboard load."
|
||||
)
|
||||
|
||||
# Update last_update timestamp for frontend polling
|
||||
scheduler.stats['last_update'] = datetime.utcnow().isoformat()
|
||||
|
||||
@@ -104,19 +104,16 @@ async def restore_oauth_monitoring_tasks(scheduler):
|
||||
# Fallback to users with existing tasks only
|
||||
|
||||
total_created = 0
|
||||
restoration_summary = [] # Collect summary for single log
|
||||
|
||||
for user_id in users_to_check:
|
||||
try:
|
||||
# Get connected platforms for this user
|
||||
# Get connected platforms for this user (silent - no logging)
|
||||
connected_platforms = get_connected_platforms(user_id)
|
||||
|
||||
logger.warning(
|
||||
f"[OAuth Task Restoration] User {user_id}: "
|
||||
f"Connected platforms: {connected_platforms}"
|
||||
)
|
||||
|
||||
if not connected_platforms:
|
||||
logger.debug(
|
||||
f"[OAuth Task Restoration] No connected platforms for user {user_id}, skipping"
|
||||
f"[OAuth Task Restoration] No connected platforms for user {user_id[:20]}..., skipping"
|
||||
)
|
||||
continue
|
||||
|
||||
@@ -134,11 +131,6 @@ async def restore_oauth_monitoring_tasks(scheduler):
|
||||
]
|
||||
|
||||
if missing_platforms:
|
||||
logger.warning(
|
||||
f"[OAuth Task Restoration] ⚠️ User {user_id} has connected platforms "
|
||||
f"{connected_platforms} but missing tasks for: {missing_platforms}"
|
||||
)
|
||||
|
||||
# Create missing tasks
|
||||
created = create_oauth_monitoring_tasks(
|
||||
user_id=user_id,
|
||||
@@ -147,15 +139,10 @@ async def restore_oauth_monitoring_tasks(scheduler):
|
||||
)
|
||||
|
||||
total_created += len(created)
|
||||
|
||||
logger.warning(
|
||||
f"[OAuth Task Restoration] ✅ Created {len(created)} missing OAuth tasks "
|
||||
f"for user {user_id}, platforms: {missing_platforms}"
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
f"[OAuth Task Restoration] ✅ User {user_id} has all required tasks "
|
||||
f"for connected platforms: {connected_platforms}"
|
||||
# Collect summary info instead of logging immediately
|
||||
platforms_str = ", ".join([p.upper() for p in missing_platforms])
|
||||
restoration_summary.append(
|
||||
f" ├─ User {user_id[:20]}...: {len(created)} tasks ({platforms_str})"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
@@ -173,16 +160,23 @@ async def restore_oauth_monitoring_tasks(scheduler):
|
||||
|
||||
final_platform_summary = ", ".join([f"{p}: {c}" for p, c in sorted(final_by_platform.items())])
|
||||
|
||||
# Single formatted summary log (similar to scheduler startup)
|
||||
if total_created > 0:
|
||||
summary_lines = "\n".join(restoration_summary[:5]) # Show first 5 users
|
||||
if len(restoration_summary) > 5:
|
||||
summary_lines += f"\n └─ ... and {len(restoration_summary) - 5} more users"
|
||||
|
||||
logger.warning(
|
||||
f"[OAuth Task Restoration] ✅ Created {total_created} missing OAuth monitoring tasks. "
|
||||
f"Final platform breakdown: {final_platform_summary}"
|
||||
f"[OAuth Task Restoration] ✅ OAuth Monitoring Tasks Restored\n"
|
||||
f" ├─ Tasks Created: {total_created}\n"
|
||||
f" ├─ Users Processed: {len(users_to_check)}\n"
|
||||
f" ├─ Platform Breakdown: {final_platform_summary}\n"
|
||||
+ summary_lines
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
f"[OAuth Task Restoration] ✅ All users have required OAuth monitoring tasks. "
|
||||
f"Checked {len(users_to_check)} users, found {len(existing_tasks)} existing tasks. "
|
||||
f"Platform breakdown: {final_platform_summary}"
|
||||
f"Checked {len(users_to_check)} users. Platform breakdown: {final_platform_summary}"
|
||||
)
|
||||
|
||||
finally:
|
||||
|
||||
@@ -0,0 +1,152 @@
|
||||
"""
|
||||
Platform Insights Task Restoration
|
||||
Automatically creates missing platform insights tasks for users who have connected platforms
|
||||
but don't have insights tasks created yet.
|
||||
"""
|
||||
|
||||
from datetime import datetime, timedelta
|
||||
from typing import List
|
||||
from sqlalchemy.orm import Session
|
||||
from utils.logger_utils import get_service_logger
|
||||
|
||||
from services.database import get_db_session
|
||||
from models.platform_insights_monitoring_models import PlatformInsightsTask
|
||||
from services.platform_insights_monitoring_service import create_platform_insights_task
|
||||
from services.oauth_token_monitoring_service import get_connected_platforms
|
||||
from models.oauth_token_monitoring_models import OAuthTokenMonitoringTask
|
||||
|
||||
logger = get_service_logger("platform_insights_task_restoration")
|
||||
|
||||
|
||||
async def restore_platform_insights_tasks(scheduler):
|
||||
"""
|
||||
Restore/create missing platform insights tasks for all users.
|
||||
|
||||
This checks all users who have connected platforms (GSC/Bing) and ensures they have
|
||||
insights tasks created. Tasks are created for platforms that are:
|
||||
- Connected (detected via get_connected_platforms or OAuth tasks)
|
||||
- Missing insights tasks (no PlatformInsightsTask exists)
|
||||
|
||||
Args:
|
||||
scheduler: TaskScheduler instance
|
||||
"""
|
||||
try:
|
||||
logger.warning("[Platform Insights Restoration] Starting platform insights task restoration...")
|
||||
db = get_db_session()
|
||||
if not db:
|
||||
logger.warning("[Platform Insights Restoration] Could not get database session")
|
||||
return
|
||||
|
||||
try:
|
||||
# Get all existing insights tasks to find unique user_ids
|
||||
existing_tasks = db.query(PlatformInsightsTask).all()
|
||||
user_ids_with_tasks = set(task.user_id for task in existing_tasks)
|
||||
|
||||
# Get all OAuth tasks to find users with connected platforms
|
||||
oauth_tasks = db.query(OAuthTokenMonitoringTask).all()
|
||||
user_ids_with_oauth = set(task.user_id for task in oauth_tasks)
|
||||
|
||||
# Platforms that support insights (GSC and Bing only)
|
||||
insights_platforms = ['gsc', 'bing']
|
||||
|
||||
# Get users who have OAuth tasks for GSC or Bing
|
||||
users_to_check = set()
|
||||
for task in oauth_tasks:
|
||||
if task.platform in insights_platforms:
|
||||
users_to_check.add(task.user_id)
|
||||
|
||||
logger.warning(
|
||||
f"[Platform Insights Restoration] Found {len(existing_tasks)} existing insights tasks "
|
||||
f"for {len(user_ids_with_tasks)} users. Checking {len(users_to_check)} users "
|
||||
f"with GSC/Bing OAuth connections."
|
||||
)
|
||||
|
||||
if not users_to_check:
|
||||
logger.warning("[Platform Insights Restoration] No users with GSC/Bing connections found")
|
||||
return
|
||||
|
||||
total_created = 0
|
||||
restoration_summary = []
|
||||
|
||||
for user_id in users_to_check:
|
||||
try:
|
||||
# Get connected platforms for this user
|
||||
connected_platforms = get_connected_platforms(user_id)
|
||||
|
||||
# Filter to only GSC and Bing
|
||||
insights_connected = [p for p in connected_platforms if p in insights_platforms]
|
||||
|
||||
if not insights_connected:
|
||||
logger.debug(
|
||||
f"[Platform Insights Restoration] No GSC/Bing connections for user {user_id[:20]}..., skipping"
|
||||
)
|
||||
continue
|
||||
|
||||
# Check which platforms are missing insights tasks
|
||||
existing_platforms = {
|
||||
task.platform
|
||||
for task in existing_tasks
|
||||
if task.user_id == user_id
|
||||
}
|
||||
|
||||
missing_platforms = [
|
||||
platform
|
||||
for platform in insights_connected
|
||||
if platform not in existing_platforms
|
||||
]
|
||||
|
||||
if missing_platforms:
|
||||
# Create missing tasks for each platform
|
||||
for platform in missing_platforms:
|
||||
try:
|
||||
# Don't fetch site_url here - it requires API calls
|
||||
# The executor will fetch it when the task runs (weekly)
|
||||
# This avoids API calls during restoration
|
||||
result = create_platform_insights_task(
|
||||
user_id=user_id,
|
||||
platform=platform,
|
||||
site_url=None, # Will be fetched by executor when task runs
|
||||
db=db
|
||||
)
|
||||
|
||||
if result.get('success'):
|
||||
total_created += 1
|
||||
restoration_summary.append(
|
||||
f" ├─ User {user_id[:20]}...: {platform.upper()} task created"
|
||||
)
|
||||
else:
|
||||
logger.debug(
|
||||
f"[Platform Insights Restoration] Failed to create {platform} task "
|
||||
f"for user {user_id}: {result.get('error')}"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.debug(
|
||||
f"[Platform Insights Restoration] Error creating {platform} task "
|
||||
f"for user {user_id}: {e}"
|
||||
)
|
||||
continue
|
||||
|
||||
except Exception as e:
|
||||
logger.debug(
|
||||
f"[Platform Insights Restoration] Error processing user {user_id}: {e}"
|
||||
)
|
||||
continue
|
||||
|
||||
# Log summary
|
||||
if total_created > 0:
|
||||
logger.warning(
|
||||
f"[Platform Insights Restoration] ✅ Created {total_created} platform insights tasks:\n" +
|
||||
"\n".join(restoration_summary)
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
f"[Platform Insights Restoration] ✅ All users have required platform insights tasks. "
|
||||
f"Checked {len(users_to_check)} users, found {len(existing_tasks)} existing tasks."
|
||||
)
|
||||
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[Platform Insights Restoration] Error during restoration: {e}", exc_info=True)
|
||||
|
||||
@@ -26,6 +26,8 @@ from models.scheduler_models import SchedulerEventLog
|
||||
from .interval_manager import determine_optimal_interval, adjust_check_interval_if_needed
|
||||
from .job_restoration import restore_persona_jobs
|
||||
from .oauth_task_restoration import restore_oauth_monitoring_tasks
|
||||
from .website_analysis_task_restoration import restore_website_analysis_tasks
|
||||
from .platform_insights_task_restoration import restore_platform_insights_tasks
|
||||
from .check_cycle_handler import check_and_execute_due_tasks
|
||||
from .task_execution_handler import execute_task_async
|
||||
|
||||
@@ -185,6 +187,15 @@ class TaskScheduler:
|
||||
# Restore/create missing OAuth token monitoring tasks for connected platforms
|
||||
await restore_oauth_monitoring_tasks(self)
|
||||
|
||||
# Restore/create missing website analysis tasks for users who completed onboarding
|
||||
await restore_website_analysis_tasks(self)
|
||||
|
||||
# Restore/create missing platform insights tasks for users with connected GSC/Bing
|
||||
await restore_platform_insights_tasks(self)
|
||||
|
||||
# Validate and rebuild cumulative stats if needed
|
||||
await self._validate_and_rebuild_cumulative_stats()
|
||||
|
||||
# Get all scheduled APScheduler jobs (including one-time tasks)
|
||||
all_jobs = self.scheduler.get_jobs()
|
||||
registered_types = self.registry.get_registered_types()
|
||||
@@ -260,27 +271,55 @@ class TaskScheduler:
|
||||
f"tasks haven't been created. Error type: {type(e).__name__}"
|
||||
)
|
||||
|
||||
# Get website analysis tasks count
|
||||
website_analysis_tasks_count = 0
|
||||
try:
|
||||
from models.website_analysis_monitoring_models import WebsiteAnalysisTask
|
||||
website_analysis_tasks_count = db.query(WebsiteAnalysisTask).filter(
|
||||
WebsiteAnalysisTask.status == 'active'
|
||||
).count()
|
||||
except Exception as e:
|
||||
logger.debug(f"Could not get website analysis tasks count: {e}")
|
||||
|
||||
# Get platform insights tasks count
|
||||
platform_insights_tasks_count = 0
|
||||
try:
|
||||
from models.platform_insights_monitoring_models import PlatformInsightsTask
|
||||
platform_insights_tasks_count = db.query(PlatformInsightsTask).filter(
|
||||
PlatformInsightsTask.status == 'active'
|
||||
).count()
|
||||
except Exception as e:
|
||||
logger.debug(f"Could not get platform insights tasks count: {e}")
|
||||
|
||||
# Calculate job counts
|
||||
apscheduler_recurring = 1 # check_due_tasks
|
||||
apscheduler_one_time = len(all_jobs) - 1
|
||||
total_recurring = apscheduler_recurring + oauth_tasks_count
|
||||
total_jobs = len(all_jobs) + oauth_tasks_count
|
||||
total_recurring = apscheduler_recurring + oauth_tasks_count + website_analysis_tasks_count + platform_insights_tasks_count
|
||||
total_jobs = len(all_jobs) + oauth_tasks_count + website_analysis_tasks_count + platform_insights_tasks_count
|
||||
|
||||
# Build comprehensive startup log message
|
||||
recurring_breakdown = f"check_due_tasks: {apscheduler_recurring}"
|
||||
if oauth_tasks_count > 0:
|
||||
recurring_breakdown += f", OAuth monitoring: {oauth_tasks_count}"
|
||||
if website_analysis_tasks_count > 0:
|
||||
recurring_breakdown += f", Website analysis: {website_analysis_tasks_count}"
|
||||
if platform_insights_tasks_count > 0:
|
||||
recurring_breakdown += f", Platform insights: {platform_insights_tasks_count}"
|
||||
|
||||
startup_lines = [
|
||||
f"[Scheduler] ✅ Task Scheduler Started",
|
||||
f" ├─ Check Interval: {initial_interval} minutes",
|
||||
f" ├─ Registered Task Types: {len(registered_types)} ({', '.join(registered_types) if registered_types else 'none'})",
|
||||
f" ├─ Active Strategies: {active_strategies}",
|
||||
f" ├─ Total Scheduled Jobs: {total_jobs}",
|
||||
f" ├─ Recurring Jobs: {total_recurring} (check_due_tasks: {apscheduler_recurring}, OAuth monitoring: {oauth_tasks_count})",
|
||||
f" ├─ Recurring Jobs: {total_recurring} ({recurring_breakdown})",
|
||||
f" └─ One-Time Jobs: {apscheduler_one_time}"
|
||||
]
|
||||
|
||||
# Add APScheduler job details
|
||||
if all_jobs:
|
||||
for idx, job in enumerate(all_jobs):
|
||||
is_last = idx == len(all_jobs) - 1 and oauth_tasks_count == 0
|
||||
is_last = idx == len(all_jobs) - 1 and oauth_tasks_count == 0 and website_analysis_tasks_count == 0 and platform_insights_tasks_count == 0
|
||||
prefix = " └─" if is_last else " ├─"
|
||||
next_run = job.next_run_time
|
||||
trigger_type = type(job.trigger).__name__
|
||||
@@ -338,7 +377,7 @@ class TaskScheduler:
|
||||
oauth_tasks = db.query(OAuthTokenMonitoringTask).all()
|
||||
|
||||
for idx, task in enumerate(oauth_tasks):
|
||||
is_last = idx == len(oauth_tasks) - 1 and len(all_jobs) == 0
|
||||
is_last = idx == len(oauth_tasks) - 1 and website_analysis_tasks_count == 0 and platform_insights_tasks_count == 0 and len(all_jobs) == 0
|
||||
prefix = " └─" if is_last else " ├─"
|
||||
|
||||
try:
|
||||
@@ -367,6 +406,71 @@ class TaskScheduler:
|
||||
except Exception as e:
|
||||
logger.debug(f"Could not get OAuth token monitoring task details: {e}")
|
||||
|
||||
# Add website analysis tasks details
|
||||
if website_analysis_tasks_count > 0:
|
||||
try:
|
||||
db = get_db_session()
|
||||
if db:
|
||||
from models.website_analysis_monitoring_models import WebsiteAnalysisTask
|
||||
website_analysis_tasks = db.query(WebsiteAnalysisTask).all()
|
||||
|
||||
for idx, task in enumerate(website_analysis_tasks):
|
||||
is_last = idx == len(website_analysis_tasks) - 1 and platform_insights_tasks_count == 0 and len(all_jobs) == 0 and total_oauth_tasks == 0
|
||||
prefix = " └─" if is_last else " ├─"
|
||||
|
||||
try:
|
||||
user_job_store = get_user_job_store_name(task.user_id, db)
|
||||
except Exception as e:
|
||||
logger.debug(f"Could not extract job store name for user {task.user_id}: {e}")
|
||||
user_job_store = 'default'
|
||||
|
||||
next_check = task.next_check.isoformat() if task.next_check else 'Not scheduled'
|
||||
frequency = f"Every {task.frequency_days} days"
|
||||
task_type_label = "User Website" if task.task_type == 'user_website' else "Competitor"
|
||||
status_indicator = "✅" if task.status == 'active' else f"[{task.status}]"
|
||||
website_display = task.website_url[:50] + "..." if task.website_url and len(task.website_url) > 50 else (task.website_url or 'N/A')
|
||||
|
||||
startup_lines.append(
|
||||
f"{prefix} Job: website_analysis_{task.task_type}_{task.user_id}_{task.id} | "
|
||||
f"Trigger: CronTrigger ({frequency}) | Next Run: {next_check} | "
|
||||
f"User: {task.user_id} | Store: {user_job_store} | Type: {task_type_label} | URL: {website_display} {status_indicator}"
|
||||
)
|
||||
db.close()
|
||||
except Exception as e:
|
||||
logger.debug(f"Could not get website analysis task details: {e}")
|
||||
|
||||
# Add platform insights tasks details
|
||||
if platform_insights_tasks_count > 0:
|
||||
try:
|
||||
db = get_db_session()
|
||||
if db:
|
||||
from models.platform_insights_monitoring_models import PlatformInsightsTask
|
||||
platform_insights_tasks = db.query(PlatformInsightsTask).all()
|
||||
|
||||
for idx, task in enumerate(platform_insights_tasks):
|
||||
is_last = idx == len(platform_insights_tasks) - 1 and len(all_jobs) == 0 and total_oauth_tasks == 0 and website_analysis_tasks_count == 0
|
||||
prefix = " └─" if is_last else " ├─"
|
||||
|
||||
try:
|
||||
user_job_store = get_user_job_store_name(task.user_id, db)
|
||||
except Exception as e:
|
||||
logger.debug(f"Could not extract job store name for user {task.user_id}: {e}")
|
||||
user_job_store = 'default'
|
||||
|
||||
next_check = task.next_check.isoformat() if task.next_check else 'Not scheduled'
|
||||
platform_label = task.platform.upper() if task.platform else 'Unknown'
|
||||
site_display = task.site_url[:50] + "..." if task.site_url and len(task.site_url) > 50 else (task.site_url or 'N/A')
|
||||
status_indicator = "✅" if task.status == 'active' else f"[{task.status}]"
|
||||
|
||||
startup_lines.append(
|
||||
f"{prefix} Job: platform_insights_{task.platform}_{task.user_id} | "
|
||||
f"Trigger: CronTrigger (Weekly) | Next Run: {next_check} | "
|
||||
f"User: {task.user_id} | Store: {user_job_store} | Platform: {platform_label} | Site: {site_display} {status_indicator}"
|
||||
)
|
||||
db.close()
|
||||
except Exception as e:
|
||||
logger.debug(f"Could not get platform insights task details: {e}")
|
||||
|
||||
# Log comprehensive startup information in single message
|
||||
logger.warning("\n".join(startup_lines))
|
||||
|
||||
@@ -384,7 +488,9 @@ class TaskScheduler:
|
||||
'total_jobs': total_jobs,
|
||||
'recurring_jobs': total_recurring,
|
||||
'one_time_jobs': apscheduler_one_time,
|
||||
'oauth_monitoring_tasks': oauth_tasks_count
|
||||
'oauth_monitoring_tasks': oauth_tasks_count,
|
||||
'website_analysis_tasks': website_analysis_tasks_count,
|
||||
'platform_insights_tasks': platform_insights_tasks_count
|
||||
}
|
||||
)
|
||||
db.add(event_log)
|
||||
@@ -533,6 +639,128 @@ class TaskScheduler:
|
||||
except Exception as e:
|
||||
logger.warning(f"Error triggering interval adjustment: {e}")
|
||||
|
||||
async def _validate_and_rebuild_cumulative_stats(self):
|
||||
"""
|
||||
Validate cumulative stats on scheduler startup and rebuild if needed.
|
||||
This ensures cumulative stats are accurate after restarts.
|
||||
"""
|
||||
db = None
|
||||
try:
|
||||
db = get_db_session()
|
||||
if not db:
|
||||
logger.warning("[Scheduler] Could not get database session for cumulative stats validation")
|
||||
return
|
||||
|
||||
try:
|
||||
from models.scheduler_cumulative_stats_model import SchedulerCumulativeStats
|
||||
from models.scheduler_models import SchedulerEventLog
|
||||
from sqlalchemy import func
|
||||
|
||||
# Get cumulative stats from persistent table
|
||||
cumulative_stats = db.query(SchedulerCumulativeStats).filter(
|
||||
SchedulerCumulativeStats.id == 1
|
||||
).first()
|
||||
|
||||
# Count check_cycle events in database
|
||||
check_cycle_count = db.query(func.count(SchedulerEventLog.id)).filter(
|
||||
SchedulerEventLog.event_type == 'check_cycle'
|
||||
).scalar() or 0
|
||||
|
||||
if cumulative_stats:
|
||||
# Validate: cumulative stats should match event log count
|
||||
if cumulative_stats.total_check_cycles != check_cycle_count:
|
||||
logger.warning(
|
||||
f"[Scheduler] ⚠️ Cumulative stats validation failed on startup: "
|
||||
f"cumulative_stats.total_check_cycles={cumulative_stats.total_check_cycles} "
|
||||
f"vs event_logs.count={check_cycle_count}. "
|
||||
f"Rebuilding cumulative stats from event logs..."
|
||||
)
|
||||
|
||||
# Rebuild from event logs
|
||||
result = db.query(
|
||||
func.count(SchedulerEventLog.id),
|
||||
func.sum(SchedulerEventLog.tasks_found),
|
||||
func.sum(SchedulerEventLog.tasks_executed),
|
||||
func.sum(SchedulerEventLog.tasks_failed)
|
||||
).filter(
|
||||
SchedulerEventLog.event_type == 'check_cycle'
|
||||
).first()
|
||||
|
||||
if result:
|
||||
total_cycles = result[0] if result[0] is not None else 0
|
||||
total_found = result[1] if result[1] is not None else 0
|
||||
total_executed = result[2] if result[2] is not None else 0
|
||||
total_failed = result[3] if result[3] is not None else 0
|
||||
|
||||
# Update cumulative stats
|
||||
cumulative_stats.total_check_cycles = int(total_cycles)
|
||||
cumulative_stats.cumulative_tasks_found = int(total_found)
|
||||
cumulative_stats.cumulative_tasks_executed = int(total_executed)
|
||||
cumulative_stats.cumulative_tasks_failed = int(total_failed)
|
||||
cumulative_stats.last_updated = datetime.utcnow()
|
||||
cumulative_stats.updated_at = datetime.utcnow()
|
||||
|
||||
db.commit()
|
||||
logger.warning(
|
||||
f"[Scheduler] ✅ Rebuilt cumulative stats on startup: "
|
||||
f"cycles={total_cycles}, found={total_found}, "
|
||||
f"executed={total_executed}, failed={total_failed}"
|
||||
)
|
||||
else:
|
||||
logger.warning("[Scheduler] No check_cycle events found to rebuild from")
|
||||
else:
|
||||
logger.warning(
|
||||
f"[Scheduler] ✅ Cumulative stats validated: "
|
||||
f"{cumulative_stats.total_check_cycles} check cycles match event logs"
|
||||
)
|
||||
else:
|
||||
# Cumulative stats table doesn't exist, create it from event logs
|
||||
logger.warning(
|
||||
"[Scheduler] Cumulative stats table not found. "
|
||||
"Creating from event logs..."
|
||||
)
|
||||
|
||||
result = db.query(
|
||||
func.count(SchedulerEventLog.id),
|
||||
func.sum(SchedulerEventLog.tasks_found),
|
||||
func.sum(SchedulerEventLog.tasks_executed),
|
||||
func.sum(SchedulerEventLog.tasks_failed)
|
||||
).filter(
|
||||
SchedulerEventLog.event_type == 'check_cycle'
|
||||
).first()
|
||||
|
||||
if result:
|
||||
total_cycles = result[0] if result[0] is not None else 0
|
||||
total_found = result[1] if result[1] is not None else 0
|
||||
total_executed = result[2] if result[2] is not None else 0
|
||||
total_failed = result[3] if result[3] is not None else 0
|
||||
|
||||
cumulative_stats = SchedulerCumulativeStats.get_or_create(db)
|
||||
cumulative_stats.total_check_cycles = int(total_cycles)
|
||||
cumulative_stats.cumulative_tasks_found = int(total_found)
|
||||
cumulative_stats.cumulative_tasks_executed = int(total_executed)
|
||||
cumulative_stats.cumulative_tasks_failed = int(total_failed)
|
||||
cumulative_stats.last_updated = datetime.utcnow()
|
||||
cumulative_stats.updated_at = datetime.utcnow()
|
||||
|
||||
db.commit()
|
||||
logger.warning(
|
||||
f"[Scheduler] ✅ Created cumulative stats from event logs: "
|
||||
f"cycles={total_cycles}, found={total_found}, "
|
||||
f"executed={total_executed}, failed={total_failed}"
|
||||
)
|
||||
except ImportError:
|
||||
logger.warning(
|
||||
"[Scheduler] Cumulative stats model not available. "
|
||||
"Migration may not have been run yet. "
|
||||
"Run: python backend/scripts/run_cumulative_stats_migration.py"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"[Scheduler] Error validating cumulative stats: {e}", exc_info=True)
|
||||
finally:
|
||||
if db:
|
||||
db.close()
|
||||
|
||||
async def _process_task_type(self, task_type: str, db: Session, cycle_summary: Dict[str, Any] = None) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Process due tasks for a specific task type.
|
||||
|
||||
@@ -0,0 +1,193 @@
|
||||
"""
|
||||
Website Analysis Task Restoration
|
||||
Automatically creates missing website analysis tasks for users who completed onboarding
|
||||
but don't have monitoring tasks created yet.
|
||||
"""
|
||||
|
||||
from typing import List
|
||||
from sqlalchemy.orm import Session
|
||||
from utils.logger_utils import get_service_logger
|
||||
|
||||
from services.database import get_db_session
|
||||
from models.website_analysis_monitoring_models import WebsiteAnalysisTask
|
||||
from services.website_analysis_monitoring_service import create_website_analysis_tasks
|
||||
from models.onboarding import OnboardingSession
|
||||
from sqlalchemy import or_
|
||||
|
||||
# Use service logger for consistent logging (WARNING level visible in production)
|
||||
logger = get_service_logger("website_analysis_restoration")
|
||||
|
||||
|
||||
async def restore_website_analysis_tasks(scheduler):
|
||||
"""
|
||||
Restore/create missing website analysis tasks for all users.
|
||||
|
||||
This checks all users who completed onboarding and ensures they have
|
||||
website analysis tasks created. Tasks are created for:
|
||||
- User's website (if analysis exists)
|
||||
- All competitors (from onboarding step 3)
|
||||
|
||||
Args:
|
||||
scheduler: TaskScheduler instance
|
||||
"""
|
||||
try:
|
||||
logger.warning("[Website Analysis Restoration] Starting website analysis task restoration...")
|
||||
db = get_db_session()
|
||||
if not db:
|
||||
logger.warning("[Website Analysis Restoration] Could not get database session")
|
||||
return
|
||||
|
||||
try:
|
||||
# Check if table exists (may not exist if migration hasn't run)
|
||||
try:
|
||||
existing_tasks = db.query(WebsiteAnalysisTask).all()
|
||||
except Exception as table_error:
|
||||
logger.error(
|
||||
f"[Website Analysis Restoration] ⚠️ WebsiteAnalysisTask table may not exist: {table_error}. "
|
||||
f"Please run database migration: create_website_analysis_monitoring_tables.sql"
|
||||
)
|
||||
return
|
||||
|
||||
user_ids_with_tasks = set(task.user_id for task in existing_tasks)
|
||||
|
||||
# Log existing tasks breakdown by type
|
||||
existing_by_type = {}
|
||||
for task in existing_tasks:
|
||||
existing_by_type[task.task_type] = existing_by_type.get(task.task_type, 0) + 1
|
||||
|
||||
type_summary = ", ".join([f"{t}: {c}" for t, c in sorted(existing_by_type.items())])
|
||||
logger.warning(
|
||||
f"[Website Analysis Restoration] Found {len(existing_tasks)} existing website analysis tasks "
|
||||
f"for {len(user_ids_with_tasks)} users. Types: {type_summary}"
|
||||
)
|
||||
|
||||
# Check users who already have at least one website analysis task
|
||||
users_to_check = list(user_ids_with_tasks)
|
||||
|
||||
# Also query all users from onboarding who completed step 2 (website analysis)
|
||||
# to catch users who completed onboarding but tasks weren't created
|
||||
# Use the same pattern as OnboardingProgressService.get_onboarding_status()
|
||||
# Completion is tracked by: current_step >= 6 OR progress >= 100.0
|
||||
# This matches the logic used in home page redirect and persona generation checks
|
||||
try:
|
||||
from services.onboarding.progress_service import get_onboarding_progress_service
|
||||
from models.onboarding import OnboardingSession
|
||||
from sqlalchemy import or_
|
||||
|
||||
# Get onboarding progress service (same as used throughout the app)
|
||||
progress_service = get_onboarding_progress_service()
|
||||
|
||||
# Query all sessions and filter using the same completion logic as the service
|
||||
# This matches the pattern in OnboardingProgressService.get_onboarding_status():
|
||||
# is_completed = (session.current_step >= 6) or (session.progress >= 100.0)
|
||||
completed_sessions = db.query(OnboardingSession).filter(
|
||||
or_(
|
||||
OnboardingSession.current_step >= 6,
|
||||
OnboardingSession.progress >= 100.0
|
||||
)
|
||||
).all()
|
||||
|
||||
# Validate using the service method for consistency
|
||||
onboarding_user_ids = set()
|
||||
for session in completed_sessions:
|
||||
# Use the same service method as the rest of the app
|
||||
status = progress_service.get_onboarding_status(session.user_id)
|
||||
if status.get('is_completed', False):
|
||||
onboarding_user_ids.add(session.user_id)
|
||||
|
||||
all_user_ids = users_to_check.copy()
|
||||
|
||||
# Add users from onboarding who might not have tasks yet
|
||||
for user_id in onboarding_user_ids:
|
||||
if user_id not in all_user_ids:
|
||||
all_user_ids.append(user_id)
|
||||
|
||||
users_to_check = all_user_ids
|
||||
logger.warning(
|
||||
f"[Website Analysis Restoration] Checking {len(users_to_check)} users "
|
||||
f"({len(user_ids_with_tasks)} with existing tasks, "
|
||||
f"{len(onboarding_user_ids)} from onboarding sessions, "
|
||||
f"{len(onboarding_user_ids) - len(user_ids_with_tasks)} new users to check)"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"[Website Analysis Restoration] Could not query onboarding users: {e}")
|
||||
# Fallback to users with existing tasks only
|
||||
users_to_check = list(user_ids_with_tasks)
|
||||
|
||||
total_created = 0
|
||||
users_processed = 0
|
||||
|
||||
for user_id in users_to_check:
|
||||
try:
|
||||
users_processed += 1
|
||||
|
||||
# Check if user already has tasks
|
||||
existing_user_tasks = [
|
||||
task for task in existing_tasks
|
||||
if task.user_id == user_id
|
||||
]
|
||||
|
||||
if existing_user_tasks:
|
||||
logger.debug(
|
||||
f"[Website Analysis Restoration] User {user_id} already has "
|
||||
f"{len(existing_user_tasks)} website analysis tasks, skipping"
|
||||
)
|
||||
continue
|
||||
|
||||
logger.warning(
|
||||
f"[Website Analysis Restoration] ⚠️ User {user_id} completed onboarding "
|
||||
f"but has no website analysis tasks. Creating tasks..."
|
||||
)
|
||||
|
||||
# Create missing tasks
|
||||
result = create_website_analysis_tasks(user_id=user_id, db=db)
|
||||
|
||||
if result.get('success'):
|
||||
tasks_count = result.get('tasks_created', 0)
|
||||
total_created += tasks_count
|
||||
logger.warning(
|
||||
f"[Website Analysis Restoration] ✅ Created {tasks_count} website analysis tasks "
|
||||
f"for user {user_id}"
|
||||
)
|
||||
else:
|
||||
error = result.get('error', 'Unknown error')
|
||||
logger.warning(
|
||||
f"[Website Analysis Restoration] ⚠️ Could not create tasks for user {user_id}: {error}"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"[Website Analysis Restoration] Error checking/creating tasks for user {user_id}: {e}",
|
||||
exc_info=True
|
||||
)
|
||||
continue
|
||||
|
||||
# Final summary log
|
||||
final_existing_tasks = db.query(WebsiteAnalysisTask).all()
|
||||
final_by_type = {}
|
||||
for task in final_existing_tasks:
|
||||
final_by_type[task.task_type] = final_by_type.get(task.task_type, 0) + 1
|
||||
|
||||
final_type_summary = ", ".join([f"{t}: {c}" for t, c in sorted(final_by_type.items())])
|
||||
|
||||
if total_created > 0:
|
||||
logger.warning(
|
||||
f"[Website Analysis Restoration] ✅ Created {total_created} missing website analysis tasks. "
|
||||
f"Processed {users_processed} users. Final type breakdown: {final_type_summary}"
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
f"[Website Analysis Restoration] ✅ All users have required website analysis tasks. "
|
||||
f"Checked {users_processed} users, found {len(existing_tasks)} existing tasks. "
|
||||
f"Type breakdown: {final_type_summary}"
|
||||
)
|
||||
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"[Website Analysis Restoration] Error restoring website analysis tasks: {e}",
|
||||
exc_info=True
|
||||
)
|
||||
|
||||
298
backend/services/scheduler/executors/bing_insights_executor.py
Normal file
298
backend/services/scheduler/executors/bing_insights_executor.py
Normal file
@@ -0,0 +1,298 @@
|
||||
"""
|
||||
Bing Insights Task Executor
|
||||
Handles execution of Bing insights fetch tasks for connected platforms.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Dict, Any, Optional
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from ..core.executor_interface import TaskExecutor, TaskExecutionResult
|
||||
from ..core.exception_handler import TaskExecutionError, DatabaseError, SchedulerExceptionHandler
|
||||
from models.platform_insights_monitoring_models import PlatformInsightsTask, PlatformInsightsExecutionLog
|
||||
from services.bing_analytics_storage_service import BingAnalyticsStorageService
|
||||
from services.integrations.bing_oauth import BingOAuthService
|
||||
from utils.logger_utils import get_service_logger
|
||||
|
||||
logger = get_service_logger("bing_insights_executor")
|
||||
|
||||
|
||||
class BingInsightsExecutor(TaskExecutor):
|
||||
"""
|
||||
Executor for Bing insights fetch tasks.
|
||||
|
||||
Handles:
|
||||
- Fetching Bing insights data weekly
|
||||
- On first run: Loads existing cached data
|
||||
- On subsequent runs: Fetches fresh data from Bing API
|
||||
- Logging results and updating task status
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.logger = logger
|
||||
self.exception_handler = SchedulerExceptionHandler()
|
||||
database_url = os.getenv('DATABASE_URL', 'sqlite:///alwrity.db')
|
||||
self.storage_service = BingAnalyticsStorageService(database_url)
|
||||
self.bing_oauth = BingOAuthService()
|
||||
|
||||
async def execute_task(self, task: PlatformInsightsTask, db: Session) -> TaskExecutionResult:
|
||||
"""
|
||||
Execute a Bing insights fetch task.
|
||||
|
||||
Args:
|
||||
task: PlatformInsightsTask instance
|
||||
db: Database session
|
||||
|
||||
Returns:
|
||||
TaskExecutionResult
|
||||
"""
|
||||
start_time = time.time()
|
||||
user_id = task.user_id
|
||||
site_url = task.site_url
|
||||
|
||||
try:
|
||||
self.logger.info(
|
||||
f"Executing Bing insights fetch: task_id={task.id} | "
|
||||
f"user_id={user_id} | site_url={site_url}"
|
||||
)
|
||||
|
||||
# Create execution log
|
||||
execution_log = PlatformInsightsExecutionLog(
|
||||
task_id=task.id,
|
||||
execution_date=datetime.utcnow(),
|
||||
status='running'
|
||||
)
|
||||
db.add(execution_log)
|
||||
db.flush()
|
||||
|
||||
# Fetch insights
|
||||
result = await self._fetch_insights(task, db)
|
||||
|
||||
# Update execution log
|
||||
execution_time_ms = int((time.time() - start_time) * 1000)
|
||||
execution_log.status = 'success' if result.success else 'failed'
|
||||
execution_log.result_data = result.result_data
|
||||
execution_log.error_message = result.error_message
|
||||
execution_log.execution_time_ms = execution_time_ms
|
||||
execution_log.data_source = result.result_data.get('data_source') if result.success else None
|
||||
|
||||
# Update task based on result
|
||||
task.last_check = datetime.utcnow()
|
||||
|
||||
if result.success:
|
||||
task.last_success = datetime.utcnow()
|
||||
task.status = 'active'
|
||||
task.failure_reason = None
|
||||
# Schedule next check (7 days from now)
|
||||
task.next_check = self.calculate_next_execution(
|
||||
task=task,
|
||||
frequency='Weekly',
|
||||
last_execution=task.last_check
|
||||
)
|
||||
else:
|
||||
task.last_failure = datetime.utcnow()
|
||||
task.failure_reason = result.error_message
|
||||
task.status = 'failed'
|
||||
# Schedule retry in 1 day
|
||||
task.next_check = datetime.utcnow() + timedelta(days=1)
|
||||
|
||||
task.updated_at = datetime.utcnow()
|
||||
db.commit()
|
||||
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
execution_time_ms = int((time.time() - start_time) * 1000)
|
||||
|
||||
# Set database session for exception handler
|
||||
self.exception_handler.db = db
|
||||
|
||||
error_result = self.exception_handler.handle_task_execution_error(
|
||||
task=task,
|
||||
error=e,
|
||||
execution_time_ms=execution_time_ms,
|
||||
context="Bing insights fetch"
|
||||
)
|
||||
|
||||
# Update task
|
||||
task.last_check = datetime.utcnow()
|
||||
task.last_failure = datetime.utcnow()
|
||||
task.failure_reason = str(e)
|
||||
task.status = 'failed'
|
||||
task.next_check = datetime.utcnow() + timedelta(days=1)
|
||||
task.updated_at = datetime.utcnow()
|
||||
db.commit()
|
||||
|
||||
return error_result
|
||||
|
||||
async def _fetch_insights(self, task: PlatformInsightsTask, db: Session) -> TaskExecutionResult:
|
||||
"""
|
||||
Fetch Bing insights data.
|
||||
|
||||
On first run (no last_success), loads cached data.
|
||||
On subsequent runs, fetches fresh data from API.
|
||||
"""
|
||||
user_id = task.user_id
|
||||
site_url = task.site_url
|
||||
|
||||
try:
|
||||
# Check if this is first run (no previous success)
|
||||
is_first_run = task.last_success is None
|
||||
|
||||
if is_first_run:
|
||||
# First run: Try to load from cache
|
||||
self.logger.info(f"First run for Bing insights task {task.id} - loading cached data")
|
||||
cached_data = self._load_cached_data(user_id, site_url)
|
||||
|
||||
if cached_data:
|
||||
self.logger.info(f"Loaded cached Bing data for user {user_id}")
|
||||
return TaskExecutionResult(
|
||||
success=True,
|
||||
result_data={
|
||||
'data_source': 'cached',
|
||||
'insights': cached_data,
|
||||
'message': 'Loaded from cached data (first run)'
|
||||
}
|
||||
)
|
||||
else:
|
||||
# No cached data - try to fetch from API
|
||||
self.logger.info(f"No cached data found, fetching from Bing API")
|
||||
return await self._fetch_fresh_data(user_id, site_url)
|
||||
else:
|
||||
# Subsequent run: Always fetch fresh data
|
||||
self.logger.info(f"Subsequent run for Bing insights task {task.id} - fetching fresh data")
|
||||
return await self._fetch_fresh_data(user_id, site_url)
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error fetching Bing insights for user {user_id}: {e}", exc_info=True)
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message=f"Failed to fetch Bing insights: {str(e)}",
|
||||
result_data={'error': str(e)}
|
||||
)
|
||||
|
||||
def _load_cached_data(self, user_id: str, site_url: Optional[str]) -> Optional[Dict[str, Any]]:
|
||||
"""Load most recent cached Bing data from database."""
|
||||
try:
|
||||
# Get analytics summary from storage service
|
||||
summary = self.storage_service.get_analytics_summary(
|
||||
user_id=user_id,
|
||||
site_url=site_url or '',
|
||||
days=30
|
||||
)
|
||||
|
||||
if summary and isinstance(summary, dict):
|
||||
self.logger.info(f"Found cached Bing data for user {user_id}")
|
||||
return summary
|
||||
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Error loading cached Bing data: {e}")
|
||||
return None
|
||||
|
||||
async def _fetch_fresh_data(self, user_id: str, site_url: Optional[str]) -> TaskExecutionResult:
|
||||
"""Fetch fresh Bing insights from API."""
|
||||
try:
|
||||
# Check if user has active tokens
|
||||
token_status = self.bing_oauth.get_user_token_status(user_id)
|
||||
|
||||
if not token_status.get('has_active_tokens'):
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message="Bing Webmaster tokens not available or expired",
|
||||
result_data={'error': 'No active tokens'}
|
||||
)
|
||||
|
||||
# Get user's sites
|
||||
sites = self.bing_oauth.get_user_sites(user_id)
|
||||
|
||||
if not sites:
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message="No Bing Webmaster sites found",
|
||||
result_data={'error': 'No sites found'}
|
||||
)
|
||||
|
||||
# Use provided site_url or first site
|
||||
if not site_url:
|
||||
site_url = sites[0].get('Url', '') if isinstance(sites[0], dict) else sites[0]
|
||||
|
||||
# Get active token
|
||||
active_tokens = token_status.get('active_tokens', [])
|
||||
if not active_tokens:
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message="No active Bing Webmaster tokens",
|
||||
result_data={'error': 'No tokens'}
|
||||
)
|
||||
|
||||
# For now, use stored analytics data (Bing API integration can be added later)
|
||||
# This ensures we have data available even if the API class doesn't exist yet
|
||||
summary = self.storage_service.get_analytics_summary(user_id, site_url, days=30)
|
||||
|
||||
if summary and isinstance(summary, dict):
|
||||
# Format insights data from stored analytics
|
||||
insights_data = {
|
||||
'site_url': site_url,
|
||||
'date_range': {
|
||||
'start': (datetime.now() - timedelta(days=30)).strftime('%Y-%m-%d'),
|
||||
'end': datetime.now().strftime('%Y-%m-%d')
|
||||
},
|
||||
'summary': summary.get('summary', {}),
|
||||
'fetched_at': datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
self.logger.info(
|
||||
f"Successfully loaded Bing insights from storage for user {user_id}, site {site_url}"
|
||||
)
|
||||
|
||||
return TaskExecutionResult(
|
||||
success=True,
|
||||
result_data={
|
||||
'data_source': 'storage',
|
||||
'insights': insights_data,
|
||||
'message': 'Loaded from stored analytics data'
|
||||
}
|
||||
)
|
||||
else:
|
||||
# No stored data available
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message="No Bing analytics data available. Data will be collected during next onboarding refresh.",
|
||||
result_data={'error': 'No stored data available'}
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error fetching fresh Bing data: {e}", exc_info=True)
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message=f"API fetch failed: {str(e)}",
|
||||
result_data={'error': str(e)}
|
||||
)
|
||||
|
||||
def calculate_next_execution(
|
||||
self,
|
||||
task: PlatformInsightsTask,
|
||||
frequency: str,
|
||||
last_execution: Optional[datetime] = None
|
||||
) -> datetime:
|
||||
"""
|
||||
Calculate next execution time based on frequency.
|
||||
|
||||
For platform insights, frequency is always 'Weekly' (7 days).
|
||||
"""
|
||||
if last_execution is None:
|
||||
last_execution = datetime.utcnow()
|
||||
|
||||
if frequency == 'Weekly':
|
||||
return last_execution + timedelta(days=7)
|
||||
elif frequency == 'Daily':
|
||||
return last_execution + timedelta(days=1)
|
||||
else:
|
||||
# Default to weekly
|
||||
return last_execution + timedelta(days=7)
|
||||
|
||||
307
backend/services/scheduler/executors/gsc_insights_executor.py
Normal file
307
backend/services/scheduler/executors/gsc_insights_executor.py
Normal file
@@ -0,0 +1,307 @@
|
||||
"""
|
||||
GSC Insights Task Executor
|
||||
Handles execution of GSC insights fetch tasks for connected platforms.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
import json
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Dict, Any, Optional
|
||||
from sqlalchemy.orm import Session
|
||||
import sqlite3
|
||||
|
||||
from ..core.executor_interface import TaskExecutor, TaskExecutionResult
|
||||
from ..core.exception_handler import TaskExecutionError, DatabaseError, SchedulerExceptionHandler
|
||||
from models.platform_insights_monitoring_models import PlatformInsightsTask, PlatformInsightsExecutionLog
|
||||
from services.gsc_service import GSCService
|
||||
from utils.logger_utils import get_service_logger
|
||||
|
||||
logger = get_service_logger("gsc_insights_executor")
|
||||
|
||||
|
||||
class GSCInsightsExecutor(TaskExecutor):
|
||||
"""
|
||||
Executor for GSC insights fetch tasks.
|
||||
|
||||
Handles:
|
||||
- Fetching GSC insights data weekly
|
||||
- On first run: Loads existing cached data
|
||||
- On subsequent runs: Fetches fresh data from GSC API
|
||||
- Logging results and updating task status
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.logger = logger
|
||||
self.exception_handler = SchedulerExceptionHandler()
|
||||
self.gsc_service = GSCService()
|
||||
|
||||
async def execute_task(self, task: PlatformInsightsTask, db: Session) -> TaskExecutionResult:
|
||||
"""
|
||||
Execute a GSC insights fetch task.
|
||||
|
||||
Args:
|
||||
task: PlatformInsightsTask instance
|
||||
db: Database session
|
||||
|
||||
Returns:
|
||||
TaskExecutionResult
|
||||
"""
|
||||
start_time = time.time()
|
||||
user_id = task.user_id
|
||||
site_url = task.site_url
|
||||
|
||||
try:
|
||||
self.logger.info(
|
||||
f"Executing GSC insights fetch: task_id={task.id} | "
|
||||
f"user_id={user_id} | site_url={site_url}"
|
||||
)
|
||||
|
||||
# Create execution log
|
||||
execution_log = PlatformInsightsExecutionLog(
|
||||
task_id=task.id,
|
||||
execution_date=datetime.utcnow(),
|
||||
status='running'
|
||||
)
|
||||
db.add(execution_log)
|
||||
db.flush()
|
||||
|
||||
# Fetch insights
|
||||
result = await self._fetch_insights(task, db)
|
||||
|
||||
# Update execution log
|
||||
execution_time_ms = int((time.time() - start_time) * 1000)
|
||||
execution_log.status = 'success' if result.success else 'failed'
|
||||
execution_log.result_data = result.result_data
|
||||
execution_log.error_message = result.error_message
|
||||
execution_log.execution_time_ms = execution_time_ms
|
||||
execution_log.data_source = result.result_data.get('data_source') if result.success else None
|
||||
|
||||
# Update task based on result
|
||||
task.last_check = datetime.utcnow()
|
||||
|
||||
if result.success:
|
||||
task.last_success = datetime.utcnow()
|
||||
task.status = 'active'
|
||||
task.failure_reason = None
|
||||
# Schedule next check (7 days from now)
|
||||
task.next_check = self.calculate_next_execution(
|
||||
task=task,
|
||||
frequency='Weekly',
|
||||
last_execution=task.last_check
|
||||
)
|
||||
else:
|
||||
task.last_failure = datetime.utcnow()
|
||||
task.failure_reason = result.error_message
|
||||
task.status = 'failed'
|
||||
# Schedule retry in 1 day
|
||||
task.next_check = datetime.utcnow() + timedelta(days=1)
|
||||
|
||||
task.updated_at = datetime.utcnow()
|
||||
db.commit()
|
||||
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
execution_time_ms = int((time.time() - start_time) * 1000)
|
||||
|
||||
# Set database session for exception handler
|
||||
self.exception_handler.db = db
|
||||
|
||||
error_result = self.exception_handler.handle_task_execution_error(
|
||||
task=task,
|
||||
error=e,
|
||||
execution_time_ms=execution_time_ms,
|
||||
context="GSC insights fetch"
|
||||
)
|
||||
|
||||
# Update task
|
||||
task.last_check = datetime.utcnow()
|
||||
task.last_failure = datetime.utcnow()
|
||||
task.failure_reason = str(e)
|
||||
task.status = 'failed'
|
||||
task.next_check = datetime.utcnow() + timedelta(days=1)
|
||||
task.updated_at = datetime.utcnow()
|
||||
db.commit()
|
||||
|
||||
return error_result
|
||||
|
||||
async def _fetch_insights(self, task: PlatformInsightsTask, db: Session) -> TaskExecutionResult:
|
||||
"""
|
||||
Fetch GSC insights data.
|
||||
|
||||
On first run (no last_success), loads cached data.
|
||||
On subsequent runs, fetches fresh data from API.
|
||||
"""
|
||||
user_id = task.user_id
|
||||
site_url = task.site_url
|
||||
|
||||
try:
|
||||
# Check if this is first run (no previous success)
|
||||
is_first_run = task.last_success is None
|
||||
|
||||
if is_first_run:
|
||||
# First run: Try to load from cache
|
||||
self.logger.info(f"First run for GSC insights task {task.id} - loading cached data")
|
||||
cached_data = self._load_cached_data(user_id, site_url)
|
||||
|
||||
if cached_data:
|
||||
self.logger.info(f"Loaded cached GSC data for user {user_id}")
|
||||
return TaskExecutionResult(
|
||||
success=True,
|
||||
result_data={
|
||||
'data_source': 'cached',
|
||||
'insights': cached_data,
|
||||
'message': 'Loaded from cached data (first run)'
|
||||
}
|
||||
)
|
||||
else:
|
||||
# No cached data - try to fetch from API
|
||||
self.logger.info(f"No cached data found, fetching from GSC API")
|
||||
return await self._fetch_fresh_data(user_id, site_url)
|
||||
else:
|
||||
# Subsequent run: Always fetch fresh data
|
||||
self.logger.info(f"Subsequent run for GSC insights task {task.id} - fetching fresh data")
|
||||
return await self._fetch_fresh_data(user_id, site_url)
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error fetching GSC insights for user {user_id}: {e}", exc_info=True)
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message=f"Failed to fetch GSC insights: {str(e)}",
|
||||
result_data={'error': str(e)}
|
||||
)
|
||||
|
||||
def _load_cached_data(self, user_id: str, site_url: Optional[str]) -> Optional[Dict[str, Any]]:
|
||||
"""Load most recent cached GSC data from database."""
|
||||
try:
|
||||
db_path = self.gsc_service.db_path
|
||||
|
||||
with sqlite3.connect(db_path) as conn:
|
||||
cursor = conn.cursor()
|
||||
|
||||
# Find most recent cached data
|
||||
if site_url:
|
||||
cursor.execute('''
|
||||
SELECT data_json, created_at
|
||||
FROM gsc_data_cache
|
||||
WHERE user_id = ? AND site_url = ? AND data_type = 'analytics'
|
||||
ORDER BY created_at DESC
|
||||
LIMIT 1
|
||||
''', (user_id, site_url))
|
||||
else:
|
||||
cursor.execute('''
|
||||
SELECT data_json, created_at
|
||||
FROM gsc_data_cache
|
||||
WHERE user_id = ? AND data_type = 'analytics'
|
||||
ORDER BY created_at DESC
|
||||
LIMIT 1
|
||||
''', (user_id,))
|
||||
|
||||
result = cursor.fetchone()
|
||||
|
||||
if result:
|
||||
data_json, created_at = result
|
||||
insights_data = json.loads(data_json) if isinstance(data_json, str) else data_json
|
||||
|
||||
self.logger.info(
|
||||
f"Found cached GSC data from {created_at} for user {user_id}"
|
||||
)
|
||||
|
||||
return insights_data
|
||||
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Error loading cached GSC data: {e}")
|
||||
return None
|
||||
|
||||
async def _fetch_fresh_data(self, user_id: str, site_url: Optional[str]) -> TaskExecutionResult:
|
||||
"""Fetch fresh GSC insights from API."""
|
||||
try:
|
||||
# If no site_url, get first site
|
||||
if not site_url:
|
||||
sites = self.gsc_service.get_site_list(user_id)
|
||||
if not sites:
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message="No GSC sites found for user",
|
||||
result_data={'error': 'No sites found'}
|
||||
)
|
||||
site_url = sites[0]['siteUrl']
|
||||
|
||||
# Get analytics for last 30 days
|
||||
end_date = datetime.now().strftime('%Y-%m-%d')
|
||||
start_date = (datetime.now() - timedelta(days=30)).strftime('%Y-%m-%d')
|
||||
|
||||
# Fetch search analytics
|
||||
search_analytics = self.gsc_service.get_search_analytics(
|
||||
user_id=user_id,
|
||||
site_url=site_url,
|
||||
start_date=start_date,
|
||||
end_date=end_date
|
||||
)
|
||||
|
||||
if 'error' in search_analytics:
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message=search_analytics.get('error', 'Unknown error'),
|
||||
result_data=search_analytics
|
||||
)
|
||||
|
||||
# Format insights data
|
||||
insights_data = {
|
||||
'site_url': site_url,
|
||||
'date_range': {
|
||||
'start': start_date,
|
||||
'end': end_date
|
||||
},
|
||||
'overall_metrics': search_analytics.get('overall_metrics', {}),
|
||||
'query_data': search_analytics.get('query_data', {}),
|
||||
'fetched_at': datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
self.logger.info(
|
||||
f"Successfully fetched GSC insights for user {user_id}, site {site_url}"
|
||||
)
|
||||
|
||||
return TaskExecutionResult(
|
||||
success=True,
|
||||
result_data={
|
||||
'data_source': 'api',
|
||||
'insights': insights_data,
|
||||
'message': 'Fetched fresh data from GSC API'
|
||||
}
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error fetching fresh GSC data: {e}", exc_info=True)
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message=f"API fetch failed: {str(e)}",
|
||||
result_data={'error': str(e)}
|
||||
)
|
||||
|
||||
def calculate_next_execution(
|
||||
self,
|
||||
task: PlatformInsightsTask,
|
||||
frequency: str,
|
||||
last_execution: Optional[datetime] = None
|
||||
) -> datetime:
|
||||
"""
|
||||
Calculate next execution time based on frequency.
|
||||
|
||||
For platform insights, frequency is always 'Weekly' (7 days).
|
||||
"""
|
||||
if last_execution is None:
|
||||
last_execution = datetime.utcnow()
|
||||
|
||||
if frequency == 'Weekly':
|
||||
return last_execution + timedelta(days=7)
|
||||
elif frequency == 'Daily':
|
||||
return last_execution + timedelta(days=1)
|
||||
else:
|
||||
# Default to weekly
|
||||
return last_execution + timedelta(days=7)
|
||||
|
||||
@@ -197,7 +197,7 @@ class OAuthTokenMonitoringExecutor(TaskExecutor):
|
||||
- GSC: gsc_credentials table (via GSCService)
|
||||
- Bing: bing_oauth_tokens table (via BingOAuthService)
|
||||
- WordPress: wordpress_oauth_tokens table (via WordPressOAuthService)
|
||||
- Wix: Currently in frontend sessionStorage (backend storage TODO)
|
||||
- Wix: wix_oauth_tokens table (via WixOAuthService)
|
||||
|
||||
Args:
|
||||
task: OAuthTokenMonitoringTask instance
|
||||
|
||||
@@ -0,0 +1,458 @@
|
||||
"""
|
||||
Website Analysis Task Executor
|
||||
Handles execution of website analysis tasks for user and competitor websites.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
import asyncio
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Dict, Any, Optional
|
||||
from sqlalchemy.orm import Session
|
||||
from functools import partial
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from ..core.executor_interface import TaskExecutor, TaskExecutionResult
|
||||
from ..core.exception_handler import TaskExecutionError, DatabaseError, SchedulerExceptionHandler
|
||||
from models.website_analysis_monitoring_models import WebsiteAnalysisTask, WebsiteAnalysisExecutionLog
|
||||
from models.onboarding import CompetitorAnalysis, OnboardingSession
|
||||
from utils.logger_utils import get_service_logger
|
||||
|
||||
# Import website analysis services
|
||||
from services.component_logic.web_crawler_logic import WebCrawlerLogic
|
||||
from services.component_logic.style_detection_logic import StyleDetectionLogic
|
||||
from services.website_analysis_service import WebsiteAnalysisService
|
||||
|
||||
logger = get_service_logger("website_analysis_executor")
|
||||
|
||||
|
||||
class WebsiteAnalysisExecutor(TaskExecutor):
|
||||
"""
|
||||
Executor for website analysis tasks.
|
||||
|
||||
Handles:
|
||||
- Analyzing user's website (updates existing WebsiteAnalysis record)
|
||||
- Analyzing competitor websites (stores in CompetitorAnalysis table)
|
||||
- Logging results and updating task status
|
||||
- Scheduling next execution based on frequency_days
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.logger = logger
|
||||
self.exception_handler = SchedulerExceptionHandler()
|
||||
self.crawler_logic = WebCrawlerLogic()
|
||||
self.style_logic = StyleDetectionLogic()
|
||||
|
||||
async def execute_task(
|
||||
self,
|
||||
task: WebsiteAnalysisTask,
|
||||
db: Session
|
||||
) -> TaskExecutionResult:
|
||||
"""
|
||||
Execute a website analysis task.
|
||||
|
||||
This performs complete website analysis using the same logic as
|
||||
/api/onboarding/style-detection/complete endpoint.
|
||||
|
||||
Args:
|
||||
task: WebsiteAnalysisTask instance
|
||||
db: Database session
|
||||
|
||||
Returns:
|
||||
TaskExecutionResult
|
||||
"""
|
||||
start_time = time.time()
|
||||
user_id = task.user_id
|
||||
website_url = task.website_url
|
||||
task_type = task.task_type
|
||||
|
||||
try:
|
||||
self.logger.info(
|
||||
f"Executing website analysis: task_id={task.id} | "
|
||||
f"user_id={user_id} | url={website_url} | type={task_type}"
|
||||
)
|
||||
|
||||
# Create execution log
|
||||
execution_log = WebsiteAnalysisExecutionLog(
|
||||
task_id=task.id,
|
||||
execution_date=datetime.utcnow(),
|
||||
status='running'
|
||||
)
|
||||
db.add(execution_log)
|
||||
db.flush()
|
||||
|
||||
# Perform website analysis
|
||||
result = await self._perform_website_analysis(
|
||||
website_url=website_url,
|
||||
user_id=user_id,
|
||||
task_type=task_type,
|
||||
task=task,
|
||||
db=db
|
||||
)
|
||||
|
||||
# Update execution log
|
||||
execution_time_ms = int((time.time() - start_time) * 1000)
|
||||
execution_log.status = 'success' if result.success else 'failed'
|
||||
execution_log.result_data = result.result_data
|
||||
execution_log.error_message = result.error_message
|
||||
execution_log.execution_time_ms = execution_time_ms
|
||||
|
||||
# Update task based on result
|
||||
task.last_check = datetime.utcnow()
|
||||
task.updated_at = datetime.utcnow()
|
||||
|
||||
if result.success:
|
||||
task.last_success = datetime.utcnow()
|
||||
task.status = 'active'
|
||||
task.failure_reason = None
|
||||
# Schedule next check based on frequency_days
|
||||
task.next_check = self.calculate_next_execution(
|
||||
task=task,
|
||||
frequency='Custom',
|
||||
last_execution=task.last_check,
|
||||
custom_days=task.frequency_days
|
||||
)
|
||||
|
||||
# Commit all changes to database
|
||||
db.commit()
|
||||
|
||||
self.logger.info(
|
||||
f"Website analysis completed successfully for task {task.id}. "
|
||||
f"Next check scheduled for {task.next_check}"
|
||||
)
|
||||
return result
|
||||
else:
|
||||
task.last_failure = datetime.utcnow()
|
||||
task.failure_reason = result.error_message
|
||||
task.status = 'failed'
|
||||
# Do NOT update next_check - wait for manual retry
|
||||
|
||||
# Commit all changes to database
|
||||
db.commit()
|
||||
|
||||
self.logger.warning(
|
||||
f"Website analysis failed for task {task.id}. "
|
||||
f"Error: {result.error_message}. Waiting for manual retry."
|
||||
)
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
execution_time_ms = int((time.time() - start_time) * 1000)
|
||||
|
||||
# Set database session for exception handler
|
||||
self.exception_handler.db = db
|
||||
|
||||
# Create structured error
|
||||
error = TaskExecutionError(
|
||||
message=f"Error executing website analysis task {task.id}: {str(e)}",
|
||||
user_id=user_id,
|
||||
task_id=task.id,
|
||||
task_type="website_analysis",
|
||||
execution_time_ms=execution_time_ms,
|
||||
context={
|
||||
"website_url": website_url,
|
||||
"task_type": task_type,
|
||||
"user_id": user_id
|
||||
},
|
||||
original_error=e
|
||||
)
|
||||
|
||||
# Handle exception with structured logging
|
||||
self.exception_handler.handle_exception(error)
|
||||
|
||||
# Update execution log with error
|
||||
try:
|
||||
execution_log = WebsiteAnalysisExecutionLog(
|
||||
task_id=task.id,
|
||||
execution_date=datetime.utcnow(),
|
||||
status='failed',
|
||||
error_message=str(e),
|
||||
execution_time_ms=execution_time_ms,
|
||||
result_data={
|
||||
"error_type": error.error_type.value,
|
||||
"severity": error.severity.value,
|
||||
"context": error.context
|
||||
}
|
||||
)
|
||||
db.add(execution_log)
|
||||
|
||||
task.last_failure = datetime.utcnow()
|
||||
task.failure_reason = str(e)
|
||||
task.status = 'failed'
|
||||
task.last_check = datetime.utcnow()
|
||||
task.updated_at = datetime.utcnow()
|
||||
# Do NOT update next_check - wait for manual retry
|
||||
|
||||
db.commit()
|
||||
except Exception as commit_error:
|
||||
db_error = DatabaseError(
|
||||
message=f"Error saving execution log: {str(commit_error)}",
|
||||
user_id=user_id,
|
||||
task_id=task.id,
|
||||
original_error=commit_error
|
||||
)
|
||||
self.exception_handler.handle_exception(db_error)
|
||||
db.rollback()
|
||||
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message=str(e),
|
||||
execution_time_ms=execution_time_ms,
|
||||
retryable=True
|
||||
)
|
||||
|
||||
async def _perform_website_analysis(
|
||||
self,
|
||||
website_url: str,
|
||||
user_id: str,
|
||||
task_type: str,
|
||||
task: WebsiteAnalysisTask,
|
||||
db: Session
|
||||
) -> TaskExecutionResult:
|
||||
"""
|
||||
Perform website analysis using existing service logic.
|
||||
|
||||
Reuses the same logic as /api/onboarding/style-detection/complete.
|
||||
"""
|
||||
try:
|
||||
# Step 1: Crawl website content
|
||||
self.logger.info(f"Crawling website: {website_url}")
|
||||
crawl_result = await self.crawler_logic.crawl_website(website_url)
|
||||
|
||||
if not crawl_result.get('success'):
|
||||
error_msg = crawl_result.get('error', 'Crawling failed')
|
||||
self.logger.error(f"Crawling failed for {website_url}: {error_msg}")
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message=f"Crawling failed: {error_msg}",
|
||||
result_data={'crawl_result': crawl_result},
|
||||
retryable=True
|
||||
)
|
||||
|
||||
# Step 2: Run style analysis and patterns analysis in parallel
|
||||
self.logger.info(f"Running style analysis for {website_url}")
|
||||
|
||||
async def run_style_analysis():
|
||||
"""Run style analysis in executor"""
|
||||
loop = asyncio.get_event_loop()
|
||||
return await loop.run_in_executor(
|
||||
None,
|
||||
partial(self.style_logic.analyze_content_style, crawl_result['content'])
|
||||
)
|
||||
|
||||
async def run_patterns_analysis():
|
||||
"""Run patterns analysis in executor"""
|
||||
loop = asyncio.get_event_loop()
|
||||
return await loop.run_in_executor(
|
||||
None,
|
||||
partial(self.style_logic.analyze_style_patterns, crawl_result['content'])
|
||||
)
|
||||
|
||||
# Execute style and patterns analysis in parallel
|
||||
style_analysis, patterns_result = await asyncio.gather(
|
||||
run_style_analysis(),
|
||||
run_patterns_analysis(),
|
||||
return_exceptions=True
|
||||
)
|
||||
|
||||
# Check for exceptions
|
||||
if isinstance(style_analysis, Exception):
|
||||
self.logger.error(f"Style analysis exception: {style_analysis}")
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message=f"Style analysis failed: {str(style_analysis)}",
|
||||
retryable=True
|
||||
)
|
||||
|
||||
if isinstance(patterns_result, Exception):
|
||||
self.logger.warning(f"Patterns analysis exception: {patterns_result}")
|
||||
patterns_result = None
|
||||
|
||||
# Step 3: Generate style guidelines
|
||||
style_guidelines = None
|
||||
if style_analysis and style_analysis.get('success'):
|
||||
loop = asyncio.get_event_loop()
|
||||
guidelines_result = await loop.run_in_executor(
|
||||
None,
|
||||
partial(self.style_logic.generate_style_guidelines, style_analysis.get('analysis', {}))
|
||||
)
|
||||
if guidelines_result and guidelines_result.get('success'):
|
||||
style_guidelines = guidelines_result.get('guidelines')
|
||||
|
||||
# Prepare analysis data
|
||||
analysis_data = {
|
||||
'crawl_result': crawl_result,
|
||||
'style_analysis': style_analysis.get('analysis') if style_analysis and style_analysis.get('success') else None,
|
||||
'style_patterns': patterns_result if patterns_result and not isinstance(patterns_result, Exception) else None,
|
||||
'style_guidelines': style_guidelines,
|
||||
}
|
||||
|
||||
# Step 4: Store results based on task type
|
||||
if task_type == 'user_website':
|
||||
# Update existing WebsiteAnalysis record
|
||||
await self._update_user_website_analysis(
|
||||
user_id=user_id,
|
||||
website_url=website_url,
|
||||
analysis_data=analysis_data,
|
||||
db=db
|
||||
)
|
||||
elif task_type == 'competitor':
|
||||
# Store in CompetitorAnalysis table
|
||||
await self._store_competitor_analysis(
|
||||
user_id=user_id,
|
||||
competitor_url=website_url,
|
||||
competitor_id=task.competitor_id,
|
||||
analysis_data=analysis_data,
|
||||
db=db
|
||||
)
|
||||
|
||||
self.logger.info(f"Website analysis completed successfully for {website_url}")
|
||||
|
||||
return TaskExecutionResult(
|
||||
success=True,
|
||||
result_data=analysis_data,
|
||||
retryable=False
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error performing website analysis: {e}", exc_info=True)
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message=str(e),
|
||||
retryable=True
|
||||
)
|
||||
|
||||
async def _update_user_website_analysis(
|
||||
self,
|
||||
user_id: str,
|
||||
website_url: str,
|
||||
analysis_data: Dict[str, Any],
|
||||
db: Session
|
||||
):
|
||||
"""Update existing WebsiteAnalysis record for user's website."""
|
||||
try:
|
||||
# Convert Clerk user ID to integer (same as component_logic.py)
|
||||
# Use the same conversion logic as the website analysis API
|
||||
import hashlib
|
||||
user_id_int = int(hashlib.sha256(user_id.encode()).hexdigest()[:15], 16)
|
||||
|
||||
# Use WebsiteAnalysisService to update
|
||||
analysis_service = WebsiteAnalysisService(db)
|
||||
|
||||
# Prepare data in format expected by save_analysis
|
||||
response_data = {
|
||||
'crawl_result': analysis_data.get('crawl_result'),
|
||||
'style_analysis': analysis_data.get('style_analysis'),
|
||||
'style_patterns': analysis_data.get('style_patterns'),
|
||||
'style_guidelines': analysis_data.get('style_guidelines'),
|
||||
}
|
||||
|
||||
# Save/update analysis
|
||||
analysis_id = analysis_service.save_analysis(
|
||||
session_id=user_id_int,
|
||||
website_url=website_url,
|
||||
analysis_data=response_data
|
||||
)
|
||||
|
||||
if analysis_id:
|
||||
self.logger.info(f"Updated user website analysis for {website_url} (analysis_id: {analysis_id})")
|
||||
else:
|
||||
self.logger.warning(f"Failed to update user website analysis for {website_url}")
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error updating user website analysis: {e}", exc_info=True)
|
||||
raise
|
||||
|
||||
async def _store_competitor_analysis(
|
||||
self,
|
||||
user_id: str,
|
||||
competitor_url: str,
|
||||
competitor_id: Optional[str],
|
||||
analysis_data: Dict[str, Any],
|
||||
db: Session
|
||||
):
|
||||
"""Store competitor analysis in CompetitorAnalysis table."""
|
||||
try:
|
||||
# Get onboarding session for user
|
||||
session = db.query(OnboardingSession).filter(
|
||||
OnboardingSession.user_id == user_id
|
||||
).first()
|
||||
|
||||
if not session:
|
||||
raise ValueError(f"No onboarding session found for user {user_id}")
|
||||
|
||||
# Extract domain from URL
|
||||
parsed_url = urlparse(competitor_url)
|
||||
competitor_domain = parsed_url.netloc or competitor_id
|
||||
|
||||
# Check if analysis already exists for this competitor
|
||||
existing = db.query(CompetitorAnalysis).filter(
|
||||
CompetitorAnalysis.session_id == session.id,
|
||||
CompetitorAnalysis.competitor_url == competitor_url
|
||||
).first()
|
||||
|
||||
if existing:
|
||||
# Update existing analysis
|
||||
existing.analysis_data = analysis_data
|
||||
existing.analysis_date = datetime.utcnow()
|
||||
existing.status = 'completed'
|
||||
existing.error_message = None
|
||||
existing.warning_message = None
|
||||
existing.updated_at = datetime.utcnow()
|
||||
self.logger.info(f"Updated competitor analysis for {competitor_url}")
|
||||
else:
|
||||
# Create new analysis
|
||||
competitor_analysis = CompetitorAnalysis(
|
||||
session_id=session.id,
|
||||
competitor_url=competitor_url,
|
||||
competitor_domain=competitor_domain,
|
||||
analysis_data=analysis_data,
|
||||
status='completed',
|
||||
analysis_date=datetime.utcnow()
|
||||
)
|
||||
db.add(competitor_analysis)
|
||||
self.logger.info(f"Created new competitor analysis for {competitor_url}")
|
||||
|
||||
db.commit()
|
||||
|
||||
except Exception as e:
|
||||
db.rollback()
|
||||
self.logger.error(f"Error storing competitor analysis: {e}", exc_info=True)
|
||||
raise
|
||||
|
||||
def calculate_next_execution(
|
||||
self,
|
||||
task: WebsiteAnalysisTask,
|
||||
frequency: str,
|
||||
last_execution: Optional[datetime] = None,
|
||||
custom_days: Optional[int] = None
|
||||
) -> datetime:
|
||||
"""
|
||||
Calculate next execution time based on frequency or custom days.
|
||||
|
||||
Args:
|
||||
task: WebsiteAnalysisTask instance
|
||||
frequency: Frequency string ('Custom' for website analysis)
|
||||
last_execution: Last execution datetime (defaults to task.last_check or now)
|
||||
custom_days: Custom number of days (from task.frequency_days)
|
||||
|
||||
Returns:
|
||||
Next execution datetime
|
||||
"""
|
||||
if last_execution is None:
|
||||
last_execution = task.last_check if task.last_check else datetime.utcnow()
|
||||
|
||||
# Use custom_days if provided, otherwise use task.frequency_days
|
||||
days = custom_days if custom_days is not None else task.frequency_days
|
||||
|
||||
if frequency == 'Custom' and days:
|
||||
return last_execution + timedelta(days=days)
|
||||
else:
|
||||
# Default to task's frequency_days
|
||||
self.logger.warning(
|
||||
f"Unknown frequency '{frequency}' for website analysis task {task.id}. "
|
||||
f"Using frequency_days={task.frequency_days}."
|
||||
)
|
||||
return last_execution + timedelta(days=task.frequency_days)
|
||||
|
||||
@@ -0,0 +1,60 @@
|
||||
"""
|
||||
Platform Insights Task Loader
|
||||
Functions to load due platform insights tasks from database.
|
||||
"""
|
||||
|
||||
from datetime import datetime
|
||||
from typing import List, Optional, Union
|
||||
from sqlalchemy.orm import Session
|
||||
from sqlalchemy import and_, or_
|
||||
|
||||
from models.platform_insights_monitoring_models import PlatformInsightsTask
|
||||
|
||||
|
||||
def load_due_platform_insights_tasks(
|
||||
db: Session,
|
||||
user_id: Optional[Union[str, int]] = None,
|
||||
platform: Optional[str] = None
|
||||
) -> List[PlatformInsightsTask]:
|
||||
"""
|
||||
Load all platform insights tasks that are due for execution.
|
||||
|
||||
Criteria:
|
||||
- status == 'active' (only check active tasks)
|
||||
- next_check <= now (or is None for first execution)
|
||||
- Optional: user_id filter for specific user
|
||||
- Optional: platform filter ('gsc' or 'bing')
|
||||
|
||||
Args:
|
||||
db: Database session
|
||||
user_id: Optional user ID (Clerk string) to filter tasks
|
||||
platform: Optional platform filter ('gsc' or 'bing')
|
||||
|
||||
Returns:
|
||||
List of due PlatformInsightsTask instances
|
||||
"""
|
||||
now = datetime.utcnow()
|
||||
|
||||
# Build query for due tasks
|
||||
query = db.query(PlatformInsightsTask).filter(
|
||||
and_(
|
||||
PlatformInsightsTask.status == 'active',
|
||||
or_(
|
||||
PlatformInsightsTask.next_check <= now,
|
||||
PlatformInsightsTask.next_check.is_(None)
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
# Apply user filter if provided
|
||||
if user_id is not None:
|
||||
query = query.filter(PlatformInsightsTask.user_id == str(user_id))
|
||||
|
||||
# Apply platform filter if provided
|
||||
if platform is not None:
|
||||
query = query.filter(PlatformInsightsTask.platform == platform)
|
||||
|
||||
tasks = query.all()
|
||||
|
||||
return tasks
|
||||
|
||||
@@ -0,0 +1,54 @@
|
||||
"""
|
||||
Website Analysis Task Loader
|
||||
Functions to load due website analysis tasks from database.
|
||||
"""
|
||||
|
||||
from datetime import datetime
|
||||
from typing import List, Optional, Union
|
||||
from sqlalchemy.orm import Session
|
||||
from sqlalchemy import and_, or_
|
||||
|
||||
from models.website_analysis_monitoring_models import WebsiteAnalysisTask
|
||||
|
||||
|
||||
def load_due_website_analysis_tasks(
|
||||
db: Session,
|
||||
user_id: Optional[Union[str, int]] = None
|
||||
) -> List[WebsiteAnalysisTask]:
|
||||
"""
|
||||
Load all website analysis tasks that are due for execution.
|
||||
|
||||
Criteria:
|
||||
- status == 'active' (only check active tasks)
|
||||
- next_check <= now (or is None for first execution)
|
||||
- Optional: user_id filter for specific user (for user isolation)
|
||||
|
||||
User isolation is enforced through filtering by user_id when provided.
|
||||
If no user_id is provided, loads tasks for all users (for system-wide monitoring).
|
||||
|
||||
Args:
|
||||
db: Database session
|
||||
user_id: Optional user ID (Clerk string) to filter tasks (if None, loads all users' tasks)
|
||||
|
||||
Returns:
|
||||
List of due WebsiteAnalysisTask instances
|
||||
"""
|
||||
now = datetime.utcnow()
|
||||
|
||||
# Build query for due tasks
|
||||
query = db.query(WebsiteAnalysisTask).filter(
|
||||
and_(
|
||||
WebsiteAnalysisTask.status == 'active',
|
||||
or_(
|
||||
WebsiteAnalysisTask.next_check <= now,
|
||||
WebsiteAnalysisTask.next_check.is_(None)
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
# Apply user filter if provided (for user isolation)
|
||||
if user_id is not None:
|
||||
query = query.filter(WebsiteAnalysisTask.user_id == str(user_id))
|
||||
|
||||
return query.all()
|
||||
|
||||
@@ -144,6 +144,11 @@ def get_exa_key(user_id: Optional[str] = None) -> Optional[str]:
|
||||
return UserAPIKeyContext.get_user_key(user_id, 'exa')
|
||||
|
||||
|
||||
def get_tavily_key(user_id: Optional[str] = None) -> Optional[str]:
|
||||
"""Get Tavily API key for user."""
|
||||
return UserAPIKeyContext.get_user_key(user_id, 'tavily')
|
||||
|
||||
|
||||
def get_copilotkit_key(user_id: Optional[str] = None) -> Optional[str]:
|
||||
"""Get CopilotKit API key for user."""
|
||||
return UserAPIKeyContext.get_user_key(user_id, 'copilotkit')
|
||||
|
||||
369
backend/services/website_analysis_monitoring_service.py
Normal file
369
backend/services/website_analysis_monitoring_service.py
Normal file
@@ -0,0 +1,369 @@
|
||||
"""
|
||||
Website Analysis Monitoring Service
|
||||
Creates and manages website analysis monitoring tasks.
|
||||
"""
|
||||
|
||||
from datetime import datetime, timedelta
|
||||
from typing import List, Dict, Any, Optional
|
||||
from sqlalchemy.orm import Session
|
||||
from urllib.parse import urlparse
|
||||
import hashlib
|
||||
|
||||
from models.website_analysis_monitoring_models import WebsiteAnalysisTask
|
||||
from models.onboarding import OnboardingSession
|
||||
from services.onboarding.database_service import OnboardingDatabaseService
|
||||
from utils.logger_utils import get_service_logger
|
||||
|
||||
logger = get_service_logger("website_analysis_monitoring")
|
||||
|
||||
|
||||
def clerk_user_id_to_int(user_id: str) -> int:
|
||||
"""
|
||||
Convert Clerk user ID to consistent integer for database session_id.
|
||||
Uses SHA256 hashing for deterministic, consistent results.
|
||||
This MUST match the pattern used in component_logic.py for onboarding.
|
||||
|
||||
Args:
|
||||
user_id: Clerk user ID (e.g., 'user_33Gz1FPI86VDXhRY8QN4ragRFGN')
|
||||
|
||||
Returns:
|
||||
int: Deterministic integer derived from user ID
|
||||
"""
|
||||
user_id_hash = hashlib.sha256(user_id.encode()).hexdigest()
|
||||
return int(user_id_hash[:8], 16) % 2147483647
|
||||
|
||||
|
||||
def create_website_analysis_tasks(user_id: str, db: Session) -> Dict[str, Any]:
|
||||
"""
|
||||
Create website analysis tasks for user's website and all competitors.
|
||||
|
||||
This should be called after onboarding completion.
|
||||
|
||||
Args:
|
||||
user_id: Clerk user ID (string)
|
||||
db: Database session
|
||||
|
||||
Returns:
|
||||
Dictionary with success status and task details
|
||||
"""
|
||||
try:
|
||||
logger.info(f"[Website Analysis Tasks] Creating tasks for user: {user_id}")
|
||||
|
||||
# Get user's website URL from onboarding
|
||||
onboarding_service = OnboardingDatabaseService(db=db)
|
||||
website_analysis = onboarding_service.get_website_analysis(user_id, db)
|
||||
|
||||
if not website_analysis:
|
||||
logger.warning(f"[Website Analysis Tasks] No website analysis found for user {user_id}")
|
||||
# Try direct query using hash-based session_id (must match onboarding pattern)
|
||||
try:
|
||||
from models.onboarding import WebsiteAnalysis
|
||||
session_id_int = clerk_user_id_to_int(user_id)
|
||||
|
||||
logger.info(
|
||||
f"[Website Analysis Tasks] Querying WebsiteAnalysis with hash-based session_id: {session_id_int}"
|
||||
)
|
||||
|
||||
analysis = db.query(WebsiteAnalysis).filter(
|
||||
WebsiteAnalysis.session_id == session_id_int
|
||||
).order_by(WebsiteAnalysis.created_at.desc()).first()
|
||||
|
||||
if analysis:
|
||||
logger.info(f"[Website Analysis Tasks] ✅ Found analysis via hash-based query: {analysis.website_url}")
|
||||
website_analysis = analysis.to_dict()
|
||||
except Exception as e:
|
||||
logger.debug(f"[Website Analysis Tasks] Direct query fallback failed: {e}")
|
||||
|
||||
if not website_analysis:
|
||||
return {
|
||||
'success': False,
|
||||
'error': 'No website analysis found. Complete onboarding first.'
|
||||
}
|
||||
|
||||
website_url = website_analysis.get('website_url')
|
||||
|
||||
# Log the actual value for debugging (always log, not just debug level)
|
||||
logger.info(
|
||||
f"[Website Analysis Tasks] website_url from dict: {repr(website_url)} "
|
||||
f"(type: {type(website_url).__name__}, truthy: {bool(website_url)})"
|
||||
)
|
||||
|
||||
# Check if website_url is None, empty string, or whitespace
|
||||
if not website_url or (isinstance(website_url, str) and not website_url.strip()):
|
||||
# Log what we actually got for debugging
|
||||
logger.warning(
|
||||
f"[Website Analysis Tasks] No website URL found for user {user_id}. "
|
||||
f"Analysis keys: {list(website_analysis.keys()) if website_analysis else 'None'}, "
|
||||
f"website_url value: {repr(website_url)}"
|
||||
)
|
||||
|
||||
# Try direct access to the model using hash-based session_id
|
||||
# This MUST use the same hash function as onboarding (clerk_user_id_to_int)
|
||||
try:
|
||||
from models.onboarding import WebsiteAnalysis
|
||||
session_id_int = clerk_user_id_to_int(user_id)
|
||||
|
||||
logger.info(
|
||||
f"[Website Analysis Tasks] Querying WebsiteAnalysis with hash-based session_id: {session_id_int} "
|
||||
f"for user {user_id}"
|
||||
)
|
||||
|
||||
analysis = db.query(WebsiteAnalysis).filter(
|
||||
WebsiteAnalysis.session_id == session_id_int
|
||||
).order_by(WebsiteAnalysis.created_at.desc()).first()
|
||||
|
||||
if analysis:
|
||||
logger.info(
|
||||
f"[Website Analysis Tasks] Direct model access - "
|
||||
f"website_url: {repr(analysis.website_url)}, "
|
||||
f"type: {type(analysis.website_url).__name__ if analysis.website_url else 'None'}, "
|
||||
f"id: {analysis.id}, session_id: {analysis.session_id}"
|
||||
)
|
||||
|
||||
if analysis.website_url:
|
||||
website_url = analysis.website_url
|
||||
logger.info(f"[Website Analysis Tasks] ✅ Retrieved website_url via hash-based query: {website_url}")
|
||||
else:
|
||||
# Try to extract URL from crawl_result if website_url is NULL
|
||||
if analysis.crawl_result and isinstance(analysis.crawl_result, dict):
|
||||
# Check multiple possible locations for URL
|
||||
crawl_url = (
|
||||
analysis.crawl_result.get('url') or
|
||||
analysis.crawl_result.get('website_url') or
|
||||
(analysis.crawl_result.get('content', {}).get('domain_info', {}).get('domain') if isinstance(analysis.crawl_result.get('content'), dict) else None)
|
||||
)
|
||||
|
||||
# If still not found, check if crawl_result has nested structure
|
||||
if not crawl_url and 'content' in analysis.crawl_result:
|
||||
content = analysis.crawl_result.get('content', {})
|
||||
if isinstance(content, dict):
|
||||
# Check domain_info for domain
|
||||
domain_info = content.get('domain_info', {})
|
||||
if isinstance(domain_info, dict):
|
||||
crawl_url = domain_info.get('domain') or domain_info.get('url')
|
||||
|
||||
if crawl_url:
|
||||
# Ensure it's a full URL (add https:// if missing)
|
||||
if crawl_url and not crawl_url.startswith(('http://', 'https://')):
|
||||
crawl_url = f"https://{crawl_url}"
|
||||
logger.info(f"[Website Analysis Tasks] ✅ Extracted website_url from crawl_result: {crawl_url}")
|
||||
website_url = crawl_url
|
||||
else:
|
||||
logger.warning(
|
||||
f"[Website Analysis Tasks] Cannot extract URL from crawl_result. "
|
||||
f"crawl_result keys: {list(analysis.crawl_result.keys()) if isinstance(analysis.crawl_result, dict) else 'not a dict'}, "
|
||||
f"Analysis ID: {analysis.id}"
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
f"[Website Analysis Tasks] website_url is NULL and crawl_result is empty or invalid. "
|
||||
f"Analysis ID: {analysis.id}, Status: {analysis.status}, "
|
||||
f"crawl_result type: {type(analysis.crawl_result).__name__ if analysis.crawl_result else 'None'}"
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
f"[Website Analysis Tasks] No WebsiteAnalysis record found for "
|
||||
f"hash-based session_id {session_id_int} (user {user_id})"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"[Website Analysis Tasks] Hash-based query fallback failed: {e}", exc_info=True)
|
||||
|
||||
if not website_url:
|
||||
return {
|
||||
'success': False,
|
||||
'error': 'No website URL found in onboarding data. Please complete step 2 (Website Analysis) in onboarding.'
|
||||
}
|
||||
|
||||
logger.info(f"[Website Analysis Tasks] User website URL: {website_url}")
|
||||
|
||||
tasks_created = []
|
||||
|
||||
# 1. Create task for user's website (optional recurring every 30 days)
|
||||
user_task = _create_or_update_task(
|
||||
db=db,
|
||||
user_id=user_id,
|
||||
website_url=website_url,
|
||||
task_type='user_website',
|
||||
frequency_days=30 # Optional: recurring every 30 days
|
||||
)
|
||||
if user_task:
|
||||
tasks_created.append(user_task)
|
||||
logger.info(f"Created user website analysis task for {website_url}")
|
||||
|
||||
# 2. Get competitors from onboarding
|
||||
competitors = _get_competitors_from_onboarding(user_id, db)
|
||||
logger.info(
|
||||
f"[Website Analysis Tasks] Found {len(competitors)} competitors for user {user_id}. "
|
||||
f"Competitors: {[c.get('url') or c.get('website_url') or c.get('domain') for c in competitors]}"
|
||||
)
|
||||
|
||||
# 3. Create task for each competitor
|
||||
for competitor in competitors:
|
||||
competitor_url = competitor.get('url') or competitor.get('website_url')
|
||||
if not competitor_url:
|
||||
continue
|
||||
|
||||
# Extract competitor identifier
|
||||
competitor_id = competitor.get('domain') or competitor.get('id') or _extract_domain(competitor_url)
|
||||
|
||||
competitor_task = _create_or_update_task(
|
||||
db=db,
|
||||
user_id=user_id,
|
||||
website_url=competitor_url,
|
||||
task_type='competitor',
|
||||
competitor_id=competitor_id,
|
||||
frequency_days=10 # Recurring every 10 days
|
||||
)
|
||||
if competitor_task:
|
||||
tasks_created.append(competitor_task)
|
||||
logger.info(f"Created competitor analysis task for {competitor_url}")
|
||||
|
||||
db.commit()
|
||||
|
||||
logger.info(f"Created {len(tasks_created)} website analysis tasks for user {user_id}")
|
||||
|
||||
return {
|
||||
'success': True,
|
||||
'tasks_created': len(tasks_created),
|
||||
'tasks': [{
|
||||
'id': t.id,
|
||||
'url': t.website_url,
|
||||
'type': t.task_type,
|
||||
'next_check': t.next_check.isoformat() if t.next_check else None
|
||||
} for t in tasks_created]
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error creating website analysis tasks for user {user_id}: {e}", exc_info=True)
|
||||
db.rollback()
|
||||
return {
|
||||
'success': False,
|
||||
'error': str(e)
|
||||
}
|
||||
|
||||
|
||||
def _create_or_update_task(
|
||||
db: Session,
|
||||
user_id: str,
|
||||
website_url: str,
|
||||
task_type: str,
|
||||
competitor_id: Optional[str] = None,
|
||||
frequency_days: int = 10
|
||||
) -> Optional[WebsiteAnalysisTask]:
|
||||
"""Create or update a website analysis task."""
|
||||
try:
|
||||
# Check if task already exists
|
||||
existing = db.query(WebsiteAnalysisTask).filter(
|
||||
WebsiteAnalysisTask.user_id == user_id,
|
||||
WebsiteAnalysisTask.website_url == website_url,
|
||||
WebsiteAnalysisTask.task_type == task_type
|
||||
).first()
|
||||
|
||||
if existing:
|
||||
# Update existing task
|
||||
existing.status = 'active'
|
||||
existing.frequency_days = frequency_days
|
||||
existing.next_check = datetime.utcnow() + timedelta(days=frequency_days)
|
||||
existing.updated_at = datetime.utcnow()
|
||||
if competitor_id:
|
||||
existing.competitor_id = competitor_id
|
||||
logger.info(f"Updated existing website analysis task {existing.id}")
|
||||
return existing
|
||||
|
||||
# Create new task
|
||||
task = WebsiteAnalysisTask(
|
||||
user_id=user_id,
|
||||
website_url=website_url,
|
||||
task_type=task_type,
|
||||
competitor_id=competitor_id,
|
||||
status='active',
|
||||
frequency_days=frequency_days,
|
||||
next_check=datetime.utcnow() + timedelta(days=frequency_days)
|
||||
)
|
||||
db.add(task)
|
||||
db.flush()
|
||||
logger.info(f"Created new website analysis task {task.id} for {website_url}")
|
||||
return task
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error creating/updating task: {e}", exc_info=True)
|
||||
return None
|
||||
|
||||
|
||||
def _get_competitors_from_onboarding(user_id: str, db: Session) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Get competitors from onboarding database.
|
||||
|
||||
Competitors are stored in onboarding_sessions.step_data['step3_research_data']['competitors']
|
||||
or via Step3ResearchService.
|
||||
"""
|
||||
try:
|
||||
# Get onboarding session
|
||||
onboarding_service = OnboardingDatabaseService(db=db)
|
||||
session = onboarding_service.get_session_by_user(user_id, db)
|
||||
|
||||
if not session:
|
||||
logger.warning(f"No onboarding session found for user {user_id}")
|
||||
return []
|
||||
|
||||
# Try to get from step_data JSON column
|
||||
competitors = []
|
||||
|
||||
# Method 1: Check if step_data column exists and has competitors
|
||||
if hasattr(session, 'step_data') and session.step_data:
|
||||
step_data = session.step_data if isinstance(session.step_data, dict) else {}
|
||||
research_data = step_data.get('step3_research_data', {})
|
||||
competitors = research_data.get('competitors', [])
|
||||
logger.info(f"[Competitor Retrieval] Method 1 (step_data): found {len(competitors)} competitors")
|
||||
|
||||
# Method 2: If not found, try Step3ResearchService
|
||||
if not competitors:
|
||||
logger.info(f"[Competitor Retrieval] Attempting Step3ResearchService for user {user_id}, session_id: {session.id}")
|
||||
try:
|
||||
from api.onboarding_utils.step3_research_service import Step3ResearchService
|
||||
import asyncio
|
||||
step3_service = Step3ResearchService()
|
||||
|
||||
# Run async function - handle both new and existing event loops
|
||||
try:
|
||||
loop = asyncio.get_event_loop()
|
||||
except RuntimeError:
|
||||
loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(loop)
|
||||
|
||||
research_data_result = loop.run_until_complete(
|
||||
step3_service.get_research_data(str(session.id))
|
||||
)
|
||||
|
||||
logger.info(f"[Competitor Retrieval] Step3ResearchService result: {research_data_result.get('success')}")
|
||||
|
||||
if research_data_result.get('success'):
|
||||
research_data = research_data_result.get('research_data', {})
|
||||
step3_data = research_data.get('step3_research_data', {})
|
||||
competitors = step3_data.get('competitors', [])
|
||||
logger.info(f"[Competitor Retrieval] Retrieved {len(competitors)} competitors from Step3ResearchService")
|
||||
else:
|
||||
logger.warning(f"[Competitor Retrieval] Step3ResearchService returned error: {research_data_result.get('error')}")
|
||||
except Exception as e:
|
||||
logger.warning(f"[Competitor Retrieval] Could not fetch competitors from Step3ResearchService: {e}", exc_info=True)
|
||||
|
||||
# Ensure competitors is a list
|
||||
if not isinstance(competitors, list):
|
||||
competitors = []
|
||||
|
||||
logger.info(f"Found {len(competitors)} competitors for user {user_id}")
|
||||
return competitors
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting competitors from onboarding: {e}", exc_info=True)
|
||||
return []
|
||||
|
||||
|
||||
def _extract_domain(url: str) -> str:
|
||||
"""Extract domain from URL."""
|
||||
try:
|
||||
parsed = urlparse(url)
|
||||
return parsed.netloc or url
|
||||
except Exception:
|
||||
return url
|
||||
|
||||
Reference in New Issue
Block a user