AI platform insights monitoring and website analysis monitoring services added
This commit is contained in:
369
backend/services/website_analysis_monitoring_service.py
Normal file
369
backend/services/website_analysis_monitoring_service.py
Normal file
@@ -0,0 +1,369 @@
|
||||
"""
|
||||
Website Analysis Monitoring Service
|
||||
Creates and manages website analysis monitoring tasks.
|
||||
"""
|
||||
|
||||
from datetime import datetime, timedelta
|
||||
from typing import List, Dict, Any, Optional
|
||||
from sqlalchemy.orm import Session
|
||||
from urllib.parse import urlparse
|
||||
import hashlib
|
||||
|
||||
from models.website_analysis_monitoring_models import WebsiteAnalysisTask
|
||||
from models.onboarding import OnboardingSession
|
||||
from services.onboarding.database_service import OnboardingDatabaseService
|
||||
from utils.logger_utils import get_service_logger
|
||||
|
||||
logger = get_service_logger("website_analysis_monitoring")
|
||||
|
||||
|
||||
def clerk_user_id_to_int(user_id: str) -> int:
|
||||
"""
|
||||
Convert Clerk user ID to consistent integer for database session_id.
|
||||
Uses SHA256 hashing for deterministic, consistent results.
|
||||
This MUST match the pattern used in component_logic.py for onboarding.
|
||||
|
||||
Args:
|
||||
user_id: Clerk user ID (e.g., 'user_33Gz1FPI86VDXhRY8QN4ragRFGN')
|
||||
|
||||
Returns:
|
||||
int: Deterministic integer derived from user ID
|
||||
"""
|
||||
user_id_hash = hashlib.sha256(user_id.encode()).hexdigest()
|
||||
return int(user_id_hash[:8], 16) % 2147483647
|
||||
|
||||
|
||||
def create_website_analysis_tasks(user_id: str, db: Session) -> Dict[str, Any]:
|
||||
"""
|
||||
Create website analysis tasks for user's website and all competitors.
|
||||
|
||||
This should be called after onboarding completion.
|
||||
|
||||
Args:
|
||||
user_id: Clerk user ID (string)
|
||||
db: Database session
|
||||
|
||||
Returns:
|
||||
Dictionary with success status and task details
|
||||
"""
|
||||
try:
|
||||
logger.info(f"[Website Analysis Tasks] Creating tasks for user: {user_id}")
|
||||
|
||||
# Get user's website URL from onboarding
|
||||
onboarding_service = OnboardingDatabaseService(db=db)
|
||||
website_analysis = onboarding_service.get_website_analysis(user_id, db)
|
||||
|
||||
if not website_analysis:
|
||||
logger.warning(f"[Website Analysis Tasks] No website analysis found for user {user_id}")
|
||||
# Try direct query using hash-based session_id (must match onboarding pattern)
|
||||
try:
|
||||
from models.onboarding import WebsiteAnalysis
|
||||
session_id_int = clerk_user_id_to_int(user_id)
|
||||
|
||||
logger.info(
|
||||
f"[Website Analysis Tasks] Querying WebsiteAnalysis with hash-based session_id: {session_id_int}"
|
||||
)
|
||||
|
||||
analysis = db.query(WebsiteAnalysis).filter(
|
||||
WebsiteAnalysis.session_id == session_id_int
|
||||
).order_by(WebsiteAnalysis.created_at.desc()).first()
|
||||
|
||||
if analysis:
|
||||
logger.info(f"[Website Analysis Tasks] ✅ Found analysis via hash-based query: {analysis.website_url}")
|
||||
website_analysis = analysis.to_dict()
|
||||
except Exception as e:
|
||||
logger.debug(f"[Website Analysis Tasks] Direct query fallback failed: {e}")
|
||||
|
||||
if not website_analysis:
|
||||
return {
|
||||
'success': False,
|
||||
'error': 'No website analysis found. Complete onboarding first.'
|
||||
}
|
||||
|
||||
website_url = website_analysis.get('website_url')
|
||||
|
||||
# Log the actual value for debugging (always log, not just debug level)
|
||||
logger.info(
|
||||
f"[Website Analysis Tasks] website_url from dict: {repr(website_url)} "
|
||||
f"(type: {type(website_url).__name__}, truthy: {bool(website_url)})"
|
||||
)
|
||||
|
||||
# Check if website_url is None, empty string, or whitespace
|
||||
if not website_url or (isinstance(website_url, str) and not website_url.strip()):
|
||||
# Log what we actually got for debugging
|
||||
logger.warning(
|
||||
f"[Website Analysis Tasks] No website URL found for user {user_id}. "
|
||||
f"Analysis keys: {list(website_analysis.keys()) if website_analysis else 'None'}, "
|
||||
f"website_url value: {repr(website_url)}"
|
||||
)
|
||||
|
||||
# Try direct access to the model using hash-based session_id
|
||||
# This MUST use the same hash function as onboarding (clerk_user_id_to_int)
|
||||
try:
|
||||
from models.onboarding import WebsiteAnalysis
|
||||
session_id_int = clerk_user_id_to_int(user_id)
|
||||
|
||||
logger.info(
|
||||
f"[Website Analysis Tasks] Querying WebsiteAnalysis with hash-based session_id: {session_id_int} "
|
||||
f"for user {user_id}"
|
||||
)
|
||||
|
||||
analysis = db.query(WebsiteAnalysis).filter(
|
||||
WebsiteAnalysis.session_id == session_id_int
|
||||
).order_by(WebsiteAnalysis.created_at.desc()).first()
|
||||
|
||||
if analysis:
|
||||
logger.info(
|
||||
f"[Website Analysis Tasks] Direct model access - "
|
||||
f"website_url: {repr(analysis.website_url)}, "
|
||||
f"type: {type(analysis.website_url).__name__ if analysis.website_url else 'None'}, "
|
||||
f"id: {analysis.id}, session_id: {analysis.session_id}"
|
||||
)
|
||||
|
||||
if analysis.website_url:
|
||||
website_url = analysis.website_url
|
||||
logger.info(f"[Website Analysis Tasks] ✅ Retrieved website_url via hash-based query: {website_url}")
|
||||
else:
|
||||
# Try to extract URL from crawl_result if website_url is NULL
|
||||
if analysis.crawl_result and isinstance(analysis.crawl_result, dict):
|
||||
# Check multiple possible locations for URL
|
||||
crawl_url = (
|
||||
analysis.crawl_result.get('url') or
|
||||
analysis.crawl_result.get('website_url') or
|
||||
(analysis.crawl_result.get('content', {}).get('domain_info', {}).get('domain') if isinstance(analysis.crawl_result.get('content'), dict) else None)
|
||||
)
|
||||
|
||||
# If still not found, check if crawl_result has nested structure
|
||||
if not crawl_url and 'content' in analysis.crawl_result:
|
||||
content = analysis.crawl_result.get('content', {})
|
||||
if isinstance(content, dict):
|
||||
# Check domain_info for domain
|
||||
domain_info = content.get('domain_info', {})
|
||||
if isinstance(domain_info, dict):
|
||||
crawl_url = domain_info.get('domain') or domain_info.get('url')
|
||||
|
||||
if crawl_url:
|
||||
# Ensure it's a full URL (add https:// if missing)
|
||||
if crawl_url and not crawl_url.startswith(('http://', 'https://')):
|
||||
crawl_url = f"https://{crawl_url}"
|
||||
logger.info(f"[Website Analysis Tasks] ✅ Extracted website_url from crawl_result: {crawl_url}")
|
||||
website_url = crawl_url
|
||||
else:
|
||||
logger.warning(
|
||||
f"[Website Analysis Tasks] Cannot extract URL from crawl_result. "
|
||||
f"crawl_result keys: {list(analysis.crawl_result.keys()) if isinstance(analysis.crawl_result, dict) else 'not a dict'}, "
|
||||
f"Analysis ID: {analysis.id}"
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
f"[Website Analysis Tasks] website_url is NULL and crawl_result is empty or invalid. "
|
||||
f"Analysis ID: {analysis.id}, Status: {analysis.status}, "
|
||||
f"crawl_result type: {type(analysis.crawl_result).__name__ if analysis.crawl_result else 'None'}"
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
f"[Website Analysis Tasks] No WebsiteAnalysis record found for "
|
||||
f"hash-based session_id {session_id_int} (user {user_id})"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"[Website Analysis Tasks] Hash-based query fallback failed: {e}", exc_info=True)
|
||||
|
||||
if not website_url:
|
||||
return {
|
||||
'success': False,
|
||||
'error': 'No website URL found in onboarding data. Please complete step 2 (Website Analysis) in onboarding.'
|
||||
}
|
||||
|
||||
logger.info(f"[Website Analysis Tasks] User website URL: {website_url}")
|
||||
|
||||
tasks_created = []
|
||||
|
||||
# 1. Create task for user's website (optional recurring every 30 days)
|
||||
user_task = _create_or_update_task(
|
||||
db=db,
|
||||
user_id=user_id,
|
||||
website_url=website_url,
|
||||
task_type='user_website',
|
||||
frequency_days=30 # Optional: recurring every 30 days
|
||||
)
|
||||
if user_task:
|
||||
tasks_created.append(user_task)
|
||||
logger.info(f"Created user website analysis task for {website_url}")
|
||||
|
||||
# 2. Get competitors from onboarding
|
||||
competitors = _get_competitors_from_onboarding(user_id, db)
|
||||
logger.info(
|
||||
f"[Website Analysis Tasks] Found {len(competitors)} competitors for user {user_id}. "
|
||||
f"Competitors: {[c.get('url') or c.get('website_url') or c.get('domain') for c in competitors]}"
|
||||
)
|
||||
|
||||
# 3. Create task for each competitor
|
||||
for competitor in competitors:
|
||||
competitor_url = competitor.get('url') or competitor.get('website_url')
|
||||
if not competitor_url:
|
||||
continue
|
||||
|
||||
# Extract competitor identifier
|
||||
competitor_id = competitor.get('domain') or competitor.get('id') or _extract_domain(competitor_url)
|
||||
|
||||
competitor_task = _create_or_update_task(
|
||||
db=db,
|
||||
user_id=user_id,
|
||||
website_url=competitor_url,
|
||||
task_type='competitor',
|
||||
competitor_id=competitor_id,
|
||||
frequency_days=10 # Recurring every 10 days
|
||||
)
|
||||
if competitor_task:
|
||||
tasks_created.append(competitor_task)
|
||||
logger.info(f"Created competitor analysis task for {competitor_url}")
|
||||
|
||||
db.commit()
|
||||
|
||||
logger.info(f"Created {len(tasks_created)} website analysis tasks for user {user_id}")
|
||||
|
||||
return {
|
||||
'success': True,
|
||||
'tasks_created': len(tasks_created),
|
||||
'tasks': [{
|
||||
'id': t.id,
|
||||
'url': t.website_url,
|
||||
'type': t.task_type,
|
||||
'next_check': t.next_check.isoformat() if t.next_check else None
|
||||
} for t in tasks_created]
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error creating website analysis tasks for user {user_id}: {e}", exc_info=True)
|
||||
db.rollback()
|
||||
return {
|
||||
'success': False,
|
||||
'error': str(e)
|
||||
}
|
||||
|
||||
|
||||
def _create_or_update_task(
|
||||
db: Session,
|
||||
user_id: str,
|
||||
website_url: str,
|
||||
task_type: str,
|
||||
competitor_id: Optional[str] = None,
|
||||
frequency_days: int = 10
|
||||
) -> Optional[WebsiteAnalysisTask]:
|
||||
"""Create or update a website analysis task."""
|
||||
try:
|
||||
# Check if task already exists
|
||||
existing = db.query(WebsiteAnalysisTask).filter(
|
||||
WebsiteAnalysisTask.user_id == user_id,
|
||||
WebsiteAnalysisTask.website_url == website_url,
|
||||
WebsiteAnalysisTask.task_type == task_type
|
||||
).first()
|
||||
|
||||
if existing:
|
||||
# Update existing task
|
||||
existing.status = 'active'
|
||||
existing.frequency_days = frequency_days
|
||||
existing.next_check = datetime.utcnow() + timedelta(days=frequency_days)
|
||||
existing.updated_at = datetime.utcnow()
|
||||
if competitor_id:
|
||||
existing.competitor_id = competitor_id
|
||||
logger.info(f"Updated existing website analysis task {existing.id}")
|
||||
return existing
|
||||
|
||||
# Create new task
|
||||
task = WebsiteAnalysisTask(
|
||||
user_id=user_id,
|
||||
website_url=website_url,
|
||||
task_type=task_type,
|
||||
competitor_id=competitor_id,
|
||||
status='active',
|
||||
frequency_days=frequency_days,
|
||||
next_check=datetime.utcnow() + timedelta(days=frequency_days)
|
||||
)
|
||||
db.add(task)
|
||||
db.flush()
|
||||
logger.info(f"Created new website analysis task {task.id} for {website_url}")
|
||||
return task
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error creating/updating task: {e}", exc_info=True)
|
||||
return None
|
||||
|
||||
|
||||
def _get_competitors_from_onboarding(user_id: str, db: Session) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Get competitors from onboarding database.
|
||||
|
||||
Competitors are stored in onboarding_sessions.step_data['step3_research_data']['competitors']
|
||||
or via Step3ResearchService.
|
||||
"""
|
||||
try:
|
||||
# Get onboarding session
|
||||
onboarding_service = OnboardingDatabaseService(db=db)
|
||||
session = onboarding_service.get_session_by_user(user_id, db)
|
||||
|
||||
if not session:
|
||||
logger.warning(f"No onboarding session found for user {user_id}")
|
||||
return []
|
||||
|
||||
# Try to get from step_data JSON column
|
||||
competitors = []
|
||||
|
||||
# Method 1: Check if step_data column exists and has competitors
|
||||
if hasattr(session, 'step_data') and session.step_data:
|
||||
step_data = session.step_data if isinstance(session.step_data, dict) else {}
|
||||
research_data = step_data.get('step3_research_data', {})
|
||||
competitors = research_data.get('competitors', [])
|
||||
logger.info(f"[Competitor Retrieval] Method 1 (step_data): found {len(competitors)} competitors")
|
||||
|
||||
# Method 2: If not found, try Step3ResearchService
|
||||
if not competitors:
|
||||
logger.info(f"[Competitor Retrieval] Attempting Step3ResearchService for user {user_id}, session_id: {session.id}")
|
||||
try:
|
||||
from api.onboarding_utils.step3_research_service import Step3ResearchService
|
||||
import asyncio
|
||||
step3_service = Step3ResearchService()
|
||||
|
||||
# Run async function - handle both new and existing event loops
|
||||
try:
|
||||
loop = asyncio.get_event_loop()
|
||||
except RuntimeError:
|
||||
loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(loop)
|
||||
|
||||
research_data_result = loop.run_until_complete(
|
||||
step3_service.get_research_data(str(session.id))
|
||||
)
|
||||
|
||||
logger.info(f"[Competitor Retrieval] Step3ResearchService result: {research_data_result.get('success')}")
|
||||
|
||||
if research_data_result.get('success'):
|
||||
research_data = research_data_result.get('research_data', {})
|
||||
step3_data = research_data.get('step3_research_data', {})
|
||||
competitors = step3_data.get('competitors', [])
|
||||
logger.info(f"[Competitor Retrieval] Retrieved {len(competitors)} competitors from Step3ResearchService")
|
||||
else:
|
||||
logger.warning(f"[Competitor Retrieval] Step3ResearchService returned error: {research_data_result.get('error')}")
|
||||
except Exception as e:
|
||||
logger.warning(f"[Competitor Retrieval] Could not fetch competitors from Step3ResearchService: {e}", exc_info=True)
|
||||
|
||||
# Ensure competitors is a list
|
||||
if not isinstance(competitors, list):
|
||||
competitors = []
|
||||
|
||||
logger.info(f"Found {len(competitors)} competitors for user {user_id}")
|
||||
return competitors
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting competitors from onboarding: {e}", exc_info=True)
|
||||
return []
|
||||
|
||||
|
||||
def _extract_domain(url: str) -> str:
|
||||
"""Extract domain from URL."""
|
||||
try:
|
||||
parsed = urlparse(url)
|
||||
return parsed.netloc or url
|
||||
except Exception:
|
||||
return url
|
||||
|
||||
Reference in New Issue
Block a user