Files
ALwrity/backend/services/intelligence/monitoring/semantic_dashboard.py

586 lines
24 KiB
Python

"""
Phase 2B: Real-Time Semantic Dashboard
This module implements a real-time semantic monitoring dashboard for ongoing
content analysis, competitor tracking, and semantic health monitoring.
"""
import asyncio
import json
import time
from typing import Dict, List, Any, Optional, Set
from datetime import datetime, timedelta
from dataclasses import dataclass, asdict
from loguru import logger
from ..txtai_service import TxtaiIntelligenceService
from ..semantic_cache import semantic_cache_manager
from ..sif_integration import SIFIntegrationService
# Agent imports will be done lazily to avoid circular imports
@dataclass
class SemanticHealthMetric:
"""Represents a semantic health metric for monitoring."""
metric_name: str
value: float
threshold: float
status: str # "healthy", "warning", "critical"
timestamp: str
description: str
recommendations: List[str]
@dataclass
class CompetitorSemanticSnapshot:
"""Snapshot of competitor semantic positioning."""
competitor_id: str
competitor_name: str
semantic_overlap: float
unique_topics: List[str]
content_volume: int
authority_score: float
last_updated: str
trending_topics: List[str]
@dataclass
class ContentSemanticInsight:
"""Real-time semantic insight for content monitoring."""
insight_id: str
insight_type: str # "gap", "opportunity", "trend", "threat"
title: str
description: str
confidence_score: float
impact_score: float
related_topics: List[str]
suggested_actions: List[str]
created_at: str
expires_at: str
class RealTimeSemanticMonitor:
"""
Real-time semantic monitoring system for content and competitor analysis.
Features:
- Continuous semantic health monitoring
- Real-time competitor tracking
- Content performance analysis
- Automated alerting system
- Trend detection and forecasting
"""
def __init__(self, user_id: str):
self.user_id = user_id
self.intelligence_service = TxtaiIntelligenceService(user_id)
self.cache_manager = semantic_cache_manager
self.sif_service = SIFIntegrationService(user_id)
# Initialize monitoring agents (lazy initialization to avoid circular imports)
self.strategy_agent = None
self.guardian_agent = None
self.link_agent = None
# Monitoring configuration
self.monitoring_interval = 300 # 5 minutes
self.health_thresholds = {
"semantic_diversity": 0.6,
"content_freshness": 0.7,
"competitor_gap": 0.5,
"authority_score": 0.4
}
# Monitoring state
self.is_monitoring = False
self.monitored_competitors: Set[str] = set()
self.alert_subscribers: List[str] = []
self.monitoring_history: List[Dict[str, Any]] = []
logger.info(f"Real-time semantic monitor initialized for user {user_id}")
async def check_semantic_health(self, user_id: Optional[str] = None) -> Any:
"""
Public wrapper for semantic health check.
Aggregates metrics into a single health status object.
"""
# Call internal method (ignoring user_id arg if passed, as we use self.user_id)
metrics = await self._check_semantic_health()
if not metrics:
# Return default/unknown state if no metrics
@dataclass
class HealthResult:
status: str = "unknown"
value: float = 0.0
return HealthResult()
# Aggregate metrics
# 1. Status: "critical" if any critical, else "warning" if any warning, else "healthy"
status = "healthy"
for m in metrics:
if m.status == "critical":
status = "critical"
break
if m.status == "warning":
status = "warning"
# 2. Value: Average of metric values
avg_value = sum(m.value for m in metrics) / len(metrics)
@dataclass
class HealthResult:
status: str
value: float
return HealthResult(status=status, value=avg_value)
async def start_monitoring(self, competitors: List[str] = None) -> bool:
"""Start real-time semantic monitoring."""
try:
self.is_monitoring = True
if competitors:
self.monitored_competitors = set(competitors)
logger.info(f"Started semantic monitoring for user {self.user_id}")
logger.info(f"Monitoring {len(self.monitored_competitors)} competitors")
# Start background monitoring task
asyncio.create_task(self._monitoring_loop())
return True
except Exception as e:
logger.error(f"Failed to start semantic monitoring: {e}")
return False
async def stop_monitoring(self) -> bool:
"""Stop real-time semantic monitoring."""
try:
self.is_monitoring = False
logger.info(f"Stopped semantic monitoring for user {self.user_id}")
return True
except Exception as e:
logger.error(f"Failed to stop semantic monitoring: {e}")
return False
async def _monitoring_loop(self):
"""Main monitoring loop that runs continuously."""
while self.is_monitoring:
try:
logger.info(f"Running semantic health check for user {self.user_id}")
# Perform comprehensive semantic analysis
health_metrics = await self._check_semantic_health()
competitor_updates = await self._monitor_competitors()
content_insights = await self._analyze_content_performance()
# Store monitoring snapshot
snapshot = {
"timestamp": datetime.now().isoformat(),
"user_id": self.user_id,
"health_metrics": [asdict(metric) for metric in health_metrics],
"competitor_updates": [asdict(update) for update in competitor_updates],
"content_insights": [asdict(insight) for insight in content_insights]
}
self.monitoring_history.append(snapshot)
# Keep only last 24 hours of history
cutoff_time = datetime.now() - timedelta(hours=24)
self.monitoring_history = [
h for h in self.monitoring_history
if datetime.fromisoformat(h["timestamp"]) > cutoff_time
]
# Check for alerts
await self._check_alerts(health_metrics, competitor_updates, content_insights)
# Cache results for dashboard
await self._cache_monitoring_results(snapshot)
logger.info(f"Semantic monitoring cycle completed. Next check in {self.monitoring_interval}s")
# Wait for next cycle
await asyncio.sleep(self.monitoring_interval)
except Exception as e:
logger.error(f"Error in semantic monitoring loop: {e}")
await asyncio.sleep(self.monitoring_interval) # Continue even on error
async def _check_semantic_health(self) -> List[SemanticHealthMetric]:
"""Check overall semantic health of user's content."""
metrics = []
try:
# Get current semantic insights
insights = await self.sif_service.get_semantic_insights({"user_id": self.user_id})
if insights.get("source") == "error":
logger.warning("Failed to get semantic insights for health check")
return metrics
insights_data = insights.get("insights", {})
# Semantic diversity metric
content_pillars = insights_data.get("content_pillars", [])
semantic_diversity = len(content_pillars) / 10.0 # Normalize to 0-1
diversity_status = "healthy" if semantic_diversity >= self.health_thresholds["semantic_diversity"] else "warning"
metrics.append(SemanticHealthMetric(
metric_name="semantic_diversity",
value=semantic_diversity,
threshold=self.health_thresholds["semantic_diversity"],
status=diversity_status,
timestamp=datetime.now().isoformat(),
description=f"Content covers {len(content_pillars)} semantic pillars",
recommendations=["Expand content topics", "Explore new semantic areas"] if diversity_status == "warning" else []
))
# Content freshness metric (based on recent updates)
freshness_score = await self._calculate_content_freshness()
freshness_status = "healthy" if freshness_score >= self.health_thresholds["content_freshness"] else "warning"
metrics.append(SemanticHealthMetric(
metric_name="content_freshness",
value=freshness_score,
threshold=self.health_thresholds["content_freshness"],
status=freshness_status,
timestamp=datetime.now().isoformat(),
description="Content freshness based on recent semantic updates",
recommendations=["Update content regularly", "Monitor trending topics"] if freshness_status == "warning" else []
))
# Authority score metric
authority_score = await self._calculate_authority_score()
authority_status = "healthy" if authority_score >= self.health_thresholds["authority_score"] else "critical"
metrics.append(SemanticHealthMetric(
metric_name="authority_score",
value=authority_score,
threshold=self.health_thresholds["authority_score"],
status=authority_status,
timestamp=datetime.now().isoformat(),
description="Semantic authority based on content depth and relevance",
recommendations=["Create authoritative content", "Build topical expertise"] if authority_status != "healthy" else []
))
except Exception as e:
logger.error(f"Failed to check semantic health: {e}")
return metrics
async def _monitor_competitors(self) -> List[CompetitorSemanticSnapshot]:
"""Monitor competitor semantic positioning."""
snapshots = []
for competitor in self.monitored_competitors:
try:
# This would perform actual competitor analysis
# For now, return sample data
snapshot = CompetitorSemanticSnapshot(
competitor_id=f"comp_{competitor}",
competitor_name=competitor,
semantic_overlap=0.65,
unique_topics=["AI automation", "Voice search", "Video marketing"],
content_volume=random.randint(50, 200),
authority_score=random.uniform(0.4, 0.9),
last_updated=datetime.now().isoformat(),
trending_topics=["AI content", "Voice optimization"]
)
snapshots.append(snapshot)
except Exception as e:
logger.error(f"Failed to monitor competitor {competitor}: {e}")
return snapshots
async def _analyze_content_performance(self) -> List[ContentSemanticInsight]:
"""Analyze content performance and identify insights."""
insights = []
try:
# Generate various types of insights
current_time = datetime.now()
# Content gap insight
insights.append(ContentSemanticInsight(
insight_id="gap_001",
insight_type="gap",
title="Voice Search Optimization Gap",
description="Competitors are covering voice search topics 40% more than your content",
confidence_score=0.85,
impact_score=8.5,
related_topics=["voice search", "featured snippets", "conversational AI"],
suggested_actions=["Create voice search content", "Optimize for featured snippets"],
created_at=current_time.isoformat(),
expires_at=(current_time + timedelta(days=7)).isoformat()
))
# Trending opportunity insight
insights.append(ContentSemanticInsight(
insight_id="trend_001",
insight_type="trend",
title="AI Content Tools Trending",
description="AI content creation tools showing 300% increase in search volume",
confidence_score=0.92,
impact_score=9.2,
related_topics=["AI content", "content automation", "AI writing tools"],
suggested_actions=["Create AI tool reviews", "Develop AI content strategy"],
created_at=current_time.isoformat(),
expires_at=(current_time + timedelta(days=14)).isoformat()
))
# Threat insight
insights.append(ContentSemanticInsight(
insight_id="threat_001",
insight_type="threat",
title="Competitor Content Surge",
description="Top competitor increased content production by 150% in your key topics",
confidence_score=0.78,
impact_score=7.8,
related_topics=["content strategy", "competitor analysis"],
suggested_actions=["Increase content frequency", "Focus on unique angles"],
created_at=current_time.isoformat(),
expires_at=(current_time + timedelta(days=5)).isoformat()
))
except Exception as e:
logger.error(f"Failed to analyze content performance: {e}")
return insights
async def _calculate_content_freshness(self) -> float:
"""Calculate content freshness score."""
# This would analyze actual content timestamps and updates
return 0.85 # Placeholder
async def _calculate_authority_score(self) -> float:
"""Calculate semantic authority score."""
# This would analyze content depth, backlinks, engagement, etc.
return 0.72 # Placeholder
async def _check_alerts(self, health_metrics: List[SemanticHealthMetric],
competitor_updates: List[CompetitorSemanticSnapshot],
content_insights: List[ContentSemanticInsight]):
"""Check for alert conditions and notify subscribers."""
alerts = []
# Check health metrics for critical conditions
for metric in health_metrics:
if metric.status == "critical":
alerts.append({
"type": "health_critical",
"title": f"Critical: {metric.metric_name}",
"message": metric.description,
"severity": "critical",
"timestamp": datetime.now().isoformat()
})
# Check for high-impact insights
for insight in content_insights:
if insight.impact_score >= 8.0:
alerts.append({
"type": "high_impact_insight",
"title": f"High Impact: {insight.title}",
"message": insight.description,
"severity": "warning",
"timestamp": datetime.now().isoformat()
})
# Send alerts to subscribers
if alerts:
try:
from services.agent_activity_service import AgentActivityService
from services.database import get_session_for_user
db = get_session_for_user(self.user_id)
if db:
service = AgentActivityService(db, self.user_id)
for alert in alerts:
alert_type = alert.get("type") or "semantic_alert"
severity = alert.get("severity") or "info"
mapped_severity = "error" if severity == "critical" else ("warning" if severity == "warning" else "info")
dedupe_key = None
if alert_type == "health_critical":
dedupe_key = f"semantic_health_critical:{alert.get('title')}:{datetime.utcnow().date().isoformat()}"
elif alert_type == "high_impact_insight":
dedupe_key = f"semantic_high_impact:{alert.get('title')}:{datetime.utcnow().date().isoformat()}"
service.create_alert(
alert_type=alert_type,
title=alert.get("title") or "Semantic alert",
message=alert.get("message") or "",
severity=mapped_severity,
payload=alert,
cta_path="/seo-dashboard",
dedupe_key=dedupe_key,
)
db.close()
except Exception:
pass
await self._send_alerts(alerts)
async def get_cache_stats(self) -> Dict[str, Any]:
"""Get semantic cache statistics."""
return self.cache_manager.get_stats()
async def _send_alerts(self, alerts: List[Dict[str, Any]]):
"""Send alerts to subscribed users."""
for alert in alerts:
logger.warning(f"ALERT: {alert['title']} - {alert['message']}")
# Here you would integrate with notification systems (email, Slack, etc.)
async def _cache_monitoring_results(self, snapshot: Dict[str, Any]):
"""Cache monitoring results for dashboard access."""
try:
cache_key = f"semantic_monitoring_{self.user_id}"
self.cache_manager.set(
cache_key,
self.user_id,
snapshot,
ttl=300 # 5 minutes
)
logger.debug(f"Cached monitoring results for user {self.user_id}")
except Exception as e:
logger.error(f"Failed to cache monitoring results: {e}")
def get_dashboard_data(self) -> Dict[str, Any]:
"""Get current dashboard data for the user."""
try:
# Get cached monitoring results
cache_key = f"semantic_monitoring_{self.user_id}"
cached_data = self.cache_manager.get(cache_key, self.user_id)
if cached_data:
return {
"status": "active" if self.is_monitoring else "inactive",
"last_updated": cached_data.get("timestamp"),
"health_metrics": cached_data.get("health_metrics", []),
"competitor_updates": cached_data.get("competitor_updates", []),
"content_insights": cached_data.get("content_insights", []),
"monitored_competitors": list(self.monitored_competitors),
"monitoring_interval": self.monitoring_interval
}
# Return default data if no cache
return {
"status": "inactive",
"last_updated": datetime.now().isoformat(),
"health_metrics": [],
"competitor_updates": [],
"content_insights": [],
"monitored_competitors": list(self.monitored_competitors),
"monitoring_interval": self.monitoring_interval
}
except Exception as e:
logger.error(f"Failed to get dashboard data: {e}")
return {"error": str(e)}
def get_monitoring_history(self, hours: int = 24) -> List[Dict[str, Any]]:
"""Get monitoring history for the specified number of hours."""
cutoff_time = datetime.now() - timedelta(hours=hours)
return [
h for h in self.monitoring_history
if datetime.fromisoformat(h["timestamp"]) > cutoff_time
]
class SemanticDashboardAPI:
"""API interface for the semantic monitoring dashboard."""
def __init__(self):
self.monitors: Dict[str, RealTimeSemanticMonitor] = {}
def get_monitor(self, user_id: str) -> RealTimeSemanticMonitor:
"""Get or create a semantic monitor for a user."""
if user_id not in self.monitors:
self.monitors[user_id] = RealTimeSemanticMonitor(user_id)
return self.monitors[user_id]
async def start_dashboard_monitoring(self, user_id: str, competitors: List[str] = None) -> Dict[str, Any]:
"""Start semantic monitoring for a user."""
monitor = self.get_monitor(user_id)
success = await monitor.start_monitoring(competitors)
return {
"user_id": user_id,
"monitoring_started": success,
"competitors": competitors or [],
"timestamp": datetime.now().isoformat()
}
async def stop_dashboard_monitoring(self, user_id: str) -> Dict[str, Any]:
"""Stop semantic monitoring for a user."""
monitor = self.get_monitor(user_id)
success = await monitor.stop_monitoring()
return {
"user_id": user_id,
"monitoring_stopped": success,
"timestamp": datetime.now().isoformat()
}
def get_dashboard_data(self, user_id: str) -> Dict[str, Any]:
"""Get current dashboard data for a user."""
monitor = self.get_monitor(user_id)
return monitor.get_dashboard_data()
def get_monitoring_history(self, user_id: str, hours: int = 24) -> List[Dict[str, Any]]:
"""Get monitoring history for a user."""
monitor = self.get_monitor(user_id)
return monitor.get_monitoring_history(hours)
# Global API instance
semantic_dashboard_api = SemanticDashboardAPI()
# Example usage and testing
async def test_semantic_dashboard():
"""Test the real-time semantic dashboard."""
logger.info("Testing Real-Time Semantic Dashboard")
# Create test monitor
user_id = "test_user_dashboard"
competitors = ["competitor1.com", "competitor2.com", "competitor3.com"]
# Start monitoring
logger.info("Starting semantic monitoring...")
start_result = await semantic_dashboard_api.start_dashboard_monitoring(user_id, competitors)
logger.info(f"Monitoring started: {start_result}")
# Wait a bit for monitoring to collect data
logger.info("Waiting for monitoring data collection...")
await asyncio.sleep(10)
# Get dashboard data
logger.info("Getting dashboard data...")
dashboard_data = semantic_dashboard_api.get_dashboard_data(user_id)
logger.info(f"Dashboard status: {dashboard_data.get('status')}")
logger.info(f"Health metrics: {len(dashboard_data.get('health_metrics', []))}")
logger.info(f"Competitor updates: {len(dashboard_data.get('competitor_updates', []))}")
logger.info(f"Content insights: {len(dashboard_data.get('content_insights', []))}")
# Get monitoring history
logger.info("Getting monitoring history...")
history = semantic_dashboard_api.get_monitoring_history(user_id, hours=1)
logger.info(f"Monitoring history entries: {len(history)}")
# Stop monitoring
logger.info("Stopping semantic monitoring...")
stop_result = await semantic_dashboard_api.stop_dashboard_monitoring(user_id)
logger.info(f"Monitoring stopped: {stop_result}")
logger.info("Semantic Dashboard test completed successfully!")
if __name__ == "__main__":
# Run test
asyncio.run(test_semantic_dashboard())