586 lines
24 KiB
Python
586 lines
24 KiB
Python
"""
|
|
Phase 2B: Real-Time Semantic Dashboard
|
|
|
|
This module implements a real-time semantic monitoring dashboard for ongoing
|
|
content analysis, competitor tracking, and semantic health monitoring.
|
|
"""
|
|
|
|
import asyncio
|
|
import json
|
|
import time
|
|
from typing import Dict, List, Any, Optional, Set
|
|
from datetime import datetime, timedelta
|
|
from dataclasses import dataclass, asdict
|
|
from loguru import logger
|
|
|
|
from ..txtai_service import TxtaiIntelligenceService
|
|
from ..semantic_cache import semantic_cache_manager
|
|
from ..sif_integration import SIFIntegrationService
|
|
# Agent imports will be done lazily to avoid circular imports
|
|
|
|
|
|
@dataclass
|
|
class SemanticHealthMetric:
|
|
"""Represents a semantic health metric for monitoring."""
|
|
metric_name: str
|
|
value: float
|
|
threshold: float
|
|
status: str # "healthy", "warning", "critical"
|
|
timestamp: str
|
|
description: str
|
|
recommendations: List[str]
|
|
|
|
|
|
@dataclass
|
|
class CompetitorSemanticSnapshot:
|
|
"""Snapshot of competitor semantic positioning."""
|
|
competitor_id: str
|
|
competitor_name: str
|
|
semantic_overlap: float
|
|
unique_topics: List[str]
|
|
content_volume: int
|
|
authority_score: float
|
|
last_updated: str
|
|
trending_topics: List[str]
|
|
|
|
|
|
@dataclass
|
|
class ContentSemanticInsight:
|
|
"""Real-time semantic insight for content monitoring."""
|
|
insight_id: str
|
|
insight_type: str # "gap", "opportunity", "trend", "threat"
|
|
title: str
|
|
description: str
|
|
confidence_score: float
|
|
impact_score: float
|
|
related_topics: List[str]
|
|
suggested_actions: List[str]
|
|
created_at: str
|
|
expires_at: str
|
|
|
|
|
|
class RealTimeSemanticMonitor:
|
|
"""
|
|
Real-time semantic monitoring system for content and competitor analysis.
|
|
|
|
Features:
|
|
- Continuous semantic health monitoring
|
|
- Real-time competitor tracking
|
|
- Content performance analysis
|
|
- Automated alerting system
|
|
- Trend detection and forecasting
|
|
"""
|
|
|
|
def __init__(self, user_id: str):
|
|
self.user_id = user_id
|
|
self.intelligence_service = TxtaiIntelligenceService(user_id)
|
|
self.cache_manager = semantic_cache_manager
|
|
self.sif_service = SIFIntegrationService(user_id)
|
|
|
|
# Initialize monitoring agents (lazy initialization to avoid circular imports)
|
|
self.strategy_agent = None
|
|
self.guardian_agent = None
|
|
self.link_agent = None
|
|
|
|
# Monitoring configuration
|
|
self.monitoring_interval = 300 # 5 minutes
|
|
self.health_thresholds = {
|
|
"semantic_diversity": 0.6,
|
|
"content_freshness": 0.7,
|
|
"competitor_gap": 0.5,
|
|
"authority_score": 0.4
|
|
}
|
|
|
|
# Monitoring state
|
|
self.is_monitoring = False
|
|
self.monitored_competitors: Set[str] = set()
|
|
self.alert_subscribers: List[str] = []
|
|
self.monitoring_history: List[Dict[str, Any]] = []
|
|
|
|
logger.info(f"Real-time semantic monitor initialized for user {user_id}")
|
|
|
|
async def check_semantic_health(self, user_id: Optional[str] = None) -> Any:
|
|
"""
|
|
Public wrapper for semantic health check.
|
|
Aggregates metrics into a single health status object.
|
|
"""
|
|
# Call internal method (ignoring user_id arg if passed, as we use self.user_id)
|
|
metrics = await self._check_semantic_health()
|
|
|
|
if not metrics:
|
|
# Return default/unknown state if no metrics
|
|
@dataclass
|
|
class HealthResult:
|
|
status: str = "unknown"
|
|
value: float = 0.0
|
|
return HealthResult()
|
|
|
|
# Aggregate metrics
|
|
# 1. Status: "critical" if any critical, else "warning" if any warning, else "healthy"
|
|
status = "healthy"
|
|
for m in metrics:
|
|
if m.status == "critical":
|
|
status = "critical"
|
|
break
|
|
if m.status == "warning":
|
|
status = "warning"
|
|
|
|
# 2. Value: Average of metric values
|
|
avg_value = sum(m.value for m in metrics) / len(metrics)
|
|
|
|
@dataclass
|
|
class HealthResult:
|
|
status: str
|
|
value: float
|
|
|
|
return HealthResult(status=status, value=avg_value)
|
|
|
|
async def start_monitoring(self, competitors: List[str] = None) -> bool:
|
|
"""Start real-time semantic monitoring."""
|
|
try:
|
|
self.is_monitoring = True
|
|
if competitors:
|
|
self.monitored_competitors = set(competitors)
|
|
|
|
logger.info(f"Started semantic monitoring for user {self.user_id}")
|
|
logger.info(f"Monitoring {len(self.monitored_competitors)} competitors")
|
|
|
|
# Start background monitoring task
|
|
asyncio.create_task(self._monitoring_loop())
|
|
|
|
return True
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to start semantic monitoring: {e}")
|
|
return False
|
|
|
|
async def stop_monitoring(self) -> bool:
|
|
"""Stop real-time semantic monitoring."""
|
|
try:
|
|
self.is_monitoring = False
|
|
logger.info(f"Stopped semantic monitoring for user {self.user_id}")
|
|
return True
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to stop semantic monitoring: {e}")
|
|
return False
|
|
|
|
async def _monitoring_loop(self):
|
|
"""Main monitoring loop that runs continuously."""
|
|
while self.is_monitoring:
|
|
try:
|
|
logger.info(f"Running semantic health check for user {self.user_id}")
|
|
|
|
# Perform comprehensive semantic analysis
|
|
health_metrics = await self._check_semantic_health()
|
|
competitor_updates = await self._monitor_competitors()
|
|
content_insights = await self._analyze_content_performance()
|
|
|
|
# Store monitoring snapshot
|
|
snapshot = {
|
|
"timestamp": datetime.now().isoformat(),
|
|
"user_id": self.user_id,
|
|
"health_metrics": [asdict(metric) for metric in health_metrics],
|
|
"competitor_updates": [asdict(update) for update in competitor_updates],
|
|
"content_insights": [asdict(insight) for insight in content_insights]
|
|
}
|
|
|
|
self.monitoring_history.append(snapshot)
|
|
|
|
# Keep only last 24 hours of history
|
|
cutoff_time = datetime.now() - timedelta(hours=24)
|
|
self.monitoring_history = [
|
|
h for h in self.monitoring_history
|
|
if datetime.fromisoformat(h["timestamp"]) > cutoff_time
|
|
]
|
|
|
|
# Check for alerts
|
|
await self._check_alerts(health_metrics, competitor_updates, content_insights)
|
|
|
|
# Cache results for dashboard
|
|
await self._cache_monitoring_results(snapshot)
|
|
|
|
logger.info(f"Semantic monitoring cycle completed. Next check in {self.monitoring_interval}s")
|
|
|
|
# Wait for next cycle
|
|
await asyncio.sleep(self.monitoring_interval)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error in semantic monitoring loop: {e}")
|
|
await asyncio.sleep(self.monitoring_interval) # Continue even on error
|
|
|
|
async def _check_semantic_health(self) -> List[SemanticHealthMetric]:
|
|
"""Check overall semantic health of user's content."""
|
|
metrics = []
|
|
|
|
try:
|
|
# Get current semantic insights
|
|
insights = await self.sif_service.get_semantic_insights({"user_id": self.user_id})
|
|
|
|
if insights.get("source") == "error":
|
|
logger.warning("Failed to get semantic insights for health check")
|
|
return metrics
|
|
|
|
insights_data = insights.get("insights", {})
|
|
|
|
# Semantic diversity metric
|
|
content_pillars = insights_data.get("content_pillars", [])
|
|
semantic_diversity = len(content_pillars) / 10.0 # Normalize to 0-1
|
|
|
|
diversity_status = "healthy" if semantic_diversity >= self.health_thresholds["semantic_diversity"] else "warning"
|
|
metrics.append(SemanticHealthMetric(
|
|
metric_name="semantic_diversity",
|
|
value=semantic_diversity,
|
|
threshold=self.health_thresholds["semantic_diversity"],
|
|
status=diversity_status,
|
|
timestamp=datetime.now().isoformat(),
|
|
description=f"Content covers {len(content_pillars)} semantic pillars",
|
|
recommendations=["Expand content topics", "Explore new semantic areas"] if diversity_status == "warning" else []
|
|
))
|
|
|
|
# Content freshness metric (based on recent updates)
|
|
freshness_score = await self._calculate_content_freshness()
|
|
freshness_status = "healthy" if freshness_score >= self.health_thresholds["content_freshness"] else "warning"
|
|
|
|
metrics.append(SemanticHealthMetric(
|
|
metric_name="content_freshness",
|
|
value=freshness_score,
|
|
threshold=self.health_thresholds["content_freshness"],
|
|
status=freshness_status,
|
|
timestamp=datetime.now().isoformat(),
|
|
description="Content freshness based on recent semantic updates",
|
|
recommendations=["Update content regularly", "Monitor trending topics"] if freshness_status == "warning" else []
|
|
))
|
|
|
|
# Authority score metric
|
|
authority_score = await self._calculate_authority_score()
|
|
authority_status = "healthy" if authority_score >= self.health_thresholds["authority_score"] else "critical"
|
|
|
|
metrics.append(SemanticHealthMetric(
|
|
metric_name="authority_score",
|
|
value=authority_score,
|
|
threshold=self.health_thresholds["authority_score"],
|
|
status=authority_status,
|
|
timestamp=datetime.now().isoformat(),
|
|
description="Semantic authority based on content depth and relevance",
|
|
recommendations=["Create authoritative content", "Build topical expertise"] if authority_status != "healthy" else []
|
|
))
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to check semantic health: {e}")
|
|
|
|
return metrics
|
|
|
|
async def _monitor_competitors(self) -> List[CompetitorSemanticSnapshot]:
|
|
"""Monitor competitor semantic positioning."""
|
|
snapshots = []
|
|
|
|
for competitor in self.monitored_competitors:
|
|
try:
|
|
# This would perform actual competitor analysis
|
|
# For now, return sample data
|
|
snapshot = CompetitorSemanticSnapshot(
|
|
competitor_id=f"comp_{competitor}",
|
|
competitor_name=competitor,
|
|
semantic_overlap=0.65,
|
|
unique_topics=["AI automation", "Voice search", "Video marketing"],
|
|
content_volume=random.randint(50, 200),
|
|
authority_score=random.uniform(0.4, 0.9),
|
|
last_updated=datetime.now().isoformat(),
|
|
trending_topics=["AI content", "Voice optimization"]
|
|
)
|
|
|
|
snapshots.append(snapshot)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to monitor competitor {competitor}: {e}")
|
|
|
|
return snapshots
|
|
|
|
async def _analyze_content_performance(self) -> List[ContentSemanticInsight]:
|
|
"""Analyze content performance and identify insights."""
|
|
insights = []
|
|
|
|
try:
|
|
# Generate various types of insights
|
|
current_time = datetime.now()
|
|
|
|
# Content gap insight
|
|
insights.append(ContentSemanticInsight(
|
|
insight_id="gap_001",
|
|
insight_type="gap",
|
|
title="Voice Search Optimization Gap",
|
|
description="Competitors are covering voice search topics 40% more than your content",
|
|
confidence_score=0.85,
|
|
impact_score=8.5,
|
|
related_topics=["voice search", "featured snippets", "conversational AI"],
|
|
suggested_actions=["Create voice search content", "Optimize for featured snippets"],
|
|
created_at=current_time.isoformat(),
|
|
expires_at=(current_time + timedelta(days=7)).isoformat()
|
|
))
|
|
|
|
# Trending opportunity insight
|
|
insights.append(ContentSemanticInsight(
|
|
insight_id="trend_001",
|
|
insight_type="trend",
|
|
title="AI Content Tools Trending",
|
|
description="AI content creation tools showing 300% increase in search volume",
|
|
confidence_score=0.92,
|
|
impact_score=9.2,
|
|
related_topics=["AI content", "content automation", "AI writing tools"],
|
|
suggested_actions=["Create AI tool reviews", "Develop AI content strategy"],
|
|
created_at=current_time.isoformat(),
|
|
expires_at=(current_time + timedelta(days=14)).isoformat()
|
|
))
|
|
|
|
# Threat insight
|
|
insights.append(ContentSemanticInsight(
|
|
insight_id="threat_001",
|
|
insight_type="threat",
|
|
title="Competitor Content Surge",
|
|
description="Top competitor increased content production by 150% in your key topics",
|
|
confidence_score=0.78,
|
|
impact_score=7.8,
|
|
related_topics=["content strategy", "competitor analysis"],
|
|
suggested_actions=["Increase content frequency", "Focus on unique angles"],
|
|
created_at=current_time.isoformat(),
|
|
expires_at=(current_time + timedelta(days=5)).isoformat()
|
|
))
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to analyze content performance: {e}")
|
|
|
|
return insights
|
|
|
|
async def _calculate_content_freshness(self) -> float:
|
|
"""Calculate content freshness score."""
|
|
# This would analyze actual content timestamps and updates
|
|
return 0.85 # Placeholder
|
|
|
|
async def _calculate_authority_score(self) -> float:
|
|
"""Calculate semantic authority score."""
|
|
# This would analyze content depth, backlinks, engagement, etc.
|
|
return 0.72 # Placeholder
|
|
|
|
async def _check_alerts(self, health_metrics: List[SemanticHealthMetric],
|
|
competitor_updates: List[CompetitorSemanticSnapshot],
|
|
content_insights: List[ContentSemanticInsight]):
|
|
"""Check for alert conditions and notify subscribers."""
|
|
alerts = []
|
|
|
|
# Check health metrics for critical conditions
|
|
for metric in health_metrics:
|
|
if metric.status == "critical":
|
|
alerts.append({
|
|
"type": "health_critical",
|
|
"title": f"Critical: {metric.metric_name}",
|
|
"message": metric.description,
|
|
"severity": "critical",
|
|
"timestamp": datetime.now().isoformat()
|
|
})
|
|
|
|
# Check for high-impact insights
|
|
for insight in content_insights:
|
|
if insight.impact_score >= 8.0:
|
|
alerts.append({
|
|
"type": "high_impact_insight",
|
|
"title": f"High Impact: {insight.title}",
|
|
"message": insight.description,
|
|
"severity": "warning",
|
|
"timestamp": datetime.now().isoformat()
|
|
})
|
|
|
|
# Send alerts to subscribers
|
|
if alerts:
|
|
try:
|
|
from services.agent_activity_service import AgentActivityService
|
|
from services.database import get_session_for_user
|
|
|
|
db = get_session_for_user(self.user_id)
|
|
if db:
|
|
service = AgentActivityService(db, self.user_id)
|
|
for alert in alerts:
|
|
alert_type = alert.get("type") or "semantic_alert"
|
|
severity = alert.get("severity") or "info"
|
|
mapped_severity = "error" if severity == "critical" else ("warning" if severity == "warning" else "info")
|
|
dedupe_key = None
|
|
if alert_type == "health_critical":
|
|
dedupe_key = f"semantic_health_critical:{alert.get('title')}:{datetime.utcnow().date().isoformat()}"
|
|
elif alert_type == "high_impact_insight":
|
|
dedupe_key = f"semantic_high_impact:{alert.get('title')}:{datetime.utcnow().date().isoformat()}"
|
|
|
|
service.create_alert(
|
|
alert_type=alert_type,
|
|
title=alert.get("title") or "Semantic alert",
|
|
message=alert.get("message") or "",
|
|
severity=mapped_severity,
|
|
payload=alert,
|
|
cta_path="/seo-dashboard",
|
|
dedupe_key=dedupe_key,
|
|
)
|
|
db.close()
|
|
except Exception:
|
|
pass
|
|
await self._send_alerts(alerts)
|
|
|
|
async def get_cache_stats(self) -> Dict[str, Any]:
|
|
"""Get semantic cache statistics."""
|
|
return self.cache_manager.get_stats()
|
|
|
|
async def _send_alerts(self, alerts: List[Dict[str, Any]]):
|
|
"""Send alerts to subscribed users."""
|
|
for alert in alerts:
|
|
logger.warning(f"ALERT: {alert['title']} - {alert['message']}")
|
|
# Here you would integrate with notification systems (email, Slack, etc.)
|
|
|
|
async def _cache_monitoring_results(self, snapshot: Dict[str, Any]):
|
|
"""Cache monitoring results for dashboard access."""
|
|
try:
|
|
cache_key = f"semantic_monitoring_{self.user_id}"
|
|
self.cache_manager.set(
|
|
cache_key,
|
|
self.user_id,
|
|
snapshot,
|
|
ttl=300 # 5 minutes
|
|
)
|
|
|
|
logger.debug(f"Cached monitoring results for user {self.user_id}")
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to cache monitoring results: {e}")
|
|
|
|
def get_dashboard_data(self) -> Dict[str, Any]:
|
|
"""Get current dashboard data for the user."""
|
|
try:
|
|
# Get cached monitoring results
|
|
cache_key = f"semantic_monitoring_{self.user_id}"
|
|
cached_data = self.cache_manager.get(cache_key, self.user_id)
|
|
|
|
if cached_data:
|
|
return {
|
|
"status": "active" if self.is_monitoring else "inactive",
|
|
"last_updated": cached_data.get("timestamp"),
|
|
"health_metrics": cached_data.get("health_metrics", []),
|
|
"competitor_updates": cached_data.get("competitor_updates", []),
|
|
"content_insights": cached_data.get("content_insights", []),
|
|
"monitored_competitors": list(self.monitored_competitors),
|
|
"monitoring_interval": self.monitoring_interval
|
|
}
|
|
|
|
# Return default data if no cache
|
|
return {
|
|
"status": "inactive",
|
|
"last_updated": datetime.now().isoformat(),
|
|
"health_metrics": [],
|
|
"competitor_updates": [],
|
|
"content_insights": [],
|
|
"monitored_competitors": list(self.monitored_competitors),
|
|
"monitoring_interval": self.monitoring_interval
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to get dashboard data: {e}")
|
|
return {"error": str(e)}
|
|
|
|
def get_monitoring_history(self, hours: int = 24) -> List[Dict[str, Any]]:
|
|
"""Get monitoring history for the specified number of hours."""
|
|
cutoff_time = datetime.now() - timedelta(hours=hours)
|
|
return [
|
|
h for h in self.monitoring_history
|
|
if datetime.fromisoformat(h["timestamp"]) > cutoff_time
|
|
]
|
|
|
|
|
|
class SemanticDashboardAPI:
|
|
"""API interface for the semantic monitoring dashboard."""
|
|
|
|
def __init__(self):
|
|
self.monitors: Dict[str, RealTimeSemanticMonitor] = {}
|
|
|
|
def get_monitor(self, user_id: str) -> RealTimeSemanticMonitor:
|
|
"""Get or create a semantic monitor for a user."""
|
|
if user_id not in self.monitors:
|
|
self.monitors[user_id] = RealTimeSemanticMonitor(user_id)
|
|
return self.monitors[user_id]
|
|
|
|
async def start_dashboard_monitoring(self, user_id: str, competitors: List[str] = None) -> Dict[str, Any]:
|
|
"""Start semantic monitoring for a user."""
|
|
monitor = self.get_monitor(user_id)
|
|
success = await monitor.start_monitoring(competitors)
|
|
|
|
return {
|
|
"user_id": user_id,
|
|
"monitoring_started": success,
|
|
"competitors": competitors or [],
|
|
"timestamp": datetime.now().isoformat()
|
|
}
|
|
|
|
async def stop_dashboard_monitoring(self, user_id: str) -> Dict[str, Any]:
|
|
"""Stop semantic monitoring for a user."""
|
|
monitor = self.get_monitor(user_id)
|
|
success = await monitor.stop_monitoring()
|
|
|
|
return {
|
|
"user_id": user_id,
|
|
"monitoring_stopped": success,
|
|
"timestamp": datetime.now().isoformat()
|
|
}
|
|
|
|
def get_dashboard_data(self, user_id: str) -> Dict[str, Any]:
|
|
"""Get current dashboard data for a user."""
|
|
monitor = self.get_monitor(user_id)
|
|
return monitor.get_dashboard_data()
|
|
|
|
def get_monitoring_history(self, user_id: str, hours: int = 24) -> List[Dict[str, Any]]:
|
|
"""Get monitoring history for a user."""
|
|
monitor = self.get_monitor(user_id)
|
|
return monitor.get_monitoring_history(hours)
|
|
|
|
|
|
# Global API instance
|
|
semantic_dashboard_api = SemanticDashboardAPI()
|
|
|
|
|
|
# Example usage and testing
|
|
async def test_semantic_dashboard():
|
|
"""Test the real-time semantic dashboard."""
|
|
logger.info("Testing Real-Time Semantic Dashboard")
|
|
|
|
# Create test monitor
|
|
user_id = "test_user_dashboard"
|
|
competitors = ["competitor1.com", "competitor2.com", "competitor3.com"]
|
|
|
|
# Start monitoring
|
|
logger.info("Starting semantic monitoring...")
|
|
start_result = await semantic_dashboard_api.start_dashboard_monitoring(user_id, competitors)
|
|
logger.info(f"Monitoring started: {start_result}")
|
|
|
|
# Wait a bit for monitoring to collect data
|
|
logger.info("Waiting for monitoring data collection...")
|
|
await asyncio.sleep(10)
|
|
|
|
# Get dashboard data
|
|
logger.info("Getting dashboard data...")
|
|
dashboard_data = semantic_dashboard_api.get_dashboard_data(user_id)
|
|
logger.info(f"Dashboard status: {dashboard_data.get('status')}")
|
|
logger.info(f"Health metrics: {len(dashboard_data.get('health_metrics', []))}")
|
|
logger.info(f"Competitor updates: {len(dashboard_data.get('competitor_updates', []))}")
|
|
logger.info(f"Content insights: {len(dashboard_data.get('content_insights', []))}")
|
|
|
|
# Get monitoring history
|
|
logger.info("Getting monitoring history...")
|
|
history = semantic_dashboard_api.get_monitoring_history(user_id, hours=1)
|
|
logger.info(f"Monitoring history entries: {len(history)}")
|
|
|
|
# Stop monitoring
|
|
logger.info("Stopping semantic monitoring...")
|
|
stop_result = await semantic_dashboard_api.stop_dashboard_monitoring(user_id)
|
|
logger.info(f"Monitoring stopped: {stop_result}")
|
|
|
|
logger.info("Semantic Dashboard test completed successfully!")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
# Run test
|
|
asyncio.run(test_semantic_dashboard())
|