Recovered state: integrated TrendSurferAgent, restored frontend/backend files, and cleaned up recovery scripts
This commit is contained in:
1
backend/services/intelligence/__init__.py
Normal file
1
backend/services/intelligence/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
|
||||
601
backend/services/intelligence/agents.py
Normal file
601
backend/services/intelligence/agents.py
Normal file
@@ -0,0 +1,601 @@
|
||||
"""
|
||||
SIF Agent Interfaces
|
||||
Defines the specialized agents for digital marketing and SEO.
|
||||
Each agent leverages TxtaiIntelligenceService for semantic operations.
|
||||
"""
|
||||
|
||||
import traceback
|
||||
from typing import List, Dict, Any, Optional
|
||||
from datetime import datetime
|
||||
from loguru import logger
|
||||
from .txtai_service import TxtaiIntelligenceService
|
||||
|
||||
class SIFBaseAgent:
|
||||
def __init__(self, intelligence_service: TxtaiIntelligenceService):
|
||||
self.intelligence = intelligence_service
|
||||
|
||||
def _log_agent_operation(self, operation: str, **kwargs):
|
||||
"""Standardized logging for agent operations."""
|
||||
logger.info(f"[{self.__class__.__name__}] {operation}")
|
||||
if kwargs:
|
||||
logger.debug(f"[{self.__class__.__name__}] Parameters: {kwargs}")
|
||||
|
||||
class StrategyArchitectAgent(SIFBaseAgent):
|
||||
"""Agent for discovering content pillars and identifying strategic gaps."""
|
||||
|
||||
async def discover_pillars(self) -> List[Dict[str, Any]]:
|
||||
"""Identify content pillars through semantic clustering."""
|
||||
self._log_agent_operation("Discovering content pillars")
|
||||
|
||||
try:
|
||||
# Check if intelligence service is initialized
|
||||
if not self.intelligence.is_initialized():
|
||||
logger.error(f"[{self.__class__.__name__}] Intelligence service not initialized")
|
||||
return []
|
||||
|
||||
clusters = await self.intelligence.cluster(min_score=0.6)
|
||||
|
||||
if not clusters:
|
||||
logger.warning(f"[{self.__class__.__name__}] No clusters found")
|
||||
return []
|
||||
|
||||
# Create pillar objects with metadata
|
||||
pillars = []
|
||||
for i, cluster_indices in enumerate(clusters):
|
||||
pillar = {
|
||||
"pillar_id": f"pillar_{i}",
|
||||
"indices": cluster_indices,
|
||||
"size": len(cluster_indices),
|
||||
"confidence": self._calculate_cluster_confidence(cluster_indices)
|
||||
}
|
||||
pillars.append(pillar)
|
||||
logger.debug(f"[{self.__class__.__name__}] Created pillar {pillar['pillar_id']} with {pillar['size']} items")
|
||||
|
||||
logger.info(f"[{self.__class__.__name__}] Discovered {len(pillars)} content pillars")
|
||||
return pillars
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[{self.__class__.__name__}] Failed to discover pillars: {e}")
|
||||
logger.error(f"[{self.__class__.__name__}] Full traceback: {traceback.format_exc()}")
|
||||
return []
|
||||
|
||||
def _calculate_cluster_confidence(self, cluster_indices: List[int]) -> float:
|
||||
"""Calculate confidence score for a cluster based on its size and coherence."""
|
||||
# Simple confidence based on cluster size - larger clusters are more reliable
|
||||
return min(1.0, len(cluster_indices) / 10.0)
|
||||
|
||||
async def find_semantic_gaps(self, competitor_indices: List[int]) -> List[Dict[str, Any]]:
|
||||
"""Compare user content vs competitor content to find missing topics."""
|
||||
self._log_agent_operation("Finding semantic content gaps", competitor_count=len(competitor_indices))
|
||||
|
||||
try:
|
||||
# STUB: Implement cross-index comparison
|
||||
# This would involve:
|
||||
# 1. Getting user content topics/themes
|
||||
# 2. Getting competitor content topics/themes
|
||||
# 3. Finding topics competitors cover but user doesn't
|
||||
|
||||
logger.info(f"[{self.__class__.__name__}] Found semantic gaps analysis stub")
|
||||
return [
|
||||
{"topic": "Topic A", "priority": "high", "reason": "Competitor coverage gap"},
|
||||
{"topic": "Topic B", "priority": "medium", "reason": "Emerging trend"}
|
||||
]
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[{self.__class__.__name__}] Failed to find semantic gaps: {e}")
|
||||
logger.error(f"[{self.__class__.__name__}] Full traceback: {traceback.format_exc()}")
|
||||
return []
|
||||
|
||||
class ContentGuardianAgent(SIFBaseAgent):
|
||||
"""Agent for preventing cannibalization and ensuring content originality."""
|
||||
|
||||
CANNIBALIZATION_THRESHOLD = 0.85 # Similarity threshold for cannibalization warning
|
||||
ORIGINALITY_THRESHOLD = 0.75 # Minimum originality score
|
||||
|
||||
def __init__(self, intelligence_service: TxtaiIntelligenceService, sif_service: Any = None):
|
||||
super().__init__(intelligence_service)
|
||||
self.sif_service = sif_service
|
||||
|
||||
async def check_cannibalization(self, new_draft: str) -> Dict[str, Any]:
|
||||
"""Check if a new draft competes semantically with existing pages."""
|
||||
self._log_agent_operation("Checking for semantic cannibalization", draft_length=len(new_draft))
|
||||
|
||||
try:
|
||||
if not self.intelligence.is_initialized():
|
||||
logger.error(f"[{self.__class__.__name__}] Intelligence service not initialized")
|
||||
return {"warning": False, "error": "Service not initialized"}
|
||||
|
||||
if not new_draft or len(new_draft.strip()) < 50:
|
||||
logger.warning(f"[{self.__class__.__name__}] Draft too short for meaningful analysis")
|
||||
return {"warning": False, "reason": "Draft too short"}
|
||||
|
||||
results = await self.intelligence.search(new_draft, limit=1)
|
||||
|
||||
if not results:
|
||||
logger.info(f"[{self.__class__.__name__}] No similar content found - draft is unique")
|
||||
return {"warning": False, "uniqueness_score": 1.0}
|
||||
|
||||
top_result = results[0]
|
||||
similarity_score = top_result.get('score', 0.0)
|
||||
|
||||
logger.debug(f"[{self.__class__.__name__}] Top similarity score: {similarity_score:.4f}")
|
||||
|
||||
if similarity_score > self.CANNIBALIZATION_THRESHOLD:
|
||||
warning_data = {
|
||||
"warning": True,
|
||||
"similar_to": top_result.get('id', 'unknown'),
|
||||
"score": similarity_score,
|
||||
"threshold": self.CANNIBALIZATION_THRESHOLD,
|
||||
"recommendation": "Consider revising the draft to target a different angle or merge with existing content"
|
||||
}
|
||||
logger.warning(f"[{self.__class__.__name__}] Cannibalization detected: {warning_data}")
|
||||
return warning_data
|
||||
|
||||
logger.info(f"[{self.__class__.__name__}] No cannibalization detected. Draft is sufficiently unique.")
|
||||
return {"warning": False, "uniqueness_score": 1.0 - similarity_score}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[{self.__class__.__name__}] Failed to check cannibalization: {e}")
|
||||
logger.error(f"[{self.__class__.__name__}] Full traceback: {traceback.format_exc()}")
|
||||
return {"warning": False, "error": str(e)}
|
||||
|
||||
async def verify_originality(self, text: str, competitor_index: Any) -> Dict[str, Any]:
|
||||
"""Verify originality against competitor content index."""
|
||||
self._log_agent_operation("Verifying originality against competitors", text_length=len(text))
|
||||
|
||||
try:
|
||||
if not text or len(text.strip()) < 50:
|
||||
logger.warning(f"[{self.__class__.__name__}] Text too short for meaningful originality check")
|
||||
return {"originality_score": 0.0, "reason": "Text too short"}
|
||||
|
||||
# STUB: Implement cross-index search against competitor content
|
||||
# This would search the text against a competitor-specific index
|
||||
|
||||
logger.info(f"[{self.__class__.__name__}] Originality verification stub completed")
|
||||
return {
|
||||
"originality_score": 0.95, # Placeholder
|
||||
"confidence": 0.8,
|
||||
"method": "semantic_comparison",
|
||||
"notes": "Competitor index integration pending"
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[{self.__class__.__name__}] Failed to verify originality: {e}")
|
||||
logger.error(f"[{self.__class__.__name__}] Full traceback: {traceback.format_exc()}")
|
||||
return {"originality_score": 0.0, "error": str(e)}
|
||||
|
||||
async def style_enforcer(self, text: str, style_guidelines: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
|
||||
"""
|
||||
Tool: Ensures content adheres to brand voice and style guidelines.
|
||||
"""
|
||||
self._log_agent_operation("Enforcing style guidelines", text_length=len(text))
|
||||
|
||||
try:
|
||||
if not text:
|
||||
return {"compliance_score": 0.0, "issues": ["No text provided"]}
|
||||
|
||||
# 1. Fetch Style Guidelines from SIF if not provided
|
||||
if not style_guidelines and self.sif_service:
|
||||
try:
|
||||
# Search for website analysis to get brand voice/style
|
||||
# We assume the most relevant 'website_analysis' doc contains the guidelines
|
||||
results = await self.intelligence.search("website analysis brand voice style", limit=1)
|
||||
if results:
|
||||
import json
|
||||
res = results[0]
|
||||
metadata_str = res.get('object')
|
||||
metadata = json.loads(metadata_str) if isinstance(metadata_str, str) else (metadata_str or res)
|
||||
|
||||
if metadata.get('type') == 'website_analysis':
|
||||
report = metadata.get('full_report', {})
|
||||
style_guidelines = {
|
||||
"tone": report.get('brand_analysis', {}).get('brand_voice', 'neutral'),
|
||||
"style_patterns": report.get('style_patterns', {}),
|
||||
"writing_style": report.get('writing_style', {})
|
||||
}
|
||||
logger.info(f"[{self.__class__.__name__}] Retrieved style guidelines from SIF: {style_guidelines.get('tone')}")
|
||||
except Exception as e:
|
||||
logger.warning(f"[{self.__class__.__name__}] Failed to retrieve style guidelines from SIF: {e}")
|
||||
|
||||
issues = []
|
||||
score = 1.0
|
||||
|
||||
# Basic Heuristic Checks (Placeholder for LLM-based style analysis)
|
||||
|
||||
# 1. Tone Check (e.g., formal vs casual)
|
||||
# If guidelines specify 'formal', check for contractions
|
||||
tone = style_guidelines.get('tone', '').lower() if style_guidelines else ''
|
||||
if 'formal' in tone or 'professional' in tone:
|
||||
contractions = ["can't", "won't", "don't", "it's"]
|
||||
found_contractions = [c for c in contractions if c in text.lower()]
|
||||
if found_contractions:
|
||||
issues.append(f"Found contractions in formal text: {', '.join(found_contractions[:3])}...")
|
||||
score -= 0.1
|
||||
|
||||
# 2. Length/Sentence Structure (simple metric)
|
||||
sentences = text.split('.')
|
||||
avg_len = sum(len(s.split()) for s in sentences if s) / max(1, len(sentences))
|
||||
if avg_len > 25:
|
||||
issues.append("Average sentence length is too high (>25 words). Consider shortening.")
|
||||
score -= 0.1
|
||||
|
||||
return {
|
||||
"compliance_score": max(0.0, score),
|
||||
"issues": issues,
|
||||
"is_compliant": score > 0.8,
|
||||
"guidelines_source": "sif_index" if not style_guidelines and self.sif_service else "provided"
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[{self.__class__.__name__}] Style enforcement failed: {e}")
|
||||
return {"error": str(e)}
|
||||
|
||||
async def safety_filter(self, text: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Tool: Flags potentially harmful, offensive, or sensitive content.
|
||||
"""
|
||||
self._log_agent_operation("Running safety filter", text_length=len(text))
|
||||
|
||||
try:
|
||||
# Basic Keyword Blocklist (Placeholder for LLM/Safety Model)
|
||||
# In production, this should call a dedicated safety API (e.g., OpenAI Moderation, Llama Guard)
|
||||
unsafe_keywords = [
|
||||
"hate", "kill", "murder", "attack", "destroy", # Violent
|
||||
"scam", "fraud", "steal", # Illegal
|
||||
"explicit", "adult" # NSFW
|
||||
]
|
||||
|
||||
found_flags = []
|
||||
text_lower = text.lower()
|
||||
|
||||
for keyword in unsafe_keywords:
|
||||
if f" {keyword} " in text_lower: # Simple word boundary check
|
||||
found_flags.append(keyword)
|
||||
|
||||
is_safe = len(found_flags) == 0
|
||||
|
||||
return {
|
||||
"is_safe": is_safe,
|
||||
"flags": found_flags,
|
||||
"safety_score": 1.0 if is_safe else 0.0,
|
||||
"action": "approve" if is_safe else "flag_for_review"
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[{self.__class__.__name__}] Safety filter failed: {e}")
|
||||
return {"error": str(e)}
|
||||
|
||||
class LinkGraphAgent(SIFBaseAgent):
|
||||
"""
|
||||
Agent for internal link suggestions, graph management, and authority analysis.
|
||||
Implements the semantic link graph using SIF and GSC/Bing data.
|
||||
"""
|
||||
|
||||
RELEVANCE_THRESHOLD = 0.6 # Minimum relevance score for link suggestions
|
||||
MAX_SUGGESTIONS = 10 # Maximum number of link suggestions
|
||||
|
||||
def __init__(self, intelligence_service: TxtaiIntelligenceService, sif_service: Any = None):
|
||||
super().__init__(intelligence_service)
|
||||
self.sif_service = sif_service
|
||||
|
||||
async def suggest_internal_links(self, draft: str) -> List[Dict[str, Any]]:
|
||||
"""Suggest internal links based on semantic proximity and authority."""
|
||||
return await self.link_suggester(draft)
|
||||
|
||||
async def link_suggester(self, draft: str) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Tool: Suggests internal links.
|
||||
Analyzes draft content and finds semantically relevant pages, boosted by authority.
|
||||
"""
|
||||
self._log_agent_operation("Suggesting internal links", draft_length=len(draft))
|
||||
|
||||
try:
|
||||
if not self.intelligence.is_initialized():
|
||||
logger.error(f"[{self.__class__.__name__}] Intelligence service not initialized")
|
||||
return []
|
||||
|
||||
if not draft or len(draft.strip()) < 50: # Reduced threshold for testing
|
||||
logger.warning(f"[{self.__class__.__name__}] Draft too short for meaningful link suggestions")
|
||||
return []
|
||||
|
||||
# 1. Get Semantic Candidates
|
||||
results = await self.intelligence.search(draft, limit=self.MAX_SUGGESTIONS)
|
||||
|
||||
if not results:
|
||||
logger.info(f"[{self.__class__.__name__}] No relevant internal pages found")
|
||||
return []
|
||||
|
||||
# 2. Get Authority Data (if available)
|
||||
authority_map = {}
|
||||
if self.sif_service:
|
||||
try:
|
||||
# Fetch dashboard context to get top performing content
|
||||
# Note: This relies on what's available in the SIF index/dashboard summary
|
||||
dashboard_context = await self.sif_service.get_seo_dashboard_context()
|
||||
|
||||
if "error" not in dashboard_context:
|
||||
# Extract top queries/pages if available in summary
|
||||
# Ideally, we'd have a map of URL -> Authority Score
|
||||
# For now, we'll try to extract what we can
|
||||
data = dashboard_context.get("dashboard_data", {})
|
||||
summary = data.get("summary", {})
|
||||
|
||||
# Example: Boost if site health is good (general confidence)
|
||||
site_health = data.get("health_score", {}).get("score", 0)
|
||||
|
||||
# If we had top pages in the summary, we'd use them.
|
||||
# For now, we'll use a placeholder authority map or just the site health
|
||||
pass
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to fetch authority data: {e}")
|
||||
|
||||
suggestions = []
|
||||
for result in results:
|
||||
relevance_score = result.get('score', 0.0)
|
||||
url = result.get('id', 'unknown')
|
||||
|
||||
# Apply authority boost (placeholder logic)
|
||||
# In a full implementation, we'd look up 'url' in authority_map
|
||||
authority_boost = 1.0
|
||||
|
||||
final_score = relevance_score * authority_boost
|
||||
|
||||
if final_score >= self.RELEVANCE_THRESHOLD:
|
||||
suggestion = {
|
||||
"url": url,
|
||||
"relevance": relevance_score,
|
||||
"final_score": final_score,
|
||||
"confidence": self._calculate_link_confidence(final_score),
|
||||
"reason": f"Semantic similarity: {relevance_score:.3f}"
|
||||
}
|
||||
suggestions.append(suggestion)
|
||||
logger.debug(f"[{self.__class__.__name__}] Added link suggestion: {url} (score: {final_score:.3f})")
|
||||
|
||||
# Sort by final score
|
||||
suggestions.sort(key=lambda x: x['final_score'], reverse=True)
|
||||
|
||||
logger.info(f"[{self.__class__.__name__}] Generated {len(suggestions)} internal link suggestions")
|
||||
return suggestions
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[{self.__class__.__name__}] Failed to suggest internal links: {e}")
|
||||
logger.error(f"[{self.__class__.__name__}] Full traceback: {traceback.format_exc()}")
|
||||
return []
|
||||
|
||||
async def graph_builder(self) -> Dict[str, Any]:
|
||||
"""
|
||||
Tool: Builds/Visualizes the semantic link graph.
|
||||
Returns the structure of the graph (nodes and edges) for visualization or analysis.
|
||||
"""
|
||||
self._log_agent_operation("Building semantic link graph")
|
||||
|
||||
try:
|
||||
if not self.intelligence.is_initialized():
|
||||
return {"error": "Intelligence service not initialized"}
|
||||
|
||||
# This is a resource-intensive operation in a real vector DB.
|
||||
# Here we simulate the graph structure based on recent content or clusters.
|
||||
|
||||
# 1. Get Clusters (Nodes)
|
||||
clusters = await self.intelligence.cluster(min_score=0.5)
|
||||
|
||||
nodes = []
|
||||
edges = []
|
||||
|
||||
for i, cluster in enumerate(clusters):
|
||||
cluster_id = f"cluster_{i}"
|
||||
nodes.append({
|
||||
"id": cluster_id,
|
||||
"type": "topic_cluster",
|
||||
"size": len(cluster)
|
||||
})
|
||||
|
||||
# Add content items as nodes linked to cluster
|
||||
for item_idx in cluster:
|
||||
# We need to retrieve item metadata.
|
||||
# txtai cluster returns indices. We might need to query by index or ID.
|
||||
# For this implementation, we'll return a simplified view.
|
||||
pass
|
||||
|
||||
return {
|
||||
"graph_stats": {
|
||||
"total_clusters": len(clusters),
|
||||
"total_nodes": sum(len(c) for c in clusters)
|
||||
},
|
||||
"structure": "hierarchical", # vs flat
|
||||
"timestamp": datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[{self.__class__.__name__}] Failed to build graph: {e}")
|
||||
return {"error": str(e)}
|
||||
|
||||
async def authority_analyzer(self, target_url: Optional[str] = None) -> Dict[str, Any]:
|
||||
"""
|
||||
Tool: Analyzes the authority of the site or specific pages using GSC/Bing data.
|
||||
"""
|
||||
self._log_agent_operation("Analyzing authority", target_url=target_url)
|
||||
|
||||
if not self.sif_service:
|
||||
return {"error": "SIF Service unavailable for authority analysis"}
|
||||
|
||||
try:
|
||||
# 1. Get Dashboard Context
|
||||
context = await self.sif_service.get_seo_dashboard_context()
|
||||
|
||||
if "error" in context:
|
||||
return context
|
||||
|
||||
data = context.get("dashboard_data", {})
|
||||
summary = data.get("summary", {})
|
||||
health = data.get("health_score", {})
|
||||
|
||||
# 2. Extract Authority Metrics
|
||||
authority_report = {
|
||||
"domain_authority_proxy": {
|
||||
"health_score": health.get("score"),
|
||||
"total_clicks": summary.get("clicks"),
|
||||
"avg_position": summary.get("position")
|
||||
},
|
||||
"page_authority": "Page-level authority requires granular GSC data (Planned)", # Placeholder
|
||||
"timestamp": datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
return authority_report
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[{self.__class__.__name__}] Authority analysis failed: {e}")
|
||||
return {"error": str(e)}
|
||||
|
||||
def _calculate_link_confidence(self, relevance_score: float) -> float:
|
||||
"""Calculate confidence score for a link suggestion."""
|
||||
# Simple confidence based on relevance score
|
||||
return min(1.0, relevance_score * 1.5)
|
||||
|
||||
async def optimize_anchor_text(self, target_url: str, context: str) -> str:
|
||||
"""Suggest the best anchor text for a given link based on target page context."""
|
||||
self._log_agent_operation("Optimizing anchor text", target_url=target_url, context_length=len(context))
|
||||
|
||||
try:
|
||||
# In a real implementation, we would fetch the target page content via SIF
|
||||
# and use an LLM to generate the anchor text.
|
||||
|
||||
# Placeholder for LLM call
|
||||
# if self.llm: ...
|
||||
|
||||
logger.info(f"[{self.__class__.__name__}] Anchor text optimization stub completed")
|
||||
return "relevant anchor text" # Placeholder
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[{self.__class__.__name__}] Failed to optimize anchor text: {e}")
|
||||
logger.error(f"[{self.__class__.__name__}] Full traceback: {traceback.format_exc()}")
|
||||
return "click here" # Fallback anchor text
|
||||
|
||||
class CitationExpert(SIFBaseAgent):
|
||||
"""
|
||||
Agent for fact-checking, citation generation, and evidence verification.
|
||||
"""
|
||||
|
||||
EVIDENCE_THRESHOLD = 0.7 # Minimum relevance score for evidence
|
||||
MAX_EVIDENCE = 5 # Maximum number of evidence pieces to return
|
||||
|
||||
async def fact_checker(self, claim: str) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Tool: Verifies facts against trusted research data.
|
||||
Returns supporting or contradicting evidence.
|
||||
"""
|
||||
return await self.verify_facts(claim)
|
||||
|
||||
async def citation_finder(self, topic: str) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Tool: Suggests authoritative citations for a given topic.
|
||||
"""
|
||||
self._log_agent_operation("Finding citations", topic=topic)
|
||||
|
||||
try:
|
||||
if not self.intelligence.is_initialized():
|
||||
return []
|
||||
|
||||
# Search for highly relevant content
|
||||
results = await self.intelligence.search(topic, limit=self.MAX_EVIDENCE)
|
||||
|
||||
citations = []
|
||||
for result in results:
|
||||
relevance = result.get('score', 0.0)
|
||||
if relevance > 0.6:
|
||||
citations.append({
|
||||
"source": result.get('id'),
|
||||
"title": result.get('text', '')[:100] + "...",
|
||||
"relevance": relevance,
|
||||
"citation_text": f"Source: {result.get('id')} (Relevance: {relevance:.2f})"
|
||||
})
|
||||
|
||||
return citations
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[{self.__class__.__name__}] Citation finder failed: {e}")
|
||||
return []
|
||||
|
||||
async def claim_verifier(self, content: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Tool: Detects unsupported statements and hallucinations.
|
||||
"""
|
||||
self._log_agent_operation("Verifying claims in content", content_length=len(content))
|
||||
|
||||
# 1. Extract potential claims (heuristic: numbers, 'research shows', etc.)
|
||||
# This is a simplified extraction. A real implementation would use NLP/LLM.
|
||||
claims = []
|
||||
sentences = content.split('.')
|
||||
for sent in sentences:
|
||||
if any(char.isdigit() for char in sent) or "show" in sent.lower() or "study" in sent.lower():
|
||||
if len(sent.strip()) > 20:
|
||||
claims.append(sent.strip())
|
||||
|
||||
if not claims:
|
||||
return {"status": "no_claims_detected", "verified_claims": []}
|
||||
|
||||
verified_results = []
|
||||
for claim in claims[:5]: # Limit to top 5 claims for performance
|
||||
evidence = await self.verify_facts(claim)
|
||||
status = "supported" if evidence else "unsupported"
|
||||
verified_results.append({
|
||||
"claim": claim,
|
||||
"status": status,
|
||||
"evidence_count": len(evidence),
|
||||
"top_evidence": evidence[0]['source'] if evidence else None
|
||||
})
|
||||
|
||||
return {
|
||||
"status": "verification_complete",
|
||||
"total_claims": len(claims),
|
||||
"verified_claims": verified_results,
|
||||
"unsupported_count": len([c for c in verified_results if c['status'] == 'unsupported']),
|
||||
"timestamp": datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
async def verify_facts(self, claim: str) -> List[Dict[str, Any]]:
|
||||
"""Find supporting or contradicting evidence in the indexed research."""
|
||||
self._log_agent_operation("Verifying facts", claim_length=len(claim))
|
||||
|
||||
try:
|
||||
if not self.intelligence.is_initialized():
|
||||
logger.error(f"[{self.__class__.__name__}] Intelligence service not initialized")
|
||||
return []
|
||||
|
||||
if not claim or len(claim.strip()) < 20:
|
||||
logger.warning(f"[{self.__class__.__name__}] Claim too short for meaningful verification")
|
||||
return []
|
||||
|
||||
results = await self.intelligence.search(claim, limit=self.MAX_EVIDENCE)
|
||||
|
||||
if not results:
|
||||
logger.info(f"[{self.__class__.__name__}] No evidence found for claim")
|
||||
return []
|
||||
|
||||
evidence = []
|
||||
for result in results:
|
||||
relevance_score = result.get('score', 0.0)
|
||||
|
||||
if relevance_score >= self.EVIDENCE_THRESHOLD:
|
||||
evidence_piece = {
|
||||
"source": result.get('id', 'unknown'),
|
||||
"relevance": relevance_score,
|
||||
"confidence": self._calculate_evidence_confidence(relevance_score),
|
||||
"type": "supporting" if relevance_score > 0.8 else "related",
|
||||
"excerpt": result.get('text', '')[:200] + "..." if len(result.get('text', '')) > 200 else result.get('text', '')
|
||||
}
|
||||
evidence.append(evidence_piece)
|
||||
logger.debug(f"[{self.__class__.__name__}] Found evidence: {evidence_piece['source']} (score: {relevance_score:.3f})")
|
||||
|
||||
logger.info(f"[{self.__class__.__name__}] Found {len(evidence)} pieces of evidence for claim")
|
||||
return evidence
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[{self.__class__.__name__}] Failed to verify facts: {e}")
|
||||
logger.error(f"[{self.__class__.__name__}] Full traceback: {traceback.format_exc()}")
|
||||
return []
|
||||
|
||||
def _calculate_evidence_confidence(self, relevance_score: float) -> float:
|
||||
"""Calculate confidence score for evidence."""
|
||||
# Simple confidence based on relevance score
|
||||
return min(1.0, relevance_score * 1.2)
|
||||
73
backend/services/intelligence/agents/__init__.py
Normal file
73
backend/services/intelligence/agents/__init__.py
Normal file
@@ -0,0 +1,73 @@
|
||||
"""
|
||||
ALwrity Autonomous Marketing Agents Module
|
||||
|
||||
This module provides autonomous marketing agents built on txtai's native agent framework.
|
||||
The agents work together to monitor market conditions, analyze competitor activities,
|
||||
and execute coordinated marketing strategies without human intervention.
|
||||
"""
|
||||
|
||||
# Core agent framework
|
||||
from .core_agent_framework import (
|
||||
BaseALwrityAgent,
|
||||
AgentAction,
|
||||
AgentPerformance,
|
||||
StrategyOrchestratorAgent
|
||||
)
|
||||
|
||||
# Market signal detection
|
||||
from .market_signal_detector import (
|
||||
MarketSignal,
|
||||
MarketSignalDetector,
|
||||
MarketTrendAnalyzer
|
||||
)
|
||||
|
||||
# Performance monitoring
|
||||
from .performance_monitor import (
|
||||
PerformanceMonitor,
|
||||
performance_monitor,
|
||||
PerformanceMetric,
|
||||
AgentPerformanceMetrics
|
||||
)
|
||||
|
||||
# Specialized agents
|
||||
from .specialized_agents import (
|
||||
ContentGuardianAgent,
|
||||
LinkGraphAgent,
|
||||
StrategyArchitectAgent,
|
||||
ContentStrategyAgent,
|
||||
CompetitorResponseAgent,
|
||||
SEOOptimizationAgent,
|
||||
SocialAmplificationAgent
|
||||
)
|
||||
|
||||
from .trend_surfer_agent import TrendSurferAgent
|
||||
|
||||
# Agent Orchestrator
|
||||
from .agent_orchestrator import (
|
||||
ALwrityAgentOrchestrator,
|
||||
orchestration_service
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
'BaseALwrityAgent',
|
||||
'AgentAction',
|
||||
'AgentPerformance',
|
||||
'StrategyOrchestratorAgent',
|
||||
'MarketSignal',
|
||||
'MarketSignalDetector',
|
||||
'MarketTrendAnalyzer',
|
||||
'PerformanceMonitor',
|
||||
'performance_monitor',
|
||||
'PerformanceMetric',
|
||||
'AgentPerformanceMetrics',
|
||||
'ContentGuardianAgent',
|
||||
'LinkGraphAgent',
|
||||
'StrategyArchitectAgent',
|
||||
'ContentStrategyAgent',
|
||||
'CompetitorResponseAgent',
|
||||
'SEOOptimizationAgent',
|
||||
'SocialAmplificationAgent',
|
||||
'TrendSurferAgent',
|
||||
'ALwrityAgentOrchestrator',
|
||||
'orchestration_service'
|
||||
]
|
||||
429
backend/services/intelligence/agents/agent_orchestrator.py
Normal file
429
backend/services/intelligence/agents/agent_orchestrator.py
Normal file
@@ -0,0 +1,429 @@
|
||||
"""
|
||||
ALwrity Agent Orchestration System
|
||||
Main orchestration system that coordinates all autonomous marketing agents
|
||||
Built on txtai's native agent framework
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
from datetime import datetime
|
||||
from typing import Dict, List, Any, Optional
|
||||
from dataclasses import dataclass, asdict
|
||||
|
||||
# txtai imports for native agent framework
|
||||
try:
|
||||
from txtai import Agent, LLM
|
||||
TXTAI_AVAILABLE = Agent.__module__ != "txtai.agent.placeholder"
|
||||
except ImportError:
|
||||
TXTAI_AVAILABLE = False
|
||||
logging.warning("txtai not available, using fallback implementation")
|
||||
|
||||
from utils.logger_utils import get_service_logger
|
||||
from services.intelligence.agents.core_agent_framework import (
|
||||
BaseALwrityAgent, AgentAction, AgentPerformance, StrategyOrchestratorAgent
|
||||
)
|
||||
from services.intelligence.agents.specialized_agents import (
|
||||
ContentStrategyAgent, CompetitorResponseAgent, SEOOptimizationAgent, SocialAmplificationAgent
|
||||
)
|
||||
from services.intelligence.agents.trend_surfer_agent import TrendSurferAgent
|
||||
from services.intelligence.agents.market_signal_detector import (
|
||||
MarketSignal, MarketSignalDetector
|
||||
)
|
||||
from services.intelligence.agents.safety_framework import (
|
||||
SafetyConstraintManager, RollbackManager, UserApprovalSystem, get_safety_framework
|
||||
)
|
||||
from services.intelligence.agents.performance_monitor import (
|
||||
PerformanceMetric, AgentStatus, AgentPerformanceMonitor, performance_service
|
||||
)
|
||||
|
||||
logger = get_service_logger(__name__)
|
||||
|
||||
@dataclass
|
||||
class AgentTeamConfiguration:
|
||||
"""Configuration for the complete agent team"""
|
||||
user_id: str
|
||||
shared_llm: str = "Qwen/Qwen3-4B-Instruct-2507"
|
||||
max_iterations: int = 15
|
||||
enable_safety: bool = True
|
||||
enable_performance_monitoring: bool = True
|
||||
enable_market_signals: bool = True
|
||||
created_at: str = None
|
||||
|
||||
def __post_init__(self):
|
||||
if self.created_at is None:
|
||||
self.created_at = datetime.utcnow().isoformat()
|
||||
|
||||
class ALwrityAgentOrchestrator:
|
||||
"""Main orchestrator for ALwrity autonomous marketing agents"""
|
||||
|
||||
def __init__(self, config: AgentTeamConfiguration):
|
||||
self.config = config
|
||||
self.user_id = config.user_id
|
||||
self.agents: Dict[str, BaseALwrityAgent] = {}
|
||||
self.orchestrator_agent: Optional[Agent] = None
|
||||
self.market_detector: Optional[MarketSignalDetector] = None
|
||||
self.performance_monitor: Optional[AgentPerformanceMonitor] = None
|
||||
self.safety_framework: Optional[Dict[str, Any]] = None
|
||||
|
||||
# Initialize components
|
||||
self._initialize_components()
|
||||
|
||||
logger.info(f"Initialized ALwrityAgentOrchestrator for user: {self.user_id}")
|
||||
|
||||
def _initialize_components(self):
|
||||
"""Initialize all agent system components"""
|
||||
try:
|
||||
# Initialize shared LLM
|
||||
if TXTAI_AVAILABLE:
|
||||
self.llm = LLM(self.config.shared_llm)
|
||||
else:
|
||||
self.llm = None
|
||||
|
||||
# Initialize market signal detector
|
||||
if self.config.enable_market_signals:
|
||||
self.market_detector = MarketSignalDetector(self.user_id)
|
||||
|
||||
# Initialize performance monitoring
|
||||
if self.config.enable_performance_monitoring:
|
||||
self.performance_monitor = AgentPerformanceMonitor(self.user_id)
|
||||
|
||||
# Initialize safety framework
|
||||
if self.config.enable_safety:
|
||||
self.safety_framework = get_safety_framework(self.user_id)
|
||||
|
||||
# Create specialized agents
|
||||
self._create_specialized_agents()
|
||||
|
||||
# Create master orchestrator agent
|
||||
self._create_orchestrator_agent()
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error initializing components for user {self.user_id}: {e}")
|
||||
raise e
|
||||
|
||||
def _create_specialized_agents(self):
|
||||
"""Create specialized marketing agents"""
|
||||
try:
|
||||
enabled_by_key = {}
|
||||
db = None
|
||||
try:
|
||||
from services.database import get_session_for_user
|
||||
from models.agent_activity_models import AgentProfile
|
||||
|
||||
db = get_session_for_user(self.user_id)
|
||||
if db:
|
||||
profiles = db.query(AgentProfile).filter(AgentProfile.user_id == self.user_id).all()
|
||||
enabled_by_key = {p.agent_key: bool(p.enabled) for p in profiles if p and p.agent_key and p.enabled is not None}
|
||||
except Exception:
|
||||
enabled_by_key = {}
|
||||
finally:
|
||||
try:
|
||||
if db:
|
||||
db.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Content Strategy Agent
|
||||
if enabled_by_key.get("content_strategist", True):
|
||||
self.content_agent = ContentStrategyAgent(self.user_id, self.config.shared_llm, llm=self.llm)
|
||||
self.agents['content'] = self.content_agent
|
||||
|
||||
# Competitor Response Agent
|
||||
if enabled_by_key.get("competitor_analyst", True):
|
||||
self.competitor_agent = CompetitorResponseAgent(self.user_id, self.config.shared_llm, llm=self.llm)
|
||||
self.agents['competitor'] = self.competitor_agent
|
||||
|
||||
# SEO Optimization Agent
|
||||
if enabled_by_key.get("seo_specialist", True):
|
||||
self.seo_agent = SEOOptimizationAgent(self.user_id, self.config.shared_llm, llm=self.llm)
|
||||
self.agents['seo'] = self.seo_agent
|
||||
|
||||
# Social Amplification Agent
|
||||
if enabled_by_key.get("social_media_manager", True):
|
||||
self.social_agent = SocialAmplificationAgent(self.user_id, self.config.shared_llm, llm=self.llm)
|
||||
self.agents['social'] = self.social_agent
|
||||
|
||||
# Trend Surfer Agent
|
||||
if enabled_by_key.get("trend_surfer", True):
|
||||
# TrendSurferAgent needs TxtaiIntelligenceService, which we might need to get from SIF or initialize
|
||||
# For now, we assume SIF integration is handled elsewhere or we pass a mock/stub if needed
|
||||
# But wait, TrendSurferAgent constructor is (intelligence_service, user_id)
|
||||
# We need to get the intelligence service here.
|
||||
# Since AgentOrchestrator doesn't hold TxtaiIntelligenceService directly (SIFIntegrationService does),
|
||||
# this is tricky.
|
||||
# However, SIFIntegrationService initializes AgentOrchestrator.
|
||||
# Let's import TxtaiIntelligenceService and initialize it here for the agent
|
||||
from services.intelligence.txtai_service import TxtaiIntelligenceService
|
||||
intel_service = TxtaiIntelligenceService(self.user_id)
|
||||
self.trend_surfer_agent = TrendSurferAgent(intel_service, self.user_id)
|
||||
self.agents['trend'] = self.trend_surfer_agent
|
||||
|
||||
logger.info(f"Created {len(self.agents)} specialized agents for user {self.user_id}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error creating specialized agents for user {self.user_id}: {e}")
|
||||
raise e
|
||||
|
||||
# Specialized agent creation methods have been moved to specialized_agents.py
|
||||
|
||||
|
||||
def _create_orchestrator_agent(self):
|
||||
"""Create master orchestrator agent using txtai native framework"""
|
||||
try:
|
||||
self.orchestrator_agent = StrategyOrchestratorAgent(
|
||||
user_id=self.user_id,
|
||||
market_detector=self.market_detector,
|
||||
performance_monitor=self.performance_monitor,
|
||||
llm=self.llm
|
||||
)
|
||||
|
||||
# Set sub-agents
|
||||
self.orchestrator_agent.set_sub_agents(self.agents)
|
||||
|
||||
logger.info(f"Created StrategyOrchestratorAgent for user {self.user_id}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error creating orchestrator agent: {e}")
|
||||
# Fallback to simple agent if class instantiation fails
|
||||
self.orchestrator_agent = Agent(llm=self.llm)
|
||||
|
||||
async def execute_marketing_strategy(self, market_context: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Execute coordinated marketing strategy using agent team"""
|
||||
try:
|
||||
logger.info(f"Executing marketing strategy for user {self.user_id}")
|
||||
|
||||
# Prepare comprehensive context
|
||||
context = await self._prepare_orchestrator_context(market_context)
|
||||
|
||||
# Execute orchestrator with full team
|
||||
# The StrategyOrchestratorAgent will autonomously delegate tasks to sub-agents
|
||||
instruction = (
|
||||
"Analyze current market conditions and coordinate our marketing team to respond effectively.\n\n"
|
||||
"Please:\n"
|
||||
"1. Analyze the market situation.\n"
|
||||
"2. DELEGATE tasks to specific agents using the 'task_delegator' tool.\n"
|
||||
"3. Synthesize their results into a unified strategy.\n"
|
||||
"4. Provide specific action recommendations.\n\n"
|
||||
"Return a comprehensive strategy with specific actions, priorities, and expected outcomes."
|
||||
)
|
||||
orchestrator_prompt = self.orchestrator_agent.build_task_prompt(instruction=instruction, task_context=context)
|
||||
result = await self.orchestrator_agent.run(orchestrator_prompt)
|
||||
|
||||
# Record performance metrics for the orchestration itself
|
||||
if self.config.enable_performance_monitoring:
|
||||
# We assume the agent's internal tracking handles per-action metrics
|
||||
pass
|
||||
|
||||
logger.info(f"Marketing strategy execution completed for user {self.user_id}")
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"strategy": result,
|
||||
"timestamp": datetime.utcnow().isoformat(),
|
||||
# In a real system, we might parse the result to extract structured data
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Agent team execution failed for user {self.user_id}: {e}")
|
||||
|
||||
return {
|
||||
"success": False,
|
||||
"error": str(e),
|
||||
"timestamp": datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
async def process_market_signals(self) -> List[MarketSignal]:
|
||||
"""Process market signals and generate agent responses"""
|
||||
try:
|
||||
if not self.market_detector:
|
||||
return []
|
||||
|
||||
# Detect market signals
|
||||
signals = await self.market_detector.detect_market_signals()
|
||||
|
||||
logger.info(f"Processed {len(signals)} market signals for user {self.user_id}")
|
||||
|
||||
return signals
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing market signals for user {self.user_id}: {e}")
|
||||
return []
|
||||
|
||||
async def get_agent_status(self) -> Dict[str, Any]:
|
||||
"""Get status of all agents"""
|
||||
try:
|
||||
agent_statuses = {}
|
||||
|
||||
for agent_type, agent in self.agents.items():
|
||||
if hasattr(agent, 'get_current_status'):
|
||||
status = await agent.get_current_status()
|
||||
agent_statuses[agent_type] = status
|
||||
|
||||
# Get performance metrics if available
|
||||
performance_summary = {}
|
||||
if self.performance_monitor:
|
||||
all_performance = self.performance_monitor.get_all_agents_performance()
|
||||
performance_summary = {perf['agent_id']: perf for perf in all_performance}
|
||||
|
||||
return {
|
||||
"user_id": self.user_id,
|
||||
"timestamp": datetime.utcnow().isoformat(),
|
||||
"agent_statuses": agent_statuses,
|
||||
"performance_summary": performance_summary,
|
||||
"market_signals_active": self.config.enable_market_signals,
|
||||
"safety_enabled": self.config.enable_safety,
|
||||
"performance_monitoring_enabled": self.config.enable_performance_monitoring
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting agent status for user {self.user_id}: {e}")
|
||||
return {
|
||||
"error": str(e),
|
||||
"timestamp": datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
# Tool implementations for txtai agents have been moved to StrategyOrchestratorAgent class
|
||||
|
||||
|
||||
# Specialized agent tools have been moved to specialized_agents.py
|
||||
|
||||
|
||||
# Helper methods
|
||||
|
||||
async def _prepare_orchestrator_context(self, market_context: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Prepare comprehensive context for orchestrator"""
|
||||
context = {
|
||||
"user_id": self.user_id,
|
||||
"market_conditions": market_context,
|
||||
"timestamp": datetime.utcnow().isoformat(),
|
||||
"available_agents": list(self.agents.keys()),
|
||||
"agent_capabilities": self._get_agent_capabilities(),
|
||||
"system_status": await self.get_agent_status()
|
||||
}
|
||||
|
||||
return context
|
||||
|
||||
def _get_agent_capabilities(self) -> Dict[str, List[str]]:
|
||||
"""Get capabilities of each agent type"""
|
||||
return {
|
||||
"content": ["Content analysis", "Gap detection", "Optimization", "Performance tracking"],
|
||||
"competitor": ["Competitor monitoring", "Threat analysis", "Response generation", "Strategy execution"],
|
||||
"seo": ["SEO auditing", "Issue prioritization", "Auto-fixing", "Strategy generation"],
|
||||
"social": ["Social monitoring", "Content adaptation", "Engagement optimization", "Distribution management"],
|
||||
"trend": ["Trend detection", "Opportunity analysis", "Content angle generation"]
|
||||
}
|
||||
|
||||
# Service class for agent orchestration
|
||||
class AgentOrchestrationService:
|
||||
"""Service class for managing agent orchestration"""
|
||||
|
||||
def __init__(self):
|
||||
self.orchestrators: Dict[str, ALwrityAgentOrchestrator] = {}
|
||||
self.execution_history: List[Dict[str, Any]] = []
|
||||
|
||||
logger.info("Initialized AgentOrchestrationService")
|
||||
|
||||
async def get_or_create_orchestrator(self, user_id: str) -> ALwrityAgentOrchestrator:
|
||||
"""Get or create an orchestrator for a user"""
|
||||
if user_id not in self.orchestrators:
|
||||
config = AgentTeamConfiguration(user_id=user_id)
|
||||
self.orchestrators[user_id] = ALwrityAgentOrchestrator(config)
|
||||
logger.info(f"Created new orchestrator for user: {user_id}")
|
||||
|
||||
return self.orchestrators[user_id]
|
||||
|
||||
async def execute_marketing_strategy(self, user_id: str, market_context: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Execute marketing strategy for a user"""
|
||||
try:
|
||||
orchestrator = await self.get_or_create_orchestrator(user_id)
|
||||
result = await orchestrator.execute_marketing_strategy(market_context)
|
||||
|
||||
# Record in history
|
||||
execution_record = {
|
||||
"user_id": user_id,
|
||||
"timestamp": datetime.utcnow().isoformat(),
|
||||
"market_context": market_context,
|
||||
"result": result,
|
||||
"success": result.get("success", False)
|
||||
}
|
||||
self.execution_history.append(execution_record)
|
||||
|
||||
# Keep only recent history (last 1000)
|
||||
if len(self.execution_history) > 1000:
|
||||
self.execution_history = self.execution_history[-1000:]
|
||||
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error executing marketing strategy for user {user_id}: {e}")
|
||||
return {
|
||||
"success": False,
|
||||
"error": str(e),
|
||||
"timestamp": datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
async def get_agent_status(self, user_id: str) -> Dict[str, Any]:
|
||||
"""Get agent status for a user"""
|
||||
try:
|
||||
orchestrator = await self.get_or_create_orchestrator(user_id)
|
||||
return await orchestrator.get_agent_status()
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting agent status for user {user_id}: {e}")
|
||||
return {
|
||||
"success": False,
|
||||
"error": str(e),
|
||||
"timestamp": datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
async def process_market_signals(self, user_id: str) -> List[MarketSignal]:
|
||||
"""Process market signals for a user"""
|
||||
try:
|
||||
orchestrator = await self.get_or_create_orchestrator(user_id)
|
||||
return await orchestrator.process_market_signals()
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing market signals for user {user_id}: {e}")
|
||||
return []
|
||||
|
||||
def get_execution_history(self, user_id: str = None, limit: int = 100) -> List[Dict[str, Any]]:
|
||||
"""Get execution history"""
|
||||
if user_id:
|
||||
return [record for record in self.execution_history if record["user_id"] == user_id][-limit:]
|
||||
else:
|
||||
return self.execution_history[-limit:]
|
||||
|
||||
def get_global_performance_stats(self) -> Dict[str, Any]:
|
||||
"""Get global performance statistics"""
|
||||
if not self.execution_history:
|
||||
return {}
|
||||
|
||||
total_executions = len(self.execution_history)
|
||||
successful_executions = len([r for r in self.execution_history if r.get("success", False)])
|
||||
|
||||
unique_users = len(set(r["user_id"] for r in self.execution_history))
|
||||
|
||||
return {
|
||||
"total_executions": total_executions,
|
||||
"successful_executions": successful_executions,
|
||||
"success_rate": successful_executions / total_executions if total_executions > 0 else 0.0,
|
||||
"unique_users": unique_users,
|
||||
"timestamp": datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
# Global service instance
|
||||
orchestration_service = AgentOrchestrationService()
|
||||
|
||||
# Convenience functions for external use
|
||||
async def execute_marketing_strategy(user_id: str, market_context: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Execute marketing strategy for a user"""
|
||||
return await orchestration_service.execute_marketing_strategy(user_id, market_context)
|
||||
|
||||
async def get_agent_system_status(user_id: str) -> Dict[str, Any]:
|
||||
"""Get agent system status for a user"""
|
||||
return await orchestration_service.get_agent_status(user_id)
|
||||
|
||||
async def process_market_signals_for_user(user_id: str) -> List[MarketSignal]:
|
||||
"""Process market signals for a user"""
|
||||
return await orchestration_service.process_market_signals(user_id)
|
||||
1004
backend/services/intelligence/agents/core_agent_framework.py
Normal file
1004
backend/services/intelligence/agents/core_agent_framework.py
Normal file
File diff suppressed because it is too large
Load Diff
250
backend/services/intelligence/agents/market_signal_detector.py
Normal file
250
backend/services/intelligence/agents/market_signal_detector.py
Normal file
@@ -0,0 +1,250 @@
|
||||
"""
|
||||
Market Signal Detection System for ALwrity Autonomous Agents
|
||||
Built on txtai's semantic intelligence and existing monitoring infrastructure
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Dict, List, Any, Optional, Set
|
||||
from dataclasses import dataclass, asdict
|
||||
from enum import Enum
|
||||
|
||||
# Integration with existing ALwrity services
|
||||
from services.intelligence.monitoring.semantic_dashboard import RealTimeSemanticMonitor
|
||||
from services.intelligence.semantic_cache import SemanticCacheManager
|
||||
from services.seo_analyzer import ComprehensiveSEOAnalyzer
|
||||
from utils.logger_utils import get_service_logger
|
||||
|
||||
logger = get_service_logger(__name__)
|
||||
|
||||
class SignalType(Enum):
|
||||
"""Types of market signals that agents can detect"""
|
||||
COMPETITOR_CHANGE = "competitor"
|
||||
SERP_FLUCTUATION = "serp"
|
||||
SOCIAL_TREND = "social"
|
||||
INDUSTRY_NEWS = "industry"
|
||||
PERFORMANCE_CHANGE = "performance"
|
||||
CONTENT_GAP = "content_gap"
|
||||
SEO_OPPORTUNITY = "seo_opportunity"
|
||||
|
||||
class UrgencyLevel(Enum):
|
||||
"""Urgency levels for market signals"""
|
||||
LOW = "low"
|
||||
MEDIUM = "medium"
|
||||
HIGH = "high"
|
||||
CRITICAL = "critical"
|
||||
|
||||
@dataclass
|
||||
class MarketSignal:
|
||||
"""Represents a detected market signal"""
|
||||
signal_id: str
|
||||
signal_type: SignalType
|
||||
source: str
|
||||
description: str
|
||||
impact_score: float # 0.0 to 1.0
|
||||
urgency_level: UrgencyLevel
|
||||
confidence_score: float # 0.0 to 1.0
|
||||
related_topics: List[str]
|
||||
suggested_actions: List[str]
|
||||
metadata: Dict[str, Any]
|
||||
detected_at: str = None
|
||||
expires_at: str = None
|
||||
|
||||
def __post_init__(self):
|
||||
if self.detected_at is None:
|
||||
self.detected_at = datetime.utcnow().isoformat()
|
||||
if self.expires_at is None:
|
||||
# Default expiration based on urgency
|
||||
if self.urgency_level == UrgencyLevel.CRITICAL:
|
||||
expires_hours = 1
|
||||
elif self.urgency_level == UrgencyLevel.HIGH:
|
||||
expires_hours = 6
|
||||
elif self.urgency_level == UrgencyLevel.MEDIUM:
|
||||
expires_hours = 24
|
||||
else:
|
||||
expires_hours = 72
|
||||
|
||||
expires = datetime.utcnow().timestamp() + (expires_hours * 60 * 60)
|
||||
self.expires_at = datetime.fromtimestamp(expires).isoformat()
|
||||
|
||||
@dataclass
|
||||
class SignalContext:
|
||||
"""Context for signal detection"""
|
||||
user_id: str
|
||||
competitor_data: Dict[str, Any]
|
||||
semantic_health: Dict[str, Any]
|
||||
seo_performance: Dict[str, Any]
|
||||
content_analysis: Dict[str, Any]
|
||||
historical_data: Dict[str, Any]
|
||||
timestamp: str = None
|
||||
|
||||
def __post_init__(self):
|
||||
if self.timestamp is None:
|
||||
self.timestamp = datetime.utcnow().isoformat()
|
||||
|
||||
class MarketSignalDetector:
|
||||
"""Main market signal detection system"""
|
||||
|
||||
def __init__(self, user_id: str):
|
||||
self.user_id = user_id
|
||||
self.semantic_monitor = RealTimeSemanticMonitor(user_id)
|
||||
self.cache_manager = SemanticCacheManager()
|
||||
self.seo_analyzer = ComprehensiveSEOAnalyzer()
|
||||
|
||||
# Signal detection thresholds
|
||||
self.thresholds = {
|
||||
"competitor_change_threshold": 0.3, # 30% change in competitor metrics
|
||||
"serp_fluctuation_threshold": 0.2, # 20% change in SERP positions
|
||||
"social_trend_threshold": 0.15, # 15% change in social metrics
|
||||
"performance_change_threshold": 0.25, # 25% change in performance metrics
|
||||
"content_gap_threshold": 0.4, # 40% semantic gap
|
||||
"seo_opportunity_threshold": 0.3 # 30% SEO improvement opportunity
|
||||
}
|
||||
|
||||
# Historical data for trend analysis
|
||||
self.signal_history: List[MarketSignal] = []
|
||||
self.baseline_metrics: Dict[str, float] = {}
|
||||
|
||||
logger.info(f"Initialized MarketSignalDetector for user: {user_id}")
|
||||
|
||||
async def detect_market_signals(self) -> List[MarketSignal]:
|
||||
"""Detect all current market signals"""
|
||||
try:
|
||||
logger.info(f"Starting market signal detection for user: {self.user_id}")
|
||||
|
||||
# Get current context
|
||||
context = await self._get_signal_context()
|
||||
|
||||
# Check cache first
|
||||
cache_key = f"market_signals_{self.user_id}"
|
||||
cached_signals = self.cache_manager.get(cache_key)
|
||||
|
||||
if cached_signals and self._is_cache_valid(cached_signals):
|
||||
logger.info(f"Using cached market signals for user: {self.user_id}")
|
||||
return cached_signals
|
||||
|
||||
# Detect signals from multiple sources
|
||||
signals = []
|
||||
|
||||
# Competitor signals
|
||||
competitor_signals = await self._detect_competitor_signals(context)
|
||||
signals.extend(competitor_signals)
|
||||
|
||||
# SERP signals
|
||||
serp_signals = await self._detect_serp_signals(context)
|
||||
signals.extend(serp_signals)
|
||||
|
||||
# Social signals
|
||||
social_signals = await self._detect_social_signals(context)
|
||||
signals.extend(social_signals)
|
||||
|
||||
# Industry signals
|
||||
industry_signals = await self._detect_industry_signals(context)
|
||||
signals.extend(industry_signals)
|
||||
|
||||
# Performance signals
|
||||
performance_signals = await self._detect_performance_signals(context)
|
||||
signals.extend(performance_signals)
|
||||
|
||||
# Content gap signals
|
||||
content_signals = await self._detect_content_gap_signals(context)
|
||||
signals.extend(content_signals)
|
||||
|
||||
# SEO opportunity signals
|
||||
seo_signals = await self._detect_seo_opportunity_signals(context)
|
||||
signals.extend(seo_signals)
|
||||
|
||||
# Filter and prioritize signals
|
||||
filtered_signals = self._filter_signals(signals)
|
||||
prioritized_signals = self._prioritize_signals(filtered_signals)
|
||||
|
||||
# Update history
|
||||
self.signal_history.extend(prioritized_signals)
|
||||
self._trim_signal_history()
|
||||
|
||||
# Cache results
|
||||
self.cache_manager.set(cache_key, prioritized_signals, ttl=300) # 5 minute cache
|
||||
|
||||
logger.info(f"Detected {len(prioritized_signals)} market signals for user: {self.user_id}")
|
||||
|
||||
return prioritized_signals
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error detecting market signals: {str(e)}")
|
||||
return []
|
||||
|
||||
async def _get_signal_context(self) -> SignalContext:
|
||||
"""Fetch current context for signal detection"""
|
||||
# Placeholder implementation
|
||||
return SignalContext(
|
||||
user_id=self.user_id,
|
||||
competitor_data={},
|
||||
semantic_health={},
|
||||
seo_performance={},
|
||||
content_analysis={},
|
||||
historical_data={}
|
||||
)
|
||||
|
||||
def _is_cache_valid(self, signals: List[MarketSignal]) -> bool:
|
||||
"""Check if cached signals are still valid"""
|
||||
if not signals:
|
||||
return False
|
||||
# Basic check for now
|
||||
return True
|
||||
|
||||
async def _detect_competitor_signals(self, context: SignalContext) -> List[MarketSignal]:
|
||||
"""Detect signals from competitor activities"""
|
||||
return []
|
||||
|
||||
async def _detect_serp_signals(self, context: SignalContext) -> List[MarketSignal]:
|
||||
"""Detect signals from SERP changes"""
|
||||
return []
|
||||
|
||||
async def _detect_social_signals(self, context: SignalContext) -> List[MarketSignal]:
|
||||
"""Detect signals from social trends"""
|
||||
return []
|
||||
|
||||
async def _detect_industry_signals(self, context: SignalContext) -> List[MarketSignal]:
|
||||
"""Detect signals from industry news"""
|
||||
return []
|
||||
|
||||
async def _detect_performance_signals(self, context: SignalContext) -> List[MarketSignal]:
|
||||
"""Detect signals from site performance"""
|
||||
return []
|
||||
|
||||
async def _detect_content_gap_signals(self, context: SignalContext) -> List[MarketSignal]:
|
||||
"""Detect signals from content gaps"""
|
||||
return []
|
||||
|
||||
async def _detect_seo_opportunity_signals(self, context: SignalContext) -> List[MarketSignal]:
|
||||
"""Detect signals from SEO opportunities"""
|
||||
return []
|
||||
|
||||
def _filter_signals(self, signals: List[MarketSignal]) -> List[MarketSignal]:
|
||||
"""Filter out low-quality or duplicate signals"""
|
||||
return signals
|
||||
|
||||
def _prioritize_signals(self, signals: List[MarketSignal]) -> List[MarketSignal]:
|
||||
"""Prioritize signals based on impact and urgency"""
|
||||
return sorted(signals, key=lambda x: (x.urgency_level.value, x.impact_score), reverse=True)
|
||||
|
||||
def _trim_signal_history(self):
|
||||
"""Keep signal history within limits"""
|
||||
if len(self.signal_history) > 1000:
|
||||
self.signal_history = self.signal_history[-1000:]
|
||||
|
||||
class MarketTrendAnalyzer:
|
||||
"""
|
||||
Analyzer for detecting market trends from aggregated signals.
|
||||
"""
|
||||
def __init__(self, user_id: str):
|
||||
self.user_id = user_id
|
||||
self.detector = MarketSignalDetector(user_id)
|
||||
|
||||
async def analyze_trends(self, context: Optional[Dict[str, Any]] = None) -> List[MarketSignal]:
|
||||
"""Analyze current market trends"""
|
||||
# Placeholder implementation
|
||||
logger.info(f"Analyzing market trends for user {self.user_id}")
|
||||
return []
|
||||
128
backend/services/intelligence/agents/performance_monitor.py
Normal file
128
backend/services/intelligence/agents/performance_monitor.py
Normal file
@@ -0,0 +1,128 @@
|
||||
"""
|
||||
Agent Performance Monitoring Framework for ALwrity Autonomous Marketing Agents
|
||||
Tracks agent performance, efficiency, and provides optimization recommendations
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Dict, List, Any, Optional, Tuple
|
||||
from dataclasses import dataclass, asdict
|
||||
from enum import Enum
|
||||
from collections import defaultdict, deque
|
||||
|
||||
from utils.logger_utils import get_service_logger
|
||||
from services.database import get_session_for_user
|
||||
|
||||
logger = get_service_logger(__name__)
|
||||
|
||||
class AgentStatus(Enum):
|
||||
IDLE = "idle"
|
||||
BUSY = "busy"
|
||||
ERROR = "error"
|
||||
OFFLINE = "offline"
|
||||
INITIALIZING = "initializing"
|
||||
|
||||
class PerformanceMetric(Enum):
|
||||
RESPONSE_TIME = "response_time"
|
||||
SUCCESS_RATE = "success_rate"
|
||||
TOKEN_USAGE = "token_usage"
|
||||
COST_PER_ACTION = "cost_per_action"
|
||||
RESOURCE_UTILIZATION = "resource_utilization"
|
||||
GOAL_COMPLETION_RATE = "goal_completion_rate"
|
||||
|
||||
@dataclass
|
||||
class AgentPerformanceMetrics:
|
||||
agent_id: str
|
||||
timestamp: datetime
|
||||
metrics: Dict[str, float]
|
||||
context: Dict[str, Any]
|
||||
|
||||
class PerformanceMonitor:
|
||||
"""
|
||||
Monitors and analyzes agent performance metrics
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.metrics_buffer = deque(maxlen=1000)
|
||||
self.performance_history = defaultdict(list)
|
||||
self.alert_thresholds = {
|
||||
PerformanceMetric.SUCCESS_RATE: 0.8, # Alert if success rate < 80%
|
||||
PerformanceMetric.RESPONSE_TIME: 30.0, # Alert if response time > 30s
|
||||
PerformanceMetric.GOAL_COMPLETION_RATE: 0.7 # Alert if completion < 70%
|
||||
}
|
||||
|
||||
async def record_metric(self,
|
||||
agent_id: str,
|
||||
metric_type: PerformanceMetric,
|
||||
value: float,
|
||||
context: Optional[Dict[str, Any]] = None):
|
||||
"""Record a performance metric for an agent"""
|
||||
metric_entry = AgentPerformanceMetrics(
|
||||
agent_id=agent_id,
|
||||
timestamp=datetime.utcnow(),
|
||||
metrics={metric_type.value: value},
|
||||
context=context or {}
|
||||
)
|
||||
|
||||
self.metrics_buffer.append(metric_entry)
|
||||
self.performance_history[agent_id].append(metric_entry)
|
||||
|
||||
# Check thresholds
|
||||
await self._check_thresholds(agent_id, metric_type, value)
|
||||
|
||||
# Persist if needed (batching implemented in production)
|
||||
# await self._persist_metric(metric_entry)
|
||||
|
||||
async def get_agent_performance(self, agent_id: str, time_window_minutes: int = 60) -> Dict[str, Any]:
|
||||
"""Get aggregated performance metrics for an agent"""
|
||||
cutoff_time = datetime.utcnow() - timedelta(minutes=time_window_minutes)
|
||||
relevant_metrics = [
|
||||
m for m in self.performance_history[agent_id]
|
||||
if m.timestamp > cutoff_time
|
||||
]
|
||||
|
||||
if not relevant_metrics:
|
||||
return {}
|
||||
|
||||
aggregated = defaultdict(list)
|
||||
for m in relevant_metrics:
|
||||
for k, v in m.metrics.items():
|
||||
aggregated[k].append(v)
|
||||
|
||||
result = {
|
||||
"agent_id": agent_id,
|
||||
"period_minutes": time_window_minutes,
|
||||
"sample_size": len(relevant_metrics),
|
||||
"metrics": {
|
||||
k: sum(v) / len(v) for k, v in aggregated.items()
|
||||
}
|
||||
}
|
||||
|
||||
return result
|
||||
|
||||
async def _check_thresholds(self, agent_id: str, metric_type: PerformanceMetric, value: float):
|
||||
"""Check if metric violates thresholds"""
|
||||
threshold = self.alert_thresholds.get(metric_type)
|
||||
if not threshold:
|
||||
return
|
||||
|
||||
is_violation = False
|
||||
if metric_type in [PerformanceMetric.SUCCESS_RATE, PerformanceMetric.GOAL_COMPLETION_RATE]:
|
||||
if value < threshold:
|
||||
is_violation = True
|
||||
elif value > threshold:
|
||||
is_violation = True
|
||||
|
||||
if is_violation:
|
||||
logger.warning(
|
||||
f"Performance alert for agent {agent_id}: "
|
||||
f"{metric_type.value} = {value} (Threshold: {threshold})"
|
||||
)
|
||||
# Trigger alert notification (impl via notification service)
|
||||
|
||||
# Singleton instance
|
||||
performance_monitor = PerformanceMonitor()
|
||||
AgentPerformanceMonitor = PerformanceMonitor
|
||||
performance_service = performance_monitor
|
||||
899
backend/services/intelligence/agents/safety_framework.py
Normal file
899
backend/services/intelligence/agents/safety_framework.py
Normal file
@@ -0,0 +1,899 @@
|
||||
"""
|
||||
Agent Safety Framework for ALwrity Autonomous Marketing Agents
|
||||
Implements safety constraints, validation, and rollback mechanisms
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Dict, List, Any, Optional, Set
|
||||
from dataclasses import dataclass, asdict
|
||||
from enum import Enum
|
||||
|
||||
from utils.logger_utils import get_service_logger
|
||||
from services.database import get_session_for_user
|
||||
|
||||
logger = get_service_logger(__name__)
|
||||
|
||||
class RiskLevel(Enum):
|
||||
"""Risk levels for agent actions"""
|
||||
LOW = "low"
|
||||
MEDIUM = "medium"
|
||||
HIGH = "high"
|
||||
CRITICAL = "critical"
|
||||
|
||||
class ActionCategory(Enum):
|
||||
"""Categories of agent actions"""
|
||||
CONTENT_MODIFICATION = "content_modification"
|
||||
SEO_OPTIMIZATION = "seo_optimization"
|
||||
COMPETITOR_RESPONSE = "competitor_response"
|
||||
SOCIAL_AMPLIFICATION = "social_amplification"
|
||||
STRATEGY_CHANGE = "strategy_change"
|
||||
SYSTEM_CONFIGURATION = "system_configuration"
|
||||
|
||||
@dataclass
|
||||
class SafetyConstraint:
|
||||
"""Represents a safety constraint for agent actions"""
|
||||
constraint_id: str
|
||||
name: str
|
||||
description: str
|
||||
action_categories: List[ActionCategory]
|
||||
risk_threshold: float # Maximum allowed risk level (0.0 to 1.0)
|
||||
approval_required: bool
|
||||
auto_approval_threshold: float # Risk level below which auto-approval is allowed
|
||||
daily_limit: Optional[int] = None # Maximum actions per day
|
||||
hourly_limit: Optional[int] = None # Maximum actions per hour
|
||||
conditions: Dict[str, Any] = None # Additional conditions for validation
|
||||
created_at: str = None
|
||||
|
||||
def __post_init__(self):
|
||||
if self.created_at is None:
|
||||
self.created_at = datetime.utcnow().isoformat()
|
||||
if self.conditions is None:
|
||||
self.conditions = {}
|
||||
|
||||
@dataclass
|
||||
class ActionCheckpoint:
|
||||
"""Represents a checkpoint for rollback purposes"""
|
||||
checkpoint_id: str
|
||||
action_id: str
|
||||
agent_id: str
|
||||
user_id: str
|
||||
action_type: str
|
||||
action_data: Dict[str, Any]
|
||||
system_state: Dict[str, Any]
|
||||
created_at: str = None
|
||||
|
||||
def __post_init__(self):
|
||||
if self.created_at is None:
|
||||
self.created_at = datetime.utcnow().isoformat()
|
||||
|
||||
@dataclass
|
||||
class SafetyValidation:
|
||||
"""Result of safety validation"""
|
||||
is_valid: bool
|
||||
risk_level: RiskLevel
|
||||
violations: List[str]
|
||||
recommendations: List[str]
|
||||
requires_approval: bool
|
||||
confidence_score: float # 0.0 to 1.0
|
||||
validation_timestamp: str = None
|
||||
|
||||
def __post_init__(self):
|
||||
if self.validation_timestamp is None:
|
||||
self.validation_timestamp = datetime.utcnow().isoformat()
|
||||
|
||||
class SafetyConstraintManager:
|
||||
"""Manages safety constraints for agent actions"""
|
||||
|
||||
def __init__(self, user_id: str):
|
||||
self.user_id = user_id
|
||||
self.constraints: Dict[str, SafetyConstraint] = {}
|
||||
self.action_history: List[Dict[str, Any]] = []
|
||||
self.violation_history: List[Dict[str, Any]] = []
|
||||
|
||||
# Initialize default constraints
|
||||
self._initialize_default_constraints()
|
||||
|
||||
logger.info(f"Initialized SafetyConstraintManager for user: {user_id}")
|
||||
|
||||
def _initialize_default_constraints(self):
|
||||
"""Initialize default safety constraints"""
|
||||
default_constraints = [
|
||||
SafetyConstraint(
|
||||
constraint_id="content_modification_limit",
|
||||
name="Content Modification Daily Limit",
|
||||
description="Limit the number of content modifications per day",
|
||||
action_categories=[ActionCategory.CONTENT_MODIFICATION],
|
||||
risk_threshold=0.7,
|
||||
approval_required=False,
|
||||
auto_approval_threshold=0.3,
|
||||
daily_limit=50,
|
||||
hourly_limit=10
|
||||
),
|
||||
SafetyConstraint(
|
||||
constraint_id="high_risk_approval_required",
|
||||
name="High Risk Action Approval",
|
||||
description="Require approval for high-risk actions",
|
||||
action_categories=[ActionCategory.STRATEGY_CHANGE, ActionCategory.SYSTEM_CONFIGURATION],
|
||||
risk_threshold=0.8,
|
||||
approval_required=True,
|
||||
auto_approval_threshold=0.2
|
||||
),
|
||||
SafetyConstraint(
|
||||
constraint_id="competitor_response_cooldown",
|
||||
name="Competitor Response Cooldown",
|
||||
description="Prevent excessive competitor responses",
|
||||
action_categories=[ActionCategory.COMPETITOR_RESPONSE],
|
||||
risk_threshold=0.6,
|
||||
approval_required=False,
|
||||
auto_approval_threshold=0.4,
|
||||
daily_limit=20,
|
||||
hourly_limit=5
|
||||
),
|
||||
SafetyConstraint(
|
||||
constraint_id="seo_optimization_safety",
|
||||
name="SEO Optimization Safety",
|
||||
description="Ensure SEO optimizations don't harm rankings",
|
||||
action_categories=[ActionCategory.SEO_OPTIMIZATION],
|
||||
risk_threshold=0.5,
|
||||
approval_required=False,
|
||||
auto_approval_threshold=0.3,
|
||||
daily_limit=30,
|
||||
hourly_limit=8
|
||||
),
|
||||
SafetyConstraint(
|
||||
constraint_id="social_amplification_limits",
|
||||
name="Social Amplification Limits",
|
||||
description="Limit social media amplification to prevent spam",
|
||||
action_categories=[ActionCategory.SOCIAL_AMPLIFICATION],
|
||||
risk_threshold=0.6,
|
||||
approval_required=False,
|
||||
auto_approval_threshold=0.4,
|
||||
daily_limit=25,
|
||||
hourly_limit=6
|
||||
)
|
||||
]
|
||||
|
||||
for constraint in default_constraints:
|
||||
self.constraints[constraint.constraint_id] = constraint
|
||||
|
||||
async def validate_action(self, action_data: Dict[str, Any]) -> SafetyValidation:
|
||||
"""Validate an action against safety constraints"""
|
||||
try:
|
||||
logger.info(f"Validating action for user {self.user_id}: {action_data.get('action_type', 'unknown')}")
|
||||
|
||||
violations = []
|
||||
recommendations = []
|
||||
requires_approval = False
|
||||
confidence_score = 1.0
|
||||
|
||||
# Extract action details
|
||||
action_type = action_data.get('action_type', 'unknown')
|
||||
action_category = self._determine_action_category(action_type)
|
||||
risk_score = action_data.get('risk_score', 0.5)
|
||||
impact_score = action_data.get('impact_score', 0.5)
|
||||
|
||||
# Determine risk level
|
||||
risk_level = self._calculate_risk_level(risk_score, impact_score)
|
||||
|
||||
# Check against all relevant constraints
|
||||
for constraint in self.constraints.values():
|
||||
if action_category in constraint.action_categories:
|
||||
constraint_result = await self._check_constraint(constraint, action_data, risk_level)
|
||||
|
||||
if not constraint_result['is_valid']:
|
||||
violations.extend(constraint_result['violations'])
|
||||
confidence_score *= 0.9 # Reduce confidence for violations
|
||||
|
||||
if constraint_result['requires_approval']:
|
||||
requires_approval = True
|
||||
|
||||
recommendations.extend(constraint_result['recommendations'])
|
||||
|
||||
# Check rate limits
|
||||
rate_limit_result = await self._check_rate_limits(action_category, action_data)
|
||||
if not rate_limit_result['is_valid']:
|
||||
violations.extend(rate_limit_result['violations'])
|
||||
confidence_score *= 0.8
|
||||
|
||||
# Check for suspicious patterns
|
||||
pattern_result = await self._check_suspicious_patterns(action_data)
|
||||
if not pattern_result['is_valid']:
|
||||
violations.extend(pattern_result['violations'])
|
||||
confidence_score *= 0.7
|
||||
requires_approval = True # Suspicious patterns always require approval
|
||||
|
||||
# Final validation
|
||||
is_valid = len(violations) == 0 and not requires_approval
|
||||
|
||||
logger.info(f"Action validation completed for user {self.user_id}. Valid: {is_valid}, Risk: {risk_level.value}, Violations: {len(violations)}")
|
||||
|
||||
# Record in history
|
||||
await self._record_validation_history(action_data, is_valid, violations)
|
||||
|
||||
return SafetyValidation(
|
||||
is_valid=is_valid,
|
||||
risk_level=risk_level,
|
||||
violations=violations,
|
||||
recommendations=recommendations,
|
||||
requires_approval=requires_approval,
|
||||
confidence_score=max(0.0, min(1.0, confidence_score))
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error validating action for user {self.user_id}: {e}")
|
||||
|
||||
# Return safe default on error
|
||||
return SafetyValidation(
|
||||
is_valid=False,
|
||||
risk_level=RiskLevel.CRITICAL,
|
||||
violations=["Validation system error"],
|
||||
recommendations=["Manual review required"],
|
||||
requires_approval=True,
|
||||
confidence_score=0.0
|
||||
)
|
||||
|
||||
def _determine_action_category(self, action_type: str) -> ActionCategory:
|
||||
"""Determine the category of an action"""
|
||||
action_type_lower = action_type.lower()
|
||||
|
||||
if any(keyword in action_type_lower for keyword in ['content', 'blog', 'article', 'post']):
|
||||
return ActionCategory.CONTENT_MODIFICATION
|
||||
elif any(keyword in action_type_lower for keyword in ['seo', 'meta', 'keyword', 'optimization']):
|
||||
return ActionCategory.SEO_OPTIMIZATION
|
||||
elif any(keyword in action_type_lower for keyword in ['competitor', 'competitive', 'response']):
|
||||
return ActionCategory.COMPETITOR_RESPONSE
|
||||
elif any(keyword in action_type_lower for keyword in ['social', 'share', 'amplify', 'distribute']):
|
||||
return ActionCategory.SOCIAL_AMPLIFICATION
|
||||
elif any(keyword in action_type_lower for keyword in ['strategy', 'plan', 'approach']):
|
||||
return ActionCategory.STRATEGY_CHANGE
|
||||
elif any(keyword in action_type_lower for keyword in ['config', 'setting', 'system']):
|
||||
return ActionCategory.SYSTEM_CONFIGURATION
|
||||
else:
|
||||
return ActionCategory.CONTENT_MODIFICATION # Default category
|
||||
|
||||
def _calculate_risk_level(self, risk_score: float, impact_score: float) -> RiskLevel:
|
||||
"""Calculate overall risk level"""
|
||||
# Weighted combination of risk and impact
|
||||
combined_score = (risk_score * 0.6) + (impact_score * 0.4)
|
||||
|
||||
if combined_score >= 0.8:
|
||||
return RiskLevel.CRITICAL
|
||||
elif combined_score >= 0.6:
|
||||
return RiskLevel.HIGH
|
||||
elif combined_score >= 0.3:
|
||||
return RiskLevel.MEDIUM
|
||||
else:
|
||||
return RiskLevel.LOW
|
||||
|
||||
async def _check_constraint(self, constraint: SafetyConstraint, action_data: Dict[str, Any], risk_level: RiskLevel) -> Dict[str, Any]:
|
||||
"""Check an action against a specific constraint"""
|
||||
violations = []
|
||||
recommendations = []
|
||||
requires_approval = False
|
||||
|
||||
# Check risk threshold
|
||||
if risk_level.value in ['high', 'critical'] and constraint.risk_threshold < 0.8:
|
||||
violations.append(f"Risk level {risk_level.value} exceeds constraint threshold")
|
||||
requires_approval = True
|
||||
|
||||
# Check rate limits
|
||||
if constraint.daily_limit:
|
||||
daily_count = await self._get_daily_action_count(constraint.constraint_id)
|
||||
if daily_count >= constraint.daily_limit:
|
||||
violations.append(f"Daily limit exceeded: {daily_count}/{constraint.daily_limit}")
|
||||
|
||||
if constraint.hourly_limit:
|
||||
hourly_count = await self._get_hourly_action_count(constraint.constraint_id)
|
||||
if hourly_count >= constraint.hourly_limit:
|
||||
violations.append(f"Hourly limit exceeded: {hourly_count}/{constraint.hourly_limit}")
|
||||
|
||||
# Check approval requirement
|
||||
if constraint.approval_required:
|
||||
requires_approval = True
|
||||
recommendations.append("Action requires manual approval due to safety constraints")
|
||||
|
||||
# Check auto-approval threshold
|
||||
risk_score = action_data.get('risk_score', 0.5)
|
||||
if risk_score > constraint.auto_approval_threshold:
|
||||
requires_approval = True
|
||||
|
||||
# Custom condition checks
|
||||
if constraint.conditions:
|
||||
condition_result = await self._check_custom_conditions(constraint.conditions, action_data)
|
||||
if not condition_result['is_valid']:
|
||||
violations.extend(condition_result['violations'])
|
||||
|
||||
is_valid = len(violations) == 0 and not requires_approval
|
||||
|
||||
return {
|
||||
"is_valid": is_valid,
|
||||
"violations": violations,
|
||||
"recommendations": recommendations,
|
||||
"requires_approval": requires_approval
|
||||
}
|
||||
|
||||
async def _check_rate_limits(self, action_category: ActionCategory, action_data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Check rate limits for actions"""
|
||||
violations = []
|
||||
|
||||
# Get current time window counts
|
||||
recent_actions = await self._get_recent_actions(hours=1)
|
||||
category_actions = [action for action in recent_actions if self._determine_action_category(action.get('action_type', '')) == action_category]
|
||||
|
||||
# Check hourly limits
|
||||
if len(category_actions) > 50: # Default hourly limit
|
||||
violations.append(f"Hourly action limit exceeded for {action_category.value}")
|
||||
|
||||
# Check daily limits
|
||||
daily_actions = await self._get_recent_actions(hours=24)
|
||||
daily_category_actions = [action for action in daily_actions if self._determine_action_category(action.get('action_type', '')) == action_category]
|
||||
|
||||
if len(daily_category_actions) > 200: # Default daily limit
|
||||
violations.append(f"Daily action limit exceeded for {action_category.value}")
|
||||
|
||||
return {
|
||||
"is_valid": len(violations) == 0,
|
||||
"violations": violations
|
||||
}
|
||||
|
||||
async def _check_suspicious_patterns(self, action_data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Check for suspicious patterns in actions"""
|
||||
violations = []
|
||||
|
||||
# Get recent action patterns
|
||||
recent_actions = await self._get_recent_actions(hours=24)
|
||||
|
||||
# Check for rapid repetitive actions
|
||||
action_type = action_data.get('action_type', '')
|
||||
similar_actions = [action for action in recent_actions if action.get('action_type') == action_type]
|
||||
|
||||
if len(similar_actions) > 10: # More than 10 similar actions in 24 hours
|
||||
violations.append(f"Suspicious pattern: {len(similar_actions)} similar actions in 24 hours")
|
||||
|
||||
# Check for unusual timing patterns
|
||||
if len(recent_actions) > 100: # More than 100 actions in 1 hour
|
||||
violations.append("Suspicious pattern: Unusually high action frequency")
|
||||
|
||||
# Check for conflicting actions
|
||||
conflicting_actions = await self._detect_conflicting_actions(action_data, recent_actions)
|
||||
if conflicting_actions:
|
||||
violations.append(f"Conflicting actions detected: {len(conflicting_actions)}")
|
||||
|
||||
return {
|
||||
"is_valid": len(violations) == 0,
|
||||
"violations": violations
|
||||
}
|
||||
|
||||
async def _detect_conflicting_actions(self, current_action: Dict[str, Any], recent_actions: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
||||
"""Detect actions that conflict with recent actions"""
|
||||
conflicts = []
|
||||
|
||||
# Simple conflict detection based on action types
|
||||
conflicting_pairs = [
|
||||
("optimize_content", "delete_content"),
|
||||
("increase_keywords", "decrease_keywords"),
|
||||
("enable_feature", "disable_feature")
|
||||
]
|
||||
|
||||
current_action_type = current_action.get('action_type', '')
|
||||
|
||||
for pair in conflicting_pairs:
|
||||
if current_action_type == pair[0]:
|
||||
# Check for recent opposite action
|
||||
for action in recent_actions:
|
||||
if action.get('action_type') == pair[1]:
|
||||
conflicts.append(action)
|
||||
break
|
||||
elif current_action_type == pair[1]:
|
||||
# Check for recent opposite action
|
||||
for action in recent_actions:
|
||||
if action.get('action_type') == pair[0]:
|
||||
conflicts.append(action)
|
||||
break
|
||||
|
||||
return conflicts
|
||||
|
||||
async def _check_custom_conditions(self, conditions: Dict[str, Any], action_data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Check custom conditions for constraints"""
|
||||
violations = []
|
||||
|
||||
# Example custom conditions (can be extended)
|
||||
if conditions.get('max_content_length'):
|
||||
content_length = len(action_data.get('content', ''))
|
||||
if content_length > conditions['max_content_length']:
|
||||
violations.append(f"Content length {content_length} exceeds maximum {conditions['max_content_length']}")
|
||||
|
||||
if conditions.get('allowed_keywords'):
|
||||
content = action_data.get('content', '').lower()
|
||||
allowed_keywords = [kw.lower() for kw in conditions['allowed_keywords']]
|
||||
if not any(keyword in content for keyword in allowed_keywords):
|
||||
violations.append("Content does not contain required keywords")
|
||||
|
||||
return {
|
||||
"is_valid": len(violations) == 0,
|
||||
"violations": violations
|
||||
}
|
||||
|
||||
async def _get_recent_actions(self, hours: int = 24) -> List[Dict[str, Any]]:
|
||||
"""Get recent actions from history"""
|
||||
cutoff_time = datetime.utcnow() - timedelta(hours=hours)
|
||||
|
||||
return [
|
||||
action for action in self.action_history
|
||||
if datetime.fromisoformat(action.get('timestamp', datetime.utcnow().isoformat())) > cutoff_time
|
||||
]
|
||||
|
||||
async def _get_daily_action_count(self, constraint_id: str) -> int:
|
||||
"""Get daily action count for a specific constraint"""
|
||||
daily_actions = await self._get_recent_actions(hours=24)
|
||||
return len(daily_actions)
|
||||
|
||||
async def _get_hourly_action_count(self, constraint_id: str) -> int:
|
||||
"""Get hourly action count for a specific constraint"""
|
||||
hourly_actions = await self._get_recent_actions(hours=1)
|
||||
return len(hourly_actions)
|
||||
|
||||
async def _record_validation_history(self, action_data: Dict[str, Any], is_valid: bool, violations: List[str]):
|
||||
"""Record validation in history"""
|
||||
validation_record = {
|
||||
"timestamp": datetime.utcnow().isoformat(),
|
||||
"action_type": action_data.get('action_type', 'unknown'),
|
||||
"is_valid": is_valid,
|
||||
"violations": violations,
|
||||
"action_data": action_data
|
||||
}
|
||||
|
||||
self.action_history.append(validation_record)
|
||||
|
||||
# Keep only recent history (last 1000 records)
|
||||
if len(self.action_history) > 1000:
|
||||
self.action_history = self.action_history[-1000:]
|
||||
|
||||
# Record violations separately
|
||||
if violations:
|
||||
violation_record = {
|
||||
"timestamp": datetime.utcnow().isoformat(),
|
||||
"action_type": action_data.get('action_type', 'unknown'),
|
||||
"violations": violations,
|
||||
"severity": "high" if len(violations) > 2 else "medium"
|
||||
}
|
||||
self.violation_history.append(violation_record)
|
||||
|
||||
# Keep only recent violations (last 500 records)
|
||||
if len(self.violation_history) > 500:
|
||||
self.violation_history = self.violation_history[-500:]
|
||||
|
||||
def add_custom_constraint(self, constraint: SafetyConstraint):
|
||||
"""Add a custom safety constraint"""
|
||||
self.constraints[constraint.constraint_id] = constraint
|
||||
logger.info(f"Added custom constraint for user {self.user_id}: {constraint.constraint_id}")
|
||||
|
||||
def remove_constraint(self, constraint_id: str):
|
||||
"""Remove a safety constraint"""
|
||||
if constraint_id in self.constraints:
|
||||
del self.constraints[constraint_id]
|
||||
logger.info(f"Removed constraint for user {self.user_id}: {constraint_id}")
|
||||
|
||||
def get_constraints(self) -> Dict[str, SafetyConstraint]:
|
||||
"""Get all safety constraints"""
|
||||
return self.constraints.copy()
|
||||
|
||||
def get_validation_history(self, limit: int = 100) -> List[Dict[str, Any]]:
|
||||
"""Get recent validation history"""
|
||||
return self.action_history[-limit:] if self.action_history else []
|
||||
|
||||
def get_violation_history(self, limit: int = 50) -> List[Dict[str, Any]]:
|
||||
"""Get recent violation history"""
|
||||
return self.violation_history[-limit:] if self.violation_history else []
|
||||
|
||||
class RollbackManager:
|
||||
"""Manages rollback operations for agent actions"""
|
||||
|
||||
def __init__(self, user_id: str):
|
||||
self.user_id = user_id
|
||||
self.checkpoints: List[ActionCheckpoint] = []
|
||||
self.rollback_history: List[Dict[str, Any]] = []
|
||||
|
||||
logger.info(f"Initialized RollbackManager for user: {user_id}")
|
||||
|
||||
async def create_checkpoint(self, action_data: Dict[str, Any], system_state: Dict[str, Any]) -> str:
|
||||
"""Create a checkpoint before executing an action"""
|
||||
try:
|
||||
checkpoint_id = f"checkpoint_{self.user_id}_{datetime.utcnow().strftime('%Y%m%d%H%M%S')}"
|
||||
|
||||
checkpoint = ActionCheckpoint(
|
||||
checkpoint_id=checkpoint_id,
|
||||
action_id=action_data.get('action_id', 'unknown'),
|
||||
agent_id=action_data.get('agent_id', 'unknown'),
|
||||
user_id=self.user_id,
|
||||
action_type=action_data.get('action_type', 'unknown'),
|
||||
action_data=action_data,
|
||||
system_state=system_state
|
||||
)
|
||||
|
||||
self.checkpoints.append(checkpoint)
|
||||
|
||||
# Keep only recent checkpoints (last 100)
|
||||
if len(self.checkpoints) > 100:
|
||||
self.checkpoints = self.checkpoints[-100:]
|
||||
|
||||
logger.info(f"Created checkpoint for user {self.user_id}: {checkpoint_id}")
|
||||
return checkpoint_id
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error creating checkpoint for user {self.user_id}: {e}")
|
||||
raise e
|
||||
|
||||
async def rollback_to_checkpoint(self, checkpoint_id: str) -> Dict[str, Any]:
|
||||
"""Rollback to a specific checkpoint"""
|
||||
try:
|
||||
# Find checkpoint
|
||||
checkpoint = next((cp for cp in self.checkpoints if cp.checkpoint_id == checkpoint_id), None)
|
||||
|
||||
if not checkpoint:
|
||||
return {
|
||||
"success": False,
|
||||
"error": f"Checkpoint not found: {checkpoint_id}"
|
||||
}
|
||||
|
||||
logger.info(f"Rolling back to checkpoint for user {self.user_id}: {checkpoint_id}")
|
||||
|
||||
# Execute rollback (implementation depends on action type)
|
||||
rollback_result = await self._execute_rollback(checkpoint)
|
||||
|
||||
# Record in history
|
||||
rollback_record = {
|
||||
"timestamp": datetime.utcnow().isoformat(),
|
||||
"checkpoint_id": checkpoint_id,
|
||||
"action_type": checkpoint.action_type,
|
||||
"success": rollback_result["success"],
|
||||
"details": rollback_result
|
||||
}
|
||||
self.rollback_history.append(rollback_record)
|
||||
|
||||
# Keep only recent rollback history (last 50)
|
||||
if len(self.rollback_history) > 50:
|
||||
self.rollback_history = self.rollback_history[-50:]
|
||||
|
||||
return rollback_result
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error rolling back to checkpoint {checkpoint_id} for user {self.user_id}: {e}")
|
||||
return {
|
||||
"success": False,
|
||||
"error": str(e)
|
||||
}
|
||||
|
||||
async def _execute_rollback(self, checkpoint: ActionCheckpoint) -> Dict[str, Any]:
|
||||
"""Execute the rollback operation based on action type"""
|
||||
try:
|
||||
action_type = checkpoint.action_type
|
||||
action_data = checkpoint.action_data
|
||||
system_state = checkpoint.system_state
|
||||
|
||||
# Implement rollback logic for different action types
|
||||
if action_type == "content_modification":
|
||||
return await self._rollback_content_modification(action_data, system_state)
|
||||
elif action_type == "seo_optimization":
|
||||
return await self._rollback_seo_optimization(action_data, system_state)
|
||||
elif action_type == "competitor_response":
|
||||
return await self._rollback_competitor_response(action_data, system_state)
|
||||
elif action_type == "social_amplification":
|
||||
return await self._rollback_social_amplification(action_data, system_state)
|
||||
else:
|
||||
# Generic rollback
|
||||
return await self._rollback_generic(action_data, system_state)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error executing rollback for action {action_type}: {e}")
|
||||
return {
|
||||
"success": False,
|
||||
"error": str(e)
|
||||
}
|
||||
|
||||
async def _rollback_content_modification(self, action_data: Dict[str, Any], system_state: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Rollback content modification"""
|
||||
try:
|
||||
# Implementation would depend on how content is stored and managed
|
||||
# For now, return a placeholder implementation
|
||||
|
||||
original_content = system_state.get('original_content', {})
|
||||
modified_content = action_data.get('content', {})
|
||||
|
||||
logger.info(f"Rolling back content modification: {action_data.get('content_id', 'unknown')}")
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"message": "Content modification rolled back successfully",
|
||||
"details": {
|
||||
"content_id": action_data.get('content_id'),
|
||||
"rollback_type": "content_modification",
|
||||
"original_state_restored": bool(original_content)
|
||||
}
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
return {
|
||||
"success": False,
|
||||
"error": f"Failed to rollback content modification: {str(e)}"
|
||||
}
|
||||
|
||||
async def _rollback_seo_optimization(self, action_data: Dict[str, Any], system_state: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Rollback SEO optimization"""
|
||||
try:
|
||||
original_seo_state = system_state.get('seo_state', {})
|
||||
|
||||
logger.info(f"Rolling back SEO optimization: {action_data.get('optimization_type', 'unknown')}")
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"message": "SEO optimization rolled back successfully",
|
||||
"details": {
|
||||
"optimization_type": action_data.get('optimization_type'),
|
||||
"rollback_type": "seo_optimization",
|
||||
"original_state_restored": bool(original_seo_state)
|
||||
}
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
return {
|
||||
"success": False,
|
||||
"error": f"Failed to rollback SEO optimization: {str(e)}"
|
||||
}
|
||||
|
||||
async def _rollback_competitor_response(self, action_data: Dict[str, Any], system_state: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Rollback competitor response"""
|
||||
try:
|
||||
logger.info(f"Rolling back competitor response: {action_data.get('response_type', 'unknown')}")
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"message": "Competitor response rolled back successfully",
|
||||
"details": {
|
||||
"response_type": action_data.get('response_type'),
|
||||
"rollback_type": "competitor_response",
|
||||
"original_state_restored": True
|
||||
}
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
return {
|
||||
"success": False,
|
||||
"error": f"Failed to rollback competitor response: {str(e)}"
|
||||
}
|
||||
|
||||
async def _rollback_social_amplification(self, action_data: Dict[str, Any], system_state: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Rollback social amplification"""
|
||||
try:
|
||||
logger.info(f"Rolling back social amplification: {action_data.get('platform', 'unknown')}")
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"message": "Social amplification rolled back successfully",
|
||||
"details": {
|
||||
"platform": action_data.get('platform'),
|
||||
"rollback_type": "social_amplification",
|
||||
"original_state_restored": True
|
||||
}
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
return {
|
||||
"success": False,
|
||||
"error": f"Failed to rollback social amplification: {str(e)}"
|
||||
}
|
||||
|
||||
async def _rollback_generic(self, action_data: Dict[str, Any], system_state: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Generic rollback for unknown action types"""
|
||||
try:
|
||||
logger.info(f"Performing generic rollback for action: {action_data.get('action_type', 'unknown')}")
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"message": "Generic rollback completed",
|
||||
"details": {
|
||||
"action_type": action_data.get('action_type'),
|
||||
"rollback_type": "generic",
|
||||
"system_state_available": bool(system_state)
|
||||
}
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
return {
|
||||
"success": False,
|
||||
"error": f"Failed to perform generic rollback: {str(e)}"
|
||||
}
|
||||
|
||||
async def rollback_latest_actions(self, count: int = 1) -> List[Dict[str, Any]]:
|
||||
"""Rollback the latest N actions"""
|
||||
results = []
|
||||
|
||||
# Get latest checkpoints
|
||||
latest_checkpoints = self.checkpoints[-count:] if self.checkpoints else []
|
||||
|
||||
for checkpoint in reversed(latest_checkpoints):
|
||||
result = await self.rollback_to_checkpoint(checkpoint.checkpoint_id)
|
||||
results.append(result)
|
||||
|
||||
return results
|
||||
|
||||
def get_checkpoints(self, limit: int = 50) -> List[Dict[str, Any]]:
|
||||
"""Get recent checkpoints"""
|
||||
checkpoints_data = []
|
||||
|
||||
for checkpoint in self.checkpoints[-limit:]:
|
||||
checkpoints_data.append({
|
||||
"checkpoint_id": checkpoint.checkpoint_id,
|
||||
"action_id": checkpoint.action_id,
|
||||
"action_type": checkpoint.action_type,
|
||||
"agent_id": checkpoint.agent_id,
|
||||
"created_at": checkpoint.created_at,
|
||||
"system_state_keys": list(checkpoint.system_state.keys())
|
||||
})
|
||||
|
||||
return checkpoints_data
|
||||
|
||||
def get_rollback_history(self, limit: int = 50) -> List[Dict[str, Any]]:
|
||||
"""Get rollback history"""
|
||||
return self.rollback_history[-limit:] if self.rollback_history else []
|
||||
|
||||
class UserApprovalSystem:
|
||||
"""Manages user approval for high-risk actions"""
|
||||
|
||||
def __init__(self, user_id: str):
|
||||
self.user_id = user_id
|
||||
self.pending_approvals: Dict[str, Dict[str, Any]] = {}
|
||||
self.approval_history: List[Dict[str, Any]] = []
|
||||
|
||||
logger.info(f"Initialized UserApprovalSystem for user: {user_id}")
|
||||
|
||||
async def request_approval(self, action_data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Request user approval for an action"""
|
||||
try:
|
||||
approval_id = f"approval_{self.user_id}_{datetime.utcnow().strftime('%Y%m%d%H%M%S')}"
|
||||
|
||||
approval_request = {
|
||||
"approval_id": approval_id,
|
||||
"action_data": action_data,
|
||||
"requested_at": datetime.utcnow().isoformat(),
|
||||
"status": "pending",
|
||||
"expires_at": (datetime.utcnow() + timedelta(hours=24)).isoformat()
|
||||
}
|
||||
|
||||
self.pending_approvals[approval_id] = approval_request
|
||||
|
||||
logger.info(f"Created approval request for user {self.user_id}: {approval_id}")
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"approval_id": approval_id,
|
||||
"status": "pending",
|
||||
"message": "Approval request created successfully"
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error creating approval request for user {self.user_id}: {e}")
|
||||
return {
|
||||
"success": False,
|
||||
"error": str(e)
|
||||
}
|
||||
|
||||
async def approve_action(self, approval_id: str, user_decision: str, user_comments: str = "") -> Dict[str, Any]:
|
||||
"""Process user approval decision"""
|
||||
try:
|
||||
if approval_id not in self.pending_approvals:
|
||||
return {
|
||||
"success": False,
|
||||
"error": "Approval request not found"
|
||||
}
|
||||
|
||||
approval_request = self.pending_approvals[approval_id]
|
||||
|
||||
# Check if approval has expired
|
||||
expires_at = datetime.fromisoformat(approval_request["expires_at"])
|
||||
if datetime.utcnow() > expires_at:
|
||||
del self.pending_approvals[approval_id]
|
||||
return {
|
||||
"success": False,
|
||||
"error": "Approval request has expired"
|
||||
}
|
||||
|
||||
# Process decision
|
||||
approval_request["status"] = user_decision
|
||||
approval_request["decision_at"] = datetime.utcnow().isoformat()
|
||||
approval_request["user_comments"] = user_comments
|
||||
|
||||
# Record in history
|
||||
self.approval_history.append(approval_request)
|
||||
|
||||
# Remove from pending
|
||||
del self.pending_approvals[approval_id]
|
||||
|
||||
# Keep only recent history (last 100)
|
||||
if len(self.approval_history) > 100:
|
||||
self.approval_history = self.approval_history[-100:]
|
||||
|
||||
logger.info(f"Processed approval decision for user {self.user_id}: {approval_id} - {user_decision}")
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"approval_id": approval_id,
|
||||
"status": user_decision,
|
||||
"message": f"Action {user_decision} successfully"
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing approval decision for user {self.user_id}: {e}")
|
||||
return {
|
||||
"success": False,
|
||||
"error": str(e)
|
||||
}
|
||||
|
||||
def get_pending_approvals(self) -> List[Dict[str, Any]]:
|
||||
"""Get all pending approval requests"""
|
||||
return list(self.pending_approvals.values())
|
||||
|
||||
def get_approval_history(self, limit: int = 50) -> List[Dict[str, Any]]:
|
||||
"""Get recent approval history"""
|
||||
return self.approval_history[-limit:] if self.approval_history else []
|
||||
|
||||
def get_approval_statistics(self) -> Dict[str, Any]:
|
||||
"""Get approval statistics"""
|
||||
if not self.approval_history:
|
||||
return {
|
||||
"total_approvals": 0,
|
||||
"approved_count": 0,
|
||||
"rejected_count": 0,
|
||||
"approval_rate": 0.0,
|
||||
"pending_count": len(self.pending_approvals)
|
||||
}
|
||||
|
||||
total = len(self.approval_history)
|
||||
approved = len([a for a in self.approval_history if a["status"] == "approved"])
|
||||
rejected = len([a for a in self.approval_history if a["status"] == "rejected"])
|
||||
|
||||
return {
|
||||
"total_approvals": total,
|
||||
"approved_count": approved,
|
||||
"rejected_count": rejected,
|
||||
"approval_rate": approved / total if total > 0 else 0.0,
|
||||
"pending_count": len(self.pending_approvals)
|
||||
}
|
||||
|
||||
# Global safety framework instance
|
||||
safety_framework_instances: Dict[str, Dict[str, Any]] = {}
|
||||
|
||||
def get_safety_framework(user_id: str) -> Dict[str, Any]:
|
||||
"""Get or create safety framework components for a user"""
|
||||
if user_id not in safety_framework_instances:
|
||||
safety_framework_instances[user_id] = {
|
||||
"constraint_manager": SafetyConstraintManager(user_id),
|
||||
"rollback_manager": RollbackManager(user_id),
|
||||
"approval_system": UserApprovalSystem(user_id)
|
||||
}
|
||||
|
||||
return safety_framework_instances[user_id]
|
||||
|
||||
# Convenience functions
|
||||
async def validate_agent_action(user_id: str, action_data: Dict[str, Any]) -> SafetyValidation:
|
||||
"""Validate an agent action for a user"""
|
||||
framework = get_safety_framework(user_id)
|
||||
return await framework["constraint_manager"].validate_action(action_data)
|
||||
|
||||
async def create_action_checkpoint(user_id: str, action_data: Dict[str, Any], system_state: Dict[str, Any]) -> str:
|
||||
"""Create a checkpoint for an action"""
|
||||
framework = get_safety_framework(user_id)
|
||||
return await framework["rollback_manager"].create_checkpoint(action_data, system_state)
|
||||
|
||||
async def rollback_to_checkpoint(user_id: str, checkpoint_id: str) -> Dict[str, Any]:
|
||||
"""Rollback to a specific checkpoint"""
|
||||
framework = get_safety_framework(user_id)
|
||||
return await framework["rollback_manager"].rollback_to_checkpoint(checkpoint_id)
|
||||
|
||||
async def request_user_approval(user_id: str, action_data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Request user approval for an action"""
|
||||
framework = get_safety_framework(user_id)
|
||||
return await framework["approval_system"].request_approval(action_data)
|
||||
1689
backend/services/intelligence/agents/specialized_agents.py
Normal file
1689
backend/services/intelligence/agents/specialized_agents.py
Normal file
File diff suppressed because it is too large
Load Diff
223
backend/services/intelligence/agents/team_catalog.py
Normal file
223
backend/services/intelligence/agents/team_catalog.py
Normal file
@@ -0,0 +1,223 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
|
||||
AgentCatalogEntry = Dict[str, Any]
|
||||
|
||||
|
||||
AGENT_TEAM_CATALOG: List[AgentCatalogEntry] = [
|
||||
{
|
||||
"agent_key": "strategy_orchestrator",
|
||||
"agent_type": "StrategyOrchestrator",
|
||||
"role": "Team Lead",
|
||||
"responsibilities": [
|
||||
"Coordinate all marketing agents and delegate work",
|
||||
"Synthesize a unified daily strategy across channels",
|
||||
"Prioritize actions based on impact and urgency",
|
||||
"Maintain safety constraints and request approval when needed",
|
||||
],
|
||||
"tools": [
|
||||
"market_signal_detector",
|
||||
"google_trends_fetcher",
|
||||
"agent_coordinator",
|
||||
"performance_analyzer",
|
||||
"strategy_synthesizer",
|
||||
"task_delegator",
|
||||
],
|
||||
"defaults": {
|
||||
"display_name_template": "{website_name} Marketing Team Lead",
|
||||
"enabled": True,
|
||||
"schedule": {"mode": "on_demand"},
|
||||
"system_prompt_template": (
|
||||
"You are the Marketing Strategy Orchestrator for {website_name}.\n\n"
|
||||
"Mission: coordinate the AI marketing team to help {website_name} win in digital marketing.\n\n"
|
||||
"Non-negotiables:\n"
|
||||
"- Delegate tasks to specialists using the available team tools.\n"
|
||||
"- Keep outputs practical for non-technical users.\n"
|
||||
"- Maintain safety constraints and request approval for high-risk actions.\n\n"
|
||||
"Context you may receive:\n"
|
||||
"- website_url, brand_voice, target_audience, competitors, content pillars\n\n"
|
||||
"Output style:\n"
|
||||
"- Provide a concise plan with priorities, expected outcomes, and next steps."
|
||||
),
|
||||
"task_prompt_template": (
|
||||
"Task: Create a unified marketing plan for today.\n"
|
||||
"Use the provided context and delegate specialized work when needed.\n\n"
|
||||
"Return JSON with:\n"
|
||||
"{\n"
|
||||
" \"summary\": string,\n"
|
||||
" \"priorities\": [string],\n"
|
||||
" \"delegations\": [{\"agent\": string, \"task\": string}],\n"
|
||||
" \"next_actions\": [{\"title\": string, \"why\": string, \"expected_outcome\": string, \"risk_level\": \"low\"|\"medium\"|\"high\"}]\n"
|
||||
"}\n"
|
||||
),
|
||||
},
|
||||
},
|
||||
{
|
||||
"agent_key": "content_strategist",
|
||||
"agent_type": "content_strategist",
|
||||
"role": "Content Strategist",
|
||||
"responsibilities": [
|
||||
"Analyze content performance and engagement signals",
|
||||
"Identify content gaps using semantic and sitemap analysis",
|
||||
"Optimize content for clarity, SEO, and conversions",
|
||||
"Track performance over time and recommend next actions",
|
||||
],
|
||||
"tools": [
|
||||
"content_analyzer",
|
||||
"semantic_gap_detector",
|
||||
"content_optimizer",
|
||||
"performance_tracker",
|
||||
"sitemap_analyzer",
|
||||
],
|
||||
"defaults": {
|
||||
"display_name_template": "{website_name} Content Strategist",
|
||||
"enabled": True,
|
||||
"schedule": {"mode": "weekly", "days": ["mon"], "time": "09:00"},
|
||||
"system_prompt_template": (
|
||||
"You are the Content Strategy Agent for {website_name}.\n\n"
|
||||
"Mission: help {website_name} publish content that matches the brand voice and grows traffic.\n\n"
|
||||
"Operating principles:\n"
|
||||
"- Be specific, actionable, and non-technical.\n"
|
||||
"- Prefer high-impact, low-effort recommendations first.\n"
|
||||
"- Maintain brand consistency.\n\n"
|
||||
"When you respond, include:\n"
|
||||
"- What to do, why it matters, and what success looks like."
|
||||
),
|
||||
"task_prompt_template": (
|
||||
"Task: Propose the next 5 content actions for {website_name}.\n"
|
||||
"Inputs may include: website analysis, competitors, content pillars, recent results.\n\n"
|
||||
"Return JSON with:\n"
|
||||
"{\n"
|
||||
" \"actions\": [{\"title\": string, \"why\": string, \"outline\": [string], \"cta\": string, \"risk_level\": \"low\"|\"medium\"|\"high\"}],\n"
|
||||
" \"notes\": [string]\n"
|
||||
"}\n"
|
||||
),
|
||||
},
|
||||
},
|
||||
{
|
||||
"agent_key": "competitor_analyst",
|
||||
"agent_type": "competitor_analyst",
|
||||
"role": "Competitor Analyst",
|
||||
"responsibilities": [
|
||||
"Monitor competitor strategy and positioning using SIF",
|
||||
"Assess threats and opportunities from competitor moves",
|
||||
"Generate counter-strategy recommendations",
|
||||
"Execute safe response actions (with approvals when needed)",
|
||||
],
|
||||
"tools": [
|
||||
"competitor_monitor",
|
||||
"threat_analyzer",
|
||||
"response_generator",
|
||||
"strategy_executor",
|
||||
],
|
||||
"defaults": {
|
||||
"display_name_template": "{website_name} Competitor Analyst",
|
||||
"enabled": True,
|
||||
"schedule": {"mode": "weekly", "days": ["wed"], "time": "10:00"},
|
||||
"system_prompt_template": (
|
||||
"You are the Competitor Response Agent for {website_name}.\n\n"
|
||||
"Mission: monitor competitor moves and translate them into clear actions for {website_name}.\n\n"
|
||||
"Rules:\n"
|
||||
"- Use semantic insights to avoid guesswork.\n"
|
||||
"- Avoid panic. Prioritize only meaningful threats.\n"
|
||||
"- Keep outputs concise and actionable."
|
||||
),
|
||||
"task_prompt_template": (
|
||||
"Task: Summarize competitor moves and recommend responses.\n\n"
|
||||
"Return JSON with:\n"
|
||||
"{\n"
|
||||
" \"threat_level\": \"low\"|\"medium\"|\"high\",\n"
|
||||
" \"signals\": [string],\n"
|
||||
" \"responses\": [{\"title\": string, \"why\": string, \"expected_outcome\": string, \"risk_level\": \"low\"|\"medium\"|\"high\"}]\n"
|
||||
"}\n"
|
||||
),
|
||||
},
|
||||
},
|
||||
{
|
||||
"agent_key": "seo_specialist",
|
||||
"agent_type": "seo_specialist",
|
||||
"role": "SEO Specialist",
|
||||
"responsibilities": [
|
||||
"Audit technical SEO and prioritize fixes by impact",
|
||||
"Generate safe SEO fixes and improvements",
|
||||
"Adjust keyword strategy based on data and trends",
|
||||
"Validate changes against safety and quality constraints",
|
||||
],
|
||||
"tools": [
|
||||
"seo_auditor",
|
||||
"issue_prioritizer",
|
||||
"auto_fix_executor",
|
||||
"strategy_generator",
|
||||
"query_seo_knowledge_base",
|
||||
],
|
||||
"defaults": {
|
||||
"display_name_template": "{website_name} SEO Specialist",
|
||||
"enabled": True,
|
||||
"schedule": {"mode": "weekly", "days": ["fri"], "time": "11:00"},
|
||||
"system_prompt_template": (
|
||||
"You are the SEO Optimization Agent for {website_name}.\n\n"
|
||||
"Mission: continuously improve technical SEO and on-page basics while preserving user experience.\n\n"
|
||||
"Rules:\n"
|
||||
"- Prioritize high-impact, low-risk fixes.\n"
|
||||
"- Explain recommendations in simple language.\n"
|
||||
"- If an action is risky, require approval."
|
||||
),
|
||||
"task_prompt_template": (
|
||||
"Task: Produce a weekly SEO fix list for {website_name}.\n\n"
|
||||
"Return JSON with:\n"
|
||||
"{\n"
|
||||
" \"fixes\": [{\"title\": string, \"why\": string, \"steps\": [string], \"risk_level\": \"low\"|\"medium\"|\"high\"}],\n"
|
||||
" \"metrics_to_watch\": [string]\n"
|
||||
"}\n"
|
||||
),
|
||||
},
|
||||
},
|
||||
{
|
||||
"agent_key": "social_media_manager",
|
||||
"agent_type": "social_media_manager",
|
||||
"role": "Social Media Manager",
|
||||
"responsibilities": [
|
||||
"Monitor social trends and identify opportunities",
|
||||
"Adapt content for platform-specific distribution",
|
||||
"Optimize engagement signals (timing, hooks, hashtags)",
|
||||
"Coordinate distribution safely (with approvals when needed)",
|
||||
],
|
||||
"tools": [
|
||||
"social_monitor",
|
||||
"content_adapter",
|
||||
"engagement_optimizer",
|
||||
"distribution_manager",
|
||||
],
|
||||
"defaults": {
|
||||
"display_name_template": "{website_name} Social Media Manager",
|
||||
"enabled": True,
|
||||
"schedule": {"mode": "weekly", "days": ["tue"], "time": "09:30"},
|
||||
"system_prompt_template": (
|
||||
"You are the Social Media Manager for {website_name}.\n\n"
|
||||
"Mission: help {website_name} distribute content effectively without spam.\n\n"
|
||||
"Rules:\n"
|
||||
"- Adapt to platform norms.\n"
|
||||
"- Optimize for engagement ethically.\n"
|
||||
"- Keep messages aligned with brand voice."
|
||||
),
|
||||
"task_prompt_template": (
|
||||
"Task: Suggest a weekly distribution plan for {website_name}.\n\n"
|
||||
"Return JSON with:\n"
|
||||
"{\n"
|
||||
" \"posts\": [{\"platform\": string, \"post\": string, \"best_time\": string, \"hashtags\": [string]}],\n"
|
||||
" \"notes\": [string]\n"
|
||||
"}\n"
|
||||
),
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
def get_agent_catalog_entry(agent_key: str) -> Optional[AgentCatalogEntry]:
|
||||
agent_key_value = (agent_key or "").strip()
|
||||
for entry in AGENT_TEAM_CATALOG:
|
||||
if entry.get("agent_key") == agent_key_value:
|
||||
return entry
|
||||
return None
|
||||
165
backend/services/intelligence/agents/trend_surfer_agent.py
Normal file
165
backend/services/intelligence/agents/trend_surfer_agent.py
Normal file
@@ -0,0 +1,165 @@
|
||||
"""
|
||||
Trend Surfer Agent
|
||||
Agent for identifying and capitalizing on emerging market trends.
|
||||
"""
|
||||
|
||||
import traceback
|
||||
from typing import List, Dict, Any, Optional
|
||||
from loguru import logger
|
||||
|
||||
from services.intelligence.agents.specialized_agents import SIFBaseAgent
|
||||
from services.intelligence.agents.market_signal_detector import MarketSignalDetector, MarketSignal, UrgencyLevel, SignalType
|
||||
from services.intelligence.txtai_service import TxtaiIntelligenceService
|
||||
from services.research.trends.google_trends_service import GoogleTrendsService
|
||||
|
||||
class TrendSurferAgent(SIFBaseAgent):
|
||||
"""
|
||||
Agent for identifying and capitalizing on emerging market trends.
|
||||
"Surfs" the trends detected by MarketSignalDetector to propose timely content.
|
||||
"""
|
||||
|
||||
def __init__(self, intelligence_service: TxtaiIntelligenceService, user_id: str):
|
||||
super().__init__(intelligence_service)
|
||||
self.user_id = user_id
|
||||
self.signal_detector = MarketSignalDetector(user_id)
|
||||
self.trends_service = GoogleTrendsService()
|
||||
|
||||
async def surf_trends(self) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Identify high-potential trends and suggest content angles.
|
||||
Integrates real-time Google Trends data with MarketSignalDetector signals.
|
||||
"""
|
||||
self._log_agent_operation("Surfing market trends")
|
||||
|
||||
try:
|
||||
# 1. Get real-time trending searches from Google Trends
|
||||
realtime_trends = await self.trends_service.get_trending_searches(user_id=self.user_id)
|
||||
logger.info(f"[{self.__class__.__name__}] Found {len(realtime_trends)} real-time trends")
|
||||
|
||||
# 2. Detect internal market signals (competitors, SERP, etc.)
|
||||
signals = await self.signal_detector.detect_market_signals()
|
||||
|
||||
# 3. Analyze real-time trends and convert to signals if actionable
|
||||
trend_signals = await self._analyze_realtime_trends(realtime_trends)
|
||||
signals.extend(trend_signals)
|
||||
|
||||
if not signals:
|
||||
logger.info(f"[{self.__class__.__name__}] No active market signals found")
|
||||
return []
|
||||
|
||||
# Filter for actionable trends (High/Critical urgency or High impact)
|
||||
actionable_trends = [
|
||||
s for s in signals
|
||||
if s.urgency_level.value in ['high', 'critical'] or s.impact_score > 0.7
|
||||
]
|
||||
|
||||
logger.info(f"[{self.__class__.__name__}] Found {len(actionable_trends)} actionable trends")
|
||||
|
||||
opportunities = []
|
||||
for trend in actionable_trends:
|
||||
opp = await self._analyze_opportunity(trend)
|
||||
if opp:
|
||||
opportunities.append(opp)
|
||||
|
||||
return opportunities
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[{self.__class__.__name__}] Trend surfing failed: {e}")
|
||||
logger.error(f"[{self.__class__.__name__}] Full traceback: {traceback.format_exc()}")
|
||||
return []
|
||||
|
||||
async def _analyze_realtime_trends(self, trends: List[str]) -> List[MarketSignal]:
|
||||
"""
|
||||
Analyze raw trend keywords and convert actionable ones to MarketSignals.
|
||||
Uses pytrends (via GoogleTrendsService) to validate interest.
|
||||
"""
|
||||
signals = []
|
||||
# Limit to top 5 for detailed analysis to avoid rate limits
|
||||
top_trends = trends[:5]
|
||||
|
||||
for trend_kw in top_trends:
|
||||
try:
|
||||
# Get detailed data for the keyword
|
||||
trend_data = await self.trends_service.analyze_trends(
|
||||
keywords=[trend_kw],
|
||||
timeframe="now 7-d", # Last 7 days to see immediate trajectory
|
||||
geo="US" # Default to US for now, could be user-configured
|
||||
)
|
||||
|
||||
# Check if rising
|
||||
interest_over_time = trend_data.get("interest_over_time", [])
|
||||
if not interest_over_time:
|
||||
continue
|
||||
|
||||
# Simple logic: is the last point higher than the average?
|
||||
values = [float(point.get(trend_kw, 0)) for point in interest_over_time if trend_kw in point]
|
||||
if not values:
|
||||
continue
|
||||
|
||||
avg_interest = sum(values) / len(values)
|
||||
last_interest = values[-1]
|
||||
|
||||
# Calculate impact/urgency
|
||||
impact_score = min(last_interest / 100.0, 1.0) # Normalized
|
||||
urgency = UrgencyLevel.MEDIUM
|
||||
if last_interest > 80:
|
||||
urgency = UrgencyLevel.CRITICAL
|
||||
elif last_interest > 50:
|
||||
urgency = UrgencyLevel.HIGH
|
||||
|
||||
# Create Signal
|
||||
signal = MarketSignal(
|
||||
signal_id=f"trend_{trend_kw.replace(' ', '_')}_{int(values[-1])}",
|
||||
signal_type=SignalType.SOCIAL_TREND, # Using SOCIAL_TREND as proxy for general search trend
|
||||
source="google_trends",
|
||||
description=f"Surging interest in '{trend_kw}'",
|
||||
impact_score=impact_score,
|
||||
urgency_level=urgency,
|
||||
confidence_score=0.9,
|
||||
related_topics=[t.get("topic_title", "") for t in trend_data.get("related_topics", {}).get("top", [])[:3]],
|
||||
suggested_actions=["Create timely content", "Update social media"],
|
||||
metadata=trend_data
|
||||
)
|
||||
signals.append(signal)
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"[{self.__class__.__name__}] Failed to analyze trend '{trend_kw}': {e}")
|
||||
continue
|
||||
|
||||
return signals
|
||||
|
||||
async def _analyze_opportunity(self, trend: MarketSignal) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Analyze a specific trend signal to generate a content opportunity.
|
||||
"""
|
||||
try:
|
||||
# Use semantic search to find if we already have content covering this
|
||||
query = f"{trend.description} {' '.join(trend.related_topics)}"
|
||||
existing_content = await self.intelligence.search(query, limit=3)
|
||||
|
||||
coverage_score = 0.0
|
||||
if existing_content:
|
||||
# If top result has high score, we might already cover it
|
||||
coverage_score = existing_content[0].get('score', 0.0)
|
||||
|
||||
# If already well-covered, might skip or suggest update
|
||||
if coverage_score > 0.8:
|
||||
recommendation = "Update existing content"
|
||||
else:
|
||||
recommendation = "Create new content"
|
||||
|
||||
return {
|
||||
"trend_id": trend.signal_id,
|
||||
"topic": trend.description,
|
||||
"source": trend.source,
|
||||
"urgency": trend.urgency_level.value,
|
||||
"impact_score": trend.impact_score,
|
||||
"current_coverage": coverage_score,
|
||||
"recommendation": recommendation,
|
||||
"suggested_angle": f"Leverage {trend.source} trend on {trend.related_topics[0] if trend.related_topics else 'topic'}",
|
||||
"detected_at": trend.detected_at
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"[{self.__class__.__name__}] Failed to analyze opportunity for signal {trend.signal_id}: {e}")
|
||||
return None
|
||||
145
backend/services/intelligence/harvester.py
Normal file
145
backend/services/intelligence/harvester.py
Normal file
@@ -0,0 +1,145 @@
|
||||
"""
|
||||
Semantic Harvester Service
|
||||
Handles deep content acquisition using Exa AI.
|
||||
Prioritizes Exa for scale (hundreds of URLs) to avoid IP bans.
|
||||
"""
|
||||
|
||||
import traceback
|
||||
from datetime import datetime
|
||||
from typing import List, Dict, Any, Optional
|
||||
from loguru import logger
|
||||
from services.research.exa_service import ExaService
|
||||
|
||||
class SemanticHarvesterService:
|
||||
def __init__(self, api_key: Optional[str] = None):
|
||||
self.exa_service = ExaService()
|
||||
self._harvest_stats = {
|
||||
"total_urls_processed": 0,
|
||||
"successful_extractions": 0,
|
||||
"failed_extractions": 0,
|
||||
"last_harvest_time": None
|
||||
}
|
||||
|
||||
async def harvest_website(self, website_url: str, limit: int = 100) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Deep crawl a website using Exa AI.
|
||||
|
||||
Args:
|
||||
website_url: The root URL to crawl.
|
||||
limit: Maximum number of pages to retrieve.
|
||||
|
||||
Returns:
|
||||
List of pages with content and metadata.
|
||||
"""
|
||||
logger.info(f"[SemanticHarvester] Starting harvest for {website_url} (Limit: {limit})")
|
||||
|
||||
try:
|
||||
# Validate input
|
||||
if not website_url or not website_url.strip():
|
||||
logger.error(f"[SemanticHarvester] Invalid website URL provided: {website_url}")
|
||||
return []
|
||||
|
||||
# Normalize URL
|
||||
website_url = website_url.strip()
|
||||
if not website_url.startswith(('http://', 'https://')):
|
||||
website_url = f"https://{website_url}"
|
||||
logger.debug(f"[SemanticHarvester] Normalized URL to: {website_url}")
|
||||
|
||||
logger.debug(f"[SemanticHarvester] Processing domain: {website_url}")
|
||||
|
||||
# Use ExaService to find similar contents (which effectively crawls the site if we search by domain)
|
||||
# OR better: Use Exa's search with 'site:' operator or include_domains
|
||||
|
||||
# Since ExaService.discover_competitors finds *similar* sites, we need a method to crawl *specific* site.
|
||||
# Exa SDK supports searching within a domain.
|
||||
|
||||
if not self.exa_service.enabled:
|
||||
self.exa_service._try_initialize()
|
||||
if not self.exa_service.enabled:
|
||||
logger.warning("[SemanticHarvester] Exa service disabled. Returning placeholder data.")
|
||||
return self._get_placeholder_data(website_url)
|
||||
|
||||
# Use Exa to search for all pages in this domain
|
||||
search_response = self.exa_service.exa.search_and_contents(
|
||||
query=f"site:{website_url}",
|
||||
num_results=min(limit, 50), # Exa limit per request
|
||||
text=True,
|
||||
highlights=True
|
||||
)
|
||||
|
||||
results = []
|
||||
if search_response and hasattr(search_response, 'results'):
|
||||
for result in search_response.results:
|
||||
results.append({
|
||||
"url": getattr(result, 'url', ''),
|
||||
"title": getattr(result, 'title', ''),
|
||||
"content": getattr(result, 'text', '') or getattr(result, 'summary', ''),
|
||||
"metadata": {
|
||||
"published_date": getattr(result, 'published_date', None),
|
||||
"author": getattr(result, 'author', None),
|
||||
"highlights": getattr(result, 'highlights', [])
|
||||
}
|
||||
})
|
||||
|
||||
logger.info(f"[SemanticHarvester] Successfully harvested {len(results)} pages from {website_url}")
|
||||
return results
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[SemanticHarvester] Failed to harvest {website_url}: {e}")
|
||||
logger.error(f"[SemanticHarvester] Full traceback: {traceback.format_exc()}")
|
||||
return []
|
||||
|
||||
def _get_placeholder_data(self, website_url: str) -> List[Dict[str, Any]]:
|
||||
"""Return placeholder data for testing."""
|
||||
return [
|
||||
{
|
||||
"url": f"{website_url}/page1",
|
||||
"title": "Sample Page 1",
|
||||
"content": "This is sample content from page 1",
|
||||
"metadata": {"word_count": 100}
|
||||
}
|
||||
]
|
||||
|
||||
async def harvest_competitors(self, competitor_urls: List[str], pages_per_competitor: int = 10) -> List[Dict[str, Any]]:
|
||||
"""Harvest content from multiple competitors with detailed logging."""
|
||||
logger.info(f"[SemanticHarvester] Starting competitor harvest for {len(competitor_urls)} competitors")
|
||||
|
||||
if not competitor_urls:
|
||||
logger.warning("[SemanticHarvester] No competitor URLs provided")
|
||||
return []
|
||||
|
||||
all_content = []
|
||||
successful_harvests = 0
|
||||
failed_harvests = 0
|
||||
|
||||
for i, url in enumerate(competitor_urls, 1):
|
||||
try:
|
||||
logger.debug(f"[SemanticHarvester] Processing competitor {i}/{len(competitor_urls)}: {url}")
|
||||
content = await self.harvest_website(url, limit=pages_per_competitor)
|
||||
|
||||
if content:
|
||||
all_content.extend(content)
|
||||
successful_harvests += 1
|
||||
logger.debug(f"[SemanticHarvester] Successfully harvested {len(content)} pages from {url}")
|
||||
else:
|
||||
failed_harvests += 1
|
||||
logger.warning(f"[SemanticHarvester] No content harvested from {url}")
|
||||
|
||||
except Exception as e:
|
||||
failed_harvests += 1
|
||||
logger.error(f"[SemanticHarvester] Failed to harvest competitor {url}: {e}")
|
||||
|
||||
# Update statistics
|
||||
self._harvest_stats["total_urls_processed"] += len(competitor_urls)
|
||||
self._harvest_stats["successful_extractions"] += successful_harvests
|
||||
self._harvest_stats["failed_extractions"] += failed_harvests
|
||||
self._harvest_stats["last_harvest_time"] = datetime.now().isoformat()
|
||||
|
||||
logger.info(f"[SemanticHarvester] Competitor harvest completed: {successful_harvests} successful, {failed_harvests} failed")
|
||||
logger.info(f"[SemanticHarvester] Total content pieces harvested: {len(all_content)}")
|
||||
|
||||
return all_content
|
||||
|
||||
def get_harvest_stats(self) -> Dict[str, Any]:
|
||||
"""Get statistics about harvesting operations."""
|
||||
return self._harvest_stats.copy()
|
||||
1
backend/services/intelligence/monitoring/__init__.py
Normal file
1
backend/services/intelligence/monitoring/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
|
||||
585
backend/services/intelligence/monitoring/semantic_dashboard.py
Normal file
585
backend/services/intelligence/monitoring/semantic_dashboard.py
Normal file
@@ -0,0 +1,585 @@
|
||||
"""
|
||||
Phase 2B: Real-Time Semantic Dashboard
|
||||
|
||||
This module implements a real-time semantic monitoring dashboard for ongoing
|
||||
content analysis, competitor tracking, and semantic health monitoring.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import time
|
||||
from typing import Dict, List, Any, Optional, Set
|
||||
from datetime import datetime, timedelta
|
||||
from dataclasses import dataclass, asdict
|
||||
from loguru import logger
|
||||
|
||||
from ..txtai_service import TxtaiIntelligenceService
|
||||
from ..semantic_cache import semantic_cache_manager
|
||||
from ..sif_integration import SIFIntegrationService
|
||||
# Agent imports will be done lazily to avoid circular imports
|
||||
|
||||
|
||||
@dataclass
|
||||
class SemanticHealthMetric:
|
||||
"""Represents a semantic health metric for monitoring."""
|
||||
metric_name: str
|
||||
value: float
|
||||
threshold: float
|
||||
status: str # "healthy", "warning", "critical"
|
||||
timestamp: str
|
||||
description: str
|
||||
recommendations: List[str]
|
||||
|
||||
|
||||
@dataclass
|
||||
class CompetitorSemanticSnapshot:
|
||||
"""Snapshot of competitor semantic positioning."""
|
||||
competitor_id: str
|
||||
competitor_name: str
|
||||
semantic_overlap: float
|
||||
unique_topics: List[str]
|
||||
content_volume: int
|
||||
authority_score: float
|
||||
last_updated: str
|
||||
trending_topics: List[str]
|
||||
|
||||
|
||||
@dataclass
|
||||
class ContentSemanticInsight:
|
||||
"""Real-time semantic insight for content monitoring."""
|
||||
insight_id: str
|
||||
insight_type: str # "gap", "opportunity", "trend", "threat"
|
||||
title: str
|
||||
description: str
|
||||
confidence_score: float
|
||||
impact_score: float
|
||||
related_topics: List[str]
|
||||
suggested_actions: List[str]
|
||||
created_at: str
|
||||
expires_at: str
|
||||
|
||||
|
||||
class RealTimeSemanticMonitor:
|
||||
"""
|
||||
Real-time semantic monitoring system for content and competitor analysis.
|
||||
|
||||
Features:
|
||||
- Continuous semantic health monitoring
|
||||
- Real-time competitor tracking
|
||||
- Content performance analysis
|
||||
- Automated alerting system
|
||||
- Trend detection and forecasting
|
||||
"""
|
||||
|
||||
def __init__(self, user_id: str):
|
||||
self.user_id = user_id
|
||||
self.intelligence_service = TxtaiIntelligenceService(user_id)
|
||||
self.cache_manager = semantic_cache_manager
|
||||
self.sif_service = SIFIntegrationService(user_id)
|
||||
|
||||
# Initialize monitoring agents (lazy initialization to avoid circular imports)
|
||||
self.strategy_agent = None
|
||||
self.guardian_agent = None
|
||||
self.link_agent = None
|
||||
|
||||
# Monitoring configuration
|
||||
self.monitoring_interval = 300 # 5 minutes
|
||||
self.health_thresholds = {
|
||||
"semantic_diversity": 0.6,
|
||||
"content_freshness": 0.7,
|
||||
"competitor_gap": 0.5,
|
||||
"authority_score": 0.4
|
||||
}
|
||||
|
||||
# Monitoring state
|
||||
self.is_monitoring = False
|
||||
self.monitored_competitors: Set[str] = set()
|
||||
self.alert_subscribers: List[str] = []
|
||||
self.monitoring_history: List[Dict[str, Any]] = []
|
||||
|
||||
logger.info(f"Real-time semantic monitor initialized for user {user_id}")
|
||||
|
||||
async def check_semantic_health(self, user_id: Optional[str] = None) -> Any:
|
||||
"""
|
||||
Public wrapper for semantic health check.
|
||||
Aggregates metrics into a single health status object.
|
||||
"""
|
||||
# Call internal method (ignoring user_id arg if passed, as we use self.user_id)
|
||||
metrics = await self._check_semantic_health()
|
||||
|
||||
if not metrics:
|
||||
# Return default/unknown state if no metrics
|
||||
@dataclass
|
||||
class HealthResult:
|
||||
status: str = "unknown"
|
||||
value: float = 0.0
|
||||
return HealthResult()
|
||||
|
||||
# Aggregate metrics
|
||||
# 1. Status: "critical" if any critical, else "warning" if any warning, else "healthy"
|
||||
status = "healthy"
|
||||
for m in metrics:
|
||||
if m.status == "critical":
|
||||
status = "critical"
|
||||
break
|
||||
if m.status == "warning":
|
||||
status = "warning"
|
||||
|
||||
# 2. Value: Average of metric values
|
||||
avg_value = sum(m.value for m in metrics) / len(metrics)
|
||||
|
||||
@dataclass
|
||||
class HealthResult:
|
||||
status: str
|
||||
value: float
|
||||
|
||||
return HealthResult(status=status, value=avg_value)
|
||||
|
||||
async def start_monitoring(self, competitors: List[str] = None) -> bool:
|
||||
"""Start real-time semantic monitoring."""
|
||||
try:
|
||||
self.is_monitoring = True
|
||||
if competitors:
|
||||
self.monitored_competitors = set(competitors)
|
||||
|
||||
logger.info(f"Started semantic monitoring for user {self.user_id}")
|
||||
logger.info(f"Monitoring {len(self.monitored_competitors)} competitors")
|
||||
|
||||
# Start background monitoring task
|
||||
asyncio.create_task(self._monitoring_loop())
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to start semantic monitoring: {e}")
|
||||
return False
|
||||
|
||||
async def stop_monitoring(self) -> bool:
|
||||
"""Stop real-time semantic monitoring."""
|
||||
try:
|
||||
self.is_monitoring = False
|
||||
logger.info(f"Stopped semantic monitoring for user {self.user_id}")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to stop semantic monitoring: {e}")
|
||||
return False
|
||||
|
||||
async def _monitoring_loop(self):
|
||||
"""Main monitoring loop that runs continuously."""
|
||||
while self.is_monitoring:
|
||||
try:
|
||||
logger.info(f"Running semantic health check for user {self.user_id}")
|
||||
|
||||
# Perform comprehensive semantic analysis
|
||||
health_metrics = await self._check_semantic_health()
|
||||
competitor_updates = await self._monitor_competitors()
|
||||
content_insights = await self._analyze_content_performance()
|
||||
|
||||
# Store monitoring snapshot
|
||||
snapshot = {
|
||||
"timestamp": datetime.now().isoformat(),
|
||||
"user_id": self.user_id,
|
||||
"health_metrics": [asdict(metric) for metric in health_metrics],
|
||||
"competitor_updates": [asdict(update) for update in competitor_updates],
|
||||
"content_insights": [asdict(insight) for insight in content_insights]
|
||||
}
|
||||
|
||||
self.monitoring_history.append(snapshot)
|
||||
|
||||
# Keep only last 24 hours of history
|
||||
cutoff_time = datetime.now() - timedelta(hours=24)
|
||||
self.monitoring_history = [
|
||||
h for h in self.monitoring_history
|
||||
if datetime.fromisoformat(h["timestamp"]) > cutoff_time
|
||||
]
|
||||
|
||||
# Check for alerts
|
||||
await self._check_alerts(health_metrics, competitor_updates, content_insights)
|
||||
|
||||
# Cache results for dashboard
|
||||
await self._cache_monitoring_results(snapshot)
|
||||
|
||||
logger.info(f"Semantic monitoring cycle completed. Next check in {self.monitoring_interval}s")
|
||||
|
||||
# Wait for next cycle
|
||||
await asyncio.sleep(self.monitoring_interval)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in semantic monitoring loop: {e}")
|
||||
await asyncio.sleep(self.monitoring_interval) # Continue even on error
|
||||
|
||||
async def _check_semantic_health(self) -> List[SemanticHealthMetric]:
|
||||
"""Check overall semantic health of user's content."""
|
||||
metrics = []
|
||||
|
||||
try:
|
||||
# Get current semantic insights
|
||||
insights = await self.sif_service.get_semantic_insights({"user_id": self.user_id})
|
||||
|
||||
if insights.get("source") == "error":
|
||||
logger.warning("Failed to get semantic insights for health check")
|
||||
return metrics
|
||||
|
||||
insights_data = insights.get("insights", {})
|
||||
|
||||
# Semantic diversity metric
|
||||
content_pillars = insights_data.get("content_pillars", [])
|
||||
semantic_diversity = len(content_pillars) / 10.0 # Normalize to 0-1
|
||||
|
||||
diversity_status = "healthy" if semantic_diversity >= self.health_thresholds["semantic_diversity"] else "warning"
|
||||
metrics.append(SemanticHealthMetric(
|
||||
metric_name="semantic_diversity",
|
||||
value=semantic_diversity,
|
||||
threshold=self.health_thresholds["semantic_diversity"],
|
||||
status=diversity_status,
|
||||
timestamp=datetime.now().isoformat(),
|
||||
description=f"Content covers {len(content_pillars)} semantic pillars",
|
||||
recommendations=["Expand content topics", "Explore new semantic areas"] if diversity_status == "warning" else []
|
||||
))
|
||||
|
||||
# Content freshness metric (based on recent updates)
|
||||
freshness_score = await self._calculate_content_freshness()
|
||||
freshness_status = "healthy" if freshness_score >= self.health_thresholds["content_freshness"] else "warning"
|
||||
|
||||
metrics.append(SemanticHealthMetric(
|
||||
metric_name="content_freshness",
|
||||
value=freshness_score,
|
||||
threshold=self.health_thresholds["content_freshness"],
|
||||
status=freshness_status,
|
||||
timestamp=datetime.now().isoformat(),
|
||||
description="Content freshness based on recent semantic updates",
|
||||
recommendations=["Update content regularly", "Monitor trending topics"] if freshness_status == "warning" else []
|
||||
))
|
||||
|
||||
# Authority score metric
|
||||
authority_score = await self._calculate_authority_score()
|
||||
authority_status = "healthy" if authority_score >= self.health_thresholds["authority_score"] else "critical"
|
||||
|
||||
metrics.append(SemanticHealthMetric(
|
||||
metric_name="authority_score",
|
||||
value=authority_score,
|
||||
threshold=self.health_thresholds["authority_score"],
|
||||
status=authority_status,
|
||||
timestamp=datetime.now().isoformat(),
|
||||
description="Semantic authority based on content depth and relevance",
|
||||
recommendations=["Create authoritative content", "Build topical expertise"] if authority_status != "healthy" else []
|
||||
))
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to check semantic health: {e}")
|
||||
|
||||
return metrics
|
||||
|
||||
async def _monitor_competitors(self) -> List[CompetitorSemanticSnapshot]:
|
||||
"""Monitor competitor semantic positioning."""
|
||||
snapshots = []
|
||||
|
||||
for competitor in self.monitored_competitors:
|
||||
try:
|
||||
# This would perform actual competitor analysis
|
||||
# For now, return sample data
|
||||
snapshot = CompetitorSemanticSnapshot(
|
||||
competitor_id=f"comp_{competitor}",
|
||||
competitor_name=competitor,
|
||||
semantic_overlap=0.65,
|
||||
unique_topics=["AI automation", "Voice search", "Video marketing"],
|
||||
content_volume=random.randint(50, 200),
|
||||
authority_score=random.uniform(0.4, 0.9),
|
||||
last_updated=datetime.now().isoformat(),
|
||||
trending_topics=["AI content", "Voice optimization"]
|
||||
)
|
||||
|
||||
snapshots.append(snapshot)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to monitor competitor {competitor}: {e}")
|
||||
|
||||
return snapshots
|
||||
|
||||
async def _analyze_content_performance(self) -> List[ContentSemanticInsight]:
|
||||
"""Analyze content performance and identify insights."""
|
||||
insights = []
|
||||
|
||||
try:
|
||||
# Generate various types of insights
|
||||
current_time = datetime.now()
|
||||
|
||||
# Content gap insight
|
||||
insights.append(ContentSemanticInsight(
|
||||
insight_id="gap_001",
|
||||
insight_type="gap",
|
||||
title="Voice Search Optimization Gap",
|
||||
description="Competitors are covering voice search topics 40% more than your content",
|
||||
confidence_score=0.85,
|
||||
impact_score=8.5,
|
||||
related_topics=["voice search", "featured snippets", "conversational AI"],
|
||||
suggested_actions=["Create voice search content", "Optimize for featured snippets"],
|
||||
created_at=current_time.isoformat(),
|
||||
expires_at=(current_time + timedelta(days=7)).isoformat()
|
||||
))
|
||||
|
||||
# Trending opportunity insight
|
||||
insights.append(ContentSemanticInsight(
|
||||
insight_id="trend_001",
|
||||
insight_type="trend",
|
||||
title="AI Content Tools Trending",
|
||||
description="AI content creation tools showing 300% increase in search volume",
|
||||
confidence_score=0.92,
|
||||
impact_score=9.2,
|
||||
related_topics=["AI content", "content automation", "AI writing tools"],
|
||||
suggested_actions=["Create AI tool reviews", "Develop AI content strategy"],
|
||||
created_at=current_time.isoformat(),
|
||||
expires_at=(current_time + timedelta(days=14)).isoformat()
|
||||
))
|
||||
|
||||
# Threat insight
|
||||
insights.append(ContentSemanticInsight(
|
||||
insight_id="threat_001",
|
||||
insight_type="threat",
|
||||
title="Competitor Content Surge",
|
||||
description="Top competitor increased content production by 150% in your key topics",
|
||||
confidence_score=0.78,
|
||||
impact_score=7.8,
|
||||
related_topics=["content strategy", "competitor analysis"],
|
||||
suggested_actions=["Increase content frequency", "Focus on unique angles"],
|
||||
created_at=current_time.isoformat(),
|
||||
expires_at=(current_time + timedelta(days=5)).isoformat()
|
||||
))
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to analyze content performance: {e}")
|
||||
|
||||
return insights
|
||||
|
||||
async def _calculate_content_freshness(self) -> float:
|
||||
"""Calculate content freshness score."""
|
||||
# This would analyze actual content timestamps and updates
|
||||
return 0.85 # Placeholder
|
||||
|
||||
async def _calculate_authority_score(self) -> float:
|
||||
"""Calculate semantic authority score."""
|
||||
# This would analyze content depth, backlinks, engagement, etc.
|
||||
return 0.72 # Placeholder
|
||||
|
||||
async def _check_alerts(self, health_metrics: List[SemanticHealthMetric],
|
||||
competitor_updates: List[CompetitorSemanticSnapshot],
|
||||
content_insights: List[ContentSemanticInsight]):
|
||||
"""Check for alert conditions and notify subscribers."""
|
||||
alerts = []
|
||||
|
||||
# Check health metrics for critical conditions
|
||||
for metric in health_metrics:
|
||||
if metric.status == "critical":
|
||||
alerts.append({
|
||||
"type": "health_critical",
|
||||
"title": f"Critical: {metric.metric_name}",
|
||||
"message": metric.description,
|
||||
"severity": "critical",
|
||||
"timestamp": datetime.now().isoformat()
|
||||
})
|
||||
|
||||
# Check for high-impact insights
|
||||
for insight in content_insights:
|
||||
if insight.impact_score >= 8.0:
|
||||
alerts.append({
|
||||
"type": "high_impact_insight",
|
||||
"title": f"High Impact: {insight.title}",
|
||||
"message": insight.description,
|
||||
"severity": "warning",
|
||||
"timestamp": datetime.now().isoformat()
|
||||
})
|
||||
|
||||
# Send alerts to subscribers
|
||||
if alerts:
|
||||
try:
|
||||
from services.agent_activity_service import AgentActivityService
|
||||
from services.database import get_session_for_user
|
||||
|
||||
db = get_session_for_user(self.user_id)
|
||||
if db:
|
||||
service = AgentActivityService(db, self.user_id)
|
||||
for alert in alerts:
|
||||
alert_type = alert.get("type") or "semantic_alert"
|
||||
severity = alert.get("severity") or "info"
|
||||
mapped_severity = "error" if severity == "critical" else ("warning" if severity == "warning" else "info")
|
||||
dedupe_key = None
|
||||
if alert_type == "health_critical":
|
||||
dedupe_key = f"semantic_health_critical:{alert.get('title')}:{datetime.utcnow().date().isoformat()}"
|
||||
elif alert_type == "high_impact_insight":
|
||||
dedupe_key = f"semantic_high_impact:{alert.get('title')}:{datetime.utcnow().date().isoformat()}"
|
||||
|
||||
service.create_alert(
|
||||
alert_type=alert_type,
|
||||
title=alert.get("title") or "Semantic alert",
|
||||
message=alert.get("message") or "",
|
||||
severity=mapped_severity,
|
||||
payload=alert,
|
||||
cta_path="/seo-dashboard",
|
||||
dedupe_key=dedupe_key,
|
||||
)
|
||||
db.close()
|
||||
except Exception:
|
||||
pass
|
||||
await self._send_alerts(alerts)
|
||||
|
||||
async def get_cache_stats(self) -> Dict[str, Any]:
|
||||
"""Get semantic cache statistics."""
|
||||
return self.cache_manager.get_stats()
|
||||
|
||||
async def _send_alerts(self, alerts: List[Dict[str, Any]]):
|
||||
"""Send alerts to subscribed users."""
|
||||
for alert in alerts:
|
||||
logger.warning(f"ALERT: {alert['title']} - {alert['message']}")
|
||||
# Here you would integrate with notification systems (email, Slack, etc.)
|
||||
|
||||
async def _cache_monitoring_results(self, snapshot: Dict[str, Any]):
|
||||
"""Cache monitoring results for dashboard access."""
|
||||
try:
|
||||
cache_key = f"semantic_monitoring_{self.user_id}"
|
||||
self.cache_manager.set(
|
||||
cache_key,
|
||||
self.user_id,
|
||||
snapshot,
|
||||
ttl=300 # 5 minutes
|
||||
)
|
||||
|
||||
logger.debug(f"Cached monitoring results for user {self.user_id}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to cache monitoring results: {e}")
|
||||
|
||||
def get_dashboard_data(self) -> Dict[str, Any]:
|
||||
"""Get current dashboard data for the user."""
|
||||
try:
|
||||
# Get cached monitoring results
|
||||
cache_key = f"semantic_monitoring_{self.user_id}"
|
||||
cached_data = self.cache_manager.get(cache_key, self.user_id)
|
||||
|
||||
if cached_data:
|
||||
return {
|
||||
"status": "active" if self.is_monitoring else "inactive",
|
||||
"last_updated": cached_data.get("timestamp"),
|
||||
"health_metrics": cached_data.get("health_metrics", []),
|
||||
"competitor_updates": cached_data.get("competitor_updates", []),
|
||||
"content_insights": cached_data.get("content_insights", []),
|
||||
"monitored_competitors": list(self.monitored_competitors),
|
||||
"monitoring_interval": self.monitoring_interval
|
||||
}
|
||||
|
||||
# Return default data if no cache
|
||||
return {
|
||||
"status": "inactive",
|
||||
"last_updated": datetime.now().isoformat(),
|
||||
"health_metrics": [],
|
||||
"competitor_updates": [],
|
||||
"content_insights": [],
|
||||
"monitored_competitors": list(self.monitored_competitors),
|
||||
"monitoring_interval": self.monitoring_interval
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to get dashboard data: {e}")
|
||||
return {"error": str(e)}
|
||||
|
||||
def get_monitoring_history(self, hours: int = 24) -> List[Dict[str, Any]]:
|
||||
"""Get monitoring history for the specified number of hours."""
|
||||
cutoff_time = datetime.now() - timedelta(hours=hours)
|
||||
return [
|
||||
h for h in self.monitoring_history
|
||||
if datetime.fromisoformat(h["timestamp"]) > cutoff_time
|
||||
]
|
||||
|
||||
|
||||
class SemanticDashboardAPI:
|
||||
"""API interface for the semantic monitoring dashboard."""
|
||||
|
||||
def __init__(self):
|
||||
self.monitors: Dict[str, RealTimeSemanticMonitor] = {}
|
||||
|
||||
def get_monitor(self, user_id: str) -> RealTimeSemanticMonitor:
|
||||
"""Get or create a semantic monitor for a user."""
|
||||
if user_id not in self.monitors:
|
||||
self.monitors[user_id] = RealTimeSemanticMonitor(user_id)
|
||||
return self.monitors[user_id]
|
||||
|
||||
async def start_dashboard_monitoring(self, user_id: str, competitors: List[str] = None) -> Dict[str, Any]:
|
||||
"""Start semantic monitoring for a user."""
|
||||
monitor = self.get_monitor(user_id)
|
||||
success = await monitor.start_monitoring(competitors)
|
||||
|
||||
return {
|
||||
"user_id": user_id,
|
||||
"monitoring_started": success,
|
||||
"competitors": competitors or [],
|
||||
"timestamp": datetime.now().isoformat()
|
||||
}
|
||||
|
||||
async def stop_dashboard_monitoring(self, user_id: str) -> Dict[str, Any]:
|
||||
"""Stop semantic monitoring for a user."""
|
||||
monitor = self.get_monitor(user_id)
|
||||
success = await monitor.stop_monitoring()
|
||||
|
||||
return {
|
||||
"user_id": user_id,
|
||||
"monitoring_stopped": success,
|
||||
"timestamp": datetime.now().isoformat()
|
||||
}
|
||||
|
||||
def get_dashboard_data(self, user_id: str) -> Dict[str, Any]:
|
||||
"""Get current dashboard data for a user."""
|
||||
monitor = self.get_monitor(user_id)
|
||||
return monitor.get_dashboard_data()
|
||||
|
||||
def get_monitoring_history(self, user_id: str, hours: int = 24) -> List[Dict[str, Any]]:
|
||||
"""Get monitoring history for a user."""
|
||||
monitor = self.get_monitor(user_id)
|
||||
return monitor.get_monitoring_history(hours)
|
||||
|
||||
|
||||
# Global API instance
|
||||
semantic_dashboard_api = SemanticDashboardAPI()
|
||||
|
||||
|
||||
# Example usage and testing
|
||||
async def test_semantic_dashboard():
|
||||
"""Test the real-time semantic dashboard."""
|
||||
logger.info("Testing Real-Time Semantic Dashboard")
|
||||
|
||||
# Create test monitor
|
||||
user_id = "test_user_dashboard"
|
||||
competitors = ["competitor1.com", "competitor2.com", "competitor3.com"]
|
||||
|
||||
# Start monitoring
|
||||
logger.info("Starting semantic monitoring...")
|
||||
start_result = await semantic_dashboard_api.start_dashboard_monitoring(user_id, competitors)
|
||||
logger.info(f"Monitoring started: {start_result}")
|
||||
|
||||
# Wait a bit for monitoring to collect data
|
||||
logger.info("Waiting for monitoring data collection...")
|
||||
await asyncio.sleep(10)
|
||||
|
||||
# Get dashboard data
|
||||
logger.info("Getting dashboard data...")
|
||||
dashboard_data = semantic_dashboard_api.get_dashboard_data(user_id)
|
||||
logger.info(f"Dashboard status: {dashboard_data.get('status')}")
|
||||
logger.info(f"Health metrics: {len(dashboard_data.get('health_metrics', []))}")
|
||||
logger.info(f"Competitor updates: {len(dashboard_data.get('competitor_updates', []))}")
|
||||
logger.info(f"Content insights: {len(dashboard_data.get('content_insights', []))}")
|
||||
|
||||
# Get monitoring history
|
||||
logger.info("Getting monitoring history...")
|
||||
history = semantic_dashboard_api.get_monitoring_history(user_id, hours=1)
|
||||
logger.info(f"Monitoring history entries: {len(history)}")
|
||||
|
||||
# Stop monitoring
|
||||
logger.info("Stopping semantic monitoring...")
|
||||
stop_result = await semantic_dashboard_api.stop_dashboard_monitoring(user_id)
|
||||
logger.info(f"Monitoring stopped: {stop_result}")
|
||||
|
||||
logger.info("Semantic Dashboard test completed successfully!")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Run test
|
||||
asyncio.run(test_semantic_dashboard())
|
||||
556
backend/services/intelligence/semantic_cache.py
Normal file
556
backend/services/intelligence/semantic_cache.py
Normal file
@@ -0,0 +1,556 @@
|
||||
"""
|
||||
Enhanced Semantic Caching System for ALwrity SIF
|
||||
|
||||
Provides intelligent caching for semantic operations including:
|
||||
- User-specific semantic indices with TTL management
|
||||
- Query result caching with relevance-based invalidation
|
||||
- Content analysis caching with versioning
|
||||
- Intelligent cache warming based on user behavior
|
||||
"""
|
||||
|
||||
import json
|
||||
import hashlib
|
||||
import time
|
||||
from typing import Dict, List, Optional, Any, Union
|
||||
from datetime import datetime, timedelta
|
||||
from dataclasses import dataclass, asdict
|
||||
from functools import wraps
|
||||
import logging
|
||||
from collections import OrderedDict
|
||||
import asyncio
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class CacheEntry:
|
||||
"""Represents a cached semantic intelligence entry"""
|
||||
data: Any
|
||||
timestamp: float
|
||||
ttl: int # Time to live in seconds
|
||||
version: str
|
||||
metadata: Dict[str, Any]
|
||||
access_count: int = 0
|
||||
last_accessed: float = 0.0
|
||||
|
||||
|
||||
@dataclass
|
||||
class SemanticCacheStats:
|
||||
"""Statistics for semantic cache performance"""
|
||||
total_hits: int = 0
|
||||
total_misses: int = 0
|
||||
total_invalidations: int = 0
|
||||
cache_size: int = 0
|
||||
memory_usage_mb: float = 0.0
|
||||
average_hit_time_ms: float = 0.0
|
||||
hit_rate: float = 0.0
|
||||
|
||||
|
||||
class SemanticCacheManager:
|
||||
"""
|
||||
Intelligent caching system for semantic intelligence operations
|
||||
|
||||
Features:
|
||||
- Multi-tier caching (memory + persistent)
|
||||
- TTL-based expiration with intelligent defaults
|
||||
- Relevance-based cache invalidation
|
||||
- User-specific semantic index isolation
|
||||
- Performance monitoring and analytics
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
max_memory_size_mb: int = 512,
|
||||
default_ttl_seconds: int = 3600,
|
||||
cleanup_interval_seconds: int = 300,
|
||||
enable_persistent_cache: bool = True,
|
||||
cache_dir: str = "/tmp/semantic_cache"
|
||||
):
|
||||
self.max_memory_size_mb = max_memory_size_mb
|
||||
self.default_ttl = default_ttl_seconds
|
||||
self.cleanup_interval = cleanup_interval_seconds
|
||||
self.enable_persistent_cache = enable_persistent_cache
|
||||
self.cache_dir = cache_dir
|
||||
|
||||
# In-memory cache with LRU eviction
|
||||
self.memory_cache: Dict[str, CacheEntry] = OrderedDict()
|
||||
self.user_indices: Dict[str, str] = {} # user_id -> index_hash mapping
|
||||
|
||||
# Statistics
|
||||
self.stats = SemanticCacheStats()
|
||||
self._stats_lock = asyncio.Lock()
|
||||
|
||||
# Thread pool for background operations
|
||||
self.executor = ThreadPoolExecutor(max_workers=4)
|
||||
|
||||
# Start background cleanup task (optional - can be started manually)
|
||||
self.cleanup_task = None
|
||||
if cleanup_interval_seconds > 0:
|
||||
# Note: Cleanup task should be started manually in async context
|
||||
pass
|
||||
|
||||
logger.info(f"SemanticCacheManager initialized with {max_memory_size_mb}MB limit")
|
||||
|
||||
def _generate_cache_key(
|
||||
self,
|
||||
operation: str,
|
||||
user_id: str,
|
||||
params: Dict[str, Any]
|
||||
) -> str:
|
||||
"""Generate a unique cache key for semantic operations"""
|
||||
# Create deterministic key from operation, user, and parameters
|
||||
key_data = {
|
||||
"operation": operation,
|
||||
"user_id": user_id,
|
||||
"params": self._serialize_params(params)
|
||||
}
|
||||
key_str = json.dumps(key_data, sort_keys=True)
|
||||
return hashlib.sha256(key_str.encode()).hexdigest()
|
||||
|
||||
def _serialize_params(self, params: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Serialize parameters for consistent hashing"""
|
||||
serialized = {}
|
||||
for key, value in params.items():
|
||||
if isinstance(value, (list, dict)):
|
||||
serialized[key] = json.dumps(value, sort_keys=True)
|
||||
else:
|
||||
serialized[key] = str(value)
|
||||
return serialized
|
||||
|
||||
def _is_entry_valid(self, entry: CacheEntry) -> bool:
|
||||
"""Check if cache entry is still valid"""
|
||||
current_time = time.time()
|
||||
|
||||
# Check TTL expiration
|
||||
if current_time - entry.timestamp > entry.ttl:
|
||||
return False
|
||||
|
||||
# Check version compatibility (semantic analysis versions)
|
||||
if entry.version != self._get_current_version():
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def _get_current_version(self) -> str:
|
||||
"""Get current semantic analysis version"""
|
||||
# This could be based on model versions, algorithm updates, etc.
|
||||
return "v1.0.0"
|
||||
|
||||
def _calculate_memory_usage(self) -> float:
|
||||
"""Calculate current memory usage in MB"""
|
||||
total_size = 0
|
||||
for entry in self.memory_cache.values():
|
||||
# Rough estimation of memory usage
|
||||
entry_size = len(json.dumps(asdict(entry)).encode())
|
||||
total_size += entry_size
|
||||
|
||||
return total_size / (1024 * 1024) # Convert to MB
|
||||
|
||||
def _evict_lru_entries(self, target_size_mb: float):
|
||||
"""Evict least recently used entries to meet memory target"""
|
||||
current_size = self._calculate_memory_usage()
|
||||
|
||||
while current_size > target_size_mb and self.memory_cache:
|
||||
# Remove oldest entry
|
||||
oldest_key = next(iter(self.memory_cache))
|
||||
del self.memory_cache[oldest_key]
|
||||
current_size = self._calculate_memory_usage()
|
||||
|
||||
logger.debug(f"Evicted cache entry: {oldest_key}")
|
||||
|
||||
def _periodic_cleanup(self):
|
||||
"""Background task to clean up expired entries"""
|
||||
while True:
|
||||
try:
|
||||
time.sleep(self.cleanup_interval)
|
||||
self.cleanup_expired_entries()
|
||||
|
||||
# Update statistics
|
||||
self.stats.cache_size = len(self.memory_cache)
|
||||
self.stats.memory_usage_mb = self._calculate_memory_usage()
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in periodic cleanup: {e}")
|
||||
|
||||
def cache_semantic_insights(
|
||||
self,
|
||||
user_id: str,
|
||||
insights: Dict[str, Any],
|
||||
ttl: Optional[int] = None,
|
||||
metadata: Optional[Dict[str, Any]] = None
|
||||
) -> bool:
|
||||
"""
|
||||
Cache semantic insights for a user
|
||||
|
||||
Args:
|
||||
user_id: User identifier
|
||||
insights: Semantic insights data
|
||||
ttl: Time to live in seconds (uses default if None)
|
||||
metadata: Additional metadata for cache management
|
||||
|
||||
Returns:
|
||||
True if caching was successful
|
||||
"""
|
||||
try:
|
||||
cache_key = self._generate_cache_key(
|
||||
"semantic_insights",
|
||||
user_id,
|
||||
{"timestamp": time.time()}
|
||||
)
|
||||
|
||||
entry = CacheEntry(
|
||||
data=insights,
|
||||
timestamp=time.time(),
|
||||
ttl=ttl or self.default_ttl,
|
||||
version=self._get_current_version(),
|
||||
metadata=metadata or {},
|
||||
access_count=1,
|
||||
last_accessed=time.time()
|
||||
)
|
||||
|
||||
# Check memory limit before adding
|
||||
projected_size = self._calculate_memory_usage() + (
|
||||
len(json.dumps(insights).encode()) / (1024 * 1024)
|
||||
)
|
||||
|
||||
if projected_size > self.max_memory_size_mb:
|
||||
# Evict old entries to make room
|
||||
self._evict_lru_entries(self.max_memory_size_mb * 0.8)
|
||||
|
||||
self.memory_cache[cache_key] = entry
|
||||
self.memory_cache.move_to_end(cache_key) # Mark as recently used
|
||||
|
||||
# Update user index mapping
|
||||
self.user_indices[user_id] = cache_key
|
||||
|
||||
logger.info(f"Cached semantic insights for user {user_id}")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to cache semantic insights: {e}")
|
||||
return False
|
||||
|
||||
def get_stats(self) -> Dict[str, Any]:
|
||||
"""Get current cache statistics"""
|
||||
return asdict(self.stats)
|
||||
|
||||
def clear_cache(self) -> bool:
|
||||
"""Clear all cache entries"""
|
||||
try:
|
||||
self.memory_cache.clear()
|
||||
self.stats.cache_size = 0
|
||||
self.stats.memory_usage_mb = 0.0
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"Error clearing cache: {e}")
|
||||
return False
|
||||
|
||||
def get_cached_semantic_insights(
|
||||
self,
|
||||
user_id: str,
|
||||
force_refresh: bool = False
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Retrieve cached semantic insights for a user
|
||||
|
||||
Args:
|
||||
user_id: User identifier
|
||||
force_refresh: Force cache refresh even if valid
|
||||
|
||||
Returns:
|
||||
Cached insights or None if not found/expired
|
||||
"""
|
||||
try:
|
||||
cache_key = self.user_indices.get(user_id)
|
||||
if not cache_key:
|
||||
self.stats.total_misses += 1
|
||||
return None
|
||||
|
||||
entry = self.memory_cache.get(cache_key)
|
||||
if not entry:
|
||||
self.stats.total_misses += 1
|
||||
return None
|
||||
|
||||
# Check validity
|
||||
if not self._is_entry_valid(entry) or force_refresh:
|
||||
del self.memory_cache[cache_key]
|
||||
del self.user_indices[user_id]
|
||||
self.stats.total_invalidations += 1
|
||||
return None
|
||||
|
||||
# Update access statistics
|
||||
entry.access_count += 1
|
||||
entry.last_accessed = time.time()
|
||||
self.memory_cache.move_to_end(cache_key)
|
||||
|
||||
self.stats.total_hits += 1
|
||||
|
||||
logger.debug(f"Retrieved cached semantic insights for user {user_id}")
|
||||
return entry.data
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to retrieve cached semantic insights: {e}")
|
||||
return None
|
||||
|
||||
def cache_query_results(
|
||||
self,
|
||||
query: str,
|
||||
results: List[Dict[str, Any]],
|
||||
relevance_threshold: float = 0.7,
|
||||
ttl: Optional[int] = None
|
||||
) -> bool:
|
||||
"""
|
||||
Cache semantic search query results with relevance-based invalidation
|
||||
|
||||
Args:
|
||||
query: Search query
|
||||
results: Query results
|
||||
relevance_threshold: Minimum relevance score for caching
|
||||
ttl: Time to live in seconds
|
||||
|
||||
Returns:
|
||||
True if caching was successful
|
||||
"""
|
||||
try:
|
||||
# Only cache high-quality results
|
||||
if not results or max(r.get('score', 0) for r in results) < relevance_threshold:
|
||||
return False
|
||||
|
||||
cache_key = self._generate_cache_key(
|
||||
"semantic_query",
|
||||
"global", # Global query cache
|
||||
{"query": query, "threshold": relevance_threshold}
|
||||
)
|
||||
|
||||
entry = CacheEntry(
|
||||
data=results,
|
||||
timestamp=time.time(),
|
||||
ttl=ttl or (self.default_ttl // 2), # Shorter TTL for queries
|
||||
version=self._get_current_version(),
|
||||
metadata={
|
||||
"query": query,
|
||||
"relevance_threshold": relevance_threshold,
|
||||
"result_count": len(results)
|
||||
}
|
||||
)
|
||||
|
||||
self.memory_cache[cache_key] = entry
|
||||
self.memory_cache.move_to_end(cache_key)
|
||||
|
||||
logger.info(f"Cached semantic query results for: {query}")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to cache query results: {e}")
|
||||
return False
|
||||
|
||||
def get_cached_query_results(
|
||||
self,
|
||||
query: str,
|
||||
relevance_threshold: float = 0.7
|
||||
) -> Optional[List[Dict[str, Any]]]:
|
||||
"""Retrieve cached semantic query results"""
|
||||
try:
|
||||
cache_key = self._generate_cache_key(
|
||||
"semantic_query",
|
||||
"global",
|
||||
{"query": query, "threshold": relevance_threshold}
|
||||
)
|
||||
|
||||
entry = self.memory_cache.get(cache_key)
|
||||
if not entry or not self._is_entry_valid(entry):
|
||||
return None
|
||||
|
||||
# Update access statistics
|
||||
entry.access_count += 1
|
||||
entry.last_accessed = time.time()
|
||||
self.memory_cache.move_to_end(cache_key)
|
||||
|
||||
logger.debug(f"Retrieved cached query results for: {query}")
|
||||
return entry.data
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to retrieve cached query results: {e}")
|
||||
return None
|
||||
|
||||
def invalidate_user_cache(self, user_id: str, operation_type: Optional[str] = None):
|
||||
"""
|
||||
Invalidate cache entries for a specific user
|
||||
|
||||
Args:
|
||||
user_id: User identifier
|
||||
operation_type: Specific operation type to invalidate (optional)
|
||||
"""
|
||||
try:
|
||||
keys_to_remove = []
|
||||
|
||||
# Check user index mapping first
|
||||
if user_id in self.user_indices:
|
||||
cache_key = self.user_indices[user_id]
|
||||
if cache_key in self.memory_cache:
|
||||
entry = self.memory_cache[cache_key]
|
||||
if operation_type is None or entry.metadata.get("operation") == operation_type:
|
||||
keys_to_remove.append(cache_key)
|
||||
|
||||
# Also check all cache entries for user_id in metadata
|
||||
for cache_key, entry in list(self.memory_cache.items()):
|
||||
if entry.metadata.get("user_id") == user_id:
|
||||
if operation_type is None or entry.metadata.get("operation") == operation_type:
|
||||
if cache_key not in keys_to_remove:
|
||||
keys_to_remove.append(cache_key)
|
||||
|
||||
# Remove identified keys
|
||||
for key in keys_to_remove:
|
||||
if key in self.memory_cache:
|
||||
del self.memory_cache[key]
|
||||
# Clean up user index mapping
|
||||
user_keys = [k for k, v in self.user_indices.items() if v == key]
|
||||
for user_key in user_keys:
|
||||
if user_key in self.user_indices:
|
||||
del self.user_indices[user_key]
|
||||
|
||||
logger.info(f"Invalidated {len(keys_to_remove)} cache entries for user {user_id}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to invalidate user cache: {e}")
|
||||
|
||||
def invalidate_on_content_update(self, user_id: str, content_type: str):
|
||||
"""
|
||||
Invalidate relevant cache entries when user content is updated
|
||||
|
||||
Args:
|
||||
user_id: User identifier
|
||||
content_type: Type of content updated (e.g., 'blog_post', 'page', etc.)
|
||||
"""
|
||||
try:
|
||||
# Invalidate semantic insights for this user
|
||||
self.invalidate_user_cache(user_id, "semantic_insights")
|
||||
|
||||
# Invalidate related query caches
|
||||
if content_type in ["blog_post", "page", "content"]:
|
||||
# Invalidate pillar-related caches
|
||||
self.invalidate_user_cache(user_id, "semantic_pillars")
|
||||
|
||||
logger.info(f"Invalidated cache for user {user_id} content update: {content_type}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to invalidate cache on content update: {e}")
|
||||
|
||||
def cleanup_expired_entries(self):
|
||||
"""Clean up expired cache entries"""
|
||||
try:
|
||||
expired_keys = []
|
||||
current_time = time.time()
|
||||
|
||||
for cache_key, entry in self.memory_cache.items():
|
||||
if not self._is_entry_valid(entry):
|
||||
expired_keys.append(cache_key)
|
||||
|
||||
for key in expired_keys:
|
||||
del self.memory_cache[key]
|
||||
# Clean up user index mapping
|
||||
user_keys = [k for k, v in self.user_indices.items() if v == key]
|
||||
for user_key in user_keys:
|
||||
del self.user_indices[user_key]
|
||||
|
||||
if expired_keys:
|
||||
logger.info(f"Cleaned up {len(expired_keys)} expired cache entries")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error during cache cleanup: {e}")
|
||||
|
||||
def get_cache_stats(self) -> SemanticCacheStats:
|
||||
"""Get current cache statistics"""
|
||||
try:
|
||||
# Calculate hit rate
|
||||
total_requests = self.stats.total_hits + self.stats.total_misses
|
||||
if total_requests > 0:
|
||||
self.stats.hit_rate = self.stats.total_hits / total_requests
|
||||
|
||||
# Update current stats
|
||||
self.stats.cache_size = len(self.memory_cache)
|
||||
self.stats.memory_usage_mb = self._calculate_memory_usage()
|
||||
|
||||
return self.stats
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to get cache stats: {e}")
|
||||
return self.stats
|
||||
|
||||
def warm_cache_for_user(self, user_id: str, common_queries: List[str]):
|
||||
"""
|
||||
Pre-populate cache with common semantic queries for a user
|
||||
|
||||
Args:
|
||||
user_id: User identifier
|
||||
common_queries: List of common semantic queries to pre-cache
|
||||
"""
|
||||
try:
|
||||
logger.info(f"Warming cache for user {user_id} with {len(common_queries)} queries")
|
||||
|
||||
# This would typically involve running the actual semantic analysis
|
||||
# For now, we log the intent and can be extended with actual warming logic
|
||||
|
||||
# Example warming scenarios:
|
||||
# 1. Pre-analyze user's top content pillars
|
||||
# 2. Cache common competitor comparisons
|
||||
# 3. Pre-compute semantic similarity scores
|
||||
|
||||
logger.info(f"Cache warming initiated for user {user_id}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to warm cache for user: {e}")
|
||||
|
||||
|
||||
def semantic_cache_decorator(ttl: int = 3600, operation_type: str = "generic"):
|
||||
"""
|
||||
Decorator for caching semantic intelligence operations
|
||||
|
||||
Args:
|
||||
ttl: Time to live in seconds
|
||||
operation_type: Type of semantic operation being cached
|
||||
"""
|
||||
def decorator(func):
|
||||
@wraps(func)
|
||||
async def wrapper(self, *args, **kwargs):
|
||||
# Get cache manager instance (assumes it's available as self.cache_manager)
|
||||
cache_manager = getattr(self, 'cache_manager', None)
|
||||
if not cache_manager:
|
||||
return await func(self, *args, **kwargs)
|
||||
|
||||
# Generate cache key from function and arguments
|
||||
user_id = kwargs.get('user_id') or (args[0] if args else 'unknown')
|
||||
cache_key = cache_manager._generate_cache_key(
|
||||
operation_type,
|
||||
user_id,
|
||||
{"args": args, "kwargs": kwargs}
|
||||
)
|
||||
|
||||
# Try to get from cache
|
||||
cached_result = cache_manager.memory_cache.get(cache_key)
|
||||
if cached_result and cache_manager._is_entry_valid(cached_result):
|
||||
logger.debug(f"Cache hit for {operation_type} operation")
|
||||
return cached_result.data
|
||||
|
||||
# Execute function and cache result
|
||||
result = await func(self, *args, **kwargs)
|
||||
|
||||
if result:
|
||||
entry = CacheEntry(
|
||||
data=result,
|
||||
timestamp=time.time(),
|
||||
ttl=ttl,
|
||||
version=cache_manager._get_current_version(),
|
||||
metadata={"operation": operation_type, "user_id": user_id}
|
||||
)
|
||||
cache_manager.memory_cache[cache_key] = entry
|
||||
|
||||
return result
|
||||
|
||||
return wrapper
|
||||
return decorator
|
||||
|
||||
|
||||
# Global cache manager instance
|
||||
semantic_cache_manager = SemanticCacheManager()
|
||||
601
backend/services/intelligence/sif_agents.py
Normal file
601
backend/services/intelligence/sif_agents.py
Normal file
@@ -0,0 +1,601 @@
|
||||
"""
|
||||
SIF Agent Interfaces
|
||||
Defines the specialized agents for digital marketing and SEO.
|
||||
Each agent leverages TxtaiIntelligenceService for semantic operations.
|
||||
"""
|
||||
|
||||
import traceback
|
||||
from typing import List, Dict, Any, Optional
|
||||
from datetime import datetime
|
||||
from loguru import logger
|
||||
from .txtai_service import TxtaiIntelligenceService
|
||||
|
||||
class SIFBaseAgent:
|
||||
def __init__(self, intelligence_service: TxtaiIntelligenceService):
|
||||
self.intelligence = intelligence_service
|
||||
|
||||
def _log_agent_operation(self, operation: str, **kwargs):
|
||||
"""Standardized logging for agent operations."""
|
||||
logger.info(f"[{self.__class__.__name__}] {operation}")
|
||||
if kwargs:
|
||||
logger.debug(f"[{self.__class__.__name__}] Parameters: {kwargs}")
|
||||
|
||||
class StrategyArchitectAgent(SIFBaseAgent):
|
||||
"""Agent for discovering content pillars and identifying strategic gaps."""
|
||||
|
||||
async def discover_pillars(self) -> List[Dict[str, Any]]:
|
||||
"""Identify content pillars through semantic clustering."""
|
||||
self._log_agent_operation("Discovering content pillars")
|
||||
|
||||
try:
|
||||
# Check if intelligence service is initialized
|
||||
if not self.intelligence.is_initialized():
|
||||
logger.error(f"[{self.__class__.__name__}] Intelligence service not initialized")
|
||||
return []
|
||||
|
||||
clusters = await self.intelligence.cluster(min_score=0.6)
|
||||
|
||||
if not clusters:
|
||||
logger.warning(f"[{self.__class__.__name__}] No clusters found")
|
||||
return []
|
||||
|
||||
# Create pillar objects with metadata
|
||||
pillars = []
|
||||
for i, cluster_indices in enumerate(clusters):
|
||||
pillar = {
|
||||
"pillar_id": f"pillar_{i}",
|
||||
"indices": cluster_indices,
|
||||
"size": len(cluster_indices),
|
||||
"confidence": self._calculate_cluster_confidence(cluster_indices)
|
||||
}
|
||||
pillars.append(pillar)
|
||||
logger.debug(f"[{self.__class__.__name__}] Created pillar {pillar['pillar_id']} with {pillar['size']} items")
|
||||
|
||||
logger.info(f"[{self.__class__.__name__}] Discovered {len(pillars)} content pillars")
|
||||
return pillars
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[{self.__class__.__name__}] Failed to discover pillars: {e}")
|
||||
logger.error(f"[{self.__class__.__name__}] Full traceback: {traceback.format_exc()}")
|
||||
return []
|
||||
|
||||
def _calculate_cluster_confidence(self, cluster_indices: List[int]) -> float:
|
||||
"""Calculate confidence score for a cluster based on its size and coherence."""
|
||||
# Simple confidence based on cluster size - larger clusters are more reliable
|
||||
return min(1.0, len(cluster_indices) / 10.0)
|
||||
|
||||
async def find_semantic_gaps(self, competitor_indices: List[int]) -> List[Dict[str, Any]]:
|
||||
"""Compare user content vs competitor content to find missing topics."""
|
||||
self._log_agent_operation("Finding semantic content gaps", competitor_count=len(competitor_indices))
|
||||
|
||||
try:
|
||||
# STUB: Implement cross-index comparison
|
||||
# This would involve:
|
||||
# 1. Getting user content topics/themes
|
||||
# 2. Getting competitor content topics/themes
|
||||
# 3. Finding topics competitors cover but user doesn't
|
||||
|
||||
logger.info(f"[{self.__class__.__name__}] Found semantic gaps analysis stub")
|
||||
return [
|
||||
{"topic": "Topic A", "priority": "high", "reason": "Competitor coverage gap"},
|
||||
{"topic": "Topic B", "priority": "medium", "reason": "Emerging trend"}
|
||||
]
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[{self.__class__.__name__}] Failed to find semantic gaps: {e}")
|
||||
logger.error(f"[{self.__class__.__name__}] Full traceback: {traceback.format_exc()}")
|
||||
return []
|
||||
|
||||
class ContentGuardianAgent(SIFBaseAgent):
|
||||
"""Agent for preventing cannibalization and ensuring content originality."""
|
||||
|
||||
CANNIBALIZATION_THRESHOLD = 0.85 # Similarity threshold for cannibalization warning
|
||||
ORIGINALITY_THRESHOLD = 0.75 # Minimum originality score
|
||||
|
||||
def __init__(self, intelligence_service: TxtaiIntelligenceService, sif_service: Any = None):
|
||||
super().__init__(intelligence_service)
|
||||
self.sif_service = sif_service
|
||||
|
||||
async def check_cannibalization(self, new_draft: str) -> Dict[str, Any]:
|
||||
"""Check if a new draft competes semantically with existing pages."""
|
||||
self._log_agent_operation("Checking for semantic cannibalization", draft_length=len(new_draft))
|
||||
|
||||
try:
|
||||
if not self.intelligence.is_initialized():
|
||||
logger.error(f"[{self.__class__.__name__}] Intelligence service not initialized")
|
||||
return {"warning": False, "error": "Service not initialized"}
|
||||
|
||||
if not new_draft or len(new_draft.strip()) < 50:
|
||||
logger.warning(f"[{self.__class__.__name__}] Draft too short for meaningful analysis")
|
||||
return {"warning": False, "reason": "Draft too short"}
|
||||
|
||||
results = await self.intelligence.search(new_draft, limit=1)
|
||||
|
||||
if not results:
|
||||
logger.info(f"[{self.__class__.__name__}] No similar content found - draft is unique")
|
||||
return {"warning": False, "uniqueness_score": 1.0}
|
||||
|
||||
top_result = results[0]
|
||||
similarity_score = top_result.get('score', 0.0)
|
||||
|
||||
logger.debug(f"[{self.__class__.__name__}] Top similarity score: {similarity_score:.4f}")
|
||||
|
||||
if similarity_score > self.CANNIBALIZATION_THRESHOLD:
|
||||
warning_data = {
|
||||
"warning": True,
|
||||
"similar_to": top_result.get('id', 'unknown'),
|
||||
"score": similarity_score,
|
||||
"threshold": self.CANNIBALIZATION_THRESHOLD,
|
||||
"recommendation": "Consider revising the draft to target a different angle or merge with existing content"
|
||||
}
|
||||
logger.warning(f"[{self.__class__.__name__}] Cannibalization detected: {warning_data}")
|
||||
return warning_data
|
||||
|
||||
logger.info(f"[{self.__class__.__name__}] No cannibalization detected. Draft is sufficiently unique.")
|
||||
return {"warning": False, "uniqueness_score": 1.0 - similarity_score}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[{self.__class__.__name__}] Failed to check cannibalization: {e}")
|
||||
logger.error(f"[{self.__class__.__name__}] Full traceback: {traceback.format_exc()}")
|
||||
return {"warning": False, "error": str(e)}
|
||||
|
||||
async def verify_originality(self, text: str, competitor_index: Any) -> Dict[str, Any]:
|
||||
"""Verify originality against competitor content index."""
|
||||
self._log_agent_operation("Verifying originality against competitors", text_length=len(text))
|
||||
|
||||
try:
|
||||
if not text or len(text.strip()) < 50:
|
||||
logger.warning(f"[{self.__class__.__name__}] Text too short for meaningful originality check")
|
||||
return {"originality_score": 0.0, "reason": "Text too short"}
|
||||
|
||||
# STUB: Implement cross-index search against competitor content
|
||||
# This would search the text against a competitor-specific index
|
||||
|
||||
logger.info(f"[{self.__class__.__name__}] Originality verification stub completed")
|
||||
return {
|
||||
"originality_score": 0.95, # Placeholder
|
||||
"confidence": 0.8,
|
||||
"method": "semantic_comparison",
|
||||
"notes": "Competitor index integration pending"
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[{self.__class__.__name__}] Failed to verify originality: {e}")
|
||||
logger.error(f"[{self.__class__.__name__}] Full traceback: {traceback.format_exc()}")
|
||||
return {"originality_score": 0.0, "error": str(e)}
|
||||
|
||||
async def style_enforcer(self, text: str, style_guidelines: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
|
||||
"""
|
||||
Tool: Ensures content adheres to brand voice and style guidelines.
|
||||
"""
|
||||
self._log_agent_operation("Enforcing style guidelines", text_length=len(text))
|
||||
|
||||
try:
|
||||
if not text:
|
||||
return {"compliance_score": 0.0, "issues": ["No text provided"]}
|
||||
|
||||
# 1. Fetch Style Guidelines from SIF if not provided
|
||||
if not style_guidelines and self.sif_service:
|
||||
try:
|
||||
# Search for website analysis to get brand voice/style
|
||||
# We assume the most relevant 'website_analysis' doc contains the guidelines
|
||||
results = await self.intelligence.search("website analysis brand voice style", limit=1)
|
||||
if results:
|
||||
import json
|
||||
res = results[0]
|
||||
metadata_str = res.get('object')
|
||||
metadata = json.loads(metadata_str) if isinstance(metadata_str, str) else (metadata_str or res)
|
||||
|
||||
if metadata.get('type') == 'website_analysis':
|
||||
report = metadata.get('full_report', {})
|
||||
style_guidelines = {
|
||||
"tone": report.get('brand_analysis', {}).get('brand_voice', 'neutral'),
|
||||
"style_patterns": report.get('style_patterns', {}),
|
||||
"writing_style": report.get('writing_style', {})
|
||||
}
|
||||
logger.info(f"[{self.__class__.__name__}] Retrieved style guidelines from SIF: {style_guidelines.get('tone')}")
|
||||
except Exception as e:
|
||||
logger.warning(f"[{self.__class__.__name__}] Failed to retrieve style guidelines from SIF: {e}")
|
||||
|
||||
issues = []
|
||||
score = 1.0
|
||||
|
||||
# Basic Heuristic Checks (Placeholder for LLM-based style analysis)
|
||||
|
||||
# 1. Tone Check (e.g., formal vs casual)
|
||||
# If guidelines specify 'formal', check for contractions
|
||||
tone = style_guidelines.get('tone', '').lower() if style_guidelines else ''
|
||||
if 'formal' in tone or 'professional' in tone:
|
||||
contractions = ["can't", "won't", "don't", "it's"]
|
||||
found_contractions = [c for c in contractions if c in text.lower()]
|
||||
if found_contractions:
|
||||
issues.append(f"Found contractions in formal text: {', '.join(found_contractions[:3])}...")
|
||||
score -= 0.1
|
||||
|
||||
# 2. Length/Sentence Structure (simple metric)
|
||||
sentences = text.split('.')
|
||||
avg_len = sum(len(s.split()) for s in sentences if s) / max(1, len(sentences))
|
||||
if avg_len > 25:
|
||||
issues.append("Average sentence length is too high (>25 words). Consider shortening.")
|
||||
score -= 0.1
|
||||
|
||||
return {
|
||||
"compliance_score": max(0.0, score),
|
||||
"issues": issues,
|
||||
"is_compliant": score > 0.8,
|
||||
"guidelines_source": "sif_index" if not style_guidelines and self.sif_service else "provided"
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[{self.__class__.__name__}] Style enforcement failed: {e}")
|
||||
return {"error": str(e)}
|
||||
|
||||
async def safety_filter(self, text: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Tool: Flags potentially harmful, offensive, or sensitive content.
|
||||
"""
|
||||
self._log_agent_operation("Running safety filter", text_length=len(text))
|
||||
|
||||
try:
|
||||
# Basic Keyword Blocklist (Placeholder for LLM/Safety Model)
|
||||
# In production, this should call a dedicated safety API (e.g., OpenAI Moderation, Llama Guard)
|
||||
unsafe_keywords = [
|
||||
"hate", "kill", "murder", "attack", "destroy", # Violent
|
||||
"scam", "fraud", "steal", # Illegal
|
||||
"explicit", "adult" # NSFW
|
||||
]
|
||||
|
||||
found_flags = []
|
||||
text_lower = text.lower()
|
||||
|
||||
for keyword in unsafe_keywords:
|
||||
if f" {keyword} " in text_lower: # Simple word boundary check
|
||||
found_flags.append(keyword)
|
||||
|
||||
is_safe = len(found_flags) == 0
|
||||
|
||||
return {
|
||||
"is_safe": is_safe,
|
||||
"flags": found_flags,
|
||||
"safety_score": 1.0 if is_safe else 0.0,
|
||||
"action": "approve" if is_safe else "flag_for_review"
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[{self.__class__.__name__}] Safety filter failed: {e}")
|
||||
return {"error": str(e)}
|
||||
|
||||
class LinkGraphAgent(SIFBaseAgent):
|
||||
"""
|
||||
Agent for internal link suggestions, graph management, and authority analysis.
|
||||
Implements the semantic link graph using SIF and GSC/Bing data.
|
||||
"""
|
||||
|
||||
RELEVANCE_THRESHOLD = 0.6 # Minimum relevance score for link suggestions
|
||||
MAX_SUGGESTIONS = 10 # Maximum number of link suggestions
|
||||
|
||||
def __init__(self, intelligence_service: TxtaiIntelligenceService, sif_service: Any = None):
|
||||
super().__init__(intelligence_service)
|
||||
self.sif_service = sif_service
|
||||
|
||||
async def suggest_internal_links(self, draft: str) -> List[Dict[str, Any]]:
|
||||
"""Suggest internal links based on semantic proximity and authority."""
|
||||
return await self.link_suggester(draft)
|
||||
|
||||
async def link_suggester(self, draft: str) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Tool: Suggests internal links.
|
||||
Analyzes draft content and finds semantically relevant pages, boosted by authority.
|
||||
"""
|
||||
self._log_agent_operation("Suggesting internal links", draft_length=len(draft))
|
||||
|
||||
try:
|
||||
if not self.intelligence.is_initialized():
|
||||
logger.error(f"[{self.__class__.__name__}] Intelligence service not initialized")
|
||||
return []
|
||||
|
||||
if not draft or len(draft.strip()) < 50: # Reduced threshold for testing
|
||||
logger.warning(f"[{self.__class__.__name__}] Draft too short for meaningful link suggestions")
|
||||
return []
|
||||
|
||||
# 1. Get Semantic Candidates
|
||||
results = await self.intelligence.search(draft, limit=self.MAX_SUGGESTIONS)
|
||||
|
||||
if not results:
|
||||
logger.info(f"[{self.__class__.__name__}] No relevant internal pages found")
|
||||
return []
|
||||
|
||||
# 2. Get Authority Data (if available)
|
||||
authority_map = {}
|
||||
if self.sif_service:
|
||||
try:
|
||||
# Fetch dashboard context to get top performing content
|
||||
# Note: This relies on what's available in the SIF index/dashboard summary
|
||||
dashboard_context = await self.sif_service.get_seo_dashboard_context()
|
||||
|
||||
if "error" not in dashboard_context:
|
||||
# Extract top queries/pages if available in summary
|
||||
# Ideally, we'd have a map of URL -> Authority Score
|
||||
# For now, we'll try to extract what we can
|
||||
data = dashboard_context.get("dashboard_data", {})
|
||||
summary = data.get("summary", {})
|
||||
|
||||
# Example: Boost if site health is good (general confidence)
|
||||
site_health = data.get("health_score", {}).get("score", 0)
|
||||
|
||||
# If we had top pages in the summary, we'd use them.
|
||||
# For now, we'll use a placeholder authority map or just the site health
|
||||
pass
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to fetch authority data: {e}")
|
||||
|
||||
suggestions = []
|
||||
for result in results:
|
||||
relevance_score = result.get('score', 0.0)
|
||||
url = result.get('id', 'unknown')
|
||||
|
||||
# Apply authority boost (placeholder logic)
|
||||
# In a full implementation, we'd look up 'url' in authority_map
|
||||
authority_boost = 1.0
|
||||
|
||||
final_score = relevance_score * authority_boost
|
||||
|
||||
if final_score >= self.RELEVANCE_THRESHOLD:
|
||||
suggestion = {
|
||||
"url": url,
|
||||
"relevance": relevance_score,
|
||||
"final_score": final_score,
|
||||
"confidence": self._calculate_link_confidence(final_score),
|
||||
"reason": f"Semantic similarity: {relevance_score:.3f}"
|
||||
}
|
||||
suggestions.append(suggestion)
|
||||
logger.debug(f"[{self.__class__.__name__}] Added link suggestion: {url} (score: {final_score:.3f})")
|
||||
|
||||
# Sort by final score
|
||||
suggestions.sort(key=lambda x: x['final_score'], reverse=True)
|
||||
|
||||
logger.info(f"[{self.__class__.__name__}] Generated {len(suggestions)} internal link suggestions")
|
||||
return suggestions
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[{self.__class__.__name__}] Failed to suggest internal links: {e}")
|
||||
logger.error(f"[{self.__class__.__name__}] Full traceback: {traceback.format_exc()}")
|
||||
return []
|
||||
|
||||
async def graph_builder(self) -> Dict[str, Any]:
|
||||
"""
|
||||
Tool: Builds/Visualizes the semantic link graph.
|
||||
Returns the structure of the graph (nodes and edges) for visualization or analysis.
|
||||
"""
|
||||
self._log_agent_operation("Building semantic link graph")
|
||||
|
||||
try:
|
||||
if not self.intelligence.is_initialized():
|
||||
return {"error": "Intelligence service not initialized"}
|
||||
|
||||
# This is a resource-intensive operation in a real vector DB.
|
||||
# Here we simulate the graph structure based on recent content or clusters.
|
||||
|
||||
# 1. Get Clusters (Nodes)
|
||||
clusters = await self.intelligence.cluster(min_score=0.5)
|
||||
|
||||
nodes = []
|
||||
edges = []
|
||||
|
||||
for i, cluster in enumerate(clusters):
|
||||
cluster_id = f"cluster_{i}"
|
||||
nodes.append({
|
||||
"id": cluster_id,
|
||||
"type": "topic_cluster",
|
||||
"size": len(cluster)
|
||||
})
|
||||
|
||||
# Add content items as nodes linked to cluster
|
||||
for item_idx in cluster:
|
||||
# We need to retrieve item metadata.
|
||||
# txtai cluster returns indices. We might need to query by index or ID.
|
||||
# For this implementation, we'll return a simplified view.
|
||||
pass
|
||||
|
||||
return {
|
||||
"graph_stats": {
|
||||
"total_clusters": len(clusters),
|
||||
"total_nodes": sum(len(c) for c in clusters)
|
||||
},
|
||||
"structure": "hierarchical", # vs flat
|
||||
"timestamp": datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[{self.__class__.__name__}] Failed to build graph: {e}")
|
||||
return {"error": str(e)}
|
||||
|
||||
async def authority_analyzer(self, target_url: Optional[str] = None) -> Dict[str, Any]:
|
||||
"""
|
||||
Tool: Analyzes the authority of the site or specific pages using GSC/Bing data.
|
||||
"""
|
||||
self._log_agent_operation("Analyzing authority", target_url=target_url)
|
||||
|
||||
if not self.sif_service:
|
||||
return {"error": "SIF Service unavailable for authority analysis"}
|
||||
|
||||
try:
|
||||
# 1. Get Dashboard Context
|
||||
context = await self.sif_service.get_seo_dashboard_context()
|
||||
|
||||
if "error" in context:
|
||||
return context
|
||||
|
||||
data = context.get("dashboard_data", {})
|
||||
summary = data.get("summary", {})
|
||||
health = data.get("health_score", {})
|
||||
|
||||
# 2. Extract Authority Metrics
|
||||
authority_report = {
|
||||
"domain_authority_proxy": {
|
||||
"health_score": health.get("score"),
|
||||
"total_clicks": summary.get("clicks"),
|
||||
"avg_position": summary.get("position")
|
||||
},
|
||||
"page_authority": "Page-level authority requires granular GSC data (Planned)", # Placeholder
|
||||
"timestamp": datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
return authority_report
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[{self.__class__.__name__}] Authority analysis failed: {e}")
|
||||
return {"error": str(e)}
|
||||
|
||||
def _calculate_link_confidence(self, relevance_score: float) -> float:
|
||||
"""Calculate confidence score for a link suggestion."""
|
||||
# Simple confidence based on relevance score
|
||||
return min(1.0, relevance_score * 1.5)
|
||||
|
||||
async def optimize_anchor_text(self, target_url: str, context: str) -> str:
|
||||
"""Suggest the best anchor text for a given link based on target page context."""
|
||||
self._log_agent_operation("Optimizing anchor text", target_url=target_url, context_length=len(context))
|
||||
|
||||
try:
|
||||
# In a real implementation, we would fetch the target page content via SIF
|
||||
# and use an LLM to generate the anchor text.
|
||||
|
||||
# Placeholder for LLM call
|
||||
# if self.llm: ...
|
||||
|
||||
logger.info(f"[{self.__class__.__name__}] Anchor text optimization stub completed")
|
||||
return "relevant anchor text" # Placeholder
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[{self.__class__.__name__}] Failed to optimize anchor text: {e}")
|
||||
logger.error(f"[{self.__class__.__name__}] Full traceback: {traceback.format_exc()}")
|
||||
return "click here" # Fallback anchor text
|
||||
|
||||
class CitationExpert(SIFBaseAgent):
|
||||
"""
|
||||
Agent for fact-checking, citation generation, and evidence verification.
|
||||
"""
|
||||
|
||||
EVIDENCE_THRESHOLD = 0.7 # Minimum relevance score for evidence
|
||||
MAX_EVIDENCE = 5 # Maximum number of evidence pieces to return
|
||||
|
||||
async def fact_checker(self, claim: str) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Tool: Verifies facts against trusted research data.
|
||||
Returns supporting or contradicting evidence.
|
||||
"""
|
||||
return await self.verify_facts(claim)
|
||||
|
||||
async def citation_finder(self, topic: str) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Tool: Suggests authoritative citations for a given topic.
|
||||
"""
|
||||
self._log_agent_operation("Finding citations", topic=topic)
|
||||
|
||||
try:
|
||||
if not self.intelligence.is_initialized():
|
||||
return []
|
||||
|
||||
# Search for highly relevant content
|
||||
results = await self.intelligence.search(topic, limit=self.MAX_EVIDENCE)
|
||||
|
||||
citations = []
|
||||
for result in results:
|
||||
relevance = result.get('score', 0.0)
|
||||
if relevance > 0.6:
|
||||
citations.append({
|
||||
"source": result.get('id'),
|
||||
"title": result.get('text', '')[:100] + "...",
|
||||
"relevance": relevance,
|
||||
"citation_text": f"Source: {result.get('id')} (Relevance: {relevance:.2f})"
|
||||
})
|
||||
|
||||
return citations
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[{self.__class__.__name__}] Citation finder failed: {e}")
|
||||
return []
|
||||
|
||||
async def claim_verifier(self, content: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Tool: Detects unsupported statements and hallucinations.
|
||||
"""
|
||||
self._log_agent_operation("Verifying claims in content", content_length=len(content))
|
||||
|
||||
# 1. Extract potential claims (heuristic: numbers, 'research shows', etc.)
|
||||
# This is a simplified extraction. A real implementation would use NLP/LLM.
|
||||
claims = []
|
||||
sentences = content.split('.')
|
||||
for sent in sentences:
|
||||
if any(char.isdigit() for char in sent) or "show" in sent.lower() or "study" in sent.lower():
|
||||
if len(sent.strip()) > 20:
|
||||
claims.append(sent.strip())
|
||||
|
||||
if not claims:
|
||||
return {"status": "no_claims_detected", "verified_claims": []}
|
||||
|
||||
verified_results = []
|
||||
for claim in claims[:5]: # Limit to top 5 claims for performance
|
||||
evidence = await self.verify_facts(claim)
|
||||
status = "supported" if evidence else "unsupported"
|
||||
verified_results.append({
|
||||
"claim": claim,
|
||||
"status": status,
|
||||
"evidence_count": len(evidence),
|
||||
"top_evidence": evidence[0]['source'] if evidence else None
|
||||
})
|
||||
|
||||
return {
|
||||
"status": "verification_complete",
|
||||
"total_claims": len(claims),
|
||||
"verified_claims": verified_results,
|
||||
"unsupported_count": len([c for c in verified_results if c['status'] == 'unsupported']),
|
||||
"timestamp": datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
async def verify_facts(self, claim: str) -> List[Dict[str, Any]]:
|
||||
"""Find supporting or contradicting evidence in the indexed research."""
|
||||
self._log_agent_operation("Verifying facts", claim_length=len(claim))
|
||||
|
||||
try:
|
||||
if not self.intelligence.is_initialized():
|
||||
logger.error(f"[{self.__class__.__name__}] Intelligence service not initialized")
|
||||
return []
|
||||
|
||||
if not claim or len(claim.strip()) < 20:
|
||||
logger.warning(f"[{self.__class__.__name__}] Claim too short for meaningful verification")
|
||||
return []
|
||||
|
||||
results = await self.intelligence.search(claim, limit=self.MAX_EVIDENCE)
|
||||
|
||||
if not results:
|
||||
logger.info(f"[{self.__class__.__name__}] No evidence found for claim")
|
||||
return []
|
||||
|
||||
evidence = []
|
||||
for result in results:
|
||||
relevance_score = result.get('score', 0.0)
|
||||
|
||||
if relevance_score >= self.EVIDENCE_THRESHOLD:
|
||||
evidence_piece = {
|
||||
"source": result.get('id', 'unknown'),
|
||||
"relevance": relevance_score,
|
||||
"confidence": self._calculate_evidence_confidence(relevance_score),
|
||||
"type": "supporting" if relevance_score > 0.8 else "related",
|
||||
"excerpt": result.get('text', '')[:200] + "..." if len(result.get('text', '')) > 200 else result.get('text', '')
|
||||
}
|
||||
evidence.append(evidence_piece)
|
||||
logger.debug(f"[{self.__class__.__name__}] Found evidence: {evidence_piece['source']} (score: {relevance_score:.3f})")
|
||||
|
||||
logger.info(f"[{self.__class__.__name__}] Found {len(evidence)} pieces of evidence for claim")
|
||||
return evidence
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[{self.__class__.__name__}] Failed to verify facts: {e}")
|
||||
logger.error(f"[{self.__class__.__name__}] Full traceback: {traceback.format_exc()}")
|
||||
return []
|
||||
|
||||
def _calculate_evidence_confidence(self, relevance_score: float) -> float:
|
||||
"""Calculate confidence score for evidence."""
|
||||
# Simple confidence based on relevance score
|
||||
return min(1.0, relevance_score * 1.2)
|
||||
1183
backend/services/intelligence/sif_integration.py
Normal file
1183
backend/services/intelligence/sif_integration.py
Normal file
File diff suppressed because it is too large
Load Diff
403
backend/services/intelligence/txtai_service.py
Normal file
403
backend/services/intelligence/txtai_service.py
Normal file
@@ -0,0 +1,403 @@
|
||||
"""
|
||||
Txtai Intelligence Service
|
||||
Core service for semantic indexing, search, and clustering using txtai.
|
||||
Designed to run on modest hardware using lightweight models.
|
||||
Enhanced with intelligent caching for performance optimization.
|
||||
"""
|
||||
|
||||
import os
|
||||
import traceback
|
||||
from typing import List, Dict, Any, Optional, Tuple
|
||||
from loguru import logger
|
||||
from datetime import datetime
|
||||
from .semantic_cache import semantic_cache_manager, semantic_cache_decorator
|
||||
|
||||
# txtai imports (will be available after pip install)
|
||||
try:
|
||||
from txtai import Embeddings
|
||||
from txtai.pipeline import Labels, Extractor
|
||||
TXTAI_AVAILABLE = True
|
||||
except ImportError:
|
||||
logger.warning("txtai not installed. Some features will be disabled.")
|
||||
Embeddings = None
|
||||
Labels = None
|
||||
Extractor = None
|
||||
TXTAI_AVAILABLE = False
|
||||
|
||||
class TxtaiIntelligenceService:
|
||||
def __init__(self, user_id: str, model_path: Optional[str] = None, enable_caching: bool = True):
|
||||
self.user_id = user_id
|
||||
self.model_path = model_path or "sentence-transformers/all-MiniLM-L6-v2"
|
||||
self.index_path = f"workspace/workspace_{user_id}/indices/txtai"
|
||||
self.embeddings = None
|
||||
self._initialized = False
|
||||
self.enable_caching = enable_caching
|
||||
self.cache_manager = semantic_cache_manager if enable_caching else None
|
||||
self._initialize_embeddings()
|
||||
|
||||
def _initialize_embeddings(self):
|
||||
"""Initialize txtai embeddings with local storage support and comprehensive error handling."""
|
||||
if not TXTAI_AVAILABLE:
|
||||
logger.error("txtai is not available. Please install with: pip install txtai[pipeline,similarity]")
|
||||
return
|
||||
|
||||
try:
|
||||
logger.info(f"Initializing txtai embeddings for user {self.user_id}")
|
||||
logger.debug(f"Model path: {self.model_path}")
|
||||
logger.debug(f"Index path: {self.index_path}")
|
||||
|
||||
# Ensure directory exists
|
||||
os.makedirs(os.path.dirname(self.index_path), exist_ok=True)
|
||||
logger.debug(f"Created index directory: {os.path.dirname(self.index_path)}")
|
||||
|
||||
# Initialize embeddings with optimal configuration for ALwrity use case
|
||||
self.embeddings = Embeddings({
|
||||
"path": self.model_path,
|
||||
"content": True, # Enable content storage for retrieval
|
||||
"objects": True, # Enable object storage for metadata
|
||||
"backend": "faiss", # Use Faiss for efficient similarity search
|
||||
"quantize": True, # Enable quantization for memory efficiency
|
||||
"batch": 32, # Batch size for processing
|
||||
"gpu": False, # Force CPU usage for compatibility
|
||||
"limit": 1000 # Maximum number of results for queries
|
||||
})
|
||||
|
||||
logger.info("Embeddings instance created successfully")
|
||||
|
||||
# Check if existing index exists and load it
|
||||
if os.path.exists(self.index_path):
|
||||
logger.info(f"Loading existing txtai index from {self.index_path}")
|
||||
try:
|
||||
self.embeddings.load(self.index_path)
|
||||
logger.info(f"Successfully loaded existing txtai index for user {self.user_id}")
|
||||
logger.debug(f"Index contains {len(self.embeddings)} items")
|
||||
except Exception as load_error:
|
||||
logger.warning(f"Failed to load existing index: {load_error}. Creating new index.")
|
||||
# Reset embeddings to create new index
|
||||
self.embeddings = Embeddings({
|
||||
"path": self.model_path,
|
||||
"content": True,
|
||||
"objects": True,
|
||||
"backend": "faiss",
|
||||
"quantize": True,
|
||||
"batch": 32,
|
||||
"gpu": False,
|
||||
"limit": 1000
|
||||
})
|
||||
else:
|
||||
logger.info(f"No existing index found. Creating new txtai index for user {self.user_id}")
|
||||
|
||||
self._initialized = True
|
||||
logger.info(f"Txtai Intelligence Service initialized successfully for user {self.user_id}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Critical failure initializing txtai embeddings: {e}")
|
||||
logger.error(f"Full traceback: {traceback.format_exc()}")
|
||||
logger.error("This may be due to:")
|
||||
logger.error("1. Missing model files - try: pip install sentence-transformers")
|
||||
logger.error("2. Insufficient memory - try using a smaller model")
|
||||
logger.error("3. Missing dependencies - try: pip install txtai[pipeline,similarity]")
|
||||
self._initialized = False
|
||||
|
||||
async def index_content(self, items: List[Tuple[str, str, Dict[str, Any]]]):
|
||||
"""
|
||||
Index content for semantic search and clustering.
|
||||
|
||||
Args:
|
||||
items: List of (id, text, metadata) tuples.
|
||||
"""
|
||||
if not self._initialized or not self.embeddings:
|
||||
logger.error(f"Cannot index content - service not initialized for user {self.user_id}")
|
||||
return
|
||||
|
||||
try:
|
||||
logger.info(f"Starting content indexing for user {self.user_id}")
|
||||
logger.debug(f"Indexing {len(items)} items")
|
||||
|
||||
# Validate input items
|
||||
if not items:
|
||||
logger.warning("No items provided for indexing")
|
||||
return
|
||||
|
||||
# Index items: [(id, text, metadata)] - metadata needs to be JSON string for txtai
|
||||
import json
|
||||
processed_items = []
|
||||
for item in items:
|
||||
id_val, text, metadata = item
|
||||
# Convert metadata dict to JSON string
|
||||
metadata_json = json.dumps(metadata) if metadata else "{}"
|
||||
processed_items.append((id_val, text, metadata_json))
|
||||
|
||||
self.embeddings.index(processed_items)
|
||||
|
||||
# Save the index
|
||||
self.embeddings.save(self.index_path)
|
||||
logger.info(f"Successfully indexed {len(items)} items for user {self.user_id}")
|
||||
logger.debug(f"Index saved to: {self.index_path}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error indexing content for user {self.user_id}: {e}")
|
||||
logger.error(f"Full traceback: {traceback.format_exc()}")
|
||||
logger.error(f"Items count: {len(items) if items else 0}")
|
||||
if items and len(items) > 0:
|
||||
logger.error(f"Sample item structure: {type(items[0])}")
|
||||
raise
|
||||
|
||||
async def search(self, query: str, limit: int = 5) -> List[Dict[str, Any]]:
|
||||
"""Perform semantic search with intelligent caching."""
|
||||
if not self._initialized or not self.embeddings:
|
||||
logger.error(f"Cannot perform search - service not initialized for user {self.user_id}")
|
||||
return []
|
||||
|
||||
try:
|
||||
# Check cache first if enabled
|
||||
if self.enable_caching and self.cache_manager:
|
||||
cached_results = self.cache_manager.get_cached_query_results(
|
||||
query=query,
|
||||
relevance_threshold=0.5 # Lower threshold for search results
|
||||
)
|
||||
if cached_results:
|
||||
logger.info(f"Cache hit for search query: '{query}'")
|
||||
# Return cached results up to the requested limit
|
||||
return cached_results[:limit]
|
||||
else:
|
||||
logger.debug(f"Cache miss for search query: '{query}'")
|
||||
|
||||
logger.debug(f"Searching for query: '{query}' with limit: {limit}")
|
||||
results = self.embeddings.search(query, limit=limit)
|
||||
|
||||
# Cache the results if caching is enabled
|
||||
if self.enable_caching and self.cache_manager and results:
|
||||
self.cache_manager.cache_query_results(
|
||||
query=query,
|
||||
results=results,
|
||||
relevance_threshold=0.5
|
||||
)
|
||||
logger.debug(f"Cached search results for query: '{query}'")
|
||||
|
||||
logger.info(f"Search completed successfully for user {self.user_id}. Found {len(results)} results")
|
||||
logger.debug(f"Top result score: {results[0]['score'] if results else 'N/A'}")
|
||||
return results
|
||||
except Exception as e:
|
||||
logger.error(f"Search failed for user {self.user_id}: {e}")
|
||||
logger.error(f"Query: '{query}'")
|
||||
logger.error(f"Full traceback: {traceback.format_exc()}")
|
||||
return []
|
||||
|
||||
async def get_similarity(self, text1: str, text2: str) -> float:
|
||||
"""Get semantic similarity between two texts with caching."""
|
||||
if not self._initialized or not self.embeddings:
|
||||
logger.error(f"Cannot calculate similarity - service not initialized for user {self.user_id}")
|
||||
return 0.0
|
||||
|
||||
try:
|
||||
# Create cache key for similarity calculation
|
||||
cache_key = f"similarity_{self.user_id}_{hash(text1)}_{hash(text2)}"
|
||||
|
||||
# Check cache first if enabled
|
||||
if self.enable_caching and self.cache_manager:
|
||||
cached_similarity = self.cache_manager.get_cached_semantic_insights(
|
||||
user_id=cache_key,
|
||||
force_refresh=False
|
||||
)
|
||||
if cached_similarity and "similarity" in cached_similarity:
|
||||
logger.info(f"Cache hit for similarity calculation")
|
||||
return cached_similarity["similarity"]
|
||||
else:
|
||||
logger.debug(f"Cache miss for similarity calculation")
|
||||
|
||||
logger.debug(f"Calculating similarity between texts: '{text1[:50]}...' and '{text2[:50]}...'")
|
||||
similarity = self.embeddings.similarity(text1, text2)
|
||||
|
||||
# Cache the similarity result
|
||||
if self.enable_caching and self.cache_manager:
|
||||
similarity_data = {
|
||||
"similarity": similarity,
|
||||
"text1_hash": hash(text1),
|
||||
"text2_hash": hash(text2),
|
||||
"timestamp": datetime.now().isoformat()
|
||||
}
|
||||
self.cache_manager.cache_semantic_insights(
|
||||
user_id=cache_key,
|
||||
insights=similarity_data,
|
||||
ttl=3600 # 1 hour TTL for similarity results
|
||||
)
|
||||
logger.debug(f"Cached similarity result")
|
||||
|
||||
logger.info(f"Similarity calculated successfully for user {self.user_id}: {similarity:.4f}")
|
||||
return similarity
|
||||
except Exception as e:
|
||||
logger.error(f"Similarity calculation failed for user {self.user_id}: {e}")
|
||||
logger.error(f"Text1 length: {len(text1)}, Text2 length: {len(text2)}")
|
||||
logger.error(f"Full traceback: {traceback.format_exc()}")
|
||||
return 0.0
|
||||
|
||||
async def cluster(self, min_score: float = 0.5) -> List[List[int]]:
|
||||
"""Cluster indexed content to find semantic pillars using graph-based clustering with caching."""
|
||||
if not self._initialized or not self.embeddings:
|
||||
logger.error(f"Cannot cluster content - service not initialized for user {self.user_id}")
|
||||
return []
|
||||
|
||||
try:
|
||||
# Check cache first if enabled
|
||||
if self.enable_caching and self.cache_manager:
|
||||
cache_key = f"cluster_{self.user_id}_{min_score}"
|
||||
cached_clusters = self.cache_manager.get_cached_semantic_insights(
|
||||
user_id=cache_key,
|
||||
force_refresh=False
|
||||
)
|
||||
if cached_clusters and "clusters" in cached_clusters:
|
||||
logger.info(f"Cache hit for clustering with min_score: {min_score}")
|
||||
return cached_clusters["clusters"]
|
||||
else:
|
||||
logger.debug(f"Cache miss for clustering with min_score: {min_score}")
|
||||
|
||||
logger.info(f"Starting content clustering for user {self.user_id} with min_score: {min_score}")
|
||||
|
||||
# Check if we have graph functionality available
|
||||
if not hasattr(self.embeddings, 'graph') or not self.embeddings.graph:
|
||||
logger.warning(f"Graph clustering not available for user {self.user_id}. Using fallback clustering.")
|
||||
return self._fallback_clustering(min_score)
|
||||
|
||||
# Use graph-based clustering if available
|
||||
# Perform a search to get graph structure
|
||||
sample_query = "content marketing digital strategy"
|
||||
graph_results = self.embeddings.search(sample_query, limit=10, graph=True)
|
||||
|
||||
if not graph_results:
|
||||
logger.warning(f"No graph results for clustering user {self.user_id}")
|
||||
return self._fallback_clustering(min_score)
|
||||
|
||||
# Extract clusters from graph results
|
||||
clusters = self._extract_clusters_from_graph(graph_results, min_score)
|
||||
|
||||
# Cache the clustering results
|
||||
if self.enable_caching and self.cache_manager:
|
||||
cluster_data = {
|
||||
"clusters": clusters,
|
||||
"cluster_count": len(clusters),
|
||||
"min_score": min_score,
|
||||
"timestamp": datetime.now().isoformat()
|
||||
}
|
||||
self.cache_manager.cache_semantic_insights(
|
||||
user_id=f"cluster_{self.user_id}_{min_score}",
|
||||
insights=cluster_data,
|
||||
ttl=1800 # 30 minutes TTL for clustering results
|
||||
)
|
||||
logger.debug(f"Cached clustering results for user {self.user_id}")
|
||||
|
||||
logger.info(f"Clustering completed successfully. Found {len(clusters)} clusters for user {self.user_id}")
|
||||
logger.debug(f"Cluster sizes: {[len(c) for c in clusters]}")
|
||||
return clusters
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Clustering failed for user {self.user_id}: {e}")
|
||||
logger.error(f"Min score: {min_score}")
|
||||
logger.error(f"Full traceback: {traceback.format_exc()}")
|
||||
return self._fallback_clustering(min_score)
|
||||
|
||||
def _fallback_clustering(self, min_score: float) -> List[List[int]]:
|
||||
"""Fallback clustering method when graph clustering is not available."""
|
||||
logger.info(f"Using fallback clustering for user {self.user_id}")
|
||||
|
||||
# Simple clustering based on semantic similarity
|
||||
# This is a placeholder - in production, you'd implement a proper clustering algorithm
|
||||
try:
|
||||
# Get a sample of indexed items to analyze
|
||||
sample_queries = ["marketing", "SEO", "content", "social media", "email marketing"]
|
||||
all_clusters = []
|
||||
|
||||
for query in sample_queries:
|
||||
results = self.embeddings.search(query, limit=5)
|
||||
if results and results[0].get("score", 0) >= min_score:
|
||||
# Create a cluster from similar results
|
||||
cluster = [i for i, result in enumerate(results) if result.get("score", 0) >= min_score]
|
||||
if cluster:
|
||||
all_clusters.append(cluster)
|
||||
|
||||
# Remove duplicate clusters
|
||||
unique_clusters = []
|
||||
for cluster in all_clusters:
|
||||
if cluster not in unique_clusters:
|
||||
unique_clusters.append(cluster)
|
||||
|
||||
return unique_clusters
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Fallback clustering failed for user {self.user_id}: {e}")
|
||||
return []
|
||||
|
||||
def _extract_clusters_from_graph(self, graph_results: List[Dict], min_score: float) -> List[List[int]]:
|
||||
"""Extract clusters from graph search results."""
|
||||
logger.debug(f"Extracting clusters from graph results for user {self.user_id}")
|
||||
|
||||
clusters = []
|
||||
|
||||
try:
|
||||
# Group results by similarity score threshold
|
||||
current_cluster = []
|
||||
|
||||
for i, result in enumerate(graph_results):
|
||||
score = result.get("score", 0)
|
||||
if score >= min_score:
|
||||
current_cluster.append(i)
|
||||
else:
|
||||
if current_cluster:
|
||||
clusters.append(current_cluster)
|
||||
current_cluster = []
|
||||
|
||||
# Add final cluster if exists
|
||||
if current_cluster:
|
||||
clusters.append(current_cluster)
|
||||
|
||||
return clusters
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Graph cluster extraction failed for user {self.user_id}: {e}")
|
||||
return []
|
||||
|
||||
async def classify(self, text: str, labels: List[str]) -> List[Tuple[str, float]]:
|
||||
"""Classify text using zero-shot classification."""
|
||||
if not self._initialized or not Labels:
|
||||
logger.error(f"Cannot classify text - service not initialized or Labels not available for user {self.user_id}")
|
||||
return []
|
||||
|
||||
try:
|
||||
logger.debug(f"Classifying text: '{text[:100]}...' with labels: {labels}")
|
||||
classifier = Labels()
|
||||
results = classifier(text, labels)
|
||||
logger.info(f"Classification completed successfully for user {self.user_id}. Found {len(results)} results")
|
||||
logger.debug(f"Classification results: {results}")
|
||||
return results
|
||||
except Exception as e:
|
||||
logger.error(f"Classification failed for user {self.user_id}: {e}")
|
||||
logger.error(f"Text length: {len(text)}")
|
||||
logger.error(f"Labels count: {len(labels)}")
|
||||
logger.error(f"Full traceback: {traceback.format_exc()}")
|
||||
return []
|
||||
|
||||
def get_index_stats(self) -> Dict[str, Any]:
|
||||
"""Get statistics about the current index."""
|
||||
if not self._initialized or not self.embeddings:
|
||||
return {"status": "not_initialized", "user_id": self.user_id}
|
||||
|
||||
try:
|
||||
# Get count of indexed items - txtai doesn't have a direct len() method
|
||||
# We'll estimate based on available data or return a placeholder
|
||||
index_size = getattr(self.embeddings, 'count', 0) or "unknown"
|
||||
|
||||
return {
|
||||
"status": "active",
|
||||
"user_id": self.user_id,
|
||||
"index_size": index_size,
|
||||
"model_path": self.model_path,
|
||||
"index_path": self.index_path,
|
||||
"initialized": self._initialized
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting index stats for user {self.user_id}: {e}")
|
||||
return {"status": "error", "user_id": self.user_id, "error": str(e)}
|
||||
|
||||
def is_initialized(self) -> bool:
|
||||
"""Check if the service is properly initialized."""
|
||||
return self._initialized and self.embeddings is not None
|
||||
Reference in New Issue
Block a user