feat: ContentGuardianAgent, onboarding UX, Team Activity action wiring, docs, agent help modal
ContentGuardianAgent consolidation:
- Merge 3 duplicate classes into single source in specialized/content_guardian.py
- Watchdog audit_committee() with heuristic scoring, coverage gaps, overlaps, alerts
- Remove misleading rejection_rate() helper; use acceptance_rate directly
- Integrate audit + alerts + trend signals into today_workflow_service.py
Team Activity page:
- QualityAuditPanel: health ring, per-agent critiques, coverage gaps, overlaps
- TrendSignalsPanel: opportunity cards with urgency/impact/coverage bars
- AlertBanner: persistent dismiss via POST /alerts/{id}/mark-read
- AgentHelpModal: dialog showing all 8 agents with descriptions, tools, schedule
- QualityAuditPanel action buttons: Fill gap -> /content-planning, Resolve overlap, View CTA on alerts/issues
- TrendSignalsPanel action buttons: Create content from this trend -> /blog-writer with trend context state
Onboarding system:
- Step 4 validation: no auto-pass via basic_ready; requires persona data or explicit progression
- Step 5 validation: logs warning on auto-pass without integration data
- OnboardingCompletionService: single DB session, transactional task creation, upsert pattern
- Business-without-website: nullable website_url on SIFIndexingTask and MarketTrendsTask
- DeepCompetitorAnalysisExecutor: 5-min timeout, 10-competitor cap, asyncio.wait_for
- Persona generation: async with 30s timeout, falls back to scheduler
- OnboardingProgressService.reset_onboarding(): resets session + pauses all DB tasks
- OnboardingControlService.reset_onboarding(): also cancels APScheduler jobs
- FinalStep TaskSchedulingPanel: shows scheduled/failed tasks after completion, 8s auto-redirect
- onboarding_completed agent activity event logged to feed
Documentation:
- docs-site/features/onboarding/: overview, steps, scheduler-tasks, technical-reference (4 pages)
- docs-site/mkdocs.yml: added Onboarding System nav section
- docs-site/features/sif-agents/: overview, agent-directory, committee-system, content-guardian (4 pages)
- docs-site/features/team-activity/: overview, quality-audit, trend-signals, alert-system (4 pages)
- docs-site/features/todays-workflow/: updated overview, technical-architecture, workflow-guide, api-reference
This commit is contained in:
@@ -587,334 +587,6 @@ class StrategyArchitectAgent(SIFBaseAgent):
|
||||
|
||||
return samples
|
||||
|
||||
class ContentGuardianAgent(SIFBaseAgent):
|
||||
"""Agent for preventing cannibalization and ensuring content originality."""
|
||||
|
||||
CANNIBALIZATION_THRESHOLD = 0.85 # Similarity threshold for cannibalization warning
|
||||
ORIGINALITY_THRESHOLD = 0.75 # Minimum originality score
|
||||
|
||||
def __init__(self, intelligence_service: TxtaiIntelligenceService, user_id: str, sif_service: Any = None):
|
||||
super().__init__(intelligence_service, user_id, agent_type="content_guardian")
|
||||
self.sif_service = sif_service
|
||||
|
||||
async def perform_site_audit(self, website_url: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Perform a comprehensive content audit on the indexed website content.
|
||||
Called by the SIF indexing executor after content sync completes.
|
||||
Returns a structured audit report with quality, brand voice, and safety assessments.
|
||||
"""
|
||||
self._log_agent_operation("Performing site audit", website_url=website_url)
|
||||
try:
|
||||
# Search the user's SIF index for website content
|
||||
results = await self.intelligence.search(
|
||||
f"website content analysis {website_url}", limit=10
|
||||
)
|
||||
|
||||
audit: Dict[str, Any] = {
|
||||
"website_url": website_url,
|
||||
"audit_timestamp": datetime.utcnow().isoformat(),
|
||||
"total_pages_crawled": len(results),
|
||||
"content_quality": None,
|
||||
"brand_voice_consistency": None,
|
||||
"safety_issues": None,
|
||||
"cannibalization_issues": None,
|
||||
}
|
||||
|
||||
if not results:
|
||||
logger.warning(f"[{self.__class__.__name__}] No indexed content found for {website_url}")
|
||||
return audit
|
||||
|
||||
# Run assessments on each indexed page
|
||||
quality_scores = []
|
||||
style_scores = []
|
||||
safety_flags = []
|
||||
|
||||
for result in results:
|
||||
text = result.get("text", "") or result.get("id", "")
|
||||
if len(text) < 50:
|
||||
continue
|
||||
|
||||
quality = await self.assess_content_quality({"description": text, "title": website_url})
|
||||
quality_scores.append(quality.get("score", 0.0))
|
||||
|
||||
style = await self.style_enforcer(text)
|
||||
style_scores.append(style.get("compliance_score", 0.0))
|
||||
|
||||
safety = await self.safety_filter(text)
|
||||
if not safety.get("is_safe", True):
|
||||
safety_flags.append(safety.get("flags", []))
|
||||
|
||||
audit["content_quality"] = {
|
||||
"score": round(sum(quality_scores) / max(len(quality_scores), 1), 4),
|
||||
"pages_analyzed": len(quality_scores),
|
||||
}
|
||||
audit["brand_voice_consistency"] = {
|
||||
"compliance_score": round(sum(style_scores) / max(len(style_scores), 1), 4),
|
||||
"pages_checked": len(style_scores),
|
||||
}
|
||||
audit["safety_issues"] = {
|
||||
"has_issues": len(safety_flags) > 0,
|
||||
"flagged_pages": len(safety_flags),
|
||||
}
|
||||
|
||||
cannibalization = await self.check_cannibalization(website_url)
|
||||
audit["cannibalization_issues"] = cannibalization
|
||||
|
||||
logger.info(
|
||||
f"[{self.__class__.__name__}] Site audit complete for {website_url}: "
|
||||
f"quality={audit['content_quality']['score']}, "
|
||||
f"brand_voice={audit['brand_voice_consistency']['compliance_score']}"
|
||||
)
|
||||
return audit
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[{self.__class__.__name__}] Site audit failed for {website_url}: {e}")
|
||||
return {
|
||||
"website_url": website_url,
|
||||
"error": str(e),
|
||||
"audit_timestamp": datetime.utcnow().isoformat(),
|
||||
}
|
||||
|
||||
async def assess_content_quality(self, website_data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Assess overall content quality based on website data."""
|
||||
self._log_agent_operation("Assessing content quality")
|
||||
try:
|
||||
# Extract sample text or description from website_data
|
||||
text_to_analyze = website_data.get('description', '') or website_data.get('title', '')
|
||||
if not text_to_analyze:
|
||||
return {"score": 0.5, "reason": "No content to analyze"}
|
||||
|
||||
# Run style check
|
||||
style_result = await self.style_enforcer(text_to_analyze)
|
||||
|
||||
# Run safety check
|
||||
safety_result = await self.safety_filter(text_to_analyze)
|
||||
|
||||
# Calculate aggregate score
|
||||
base_score = style_result.get('compliance_score', 0.8)
|
||||
if safety_result.get('action') == 'flag_for_review':
|
||||
base_score *= 0.5
|
||||
|
||||
return {
|
||||
"score": base_score,
|
||||
"style_analysis": style_result,
|
||||
"safety_analysis": safety_result,
|
||||
"analyzed_text_length": len(text_to_analyze)
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"[{self.__class__.__name__}] Quality assessment failed: {e}")
|
||||
return {"score": 0.0, "error": str(e)}
|
||||
|
||||
async def check_cannibalization(self, new_draft: str) -> Dict[str, Any]:
|
||||
"""Check if a new draft competes semantically with existing pages."""
|
||||
self._log_agent_operation("Checking for semantic cannibalization", draft_length=len(new_draft))
|
||||
|
||||
try:
|
||||
if not await self._ensure_intelligence_ready():
|
||||
logger.error(f"[{self.__class__.__name__}] Intelligence service not initialized")
|
||||
return {"warning": False, "error": "Service not initialized"}
|
||||
|
||||
if not new_draft or len(new_draft.strip()) < 50:
|
||||
logger.warning(f"[{self.__class__.__name__}] Draft too short for meaningful analysis")
|
||||
return {"warning": False, "reason": "Draft too short"}
|
||||
|
||||
results = await self.intelligence.search(new_draft, limit=1)
|
||||
|
||||
if not results:
|
||||
logger.info(f"[{self.__class__.__name__}] No similar content found - draft is unique")
|
||||
return {"warning": False, "uniqueness_score": 1.0}
|
||||
|
||||
top_result = results[0]
|
||||
similarity_score = top_result.get('score', 0.0)
|
||||
|
||||
logger.debug(f"[{self.__class__.__name__}] Top similarity score: {similarity_score:.4f}")
|
||||
|
||||
if similarity_score > self.CANNIBALIZATION_THRESHOLD:
|
||||
warning_data = {
|
||||
"warning": True,
|
||||
"similar_to": top_result.get('id', 'unknown'),
|
||||
"score": similarity_score,
|
||||
"threshold": self.CANNIBALIZATION_THRESHOLD,
|
||||
"recommendation": "Consider revising the draft to target a different angle or merge with existing content"
|
||||
}
|
||||
logger.warning(f"[{self.__class__.__name__}] Cannibalization detected: {warning_data}")
|
||||
return warning_data
|
||||
|
||||
logger.info(f"[{self.__class__.__name__}] No cannibalization detected. Draft is sufficiently unique.")
|
||||
return {"warning": False, "uniqueness_score": 1.0 - similarity_score}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[{self.__class__.__name__}] Failed to check cannibalization: {e}")
|
||||
logger.error(f"[{self.__class__.__name__}] Full traceback: {traceback.format_exc()}")
|
||||
return {"warning": False, "error": str(e)}
|
||||
|
||||
async def verify_originality(self, text: str, competitor_index: Any) -> Dict[str, Any]:
|
||||
"""Verify originality against competitor content index."""
|
||||
self._log_agent_operation("Verifying originality against competitors", text_length=len(text))
|
||||
|
||||
try:
|
||||
if not text or len(text.strip()) < 50:
|
||||
logger.warning(f"[{self.__class__.__name__}] Text too short for meaningful originality check")
|
||||
return {"originality_score": 0.0, "reason": "Text too short"}
|
||||
|
||||
query = text.strip()
|
||||
competitor_results = []
|
||||
method = "user_index_competitor_filter"
|
||||
|
||||
if competitor_index is not None and hasattr(competitor_index, "search"):
|
||||
method = "competitor_index_search"
|
||||
raw_results = competitor_index.search(query, limit=5)
|
||||
if asyncio.iscoroutine(raw_results):
|
||||
raw_results = await raw_results
|
||||
competitor_results = raw_results or []
|
||||
else:
|
||||
raw_results = await self.intelligence.search(query, limit=10)
|
||||
for result in raw_results or []:
|
||||
metadata_raw = result.get("object")
|
||||
metadata = metadata_raw if isinstance(metadata_raw, dict) else {}
|
||||
if not metadata and isinstance(metadata_raw, str):
|
||||
try:
|
||||
metadata = json.loads(metadata_raw)
|
||||
except Exception:
|
||||
metadata = {}
|
||||
|
||||
doc_type = str((metadata or {}).get("type", "")).lower()
|
||||
source = str((metadata or {}).get("source", "")).lower()
|
||||
if "competitor" in doc_type or "competitor" in source:
|
||||
competitor_results.append(result)
|
||||
|
||||
if not competitor_results:
|
||||
return {
|
||||
"originality_score": 1.0,
|
||||
"confidence": 0.6,
|
||||
"method": method,
|
||||
"notes": "No competitor overlap detected in available index"
|
||||
}
|
||||
|
||||
top_match = max(competitor_results, key=lambda item: float(item.get("score", 0.0)))
|
||||
top_score = max(0.0, min(1.0, float(top_match.get("score", 0.0))))
|
||||
originality_score = max(0.0, round(1.0 - top_score, 4))
|
||||
confidence = round(min(1.0, 0.55 + (min(len(competitor_results), 5) * 0.07)), 3)
|
||||
warning = originality_score < self.ORIGINALITY_THRESHOLD
|
||||
|
||||
return {
|
||||
"originality_score": originality_score,
|
||||
"confidence": confidence,
|
||||
"method": method,
|
||||
"warning": warning,
|
||||
"threshold": self.ORIGINALITY_THRESHOLD,
|
||||
"top_competitor_match": {
|
||||
"id": top_match.get("id"),
|
||||
"score": round(top_score, 4)
|
||||
},
|
||||
"matches_evaluated": len(competitor_results)
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[{self.__class__.__name__}] Failed to verify originality: {e}")
|
||||
logger.error(f"[{self.__class__.__name__}] Full traceback: {traceback.format_exc()}")
|
||||
return {"originality_score": 0.0, "error": str(e)}
|
||||
|
||||
async def style_enforcer(self, text: str, style_guidelines: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
|
||||
"""
|
||||
Tool: Ensures content adheres to brand voice and style guidelines.
|
||||
"""
|
||||
self._log_agent_operation("Enforcing style guidelines", text_length=len(text))
|
||||
|
||||
try:
|
||||
if not text:
|
||||
return {"compliance_score": 0.0, "issues": ["No text provided"]}
|
||||
|
||||
# 1. Fetch Style Guidelines from SIF if not provided
|
||||
if not style_guidelines and self.sif_service:
|
||||
try:
|
||||
# Search for website analysis to get brand voice/style
|
||||
# We assume the most relevant 'website_analysis' doc contains the guidelines
|
||||
results = await self.intelligence.search("website analysis brand voice style", limit=1)
|
||||
if results:
|
||||
import json
|
||||
res = results[0]
|
||||
metadata_str = res.get('object')
|
||||
metadata = json.loads(metadata_str) if isinstance(metadata_str, str) else (metadata_str or res)
|
||||
|
||||
if metadata.get('type') == 'website_analysis':
|
||||
report = metadata.get('full_report', {})
|
||||
style_guidelines = {
|
||||
"tone": report.get('brand_analysis', {}).get('brand_voice', 'neutral'),
|
||||
"style_patterns": report.get('style_patterns', {}),
|
||||
"writing_style": report.get('writing_style', {})
|
||||
}
|
||||
logger.info(f"[{self.__class__.__name__}] Retrieved style guidelines from SIF: {style_guidelines.get('tone')}")
|
||||
except Exception as e:
|
||||
logger.warning(f"[{self.__class__.__name__}] Failed to retrieve style guidelines from SIF: {e}")
|
||||
|
||||
issues = []
|
||||
score = 1.0
|
||||
|
||||
# Basic Heuristic Checks (Placeholder for LLM-based style analysis)
|
||||
|
||||
# 1. Tone Check (e.g., formal vs casual)
|
||||
# If guidelines specify 'formal', check for contractions
|
||||
tone = style_guidelines.get('tone', '').lower() if style_guidelines else ''
|
||||
if 'formal' in tone or 'professional' in tone:
|
||||
contractions = ["can't", "won't", "don't", "it's"]
|
||||
found_contractions = [c for c in contractions if c in text.lower()]
|
||||
if found_contractions:
|
||||
issues.append(f"Found contractions in formal text: {', '.join(found_contractions[:3])}...")
|
||||
score -= 0.1
|
||||
|
||||
# 2. Length/Sentence Structure (simple metric)
|
||||
sentences = text.split('.')
|
||||
avg_len = sum(len(s.split()) for s in sentences if s) / max(1, len(sentences))
|
||||
if avg_len > 25:
|
||||
issues.append("Average sentence length is too high (>25 words). Consider shortening.")
|
||||
score -= 0.1
|
||||
|
||||
return {
|
||||
"compliance_score": max(0.0, score),
|
||||
"issues": issues,
|
||||
"is_compliant": score > 0.8,
|
||||
"guidelines_source": "sif_index" if not style_guidelines and self.sif_service else "provided"
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[{self.__class__.__name__}] Style enforcement failed: {e}")
|
||||
return {"error": str(e)}
|
||||
|
||||
async def safety_filter(self, text: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Tool: Flags potentially harmful, offensive, or sensitive content.
|
||||
"""
|
||||
self._log_agent_operation("Running safety filter", text_length=len(text))
|
||||
|
||||
try:
|
||||
# Basic Keyword Blocklist (Placeholder for LLM/Safety Model)
|
||||
# In production, this should call a dedicated safety API (e.g., OpenAI Moderation, Llama Guard)
|
||||
unsafe_keywords = [
|
||||
"hate", "kill", "murder", "attack", "destroy", # Violent
|
||||
"scam", "fraud", "steal", # Illegal
|
||||
"explicit", "adult" # NSFW
|
||||
]
|
||||
|
||||
found_flags = []
|
||||
text_lower = text.lower()
|
||||
|
||||
for keyword in unsafe_keywords:
|
||||
if f" {keyword} " in text_lower: # Simple word boundary check
|
||||
found_flags.append(keyword)
|
||||
|
||||
is_safe = len(found_flags) == 0
|
||||
|
||||
return {
|
||||
"is_safe": is_safe,
|
||||
"flags": found_flags,
|
||||
"safety_score": 1.0 if is_safe else 0.0,
|
||||
"action": "approve" if is_safe else "flag_for_review"
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[{self.__class__.__name__}] Safety filter failed: {e}")
|
||||
return {"error": str(e)}
|
||||
|
||||
class LinkGraphAgent(SIFBaseAgent):
|
||||
"""
|
||||
|
||||
Reference in New Issue
Block a user