Files
ALwrity/backend/services/intelligence/agents/content_gap_radar_agent.py
ajaysi 923fa671fe feat: ContentGuardianAgent, onboarding UX, Team Activity action wiring, docs, agent help modal
ContentGuardianAgent consolidation:
- Merge 3 duplicate classes into single source in specialized/content_guardian.py
- Watchdog audit_committee() with heuristic scoring, coverage gaps, overlaps, alerts
- Remove misleading rejection_rate() helper; use acceptance_rate directly
- Integrate audit + alerts + trend signals into today_workflow_service.py

Team Activity page:
- QualityAuditPanel: health ring, per-agent critiques, coverage gaps, overlaps
- TrendSignalsPanel: opportunity cards with urgency/impact/coverage bars
- AlertBanner: persistent dismiss via POST /alerts/{id}/mark-read
- AgentHelpModal: dialog showing all 8 agents with descriptions, tools, schedule
- QualityAuditPanel action buttons: Fill gap -> /content-planning, Resolve overlap, View CTA on alerts/issues
- TrendSignalsPanel action buttons: Create content from this trend -> /blog-writer with trend context state

Onboarding system:
- Step 4 validation: no auto-pass via basic_ready; requires persona data or explicit progression
- Step 5 validation: logs warning on auto-pass without integration data
- OnboardingCompletionService: single DB session, transactional task creation, upsert pattern
- Business-without-website: nullable website_url on SIFIndexingTask and MarketTrendsTask
- DeepCompetitorAnalysisExecutor: 5-min timeout, 10-competitor cap, asyncio.wait_for
- Persona generation: async with 30s timeout, falls back to scheduler
- OnboardingProgressService.reset_onboarding(): resets session + pauses all DB tasks
- OnboardingControlService.reset_onboarding(): also cancels APScheduler jobs
- FinalStep TaskSchedulingPanel: shows scheduled/failed tasks after completion, 8s auto-redirect
- onboarding_completed agent activity event logged to feed

Documentation:
- docs-site/features/onboarding/: overview, steps, scheduler-tasks, technical-reference (4 pages)
- docs-site/mkdocs.yml: added Onboarding System nav section
- docs-site/features/sif-agents/: overview, agent-directory, committee-system, content-guardian (4 pages)
- docs-site/features/team-activity/: overview, quality-audit, trend-signals, alert-system (4 pages)
- docs-site/features/todays-workflow/: updated overview, technical-architecture, workflow-guide, api-reference
2026-06-01 12:24:31 +05:30

467 lines
18 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
Content Gap Radar Agent
Scores and prioritizes content opportunities by combining SIF semantic gap analysis,
SERP ranking presence (Google CSE), competitor content deep-dive (Exa), and trend
momentum into a single ROI score per topic.
Phase 3 of the Content Gap Radar feature.
"""
import traceback
from typing import List, Dict, Any, Optional
from loguru import logger
from services.intelligence.agents.specialized import SIFBaseAgent
from services.intelligence.agents.specialized.strategy_architect import StrategyArchitectAgent
from services.intelligence.agents.trend_surfer_agent import TrendSurferAgent
from services.intelligence.agents.core_agent_framework import TaskProposal
from services.intelligence.txtai_service import TxtaiIntelligenceService
from services.seo_tools.serp_gap_service import SerpGapService
from services.seo_tools.competitor_content_service import CompetitorContentService
class ContentGapRadarAgent(SIFBaseAgent):
"""
Agent that scores and prioritizes content opportunities by combining
SIF semantic gap analysis, SERP ranking presence, Exa competitor content,
and trend momentum into a single ROI score.
"""
def __init__(self, intelligence_service: TxtaiIntelligenceService, user_id: str, **kwargs):
super().__init__(intelligence_service, user_id, agent_type="content_gap_radar", **kwargs)
self.user_id = user_id
self.serp_service = SerpGapService()
self.competitor_content_service = CompetitorContentService()
self.strategy_architect = StrategyArchitectAgent(intelligence_service, user_id)
async def analyze(
self,
competitor_domains: List[str],
competitor_indices: Optional[List[Any]] = None,
topics: Optional[List[str]] = None,
bypass_cache: bool = False,
) -> Dict[str, Any]:
"""
Full content gap radar pipeline.
1. Get topic-level gaps from SIF semantic analysis
2. Get SERP ranking data per topic
3. Get Exa competitor content for top topics
4. Get trend momentum data
5. Score each topic with ROI formula
6. Return prioritized results
Args:
competitor_domains: Known competitor domains
competitor_indices: SIF index positions for competitor docs
topics: Optional explicit topic list (derived from SIF if omitted)
bypass_cache: Force fresh API calls
Returns:
Dict with scored gaps list and summary.
"""
self._log_agent_operation(
"Running content gap radar",
competitor_count=len(competitor_domains),
topics_provided=bool(topics),
)
try:
sif_gaps = []
# Step 1: Derive topics from SIF semantic gaps if not provided
if not topics:
sif_gaps = await self.strategy_architect.find_semantic_gaps(
competitor_indices or []
)
topics = [g["topic"] for g in sif_gaps[:12]]
logger.info(
f"[{self.__class__.__name__}] Derived {len(topics)} topics from SIF gaps"
)
if not topics:
logger.info(f"[{self.__class__.__name__}] No topics to analyze")
return {"gaps": [], "summary": {}}
# If we got sif_gaps externally but topics were provided, fetch SIF data anyway
if not sif_gaps:
try:
sif_gaps = await self.strategy_architect.find_semantic_gaps(
competitor_indices or []
)
except Exception as e:
logger.warning(
f"[{self.__class__.__name__}] SIF gap fetch failed (non-fatal): {e}"
)
sif_gaps = []
# Build lookup maps for cross-referencing
sif_map = {g["topic"]: g for g in sif_gaps}
# Step 2: SERP gap analysis
serp_data = await self.serp_service.analyze_topic_gaps(
topics, competitor_domains, bypass_cache=bypass_cache
)
serp_map = {}
for g in serp_data.get("gaps", []):
serp_map[g["topic"]] = g
# Step 3: Exa deep-dive (top 6 topics — paid API)
exa_data = await self.competitor_content_service.deep_dive(
topics[:6], competitor_domains, bypass_cache=bypass_cache
)
exa_map = {}
for r in exa_data.get("results", []):
exa_map[r["topic"]] = r
# Step 4: Trend momentum data
trend_surfer = TrendSurferAgent(
self.intelligence, self.user_id
)
trend_signals = await trend_surfer.surf_trends()
# Step 5: Score each topic
scored = []
for topic in topics:
scored.append(
self._score_topic(
topic=topic,
sif_map=sif_map,
serp_map=serp_map,
exa_map=exa_map,
trend_signals=trend_signals,
)
)
scored.sort(key=lambda x: x["roi_score"], reverse=True)
# Step 6: Summary
high = [g for g in scored if g["priority"] == "high"]
medium = [g for g in scored if g["priority"] == "medium"]
low = [g for g in scored if g["priority"] == "low"]
logger.info(
f"[{self.__class__.__name__}] Scored {len(scored)} gaps: "
f"{len(high)} high, {len(medium)} medium, {len(low)} low"
)
return {
"gaps": scored,
"summary": {
"total_topics_analyzed": len(topics),
"high_priority": len(high),
"medium_priority": len(medium),
"low_priority": len(low),
},
}
except Exception as e:
logger.error(
f"[{self.__class__.__name__}] Content gap radar failed: {e}"
)
logger.error(
f"[{self.__class__.__name__}] Full traceback: {traceback.format_exc()}"
)
return {"gaps": [], "summary": {}, "error": str(e)}
async def propose_daily_tasks(self, context: Dict[str, Any]) -> List[TaskProposal]:
"""
Propose high-ROI content tasks from gap radar analysis.
Integrates with Today's Workflow agent committee polling.
"""
proposals = []
onboarding = context.get("onboarding_data", {})
competitor_focus = onboarding.get("competitor_focus", {})
competitor_domains = competitor_focus.get("top_competitor_domains", [])
if not competitor_domains:
logger.info(f"[{self.__class__.__name__}] No competitor domains in context, skipping")
return proposals
try:
result = await self.analyze(
competitor_domains=competitor_domains,
competitor_indices=[],
)
except Exception as e:
logger.error(f"[{self.__class__.__name__}] propose_daily_tasks failed: {e}")
return proposals
gaps = result.get("gaps", [])
scored = [g for g in gaps if g["priority"] in ("high", "medium")]
scored.sort(key=lambda x: x["roi_score"], reverse=True)
for gap in scored[:3]:
pillar_id = self._action_to_pillar(gap["recommended_action"])
action_url = (
"/blog-writer"
if pillar_id == "generate"
else "/seo-dashboard#content-gap-radar"
)
proposals.append(TaskProposal(
title=f"Write about: {gap['topic']}",
description=gap["recommended_action"],
pillar_id=pillar_id,
priority=gap["priority"],
estimated_time=60 if pillar_id == "generate" else 30,
source_agent="ContentGapRadarAgent",
reasoning=(
f"Content gap with {gap['scoring']['gap_size']:.0%} gap size, "
f"{gap['scoring']['volume']:.0%} volume, "
f"{gap['scoring']['trend']:.0%} trend momentum, "
f"ROI {gap['roi_score']:.0%}"
),
action_type="navigate",
action_url=action_url,
context_data={"gap": gap},
))
return proposals
@staticmethod
def _action_to_pillar(recommended_action: str) -> str:
action_lower = recommended_action.lower()
if "optimize" in action_lower:
return "analyze"
return "generate"
def _score_topic(
self,
topic: str,
sif_map: Dict[str, Any],
serp_map: Dict[str, Any],
exa_map: Dict[str, Any],
trend_signals: List[Any],
) -> Dict[str, Any]:
"""Score a single topic with the ROI formula."""
# gap_size: from SIF coverage_delta
sif = sif_map.get(topic, {})
gap_size = sif.get("coverage_delta", 0.5)
# volume: from SERP gap — competitors ranking for this topic
serp = serp_map.get(topic, {})
comp_count = serp.get("competitor_count", 0)
total_domains = serp.get("total_domains_checked", 1)
volume = min(comp_count / max(total_domains, 1), 1.0)
# trend: match topic against TrendSurfer signals
trend_score = self._match_trend_score(topic, trend_signals)
# intent: classify topic commercial value
intent = self._classify_intent(topic)
# competition: Exa content depth as penalty
exa = exa_map.get(topic, {})
content_count = exa.get("total_results", 0)
competition = min(content_count / 10.0, 1.0)
# ROI = (gap_size × volume × trend × intent) × (1 - 0.3 × competition)
base_roi = gap_size * volume * trend_score * intent
roi = base_roi * (1 - 0.3 * competition)
# Priority thresholds
if roi >= 0.6:
priority = "high"
elif roi >= 0.3:
priority = "medium"
else:
priority = "low"
# Recommended action based on scoring profile
action = self._recommend_action(gap_size, competition, intent)
return {
"topic": topic,
"roi_score": round(roi, 3),
"priority": priority,
"recommended_action": action,
"scoring": {
"gap_size": round(gap_size, 3),
"volume": round(volume, 3),
"trend": round(trend_score, 3),
"intent": round(intent, 3),
"competition": round(competition, 3),
},
"sif_gap": sif if sif else None,
"serp_evidence": {
"competitors_found": serp.get("competitors_found", []),
"competitor_count": comp_count,
"domains_with_content": serp.get("domains_with_content", []),
} if serp else None,
"competitor_content": exa if exa else None,
}
def _match_trend_score(self, topic: str, signals: List[Dict[str, Any]]) -> float:
if not signals:
return 0.5
topic_lower = topic.lower()
topic_words = set(topic_lower.split())
best_score = 0.0
for signal in signals:
impact = signal.get("impact_score", 0.5)
text_fields = " ".join(filter(None, [
signal.get("topic", ""),
signal.get("headline", ""),
signal.get("suggested_angle", ""),
]))
text_lower = text_fields.lower()
if topic_lower in text_lower:
best_score = max(best_score, impact)
text_words = set(text_lower.split())
overlap = len(topic_words & text_words)
if overlap > 0:
word_score = (overlap / max(len(topic_words), 1)) * impact
best_score = max(best_score, word_score)
return max(best_score, 0.5)
def _classify_intent(self, topic: str) -> float:
"""
Classify topic intent using LLM with keyword fallback.
Returns intent score 0.0-1.0.
"""
topic_lower = topic.lower()
# Keyword-based heuristics
commercial_words = [
"best", "top", "review", "vs", "comparison", "alternative",
"vs.", "versus", "pricing", "cost", "price", "cheap",
"affordable", "discount", "coupon", "deal", "buy",
]
transactional_words = [
"buy", "purchase", "order", "subscribe", "sign up",
"download", "get started", "free trial", "demo",
]
has_commercial = any(w in topic_lower for w in commercial_words)
has_transactional = any(w in topic_lower for w in transactional_words)
if has_transactional:
return 0.9
if has_commercial:
return 0.7
return 0.4 # Informational default
def _recommend_action(
self, gap_size: float, competition: float, intent: float
) -> str:
"""Generate a recommended action based on scoring profile."""
if gap_size > 0.7 and competition < 0.3:
return "Create comprehensive pillar page — large gap, low competition"
elif gap_size > 0.5 and intent > 0.6:
return "Create high-conversion content — significant gap, strong intent"
elif competition > 0.7:
return "Create differentiated content — high competition requires unique angle"
elif gap_size < 0.3:
return "Optimize existing content — incremental gap, update current pages"
else:
return "Create targeted blog post — moderate opportunity"
async def generate_content_brief(
self,
topic: str,
recommended_action: str,
scoring: Optional[Dict[str, float]] = None,
serp_evidence: Optional[Dict[str, Any]] = None,
sif_gap: Optional[Dict[str, Any]] = None,
) -> Dict[str, Any]:
"""
Generate a structured content brief from a gap item.
Uses LLM to produce title options, outline sections, target keywords,
and a writing angle. Falls back to template-based generation on LLM failure.
"""
gap_size = (scoring or {}).get("gap_size", 0.5)
volume = (scoring or {}).get("volume", 0.5)
trend = (scoring or {}).get("trend", 0.5)
intent = (scoring or {}).get("intent", 0.5)
competition = (scoring or {}).get("competition", 0.5)
word_count = 800 if competition > 0.7 else 1200 if gap_size > 0.5 else 600
serp_context = ""
if serp_evidence and serp_evidence.get("competitors_found"):
snippets = [
f"- {c.get('title','')}: {c.get('snippet','')[:100]}"
for c in serp_evidence["competitors_found"][:3]
]
serp_context = "Competitor content already ranking:\n" + "\n".join(snippets)
sif_context = ""
if sif_gap:
sif_context = (
f"SIF coverage delta: {sif_gap.get('coverage_delta', 0):.2%}, "
f"confidence: {sif_gap.get('confidence', 0):.2%}"
)
prompt = f"""You are a senior content strategist. Create a detailed content brief for the topic below.
TOPIC: {topic}
RECOMMENDED ACTION: {recommended_action}
{serp_context}
{sif_context}
Scoring profile:
- Gap size: {gap_size:.0%}
- Search volume: {volume:.0%}
- Trend momentum: {trend:.0%}
- Intent score: {intent:.0%}
- Competition level: {competition:.0%}
- Target word count: {word_count}
Return a JSON object with these exact keys:
{{
"titles": ["Title option 1", "Title option 2", "Title option 3"],
"outline": [
{{"heading": "Section heading", "key_points": ["point 1", "point 2", "point 3"]}}
],
"keywords": ["keyword1", "keyword2", "keyword3", "keyword4", "keyword5"],
"angle": "A single paragraph describing the strategic writing angle",
"word_count": {word_count}
}}
Generate 4-6 outline sections. Only return valid JSON, no other text."""
try:
response = await self._generate_llm_response(prompt)
import json as _json
start = response.find("{")
end = response.rfind("}") + 1
if start >= 0 and end > start:
brief = _json.loads(response[start:end])
else:
raise ValueError("No JSON found in LLM response")
except Exception as e:
logger.warning(
f"[{self.__class__.__name__}] LLM brief generation failed, using template: {e}"
)
brief = {
"titles": [
f"The Ultimate Guide to {topic}",
f"{topic}: Strategies That Actually Work",
f"Why {topic} Matters More Than Ever",
],
"outline": [
{"heading": f"Introduction to {topic}", "key_points": ["Context and importance", "What this guide covers"]},
{"heading": "Why This Matters", "key_points": ["Current landscape", "Key challenges and opportunities"]},
{"heading": "Key Strategies", "key_points": ["Strategy 1 with examples", "Strategy 2 with implementation tips", "Strategy 3 for advanced practitioners"]},
{"heading": "Common Pitfalls to Avoid", "key_points": ["Mistake 1 and how to avoid it", "Mistake 2 and how to avoid it"]},
{"heading": "Measuring Success", "key_points": ["Key metrics to track", "Tools and methods for measurement"]},
{"heading": "Conclusion & Next Steps", "key_points": ["Summary of key takeaways", "Actionable next steps"]},
],
"keywords": [topic] + [topic.split()[-1]] if len(topic.split()) > 1 else [topic, "guide", "strategy"],
"angle": f"Create comprehensive, actionable content about {topic} that fills the gap identified in competitor analysis. Focus on providing unique insights and practical implementation guidance.",
"word_count": word_count,
}
return {
"topic": topic,
"recommended_action": recommended_action,
"brief": brief,
"scoring": scoring,
}