ContentGuardianAgent consolidation:
- Merge 3 duplicate classes into single source in specialized/content_guardian.py
- Watchdog audit_committee() with heuristic scoring, coverage gaps, overlaps, alerts
- Remove misleading rejection_rate() helper; use acceptance_rate directly
- Integrate audit + alerts + trend signals into today_workflow_service.py
Team Activity page:
- QualityAuditPanel: health ring, per-agent critiques, coverage gaps, overlaps
- TrendSignalsPanel: opportunity cards with urgency/impact/coverage bars
- AlertBanner: persistent dismiss via POST /alerts/{id}/mark-read
- AgentHelpModal: dialog showing all 8 agents with descriptions, tools, schedule
- QualityAuditPanel action buttons: Fill gap -> /content-planning, Resolve overlap, View CTA on alerts/issues
- TrendSignalsPanel action buttons: Create content from this trend -> /blog-writer with trend context state
Onboarding system:
- Step 4 validation: no auto-pass via basic_ready; requires persona data or explicit progression
- Step 5 validation: logs warning on auto-pass without integration data
- OnboardingCompletionService: single DB session, transactional task creation, upsert pattern
- Business-without-website: nullable website_url on SIFIndexingTask and MarketTrendsTask
- DeepCompetitorAnalysisExecutor: 5-min timeout, 10-competitor cap, asyncio.wait_for
- Persona generation: async with 30s timeout, falls back to scheduler
- OnboardingProgressService.reset_onboarding(): resets session + pauses all DB tasks
- OnboardingControlService.reset_onboarding(): also cancels APScheduler jobs
- FinalStep TaskSchedulingPanel: shows scheduled/failed tasks after completion, 8s auto-redirect
- onboarding_completed agent activity event logged to feed
Documentation:
- docs-site/features/onboarding/: overview, steps, scheduler-tasks, technical-reference (4 pages)
- docs-site/mkdocs.yml: added Onboarding System nav section
- docs-site/features/sif-agents/: overview, agent-directory, committee-system, content-guardian (4 pages)
- docs-site/features/team-activity/: overview, quality-audit, trend-signals, alert-system (4 pages)
- docs-site/features/todays-workflow/: updated overview, technical-architecture, workflow-guide, api-reference
467 lines
18 KiB
Python
467 lines
18 KiB
Python
"""
|
||
Content Gap Radar Agent
|
||
|
||
Scores and prioritizes content opportunities by combining SIF semantic gap analysis,
|
||
SERP ranking presence (Google CSE), competitor content deep-dive (Exa), and trend
|
||
momentum into a single ROI score per topic.
|
||
|
||
Phase 3 of the Content Gap Radar feature.
|
||
"""
|
||
|
||
import traceback
|
||
from typing import List, Dict, Any, Optional
|
||
from loguru import logger
|
||
|
||
from services.intelligence.agents.specialized import SIFBaseAgent
|
||
from services.intelligence.agents.specialized.strategy_architect import StrategyArchitectAgent
|
||
from services.intelligence.agents.trend_surfer_agent import TrendSurferAgent
|
||
from services.intelligence.agents.core_agent_framework import TaskProposal
|
||
from services.intelligence.txtai_service import TxtaiIntelligenceService
|
||
from services.seo_tools.serp_gap_service import SerpGapService
|
||
from services.seo_tools.competitor_content_service import CompetitorContentService
|
||
|
||
|
||
class ContentGapRadarAgent(SIFBaseAgent):
|
||
"""
|
||
Agent that scores and prioritizes content opportunities by combining
|
||
SIF semantic gap analysis, SERP ranking presence, Exa competitor content,
|
||
and trend momentum into a single ROI score.
|
||
"""
|
||
|
||
def __init__(self, intelligence_service: TxtaiIntelligenceService, user_id: str, **kwargs):
|
||
super().__init__(intelligence_service, user_id, agent_type="content_gap_radar", **kwargs)
|
||
self.user_id = user_id
|
||
self.serp_service = SerpGapService()
|
||
self.competitor_content_service = CompetitorContentService()
|
||
self.strategy_architect = StrategyArchitectAgent(intelligence_service, user_id)
|
||
|
||
async def analyze(
|
||
self,
|
||
competitor_domains: List[str],
|
||
competitor_indices: Optional[List[Any]] = None,
|
||
topics: Optional[List[str]] = None,
|
||
bypass_cache: bool = False,
|
||
) -> Dict[str, Any]:
|
||
"""
|
||
Full content gap radar pipeline.
|
||
|
||
1. Get topic-level gaps from SIF semantic analysis
|
||
2. Get SERP ranking data per topic
|
||
3. Get Exa competitor content for top topics
|
||
4. Get trend momentum data
|
||
5. Score each topic with ROI formula
|
||
6. Return prioritized results
|
||
|
||
Args:
|
||
competitor_domains: Known competitor domains
|
||
competitor_indices: SIF index positions for competitor docs
|
||
topics: Optional explicit topic list (derived from SIF if omitted)
|
||
bypass_cache: Force fresh API calls
|
||
|
||
Returns:
|
||
Dict with scored gaps list and summary.
|
||
"""
|
||
self._log_agent_operation(
|
||
"Running content gap radar",
|
||
competitor_count=len(competitor_domains),
|
||
topics_provided=bool(topics),
|
||
)
|
||
|
||
try:
|
||
sif_gaps = []
|
||
|
||
# Step 1: Derive topics from SIF semantic gaps if not provided
|
||
if not topics:
|
||
sif_gaps = await self.strategy_architect.find_semantic_gaps(
|
||
competitor_indices or []
|
||
)
|
||
topics = [g["topic"] for g in sif_gaps[:12]]
|
||
logger.info(
|
||
f"[{self.__class__.__name__}] Derived {len(topics)} topics from SIF gaps"
|
||
)
|
||
|
||
if not topics:
|
||
logger.info(f"[{self.__class__.__name__}] No topics to analyze")
|
||
return {"gaps": [], "summary": {}}
|
||
|
||
# If we got sif_gaps externally but topics were provided, fetch SIF data anyway
|
||
if not sif_gaps:
|
||
try:
|
||
sif_gaps = await self.strategy_architect.find_semantic_gaps(
|
||
competitor_indices or []
|
||
)
|
||
except Exception as e:
|
||
logger.warning(
|
||
f"[{self.__class__.__name__}] SIF gap fetch failed (non-fatal): {e}"
|
||
)
|
||
sif_gaps = []
|
||
|
||
# Build lookup maps for cross-referencing
|
||
sif_map = {g["topic"]: g for g in sif_gaps}
|
||
|
||
# Step 2: SERP gap analysis
|
||
serp_data = await self.serp_service.analyze_topic_gaps(
|
||
topics, competitor_domains, bypass_cache=bypass_cache
|
||
)
|
||
serp_map = {}
|
||
for g in serp_data.get("gaps", []):
|
||
serp_map[g["topic"]] = g
|
||
|
||
# Step 3: Exa deep-dive (top 6 topics — paid API)
|
||
exa_data = await self.competitor_content_service.deep_dive(
|
||
topics[:6], competitor_domains, bypass_cache=bypass_cache
|
||
)
|
||
exa_map = {}
|
||
for r in exa_data.get("results", []):
|
||
exa_map[r["topic"]] = r
|
||
|
||
# Step 4: Trend momentum data
|
||
trend_surfer = TrendSurferAgent(
|
||
self.intelligence, self.user_id
|
||
)
|
||
trend_signals = await trend_surfer.surf_trends()
|
||
|
||
# Step 5: Score each topic
|
||
scored = []
|
||
for topic in topics:
|
||
scored.append(
|
||
self._score_topic(
|
||
topic=topic,
|
||
sif_map=sif_map,
|
||
serp_map=serp_map,
|
||
exa_map=exa_map,
|
||
trend_signals=trend_signals,
|
||
)
|
||
)
|
||
|
||
scored.sort(key=lambda x: x["roi_score"], reverse=True)
|
||
|
||
# Step 6: Summary
|
||
high = [g for g in scored if g["priority"] == "high"]
|
||
medium = [g for g in scored if g["priority"] == "medium"]
|
||
low = [g for g in scored if g["priority"] == "low"]
|
||
|
||
logger.info(
|
||
f"[{self.__class__.__name__}] Scored {len(scored)} gaps: "
|
||
f"{len(high)} high, {len(medium)} medium, {len(low)} low"
|
||
)
|
||
|
||
return {
|
||
"gaps": scored,
|
||
"summary": {
|
||
"total_topics_analyzed": len(topics),
|
||
"high_priority": len(high),
|
||
"medium_priority": len(medium),
|
||
"low_priority": len(low),
|
||
},
|
||
}
|
||
|
||
except Exception as e:
|
||
logger.error(
|
||
f"[{self.__class__.__name__}] Content gap radar failed: {e}"
|
||
)
|
||
logger.error(
|
||
f"[{self.__class__.__name__}] Full traceback: {traceback.format_exc()}"
|
||
)
|
||
return {"gaps": [], "summary": {}, "error": str(e)}
|
||
|
||
async def propose_daily_tasks(self, context: Dict[str, Any]) -> List[TaskProposal]:
|
||
"""
|
||
Propose high-ROI content tasks from gap radar analysis.
|
||
Integrates with Today's Workflow agent committee polling.
|
||
"""
|
||
proposals = []
|
||
|
||
onboarding = context.get("onboarding_data", {})
|
||
competitor_focus = onboarding.get("competitor_focus", {})
|
||
competitor_domains = competitor_focus.get("top_competitor_domains", [])
|
||
|
||
if not competitor_domains:
|
||
logger.info(f"[{self.__class__.__name__}] No competitor domains in context, skipping")
|
||
return proposals
|
||
|
||
try:
|
||
result = await self.analyze(
|
||
competitor_domains=competitor_domains,
|
||
competitor_indices=[],
|
||
)
|
||
except Exception as e:
|
||
logger.error(f"[{self.__class__.__name__}] propose_daily_tasks failed: {e}")
|
||
return proposals
|
||
|
||
gaps = result.get("gaps", [])
|
||
scored = [g for g in gaps if g["priority"] in ("high", "medium")]
|
||
scored.sort(key=lambda x: x["roi_score"], reverse=True)
|
||
|
||
for gap in scored[:3]:
|
||
pillar_id = self._action_to_pillar(gap["recommended_action"])
|
||
action_url = (
|
||
"/blog-writer"
|
||
if pillar_id == "generate"
|
||
else "/seo-dashboard#content-gap-radar"
|
||
)
|
||
proposals.append(TaskProposal(
|
||
title=f"Write about: {gap['topic']}",
|
||
description=gap["recommended_action"],
|
||
pillar_id=pillar_id,
|
||
priority=gap["priority"],
|
||
estimated_time=60 if pillar_id == "generate" else 30,
|
||
source_agent="ContentGapRadarAgent",
|
||
reasoning=(
|
||
f"Content gap with {gap['scoring']['gap_size']:.0%} gap size, "
|
||
f"{gap['scoring']['volume']:.0%} volume, "
|
||
f"{gap['scoring']['trend']:.0%} trend momentum, "
|
||
f"ROI {gap['roi_score']:.0%}"
|
||
),
|
||
action_type="navigate",
|
||
action_url=action_url,
|
||
context_data={"gap": gap},
|
||
))
|
||
|
||
return proposals
|
||
|
||
@staticmethod
|
||
def _action_to_pillar(recommended_action: str) -> str:
|
||
action_lower = recommended_action.lower()
|
||
if "optimize" in action_lower:
|
||
return "analyze"
|
||
return "generate"
|
||
|
||
def _score_topic(
|
||
self,
|
||
topic: str,
|
||
sif_map: Dict[str, Any],
|
||
serp_map: Dict[str, Any],
|
||
exa_map: Dict[str, Any],
|
||
trend_signals: List[Any],
|
||
) -> Dict[str, Any]:
|
||
"""Score a single topic with the ROI formula."""
|
||
# gap_size: from SIF coverage_delta
|
||
sif = sif_map.get(topic, {})
|
||
gap_size = sif.get("coverage_delta", 0.5)
|
||
|
||
# volume: from SERP gap — competitors ranking for this topic
|
||
serp = serp_map.get(topic, {})
|
||
comp_count = serp.get("competitor_count", 0)
|
||
total_domains = serp.get("total_domains_checked", 1)
|
||
volume = min(comp_count / max(total_domains, 1), 1.0)
|
||
|
||
# trend: match topic against TrendSurfer signals
|
||
trend_score = self._match_trend_score(topic, trend_signals)
|
||
|
||
# intent: classify topic commercial value
|
||
intent = self._classify_intent(topic)
|
||
|
||
# competition: Exa content depth as penalty
|
||
exa = exa_map.get(topic, {})
|
||
content_count = exa.get("total_results", 0)
|
||
competition = min(content_count / 10.0, 1.0)
|
||
|
||
# ROI = (gap_size × volume × trend × intent) × (1 - 0.3 × competition)
|
||
base_roi = gap_size * volume * trend_score * intent
|
||
roi = base_roi * (1 - 0.3 * competition)
|
||
|
||
# Priority thresholds
|
||
if roi >= 0.6:
|
||
priority = "high"
|
||
elif roi >= 0.3:
|
||
priority = "medium"
|
||
else:
|
||
priority = "low"
|
||
|
||
# Recommended action based on scoring profile
|
||
action = self._recommend_action(gap_size, competition, intent)
|
||
|
||
return {
|
||
"topic": topic,
|
||
"roi_score": round(roi, 3),
|
||
"priority": priority,
|
||
"recommended_action": action,
|
||
"scoring": {
|
||
"gap_size": round(gap_size, 3),
|
||
"volume": round(volume, 3),
|
||
"trend": round(trend_score, 3),
|
||
"intent": round(intent, 3),
|
||
"competition": round(competition, 3),
|
||
},
|
||
"sif_gap": sif if sif else None,
|
||
"serp_evidence": {
|
||
"competitors_found": serp.get("competitors_found", []),
|
||
"competitor_count": comp_count,
|
||
"domains_with_content": serp.get("domains_with_content", []),
|
||
} if serp else None,
|
||
"competitor_content": exa if exa else None,
|
||
}
|
||
|
||
def _match_trend_score(self, topic: str, signals: List[Dict[str, Any]]) -> float:
|
||
if not signals:
|
||
return 0.5
|
||
|
||
topic_lower = topic.lower()
|
||
topic_words = set(topic_lower.split())
|
||
|
||
best_score = 0.0
|
||
for signal in signals:
|
||
impact = signal.get("impact_score", 0.5)
|
||
text_fields = " ".join(filter(None, [
|
||
signal.get("topic", ""),
|
||
signal.get("headline", ""),
|
||
signal.get("suggested_angle", ""),
|
||
]))
|
||
text_lower = text_fields.lower()
|
||
|
||
if topic_lower in text_lower:
|
||
best_score = max(best_score, impact)
|
||
|
||
text_words = set(text_lower.split())
|
||
overlap = len(topic_words & text_words)
|
||
if overlap > 0:
|
||
word_score = (overlap / max(len(topic_words), 1)) * impact
|
||
best_score = max(best_score, word_score)
|
||
|
||
return max(best_score, 0.5)
|
||
|
||
def _classify_intent(self, topic: str) -> float:
|
||
"""
|
||
Classify topic intent using LLM with keyword fallback.
|
||
Returns intent score 0.0-1.0.
|
||
"""
|
||
topic_lower = topic.lower()
|
||
|
||
# Keyword-based heuristics
|
||
commercial_words = [
|
||
"best", "top", "review", "vs", "comparison", "alternative",
|
||
"vs.", "versus", "pricing", "cost", "price", "cheap",
|
||
"affordable", "discount", "coupon", "deal", "buy",
|
||
]
|
||
transactional_words = [
|
||
"buy", "purchase", "order", "subscribe", "sign up",
|
||
"download", "get started", "free trial", "demo",
|
||
]
|
||
|
||
has_commercial = any(w in topic_lower for w in commercial_words)
|
||
has_transactional = any(w in topic_lower for w in transactional_words)
|
||
|
||
if has_transactional:
|
||
return 0.9
|
||
if has_commercial:
|
||
return 0.7
|
||
return 0.4 # Informational default
|
||
|
||
def _recommend_action(
|
||
self, gap_size: float, competition: float, intent: float
|
||
) -> str:
|
||
"""Generate a recommended action based on scoring profile."""
|
||
if gap_size > 0.7 and competition < 0.3:
|
||
return "Create comprehensive pillar page — large gap, low competition"
|
||
elif gap_size > 0.5 and intent > 0.6:
|
||
return "Create high-conversion content — significant gap, strong intent"
|
||
elif competition > 0.7:
|
||
return "Create differentiated content — high competition requires unique angle"
|
||
elif gap_size < 0.3:
|
||
return "Optimize existing content — incremental gap, update current pages"
|
||
else:
|
||
return "Create targeted blog post — moderate opportunity"
|
||
|
||
async def generate_content_brief(
|
||
self,
|
||
topic: str,
|
||
recommended_action: str,
|
||
scoring: Optional[Dict[str, float]] = None,
|
||
serp_evidence: Optional[Dict[str, Any]] = None,
|
||
sif_gap: Optional[Dict[str, Any]] = None,
|
||
) -> Dict[str, Any]:
|
||
"""
|
||
Generate a structured content brief from a gap item.
|
||
Uses LLM to produce title options, outline sections, target keywords,
|
||
and a writing angle. Falls back to template-based generation on LLM failure.
|
||
"""
|
||
gap_size = (scoring or {}).get("gap_size", 0.5)
|
||
volume = (scoring or {}).get("volume", 0.5)
|
||
trend = (scoring or {}).get("trend", 0.5)
|
||
intent = (scoring or {}).get("intent", 0.5)
|
||
competition = (scoring or {}).get("competition", 0.5)
|
||
word_count = 800 if competition > 0.7 else 1200 if gap_size > 0.5 else 600
|
||
|
||
serp_context = ""
|
||
if serp_evidence and serp_evidence.get("competitors_found"):
|
||
snippets = [
|
||
f"- {c.get('title','')}: {c.get('snippet','')[:100]}"
|
||
for c in serp_evidence["competitors_found"][:3]
|
||
]
|
||
serp_context = "Competitor content already ranking:\n" + "\n".join(snippets)
|
||
|
||
sif_context = ""
|
||
if sif_gap:
|
||
sif_context = (
|
||
f"SIF coverage delta: {sif_gap.get('coverage_delta', 0):.2%}, "
|
||
f"confidence: {sif_gap.get('confidence', 0):.2%}"
|
||
)
|
||
|
||
prompt = f"""You are a senior content strategist. Create a detailed content brief for the topic below.
|
||
|
||
TOPIC: {topic}
|
||
RECOMMENDED ACTION: {recommended_action}
|
||
{serp_context}
|
||
{sif_context}
|
||
|
||
Scoring profile:
|
||
- Gap size: {gap_size:.0%}
|
||
- Search volume: {volume:.0%}
|
||
- Trend momentum: {trend:.0%}
|
||
- Intent score: {intent:.0%}
|
||
- Competition level: {competition:.0%}
|
||
- Target word count: {word_count}
|
||
|
||
Return a JSON object with these exact keys:
|
||
{{
|
||
"titles": ["Title option 1", "Title option 2", "Title option 3"],
|
||
"outline": [
|
||
{{"heading": "Section heading", "key_points": ["point 1", "point 2", "point 3"]}}
|
||
],
|
||
"keywords": ["keyword1", "keyword2", "keyword3", "keyword4", "keyword5"],
|
||
"angle": "A single paragraph describing the strategic writing angle",
|
||
"word_count": {word_count}
|
||
}}
|
||
|
||
Generate 4-6 outline sections. Only return valid JSON, no other text."""
|
||
|
||
try:
|
||
response = await self._generate_llm_response(prompt)
|
||
import json as _json
|
||
start = response.find("{")
|
||
end = response.rfind("}") + 1
|
||
if start >= 0 and end > start:
|
||
brief = _json.loads(response[start:end])
|
||
else:
|
||
raise ValueError("No JSON found in LLM response")
|
||
except Exception as e:
|
||
logger.warning(
|
||
f"[{self.__class__.__name__}] LLM brief generation failed, using template: {e}"
|
||
)
|
||
brief = {
|
||
"titles": [
|
||
f"The Ultimate Guide to {topic}",
|
||
f"{topic}: Strategies That Actually Work",
|
||
f"Why {topic} Matters More Than Ever",
|
||
],
|
||
"outline": [
|
||
{"heading": f"Introduction to {topic}", "key_points": ["Context and importance", "What this guide covers"]},
|
||
{"heading": "Why This Matters", "key_points": ["Current landscape", "Key challenges and opportunities"]},
|
||
{"heading": "Key Strategies", "key_points": ["Strategy 1 with examples", "Strategy 2 with implementation tips", "Strategy 3 for advanced practitioners"]},
|
||
{"heading": "Common Pitfalls to Avoid", "key_points": ["Mistake 1 and how to avoid it", "Mistake 2 and how to avoid it"]},
|
||
{"heading": "Measuring Success", "key_points": ["Key metrics to track", "Tools and methods for measurement"]},
|
||
{"heading": "Conclusion & Next Steps", "key_points": ["Summary of key takeaways", "Actionable next steps"]},
|
||
],
|
||
"keywords": [topic] + [topic.split()[-1]] if len(topic.split()) > 1 else [topic, "guide", "strategy"],
|
||
"angle": f"Create comprehensive, actionable content about {topic} that fills the gap identified in competitor analysis. Focus on providing unique insights and practical implementation guidance.",
|
||
"word_count": word_count,
|
||
}
|
||
|
||
return {
|
||
"topic": topic,
|
||
"recommended_action": recommended_action,
|
||
"brief": brief,
|
||
"scoring": scoring,
|
||
}
|