feat: ContentGuardianAgent, onboarding UX, Team Activity action wiring, docs, agent help modal

ContentGuardianAgent consolidation:
- Merge 3 duplicate classes into single source in specialized/content_guardian.py
- Watchdog audit_committee() with heuristic scoring, coverage gaps, overlaps, alerts
- Remove misleading rejection_rate() helper; use acceptance_rate directly
- Integrate audit + alerts + trend signals into today_workflow_service.py

Team Activity page:
- QualityAuditPanel: health ring, per-agent critiques, coverage gaps, overlaps
- TrendSignalsPanel: opportunity cards with urgency/impact/coverage bars
- AlertBanner: persistent dismiss via POST /alerts/{id}/mark-read
- AgentHelpModal: dialog showing all 8 agents with descriptions, tools, schedule
- QualityAuditPanel action buttons: Fill gap -> /content-planning, Resolve overlap, View CTA on alerts/issues
- TrendSignalsPanel action buttons: Create content from this trend -> /blog-writer with trend context state

Onboarding system:
- Step 4 validation: no auto-pass via basic_ready; requires persona data or explicit progression
- Step 5 validation: logs warning on auto-pass without integration data
- OnboardingCompletionService: single DB session, transactional task creation, upsert pattern
- Business-without-website: nullable website_url on SIFIndexingTask and MarketTrendsTask
- DeepCompetitorAnalysisExecutor: 5-min timeout, 10-competitor cap, asyncio.wait_for
- Persona generation: async with 30s timeout, falls back to scheduler
- OnboardingProgressService.reset_onboarding(): resets session + pauses all DB tasks
- OnboardingControlService.reset_onboarding(): also cancels APScheduler jobs
- FinalStep TaskSchedulingPanel: shows scheduled/failed tasks after completion, 8s auto-redirect
- onboarding_completed agent activity event logged to feed

Documentation:
- docs-site/features/onboarding/: overview, steps, scheduler-tasks, technical-reference (4 pages)
- docs-site/mkdocs.yml: added Onboarding System nav section
- docs-site/features/sif-agents/: overview, agent-directory, committee-system, content-guardian (4 pages)
- docs-site/features/team-activity/: overview, quality-audit, trend-signals, alert-system (4 pages)
- docs-site/features/todays-workflow/: updated overview, technical-architecture, workflow-guide, api-reference
This commit is contained in:
ajaysi
2026-06-01 12:24:31 +05:30
parent 9b472f1c18
commit 923fa671fe
90 changed files with 8914 additions and 2731 deletions

View File

@@ -9,6 +9,8 @@ from .on_page_seo_service import OnPageSEOService
from .technical_seo_service import TechnicalSEOService
from .enterprise_seo_service import EnterpriseSEOService
from .content_strategy_service import ContentStrategyService
from .serp_gap_service import SerpGapService
from .competitor_content_service import CompetitorContentService
__all__ = [
'MetaDescriptionService',
@@ -20,4 +22,6 @@ __all__ = [
'TechnicalSEOService',
'EnterpriseSEOService',
'ContentStrategyService',
'SerpGapService',
'CompetitorContentService',
]

View File

@@ -0,0 +1,214 @@
"""
Competitor Content Service for ALwrity
Fetches full competitor content for gap topics using Exa with include_domains.
Phase 2 of the Content Gap Radar feature.
Usage:
service = CompetitorContentService()
result = await service.deep_dive(
topics=["AI content strategy"],
competitor_domains=["example.com"]
)
"""
import os
import asyncio
import hashlib
import json
import time
from typing import Dict, List, Optional, Any
from loguru import logger
class CompetitorContentService:
"""
Fetches competitor content for gap topics using Exa neural search.
Uses Exa's `include_domains` to scope searches to known competitor domains,
returning full text, highlights, and summaries for deeper competitive analysis.
Results are cached for 24h to reduce API costs.
Designed to be consumed by the future ContentGapRadarAgent.
"""
CACHE_TTL = int(os.getenv("COMPETITOR_CONTENT_CACHE_TTL", "86400"))
def __init__(self):
self.api_key = os.getenv("EXA_API_KEY")
if not self.api_key:
logger.warning(
"EXA_API_KEY not configured; CompetitorContentService disabled"
)
self._exa = None
self._cache: Dict[str, Dict[str, Any]] = {}
@property
def exa(self):
"""Lazy-init Exa SDK to allow env injection after import."""
if self._exa is None and self.api_key:
from exa_py import Exa
self._exa = Exa(self.api_key)
return self._exa
def _cache_key(self, topics: List[str], domains: List[str]) -> str:
raw = json.dumps(
{"t": sorted(topics), "d": sorted(domains)}, sort_keys=True
)
return hashlib.md5(raw.encode()).hexdigest()
def _get_cached(self, key: str) -> Optional[Dict[str, Any]]:
entry = self._cache.get(key)
if entry and (time.time() - entry["ts"]) < self.CACHE_TTL:
return entry["data"]
return None
def _set_cache(self, key: str, data: Dict[str, Any]):
self._cache[key] = {"data": data, "ts": time.time()}
async def deep_dive(
self,
topics: List[str],
competitor_domains: List[str],
max_total_results: int = 10,
concurrency: int = 3,
bypass_cache: bool = False,
) -> Dict[str, Any]:
"""
Fetch competitor content for a list of gap topics.
For each topic, searches Exa scoped to competitor domains and returns
full text, highlights, and publishing metadata.
Args:
topics: Topic phrases to research (e.g. from SERP gap analysis)
competitor_domains: Known competitor domains to scope search
max_total_results: Max results per topic total (Exa API limit varies)
concurrency: Max concurrent Exa API calls
bypass_cache: Force fresh API calls, ignoring cache
Returns:
Dict with keys:
results: List of per-topic competitor content results
total_topics_analyzed: int
topics_with_content: int
cached: bool
"""
if not topics or not competitor_domains:
return {
"results": [],
"total_topics_analyzed": 0,
"topics_with_content": 0,
"cached": False,
}
ck = self._cache_key(topics, competitor_domains)
if not bypass_cache:
cached = self._get_cached(ck)
if cached:
logger.info("Returning cached competitor content results")
return {**cached, "cached": True}
if not self.api_key or not self.exa:
return {
"results": [],
"total_topics_analyzed": len(topics),
"topics_with_content": 0,
"cached": False,
"error": "EXA_API_KEY not configured",
}
semaphore = asyncio.Semaphore(concurrency)
loop = asyncio.get_running_loop()
async def search_topic(topic: str) -> Dict[str, Any]:
async with semaphore:
return await self._search_single_topic(
topic, competitor_domains, max_total_results, loop
)
tasks = [search_topic(topic) for topic in topics]
results = await asyncio.gather(*tasks)
output = {
"results": results,
"total_topics_analyzed": len(topics),
"topics_with_content": sum(
1 for r in results if r.get("total_results", 0) > 0
),
"cached": False,
}
self._set_cache(ck, output)
return output
async def _search_single_topic(
self,
topic: str,
competitor_domains: List[str],
max_results: int,
loop: asyncio.AbstractEventLoop,
) -> Dict[str, Any]:
"""
Search Exa for a single topic, scoped to competitor domains.
"""
query = topic
search_kwargs = {
"type": "auto",
"num_results": max_results,
"include_domains": competitor_domains,
"text": {"max_characters": 2000},
"highlights": {"num_sentences": 3, "highlights_per_url": 3},
"summary": {"query": f"Key details about {topic}"},
}
try:
results = await loop.run_in_executor(
None,
lambda: self.exa.search_and_contents(query, **search_kwargs),
)
content = []
seen_urls = set()
for result in getattr(results, "results", []) or []:
url = getattr(result, "url", "")
if not url or url in seen_urls:
continue
seen_urls.add(url)
content.append({
"domain": self._extract_domain(url),
"title": getattr(result, "title", "Untitled"),
"url": url,
"highlights": getattr(result, "highlights", []),
"summary": getattr(result, "summary", ""),
"text": getattr(result, "text", ""),
"published_date": getattr(result, "published_date", None),
"author": getattr(result, "author", None),
})
return {
"topic": topic,
"competitor_content": content,
"total_results": len(content),
"domains_found": list(
set(c["domain"] for c in content if c["domain"])
),
}
except Exception as e:
logger.warning(f"Exa search failed for topic '{topic}': {e}")
return {
"topic": topic,
"competitor_content": [],
"total_results": 0,
"domains_found": [],
"error": str(e),
}
@staticmethod
def _extract_domain(url: str) -> str:
"""Extract domain from URL."""
try:
from urllib.parse import urlparse
return urlparse(url).netloc.lower()
except Exception:
return url.lower()

View File

@@ -0,0 +1,175 @@
"""
SERP Gap Service for ALwrity
Detects which competitors rank for target topics using Google Custom Search.
Phase 1 of the Content Gap Radar feature.
Usage:
service = SerpGapService()
result = await service.analyze_topic_gaps(
topics=["AI content strategy", "topic clustering"],
competitor_domains=["example.com", "competitor.org"]
)
"""
import asyncio
import hashlib
import json
import os
import time
from typing import Dict, List, Optional, Any
from loguru import logger
from services.research.google_search_service import GoogleSearchService
class SerpGapService:
"""
SERP Gap Analysis Service.
Uses Google Custom Search `site:` queries to detect competitor ranking presence
for specific topics. Results are cached for 24h to stay within free-tier quotas
(100 queries/day). Designed to be consumed by a future ContentGapRadarAgent
that scores and prioritizes gaps.
"""
CACHE_TTL = int(os.getenv("SERP_GAP_CACHE_TTL", "86400")) # 24 hours default
def __init__(self, google_search_service: Optional[GoogleSearchService] = None):
self.gcs = google_search_service or GoogleSearchService()
self._cache: Dict[str, Dict[str, Any]] = {}
logger.info("SerpGapService initialized")
def _cache_key(self, topics: List[str], domains: List[str]) -> str:
"""Deterministic cache key from sorted topics + domains."""
raw = json.dumps(
{"t": sorted(topics), "d": sorted(domains)}, sort_keys=True
)
return hashlib.md5(raw.encode()).hexdigest()
def _get_cached(self, key: str) -> Optional[Dict[str, Any]]:
entry = self._cache.get(key)
if entry and (time.time() - entry["ts"]) < self.CACHE_TTL:
return entry["data"]
return None
def _set_cache(self, key: str, data: Dict[str, Any]):
self._cache[key] = {"data": data, "ts": time.time()}
async def analyze_topic_gaps(
self,
topics: List[str],
competitor_domains: List[str],
max_results_per_site: int = 5,
concurrency: int = 3,
bypass_cache: bool = False,
) -> Dict[str, Any]:
"""
Analyze SERP gaps for a list of topics across known competitors.
For each topic, queries Google with `site:competitor_domain topic` for
each known competitor to detect ranking presence.
Args:
topics: Topic phrases to check (e.g. from find_semantic_gaps())
competitor_domains: Known competitor domains (e.g. ["example.com"])
max_results_per_site: Max Google CSE results per site: query (max 10)
concurrency: Max concurrent API calls to stay under rate limits
bypass_cache: Force fresh API calls, ignoring cache
Returns:
Dict with keys:
gaps: List of per-topic SERP gap results
total_topics_analyzed: int
total_competitors: int
cached: bool
"""
if not topics or not competitor_domains:
return {
"gaps": [],
"total_topics_analyzed": 0,
"total_competitors": 0,
"cached": False,
}
ck = self._cache_key(topics, competitor_domains)
if not bypass_cache:
cached = self._get_cached(ck)
if cached:
logger.info("Returning cached SERP gap results")
return {**cached, "cached": True}
semaphore = asyncio.Semaphore(concurrency)
async def analyze_topic(topic: str) -> Dict[str, Any]:
async with semaphore:
return await self._analyze_single_topic(
topic, competitor_domains, max_results_per_site
)
tasks = [analyze_topic(topic) for topic in topics]
results = await asyncio.gather(*tasks)
output = {
"gaps": results,
"total_topics_analyzed": len(topics),
"total_competitors": len(competitor_domains),
"cached": False,
}
self._set_cache(ck, output)
return dict(output)
async def _analyze_single_topic(
self,
topic: str,
competitor_domains: List[str],
max_results: int,
) -> Dict[str, Any]:
"""
Check SERP presence for a single topic across all competitor domains.
Removes the dateRestrict and sort=date defaults from Google CSE so we
see all-time competitor content (not just last month).
"""
competitors_found = []
failed_queries = 0
for domain in competitor_domains:
query = f"site:{domain} {topic}"
try:
raw_results = await self.gcs.perform_search(
query,
max_results,
dateRestrict=None, # Don't limit to last month
sort=None, # Use relevance sorting, not date
)
for result in raw_results:
competitors_found.append({
"domain": domain,
"title": result.get("title", ""),
"url": result.get("link", ""),
"snippet": result.get("snippet", ""),
})
except Exception as e:
logger.warning(
f"GCS query failed for site:{domain} topic='{topic}': {e}"
)
failed_queries += 1
continue
seen_urls = set()
unique_competitors = []
for entry in competitors_found:
if entry["url"] not in seen_urls:
seen_urls.add(entry["url"])
unique_competitors.append(entry)
return {
"topic": topic,
"competitors_found": unique_competitors,
"competitor_count": len(unique_competitors),
"domains_with_content": list(
set(e["domain"] for e in unique_competitors)
),
"failed_queries": failed_queries,
"total_domains_checked": len(competitor_domains),
}