feat: ContentGuardianAgent, onboarding UX, Team Activity action wiring, docs, agent help modal
ContentGuardianAgent consolidation:
- Merge 3 duplicate classes into single source in specialized/content_guardian.py
- Watchdog audit_committee() with heuristic scoring, coverage gaps, overlaps, alerts
- Remove misleading rejection_rate() helper; use acceptance_rate directly
- Integrate audit + alerts + trend signals into today_workflow_service.py
Team Activity page:
- QualityAuditPanel: health ring, per-agent critiques, coverage gaps, overlaps
- TrendSignalsPanel: opportunity cards with urgency/impact/coverage bars
- AlertBanner: persistent dismiss via POST /alerts/{id}/mark-read
- AgentHelpModal: dialog showing all 8 agents with descriptions, tools, schedule
- QualityAuditPanel action buttons: Fill gap -> /content-planning, Resolve overlap, View CTA on alerts/issues
- TrendSignalsPanel action buttons: Create content from this trend -> /blog-writer with trend context state
Onboarding system:
- Step 4 validation: no auto-pass via basic_ready; requires persona data or explicit progression
- Step 5 validation: logs warning on auto-pass without integration data
- OnboardingCompletionService: single DB session, transactional task creation, upsert pattern
- Business-without-website: nullable website_url on SIFIndexingTask and MarketTrendsTask
- DeepCompetitorAnalysisExecutor: 5-min timeout, 10-competitor cap, asyncio.wait_for
- Persona generation: async with 30s timeout, falls back to scheduler
- OnboardingProgressService.reset_onboarding(): resets session + pauses all DB tasks
- OnboardingControlService.reset_onboarding(): also cancels APScheduler jobs
- FinalStep TaskSchedulingPanel: shows scheduled/failed tasks after completion, 8s auto-redirect
- onboarding_completed agent activity event logged to feed
Documentation:
- docs-site/features/onboarding/: overview, steps, scheduler-tasks, technical-reference (4 pages)
- docs-site/mkdocs.yml: added Onboarding System nav section
- docs-site/features/sif-agents/: overview, agent-directory, committee-system, content-guardian (4 pages)
- docs-site/features/team-activity/: overview, quality-audit, trend-signals, alert-system (4 pages)
- docs-site/features/todays-workflow/: updated overview, technical-architecture, workflow-guide, api-reference
This commit is contained in:
@@ -9,6 +9,8 @@ from .on_page_seo_service import OnPageSEOService
|
||||
from .technical_seo_service import TechnicalSEOService
|
||||
from .enterprise_seo_service import EnterpriseSEOService
|
||||
from .content_strategy_service import ContentStrategyService
|
||||
from .serp_gap_service import SerpGapService
|
||||
from .competitor_content_service import CompetitorContentService
|
||||
|
||||
__all__ = [
|
||||
'MetaDescriptionService',
|
||||
@@ -20,4 +22,6 @@ __all__ = [
|
||||
'TechnicalSEOService',
|
||||
'EnterpriseSEOService',
|
||||
'ContentStrategyService',
|
||||
'SerpGapService',
|
||||
'CompetitorContentService',
|
||||
]
|
||||
214
backend/services/seo_tools/competitor_content_service.py
Normal file
214
backend/services/seo_tools/competitor_content_service.py
Normal file
@@ -0,0 +1,214 @@
|
||||
"""
|
||||
Competitor Content Service for ALwrity
|
||||
|
||||
Fetches full competitor content for gap topics using Exa with include_domains.
|
||||
Phase 2 of the Content Gap Radar feature.
|
||||
|
||||
Usage:
|
||||
service = CompetitorContentService()
|
||||
result = await service.deep_dive(
|
||||
topics=["AI content strategy"],
|
||||
competitor_domains=["example.com"]
|
||||
)
|
||||
"""
|
||||
|
||||
import os
|
||||
import asyncio
|
||||
import hashlib
|
||||
import json
|
||||
import time
|
||||
from typing import Dict, List, Optional, Any
|
||||
from loguru import logger
|
||||
|
||||
|
||||
class CompetitorContentService:
|
||||
"""
|
||||
Fetches competitor content for gap topics using Exa neural search.
|
||||
|
||||
Uses Exa's `include_domains` to scope searches to known competitor domains,
|
||||
returning full text, highlights, and summaries for deeper competitive analysis.
|
||||
Results are cached for 24h to reduce API costs.
|
||||
Designed to be consumed by the future ContentGapRadarAgent.
|
||||
"""
|
||||
|
||||
CACHE_TTL = int(os.getenv("COMPETITOR_CONTENT_CACHE_TTL", "86400"))
|
||||
|
||||
def __init__(self):
|
||||
self.api_key = os.getenv("EXA_API_KEY")
|
||||
if not self.api_key:
|
||||
logger.warning(
|
||||
"EXA_API_KEY not configured; CompetitorContentService disabled"
|
||||
)
|
||||
self._exa = None
|
||||
self._cache: Dict[str, Dict[str, Any]] = {}
|
||||
|
||||
@property
|
||||
def exa(self):
|
||||
"""Lazy-init Exa SDK to allow env injection after import."""
|
||||
if self._exa is None and self.api_key:
|
||||
from exa_py import Exa
|
||||
self._exa = Exa(self.api_key)
|
||||
return self._exa
|
||||
|
||||
def _cache_key(self, topics: List[str], domains: List[str]) -> str:
|
||||
raw = json.dumps(
|
||||
{"t": sorted(topics), "d": sorted(domains)}, sort_keys=True
|
||||
)
|
||||
return hashlib.md5(raw.encode()).hexdigest()
|
||||
|
||||
def _get_cached(self, key: str) -> Optional[Dict[str, Any]]:
|
||||
entry = self._cache.get(key)
|
||||
if entry and (time.time() - entry["ts"]) < self.CACHE_TTL:
|
||||
return entry["data"]
|
||||
return None
|
||||
|
||||
def _set_cache(self, key: str, data: Dict[str, Any]):
|
||||
self._cache[key] = {"data": data, "ts": time.time()}
|
||||
|
||||
async def deep_dive(
|
||||
self,
|
||||
topics: List[str],
|
||||
competitor_domains: List[str],
|
||||
max_total_results: int = 10,
|
||||
concurrency: int = 3,
|
||||
bypass_cache: bool = False,
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Fetch competitor content for a list of gap topics.
|
||||
|
||||
For each topic, searches Exa scoped to competitor domains and returns
|
||||
full text, highlights, and publishing metadata.
|
||||
|
||||
Args:
|
||||
topics: Topic phrases to research (e.g. from SERP gap analysis)
|
||||
competitor_domains: Known competitor domains to scope search
|
||||
max_total_results: Max results per topic total (Exa API limit varies)
|
||||
concurrency: Max concurrent Exa API calls
|
||||
bypass_cache: Force fresh API calls, ignoring cache
|
||||
|
||||
Returns:
|
||||
Dict with keys:
|
||||
results: List of per-topic competitor content results
|
||||
total_topics_analyzed: int
|
||||
topics_with_content: int
|
||||
cached: bool
|
||||
"""
|
||||
if not topics or not competitor_domains:
|
||||
return {
|
||||
"results": [],
|
||||
"total_topics_analyzed": 0,
|
||||
"topics_with_content": 0,
|
||||
"cached": False,
|
||||
}
|
||||
|
||||
ck = self._cache_key(topics, competitor_domains)
|
||||
if not bypass_cache:
|
||||
cached = self._get_cached(ck)
|
||||
if cached:
|
||||
logger.info("Returning cached competitor content results")
|
||||
return {**cached, "cached": True}
|
||||
|
||||
if not self.api_key or not self.exa:
|
||||
return {
|
||||
"results": [],
|
||||
"total_topics_analyzed": len(topics),
|
||||
"topics_with_content": 0,
|
||||
"cached": False,
|
||||
"error": "EXA_API_KEY not configured",
|
||||
}
|
||||
|
||||
semaphore = asyncio.Semaphore(concurrency)
|
||||
loop = asyncio.get_running_loop()
|
||||
|
||||
async def search_topic(topic: str) -> Dict[str, Any]:
|
||||
async with semaphore:
|
||||
return await self._search_single_topic(
|
||||
topic, competitor_domains, max_total_results, loop
|
||||
)
|
||||
|
||||
tasks = [search_topic(topic) for topic in topics]
|
||||
results = await asyncio.gather(*tasks)
|
||||
|
||||
output = {
|
||||
"results": results,
|
||||
"total_topics_analyzed": len(topics),
|
||||
"topics_with_content": sum(
|
||||
1 for r in results if r.get("total_results", 0) > 0
|
||||
),
|
||||
"cached": False,
|
||||
}
|
||||
self._set_cache(ck, output)
|
||||
return output
|
||||
|
||||
async def _search_single_topic(
|
||||
self,
|
||||
topic: str,
|
||||
competitor_domains: List[str],
|
||||
max_results: int,
|
||||
loop: asyncio.AbstractEventLoop,
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Search Exa for a single topic, scoped to competitor domains.
|
||||
"""
|
||||
query = topic
|
||||
|
||||
search_kwargs = {
|
||||
"type": "auto",
|
||||
"num_results": max_results,
|
||||
"include_domains": competitor_domains,
|
||||
"text": {"max_characters": 2000},
|
||||
"highlights": {"num_sentences": 3, "highlights_per_url": 3},
|
||||
"summary": {"query": f"Key details about {topic}"},
|
||||
}
|
||||
|
||||
try:
|
||||
results = await loop.run_in_executor(
|
||||
None,
|
||||
lambda: self.exa.search_and_contents(query, **search_kwargs),
|
||||
)
|
||||
|
||||
content = []
|
||||
seen_urls = set()
|
||||
for result in getattr(results, "results", []) or []:
|
||||
url = getattr(result, "url", "")
|
||||
if not url or url in seen_urls:
|
||||
continue
|
||||
seen_urls.add(url)
|
||||
content.append({
|
||||
"domain": self._extract_domain(url),
|
||||
"title": getattr(result, "title", "Untitled"),
|
||||
"url": url,
|
||||
"highlights": getattr(result, "highlights", []),
|
||||
"summary": getattr(result, "summary", ""),
|
||||
"text": getattr(result, "text", ""),
|
||||
"published_date": getattr(result, "published_date", None),
|
||||
"author": getattr(result, "author", None),
|
||||
})
|
||||
|
||||
return {
|
||||
"topic": topic,
|
||||
"competitor_content": content,
|
||||
"total_results": len(content),
|
||||
"domains_found": list(
|
||||
set(c["domain"] for c in content if c["domain"])
|
||||
),
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Exa search failed for topic '{topic}': {e}")
|
||||
return {
|
||||
"topic": topic,
|
||||
"competitor_content": [],
|
||||
"total_results": 0,
|
||||
"domains_found": [],
|
||||
"error": str(e),
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _extract_domain(url: str) -> str:
|
||||
"""Extract domain from URL."""
|
||||
try:
|
||||
from urllib.parse import urlparse
|
||||
return urlparse(url).netloc.lower()
|
||||
except Exception:
|
||||
return url.lower()
|
||||
175
backend/services/seo_tools/serp_gap_service.py
Normal file
175
backend/services/seo_tools/serp_gap_service.py
Normal file
@@ -0,0 +1,175 @@
|
||||
"""
|
||||
SERP Gap Service for ALwrity
|
||||
|
||||
Detects which competitors rank for target topics using Google Custom Search.
|
||||
Phase 1 of the Content Gap Radar feature.
|
||||
|
||||
Usage:
|
||||
service = SerpGapService()
|
||||
result = await service.analyze_topic_gaps(
|
||||
topics=["AI content strategy", "topic clustering"],
|
||||
competitor_domains=["example.com", "competitor.org"]
|
||||
)
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import hashlib
|
||||
import json
|
||||
import os
|
||||
import time
|
||||
from typing import Dict, List, Optional, Any
|
||||
from loguru import logger
|
||||
from services.research.google_search_service import GoogleSearchService
|
||||
|
||||
|
||||
class SerpGapService:
|
||||
"""
|
||||
SERP Gap Analysis Service.
|
||||
|
||||
Uses Google Custom Search `site:` queries to detect competitor ranking presence
|
||||
for specific topics. Results are cached for 24h to stay within free-tier quotas
|
||||
(100 queries/day). Designed to be consumed by a future ContentGapRadarAgent
|
||||
that scores and prioritizes gaps.
|
||||
"""
|
||||
|
||||
CACHE_TTL = int(os.getenv("SERP_GAP_CACHE_TTL", "86400")) # 24 hours default
|
||||
|
||||
def __init__(self, google_search_service: Optional[GoogleSearchService] = None):
|
||||
self.gcs = google_search_service or GoogleSearchService()
|
||||
self._cache: Dict[str, Dict[str, Any]] = {}
|
||||
logger.info("SerpGapService initialized")
|
||||
|
||||
def _cache_key(self, topics: List[str], domains: List[str]) -> str:
|
||||
"""Deterministic cache key from sorted topics + domains."""
|
||||
raw = json.dumps(
|
||||
{"t": sorted(topics), "d": sorted(domains)}, sort_keys=True
|
||||
)
|
||||
return hashlib.md5(raw.encode()).hexdigest()
|
||||
|
||||
def _get_cached(self, key: str) -> Optional[Dict[str, Any]]:
|
||||
entry = self._cache.get(key)
|
||||
if entry and (time.time() - entry["ts"]) < self.CACHE_TTL:
|
||||
return entry["data"]
|
||||
return None
|
||||
|
||||
def _set_cache(self, key: str, data: Dict[str, Any]):
|
||||
self._cache[key] = {"data": data, "ts": time.time()}
|
||||
|
||||
async def analyze_topic_gaps(
|
||||
self,
|
||||
topics: List[str],
|
||||
competitor_domains: List[str],
|
||||
max_results_per_site: int = 5,
|
||||
concurrency: int = 3,
|
||||
bypass_cache: bool = False,
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Analyze SERP gaps for a list of topics across known competitors.
|
||||
|
||||
For each topic, queries Google with `site:competitor_domain topic` for
|
||||
each known competitor to detect ranking presence.
|
||||
|
||||
Args:
|
||||
topics: Topic phrases to check (e.g. from find_semantic_gaps())
|
||||
competitor_domains: Known competitor domains (e.g. ["example.com"])
|
||||
max_results_per_site: Max Google CSE results per site: query (max 10)
|
||||
concurrency: Max concurrent API calls to stay under rate limits
|
||||
bypass_cache: Force fresh API calls, ignoring cache
|
||||
|
||||
Returns:
|
||||
Dict with keys:
|
||||
gaps: List of per-topic SERP gap results
|
||||
total_topics_analyzed: int
|
||||
total_competitors: int
|
||||
cached: bool
|
||||
"""
|
||||
if not topics or not competitor_domains:
|
||||
return {
|
||||
"gaps": [],
|
||||
"total_topics_analyzed": 0,
|
||||
"total_competitors": 0,
|
||||
"cached": False,
|
||||
}
|
||||
|
||||
ck = self._cache_key(topics, competitor_domains)
|
||||
if not bypass_cache:
|
||||
cached = self._get_cached(ck)
|
||||
if cached:
|
||||
logger.info("Returning cached SERP gap results")
|
||||
return {**cached, "cached": True}
|
||||
|
||||
semaphore = asyncio.Semaphore(concurrency)
|
||||
|
||||
async def analyze_topic(topic: str) -> Dict[str, Any]:
|
||||
async with semaphore:
|
||||
return await self._analyze_single_topic(
|
||||
topic, competitor_domains, max_results_per_site
|
||||
)
|
||||
|
||||
tasks = [analyze_topic(topic) for topic in topics]
|
||||
results = await asyncio.gather(*tasks)
|
||||
|
||||
output = {
|
||||
"gaps": results,
|
||||
"total_topics_analyzed": len(topics),
|
||||
"total_competitors": len(competitor_domains),
|
||||
"cached": False,
|
||||
}
|
||||
self._set_cache(ck, output)
|
||||
return dict(output)
|
||||
|
||||
async def _analyze_single_topic(
|
||||
self,
|
||||
topic: str,
|
||||
competitor_domains: List[str],
|
||||
max_results: int,
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Check SERP presence for a single topic across all competitor domains.
|
||||
|
||||
Removes the dateRestrict and sort=date defaults from Google CSE so we
|
||||
see all-time competitor content (not just last month).
|
||||
"""
|
||||
competitors_found = []
|
||||
failed_queries = 0
|
||||
|
||||
for domain in competitor_domains:
|
||||
query = f"site:{domain} {topic}"
|
||||
try:
|
||||
raw_results = await self.gcs.perform_search(
|
||||
query,
|
||||
max_results,
|
||||
dateRestrict=None, # Don't limit to last month
|
||||
sort=None, # Use relevance sorting, not date
|
||||
)
|
||||
for result in raw_results:
|
||||
competitors_found.append({
|
||||
"domain": domain,
|
||||
"title": result.get("title", ""),
|
||||
"url": result.get("link", ""),
|
||||
"snippet": result.get("snippet", ""),
|
||||
})
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"GCS query failed for site:{domain} topic='{topic}': {e}"
|
||||
)
|
||||
failed_queries += 1
|
||||
continue
|
||||
|
||||
seen_urls = set()
|
||||
unique_competitors = []
|
||||
for entry in competitors_found:
|
||||
if entry["url"] not in seen_urls:
|
||||
seen_urls.add(entry["url"])
|
||||
unique_competitors.append(entry)
|
||||
|
||||
return {
|
||||
"topic": topic,
|
||||
"competitors_found": unique_competitors,
|
||||
"competitor_count": len(unique_competitors),
|
||||
"domains_with_content": list(
|
||||
set(e["domain"] for e in unique_competitors)
|
||||
),
|
||||
"failed_queries": failed_queries,
|
||||
"total_domains_checked": len(competitor_domains),
|
||||
}
|
||||
Reference in New Issue
Block a user