ContentGuardianAgent consolidation:
- Merge 3 duplicate classes into single source in specialized/content_guardian.py
- Watchdog audit_committee() with heuristic scoring, coverage gaps, overlaps, alerts
- Remove misleading rejection_rate() helper; use acceptance_rate directly
- Integrate audit + alerts + trend signals into today_workflow_service.py
Team Activity page:
- QualityAuditPanel: health ring, per-agent critiques, coverage gaps, overlaps
- TrendSignalsPanel: opportunity cards with urgency/impact/coverage bars
- AlertBanner: persistent dismiss via POST /alerts/{id}/mark-read
- AgentHelpModal: dialog showing all 8 agents with descriptions, tools, schedule
- QualityAuditPanel action buttons: Fill gap -> /content-planning, Resolve overlap, View CTA on alerts/issues
- TrendSignalsPanel action buttons: Create content from this trend -> /blog-writer with trend context state
Onboarding system:
- Step 4 validation: no auto-pass via basic_ready; requires persona data or explicit progression
- Step 5 validation: logs warning on auto-pass without integration data
- OnboardingCompletionService: single DB session, transactional task creation, upsert pattern
- Business-without-website: nullable website_url on SIFIndexingTask and MarketTrendsTask
- DeepCompetitorAnalysisExecutor: 5-min timeout, 10-competitor cap, asyncio.wait_for
- Persona generation: async with 30s timeout, falls back to scheduler
- OnboardingProgressService.reset_onboarding(): resets session + pauses all DB tasks
- OnboardingControlService.reset_onboarding(): also cancels APScheduler jobs
- FinalStep TaskSchedulingPanel: shows scheduled/failed tasks after completion, 8s auto-redirect
- onboarding_completed agent activity event logged to feed
Documentation:
- docs-site/features/onboarding/: overview, steps, scheduler-tasks, technical-reference (4 pages)
- docs-site/mkdocs.yml: added Onboarding System nav section
- docs-site/features/sif-agents/: overview, agent-directory, committee-system, content-guardian (4 pages)
- docs-site/features/team-activity/: overview, quality-audit, trend-signals, alert-system (4 pages)
- docs-site/features/todays-workflow/: updated overview, technical-architecture, workflow-guide, api-reference
215 lines
7.1 KiB
Python
215 lines
7.1 KiB
Python
"""
|
|
Competitor Content Service for ALwrity
|
|
|
|
Fetches full competitor content for gap topics using Exa with include_domains.
|
|
Phase 2 of the Content Gap Radar feature.
|
|
|
|
Usage:
|
|
service = CompetitorContentService()
|
|
result = await service.deep_dive(
|
|
topics=["AI content strategy"],
|
|
competitor_domains=["example.com"]
|
|
)
|
|
"""
|
|
|
|
import os
|
|
import asyncio
|
|
import hashlib
|
|
import json
|
|
import time
|
|
from typing import Dict, List, Optional, Any
|
|
from loguru import logger
|
|
|
|
|
|
class CompetitorContentService:
|
|
"""
|
|
Fetches competitor content for gap topics using Exa neural search.
|
|
|
|
Uses Exa's `include_domains` to scope searches to known competitor domains,
|
|
returning full text, highlights, and summaries for deeper competitive analysis.
|
|
Results are cached for 24h to reduce API costs.
|
|
Designed to be consumed by the future ContentGapRadarAgent.
|
|
"""
|
|
|
|
CACHE_TTL = int(os.getenv("COMPETITOR_CONTENT_CACHE_TTL", "86400"))
|
|
|
|
def __init__(self):
|
|
self.api_key = os.getenv("EXA_API_KEY")
|
|
if not self.api_key:
|
|
logger.warning(
|
|
"EXA_API_KEY not configured; CompetitorContentService disabled"
|
|
)
|
|
self._exa = None
|
|
self._cache: Dict[str, Dict[str, Any]] = {}
|
|
|
|
@property
|
|
def exa(self):
|
|
"""Lazy-init Exa SDK to allow env injection after import."""
|
|
if self._exa is None and self.api_key:
|
|
from exa_py import Exa
|
|
self._exa = Exa(self.api_key)
|
|
return self._exa
|
|
|
|
def _cache_key(self, topics: List[str], domains: List[str]) -> str:
|
|
raw = json.dumps(
|
|
{"t": sorted(topics), "d": sorted(domains)}, sort_keys=True
|
|
)
|
|
return hashlib.md5(raw.encode()).hexdigest()
|
|
|
|
def _get_cached(self, key: str) -> Optional[Dict[str, Any]]:
|
|
entry = self._cache.get(key)
|
|
if entry and (time.time() - entry["ts"]) < self.CACHE_TTL:
|
|
return entry["data"]
|
|
return None
|
|
|
|
def _set_cache(self, key: str, data: Dict[str, Any]):
|
|
self._cache[key] = {"data": data, "ts": time.time()}
|
|
|
|
async def deep_dive(
|
|
self,
|
|
topics: List[str],
|
|
competitor_domains: List[str],
|
|
max_total_results: int = 10,
|
|
concurrency: int = 3,
|
|
bypass_cache: bool = False,
|
|
) -> Dict[str, Any]:
|
|
"""
|
|
Fetch competitor content for a list of gap topics.
|
|
|
|
For each topic, searches Exa scoped to competitor domains and returns
|
|
full text, highlights, and publishing metadata.
|
|
|
|
Args:
|
|
topics: Topic phrases to research (e.g. from SERP gap analysis)
|
|
competitor_domains: Known competitor domains to scope search
|
|
max_total_results: Max results per topic total (Exa API limit varies)
|
|
concurrency: Max concurrent Exa API calls
|
|
bypass_cache: Force fresh API calls, ignoring cache
|
|
|
|
Returns:
|
|
Dict with keys:
|
|
results: List of per-topic competitor content results
|
|
total_topics_analyzed: int
|
|
topics_with_content: int
|
|
cached: bool
|
|
"""
|
|
if not topics or not competitor_domains:
|
|
return {
|
|
"results": [],
|
|
"total_topics_analyzed": 0,
|
|
"topics_with_content": 0,
|
|
"cached": False,
|
|
}
|
|
|
|
ck = self._cache_key(topics, competitor_domains)
|
|
if not bypass_cache:
|
|
cached = self._get_cached(ck)
|
|
if cached:
|
|
logger.info("Returning cached competitor content results")
|
|
return {**cached, "cached": True}
|
|
|
|
if not self.api_key or not self.exa:
|
|
return {
|
|
"results": [],
|
|
"total_topics_analyzed": len(topics),
|
|
"topics_with_content": 0,
|
|
"cached": False,
|
|
"error": "EXA_API_KEY not configured",
|
|
}
|
|
|
|
semaphore = asyncio.Semaphore(concurrency)
|
|
loop = asyncio.get_running_loop()
|
|
|
|
async def search_topic(topic: str) -> Dict[str, Any]:
|
|
async with semaphore:
|
|
return await self._search_single_topic(
|
|
topic, competitor_domains, max_total_results, loop
|
|
)
|
|
|
|
tasks = [search_topic(topic) for topic in topics]
|
|
results = await asyncio.gather(*tasks)
|
|
|
|
output = {
|
|
"results": results,
|
|
"total_topics_analyzed": len(topics),
|
|
"topics_with_content": sum(
|
|
1 for r in results if r.get("total_results", 0) > 0
|
|
),
|
|
"cached": False,
|
|
}
|
|
self._set_cache(ck, output)
|
|
return output
|
|
|
|
async def _search_single_topic(
|
|
self,
|
|
topic: str,
|
|
competitor_domains: List[str],
|
|
max_results: int,
|
|
loop: asyncio.AbstractEventLoop,
|
|
) -> Dict[str, Any]:
|
|
"""
|
|
Search Exa for a single topic, scoped to competitor domains.
|
|
"""
|
|
query = topic
|
|
|
|
search_kwargs = {
|
|
"type": "auto",
|
|
"num_results": max_results,
|
|
"include_domains": competitor_domains,
|
|
"text": {"max_characters": 2000},
|
|
"highlights": {"num_sentences": 3, "highlights_per_url": 3},
|
|
"summary": {"query": f"Key details about {topic}"},
|
|
}
|
|
|
|
try:
|
|
results = await loop.run_in_executor(
|
|
None,
|
|
lambda: self.exa.search_and_contents(query, **search_kwargs),
|
|
)
|
|
|
|
content = []
|
|
seen_urls = set()
|
|
for result in getattr(results, "results", []) or []:
|
|
url = getattr(result, "url", "")
|
|
if not url or url in seen_urls:
|
|
continue
|
|
seen_urls.add(url)
|
|
content.append({
|
|
"domain": self._extract_domain(url),
|
|
"title": getattr(result, "title", "Untitled"),
|
|
"url": url,
|
|
"highlights": getattr(result, "highlights", []),
|
|
"summary": getattr(result, "summary", ""),
|
|
"text": getattr(result, "text", ""),
|
|
"published_date": getattr(result, "published_date", None),
|
|
"author": getattr(result, "author", None),
|
|
})
|
|
|
|
return {
|
|
"topic": topic,
|
|
"competitor_content": content,
|
|
"total_results": len(content),
|
|
"domains_found": list(
|
|
set(c["domain"] for c in content if c["domain"])
|
|
),
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.warning(f"Exa search failed for topic '{topic}': {e}")
|
|
return {
|
|
"topic": topic,
|
|
"competitor_content": [],
|
|
"total_results": 0,
|
|
"domains_found": [],
|
|
"error": str(e),
|
|
}
|
|
|
|
@staticmethod
|
|
def _extract_domain(url: str) -> str:
|
|
"""Extract domain from URL."""
|
|
try:
|
|
from urllib.parse import urlparse
|
|
return urlparse(url).netloc.lower()
|
|
except Exception:
|
|
return url.lower()
|