ContentGuardianAgent consolidation:
- Merge 3 duplicate classes into single source in specialized/content_guardian.py
- Watchdog audit_committee() with heuristic scoring, coverage gaps, overlaps, alerts
- Remove misleading rejection_rate() helper; use acceptance_rate directly
- Integrate audit + alerts + trend signals into today_workflow_service.py
Team Activity page:
- QualityAuditPanel: health ring, per-agent critiques, coverage gaps, overlaps
- TrendSignalsPanel: opportunity cards with urgency/impact/coverage bars
- AlertBanner: persistent dismiss via POST /alerts/{id}/mark-read
- AgentHelpModal: dialog showing all 8 agents with descriptions, tools, schedule
- QualityAuditPanel action buttons: Fill gap -> /content-planning, Resolve overlap, View CTA on alerts/issues
- TrendSignalsPanel action buttons: Create content from this trend -> /blog-writer with trend context state
Onboarding system:
- Step 4 validation: no auto-pass via basic_ready; requires persona data or explicit progression
- Step 5 validation: logs warning on auto-pass without integration data
- OnboardingCompletionService: single DB session, transactional task creation, upsert pattern
- Business-without-website: nullable website_url on SIFIndexingTask and MarketTrendsTask
- DeepCompetitorAnalysisExecutor: 5-min timeout, 10-competitor cap, asyncio.wait_for
- Persona generation: async with 30s timeout, falls back to scheduler
- OnboardingProgressService.reset_onboarding(): resets session + pauses all DB tasks
- OnboardingControlService.reset_onboarding(): also cancels APScheduler jobs
- FinalStep TaskSchedulingPanel: shows scheduled/failed tasks after completion, 8s auto-redirect
- onboarding_completed agent activity event logged to feed
Documentation:
- docs-site/features/onboarding/: overview, steps, scheduler-tasks, technical-reference (4 pages)
- docs-site/mkdocs.yml: added Onboarding System nav section
- docs-site/features/sif-agents/: overview, agent-directory, committee-system, content-guardian (4 pages)
- docs-site/features/team-activity/: overview, quality-audit, trend-signals, alert-system (4 pages)
- docs-site/features/todays-workflow/: updated overview, technical-architecture, workflow-guide, api-reference
176 lines
6.0 KiB
Python
176 lines
6.0 KiB
Python
"""
|
|
SERP Gap Service for ALwrity
|
|
|
|
Detects which competitors rank for target topics using Google Custom Search.
|
|
Phase 1 of the Content Gap Radar feature.
|
|
|
|
Usage:
|
|
service = SerpGapService()
|
|
result = await service.analyze_topic_gaps(
|
|
topics=["AI content strategy", "topic clustering"],
|
|
competitor_domains=["example.com", "competitor.org"]
|
|
)
|
|
"""
|
|
|
|
import asyncio
|
|
import hashlib
|
|
import json
|
|
import os
|
|
import time
|
|
from typing import Dict, List, Optional, Any
|
|
from loguru import logger
|
|
from services.research.google_search_service import GoogleSearchService
|
|
|
|
|
|
class SerpGapService:
|
|
"""
|
|
SERP Gap Analysis Service.
|
|
|
|
Uses Google Custom Search `site:` queries to detect competitor ranking presence
|
|
for specific topics. Results are cached for 24h to stay within free-tier quotas
|
|
(100 queries/day). Designed to be consumed by a future ContentGapRadarAgent
|
|
that scores and prioritizes gaps.
|
|
"""
|
|
|
|
CACHE_TTL = int(os.getenv("SERP_GAP_CACHE_TTL", "86400")) # 24 hours default
|
|
|
|
def __init__(self, google_search_service: Optional[GoogleSearchService] = None):
|
|
self.gcs = google_search_service or GoogleSearchService()
|
|
self._cache: Dict[str, Dict[str, Any]] = {}
|
|
logger.info("SerpGapService initialized")
|
|
|
|
def _cache_key(self, topics: List[str], domains: List[str]) -> str:
|
|
"""Deterministic cache key from sorted topics + domains."""
|
|
raw = json.dumps(
|
|
{"t": sorted(topics), "d": sorted(domains)}, sort_keys=True
|
|
)
|
|
return hashlib.md5(raw.encode()).hexdigest()
|
|
|
|
def _get_cached(self, key: str) -> Optional[Dict[str, Any]]:
|
|
entry = self._cache.get(key)
|
|
if entry and (time.time() - entry["ts"]) < self.CACHE_TTL:
|
|
return entry["data"]
|
|
return None
|
|
|
|
def _set_cache(self, key: str, data: Dict[str, Any]):
|
|
self._cache[key] = {"data": data, "ts": time.time()}
|
|
|
|
async def analyze_topic_gaps(
|
|
self,
|
|
topics: List[str],
|
|
competitor_domains: List[str],
|
|
max_results_per_site: int = 5,
|
|
concurrency: int = 3,
|
|
bypass_cache: bool = False,
|
|
) -> Dict[str, Any]:
|
|
"""
|
|
Analyze SERP gaps for a list of topics across known competitors.
|
|
|
|
For each topic, queries Google with `site:competitor_domain topic` for
|
|
each known competitor to detect ranking presence.
|
|
|
|
Args:
|
|
topics: Topic phrases to check (e.g. from find_semantic_gaps())
|
|
competitor_domains: Known competitor domains (e.g. ["example.com"])
|
|
max_results_per_site: Max Google CSE results per site: query (max 10)
|
|
concurrency: Max concurrent API calls to stay under rate limits
|
|
bypass_cache: Force fresh API calls, ignoring cache
|
|
|
|
Returns:
|
|
Dict with keys:
|
|
gaps: List of per-topic SERP gap results
|
|
total_topics_analyzed: int
|
|
total_competitors: int
|
|
cached: bool
|
|
"""
|
|
if not topics or not competitor_domains:
|
|
return {
|
|
"gaps": [],
|
|
"total_topics_analyzed": 0,
|
|
"total_competitors": 0,
|
|
"cached": False,
|
|
}
|
|
|
|
ck = self._cache_key(topics, competitor_domains)
|
|
if not bypass_cache:
|
|
cached = self._get_cached(ck)
|
|
if cached:
|
|
logger.info("Returning cached SERP gap results")
|
|
return {**cached, "cached": True}
|
|
|
|
semaphore = asyncio.Semaphore(concurrency)
|
|
|
|
async def analyze_topic(topic: str) -> Dict[str, Any]:
|
|
async with semaphore:
|
|
return await self._analyze_single_topic(
|
|
topic, competitor_domains, max_results_per_site
|
|
)
|
|
|
|
tasks = [analyze_topic(topic) for topic in topics]
|
|
results = await asyncio.gather(*tasks)
|
|
|
|
output = {
|
|
"gaps": results,
|
|
"total_topics_analyzed": len(topics),
|
|
"total_competitors": len(competitor_domains),
|
|
"cached": False,
|
|
}
|
|
self._set_cache(ck, output)
|
|
return dict(output)
|
|
|
|
async def _analyze_single_topic(
|
|
self,
|
|
topic: str,
|
|
competitor_domains: List[str],
|
|
max_results: int,
|
|
) -> Dict[str, Any]:
|
|
"""
|
|
Check SERP presence for a single topic across all competitor domains.
|
|
|
|
Removes the dateRestrict and sort=date defaults from Google CSE so we
|
|
see all-time competitor content (not just last month).
|
|
"""
|
|
competitors_found = []
|
|
failed_queries = 0
|
|
|
|
for domain in competitor_domains:
|
|
query = f"site:{domain} {topic}"
|
|
try:
|
|
raw_results = await self.gcs.perform_search(
|
|
query,
|
|
max_results,
|
|
dateRestrict=None, # Don't limit to last month
|
|
sort=None, # Use relevance sorting, not date
|
|
)
|
|
for result in raw_results:
|
|
competitors_found.append({
|
|
"domain": domain,
|
|
"title": result.get("title", ""),
|
|
"url": result.get("link", ""),
|
|
"snippet": result.get("snippet", ""),
|
|
})
|
|
except Exception as e:
|
|
logger.warning(
|
|
f"GCS query failed for site:{domain} topic='{topic}': {e}"
|
|
)
|
|
failed_queries += 1
|
|
continue
|
|
|
|
seen_urls = set()
|
|
unique_competitors = []
|
|
for entry in competitors_found:
|
|
if entry["url"] not in seen_urls:
|
|
seen_urls.add(entry["url"])
|
|
unique_competitors.append(entry)
|
|
|
|
return {
|
|
"topic": topic,
|
|
"competitors_found": unique_competitors,
|
|
"competitor_count": len(unique_competitors),
|
|
"domains_with_content": list(
|
|
set(e["domain"] for e in unique_competitors)
|
|
),
|
|
"failed_queries": failed_queries,
|
|
"total_domains_checked": len(competitor_domains),
|
|
}
|