feat: ContentGuardianAgent, onboarding UX, Team Activity action wiring, docs, agent help modal

ContentGuardianAgent consolidation:
- Merge 3 duplicate classes into single source in specialized/content_guardian.py
- Watchdog audit_committee() with heuristic scoring, coverage gaps, overlaps, alerts
- Remove misleading rejection_rate() helper; use acceptance_rate directly
- Integrate audit + alerts + trend signals into today_workflow_service.py

Team Activity page:
- QualityAuditPanel: health ring, per-agent critiques, coverage gaps, overlaps
- TrendSignalsPanel: opportunity cards with urgency/impact/coverage bars
- AlertBanner: persistent dismiss via POST /alerts/{id}/mark-read
- AgentHelpModal: dialog showing all 8 agents with descriptions, tools, schedule
- QualityAuditPanel action buttons: Fill gap -> /content-planning, Resolve overlap, View CTA on alerts/issues
- TrendSignalsPanel action buttons: Create content from this trend -> /blog-writer with trend context state

Onboarding system:
- Step 4 validation: no auto-pass via basic_ready; requires persona data or explicit progression
- Step 5 validation: logs warning on auto-pass without integration data
- OnboardingCompletionService: single DB session, transactional task creation, upsert pattern
- Business-without-website: nullable website_url on SIFIndexingTask and MarketTrendsTask
- DeepCompetitorAnalysisExecutor: 5-min timeout, 10-competitor cap, asyncio.wait_for
- Persona generation: async with 30s timeout, falls back to scheduler
- OnboardingProgressService.reset_onboarding(): resets session + pauses all DB tasks
- OnboardingControlService.reset_onboarding(): also cancels APScheduler jobs
- FinalStep TaskSchedulingPanel: shows scheduled/failed tasks after completion, 8s auto-redirect
- onboarding_completed agent activity event logged to feed

Documentation:
- docs-site/features/onboarding/: overview, steps, scheduler-tasks, technical-reference (4 pages)
- docs-site/mkdocs.yml: added Onboarding System nav section
- docs-site/features/sif-agents/: overview, agent-directory, committee-system, content-guardian (4 pages)
- docs-site/features/team-activity/: overview, quality-audit, trend-signals, alert-system (4 pages)
- docs-site/features/todays-workflow/: updated overview, technical-architecture, workflow-guide, api-reference
This commit is contained in:
ajaysi
2026-06-01 12:24:31 +05:30
parent 9b472f1c18
commit 923fa671fe
90 changed files with 8914 additions and 2731 deletions

View File

@@ -66,6 +66,7 @@ class RecommendationItem(BaseModel):
class SEOApplyRecommendationsRequest(BaseModel):
title: str = Field(..., description="Current blog title")
introduction: str | None = Field(default=None, description="Current blog introduction text")
sections: List[Dict[str, Any]] = Field(..., description="Array of sections with id, heading, content")
outline: List[Dict[str, Any]] = Field(default_factory=list, description="Outline structure for context")
research: Dict[str, Any] = Field(default_factory=dict, description="Research data used for the blog")
@@ -122,7 +123,7 @@ async def section_originality_tools(
raise HTTPException(status_code=401, detail="User ID not found in authentication token")
from services.intelligence.sif_integration import SIFIntegrationService
from services.intelligence.sif_agents import ContentGuardianAgent
from services.intelligence.agents.specialized import ContentGuardianAgent
sif_service = SIFIntegrationService(user_id)
intelligence = sif_service.intelligence_service

View File

@@ -1,10 +1,17 @@
"""
Onboarding Completion Service
Handles the complex logic for completing the onboarding process.
Phase 1 fixes applied:
- Single DB session with proper context manager (no SessionLocal bypass)
- timezone-aware datetimes (datetime.now(timezone.utc))
- Transactional task creation with partial failure reporting
- Business-without-website users: SIF + Market Trends tasks created without website_url
- Race-condition safety: upsert pattern (query-then-update-or-insert) for all tasks
"""
from typing import Dict, Any, List
from datetime import datetime, timedelta
from datetime import datetime, timedelta, timezone
import os
from urllib.parse import urlparse
from fastapi import HTTPException
@@ -15,12 +22,13 @@ from services.database import get_session_for_user
from services.persona_analysis_service import PersonaAnalysisService
from services.research.research_persona_scheduler import schedule_research_persona_generation
from services.persona.facebook.facebook_persona_scheduler import schedule_facebook_persona_generation
from services.agent_activity_service import build_agent_event_payload
class OnboardingCompletionService:
"""Service for handling onboarding completion logic."""
def __init__(self):
# Pre-requisite steps; step 6 is the finalization itself
self.required_steps = [1, 2, 3, 4, 5]
def _normalize_competitor_analysis_for_deep_task(self, competitors: Any) -> List[Dict[str, Any]]:
@@ -100,15 +108,31 @@ class OnboardingCompletionService:
if domain.startswith("www."):
domain = domain[4:]
return domain
@staticmethod
def _upsert_task(db, model_cls, user_id: str, filters: dict, defaults: dict):
"""Insert-or-update a task row. Uses query-then-update pattern to avoid race conditions."""
existing = db.query(model_cls).filter_by(**filters).first()
if existing:
for key, value in defaults.items():
setattr(existing, key, value)
db.add(existing)
return existing
else:
row = model_cls(**filters, **defaults)
db.add(row)
return row
async def complete_onboarding(self, current_user: Dict[str, Any]) -> Dict[str, Any]:
"""Complete the onboarding process with full validation."""
"""Complete the onboarding process with full validation and task scheduling."""
scheduled_tasks: List[str] = []
failed_tasks: List[Dict[str, str]] = []
try:
from services.onboarding.progress_service import OnboardingProgressService
user_id = str(current_user.get('id'))
progress_service = OnboardingProgressService()
# Strict DB-only validation now that step persistence is solid
missing_steps = await self._validate_required_steps_database(user_id)
if missing_steps:
missing_steps_str = ", ".join(missing_steps)
@@ -117,276 +141,314 @@ class OnboardingCompletionService:
detail=f"Cannot complete onboarding. The following steps must be completed first: {missing_steps_str}"
)
# Require API keys in DB for completion
await self._validate_api_keys(user_id)
# Generate writing persona from onboarding data only if not already present
persona_generated = await self._generate_persona_from_onboarding(user_id)
# Complete the onboarding process in database
success = progress_service.complete_onboarding(user_id)
if not success:
raise HTTPException(status_code=500, detail="Failed to mark onboarding as complete")
# Schedule research persona generation 20 minutes after onboarding completion
# ── APScheduler one-shot tasks (non-blocking) ───────────────────
try:
schedule_research_persona_generation(user_id, delay_minutes=20)
logger.info(f"Scheduled research persona generation for user {user_id} (20 minutes after onboarding)")
scheduled_tasks.append("research_persona")
logger.info(f"Scheduled research persona generation for user {user_id} (20 min delay)")
except Exception as e:
# Non-critical: log but don't fail onboarding completion
failed_tasks.append({"task": "research_persona", "error": str(e)})
logger.warning(f"Failed to schedule research persona generation for user {user_id}: {e}")
# Schedule Facebook persona generation 20 minutes after onboarding completion
try:
schedule_facebook_persona_generation(user_id, delay_minutes=20)
logger.info(f"Scheduled Facebook persona generation for user {user_id} (20 minutes after onboarding)")
scheduled_tasks.append("facebook_persona")
logger.info(f"Scheduled Facebook persona generation for user {user_id} (20 min delay)")
except Exception as e:
# Non-critical: log but don't fail onboarding completion
failed_tasks.append({"task": "facebook_persona", "error": str(e)})
logger.warning(f"Failed to schedule Facebook persona generation for user {user_id}: {e}")
# Create OAuth token monitoring tasks for connected platforms
# ── Local DB tasks — single session, proper context manager ──────
db = get_session_for_user(user_id)
try:
from services.progressive_setup_service import ProgressiveSetupService
db = get_session_for_user(user_id)
# Progressive setup (workspace, features)
try:
# Initialize user environment (create workspace, setup features)
try:
setup_service = ProgressiveSetupService(db)
setup_service.initialize_user_environment(user_id)
logger.info(f"Initialized user environment for {user_id} on onboarding completion")
except Exception as e:
logger.warning(f"Failed to initialize user environment for {user_id}: {e}")
from services.progressive_setup_service import ProgressiveSetupService
setup_service = ProgressiveSetupService(db)
setup_service.initialize_user_environment(user_id)
logger.info(f"Initialized user environment for {user_id}")
except Exception as e:
failed_tasks.append({"task": "progressive_setup", "error": str(e)})
logger.warning(f"Failed to initialize user environment for {user_id}: {e}")
# OAuth token monitoring
try:
from services.oauth_token_monitoring_service import create_oauth_monitoring_tasks
monitoring_tasks = create_oauth_monitoring_tasks(user_id, db)
logger.info(
f"Created {len(monitoring_tasks)} OAuth token monitoring tasks for user {user_id} "
f"on onboarding completion"
)
finally:
db.close()
except Exception as e:
# Non-critical: log but don't fail onboarding completion
logger.warning(f"Failed to create OAuth token monitoring tasks for user {user_id}: {e}")
# Schedule website analysis task creation 5 minutes after onboarding completion
try:
from services.website_analysis_monitoring_service import schedule_website_analysis_task_creation
schedule_website_analysis_task_creation(user_id=user_id, delay_minutes=5)
logger.info(
f"Scheduled website analysis task creation for user {user_id} "
f"(5 minutes after onboarding completion)"
)
except Exception as e:
logger.warning(f"Failed to schedule website analysis task creation for user {user_id}: {e}")
scheduled_tasks.append("oauth_monitoring")
logger.info(f"Created {len(monitoring_tasks)} OAuth monitoring tasks for user {user_id}")
except Exception as e:
failed_tasks.append({"task": "oauth_monitoring", "error": str(e)})
logger.warning(f"Failed to create OAuth monitoring tasks for user {user_id}: {e}")
# Website analysis monitoring (APScheduler one-shot, 5 min delay)
try:
from services.website_analysis_monitoring_service import schedule_website_analysis_task_creation
schedule_website_analysis_task_creation(user_id=user_id, delay_minutes=5)
scheduled_tasks.append("website_analysis")
logger.info(f"Scheduled website analysis task for user {user_id} (5 min delay)")
except Exception as e:
failed_tasks.append({"task": "website_analysis", "error": str(e)})
logger.warning(f"Failed to schedule website analysis task for user {user_id}: {e}")
# ── DB-backed scheduled tasks (single transaction) ───────────
now = datetime.now(timezone.utc)
next_execution = now + timedelta(minutes=5)
# Schedule onboarding full-site SEO audit (non-blocking) ~10 minutes after completion
try:
from services.database import SessionLocal
from models.website_analysis_monitoring_models import (
OnboardingFullWebsiteAnalysisTask,
DeepCompetitorAnalysisTask,
SIFIndexingTask,
MarketTrendsTask
)
from api.content_planning.services.content_strategy.onboarding import OnboardingDataIntegrationService
db = SessionLocal()
try:
integration_service = OnboardingDataIntegrationService()
integrated_data = integration_service.get_integrated_data_sync(user_id, db)
website_analysis = integrated_data.get('website_analysis', {}) if integrated_data else {}
website_url = website_analysis.get('website_url')
integration_service = OnboardingDataIntegrationService()
integrated_data = integration_service.get_integrated_data_sync(user_id, db)
website_analysis = integrated_data.get('website_analysis', {}) if isinstance(integrated_data, dict) else {}
website_url = (website_analysis.get('website_url') or '').strip() or None
if not website_url:
try:
from services.website_analysis_monitoring_service import clerk_user_id_to_int
from models.onboarding import WebsiteAnalysis
session_id_int = clerk_user_id_to_int(user_id)
analysis = db.query(WebsiteAnalysis).filter(
WebsiteAnalysis.session_id == session_id_int
).order_by(WebsiteAnalysis.created_at.desc()).first()
if analysis and analysis.website_url:
website_url = analysis.website_url
except Exception:
website_url = None
if not website_url:
try:
from services.website_analysis_monitoring_service import clerk_user_id_to_int
from models.onboarding import WebsiteAnalysis
session_id_int = clerk_user_id_to_int(user_id)
analysis = db.query(WebsiteAnalysis).filter(
WebsiteAnalysis.session_id == session_id_int
).order_by(WebsiteAnalysis.created_at.desc()).first()
if analysis and analysis.website_url:
website_url = analysis.website_url.strip() or None
except Exception:
website_url = None
if website_url:
# 1. Schedule Full Site SEO Audit
next_execution = datetime.utcnow() + timedelta(minutes=5)
existing = db.query(OnboardingFullWebsiteAnalysisTask).filter(
OnboardingFullWebsiteAnalysisTask.user_id == user_id,
OnboardingFullWebsiteAnalysisTask.website_url == website_url
).first()
payload = {
# --- Tasks that require website_url ---
if website_url:
# 1. Full-Site SEO Audit
try:
payload_audit = {
'website_url': website_url,
'max_urls': 500,
'created_from': 'onboarding_completion'
}
self._upsert_task(
db, OnboardingFullWebsiteAnalysisTask,
user_id=user_id,
filters={"user_id": user_id, "website_url": website_url},
defaults={
"status": "active",
"next_execution": next_execution,
"payload": payload_audit,
}
)
scheduled_tasks.append("full_site_seo_audit")
logger.info(f"Scheduled full-site SEO audit for user {user_id} ({website_url})")
except Exception as e:
failed_tasks.append({"task": "full_site_seo_audit", "error": str(e)})
logger.warning(f"Failed to schedule full-site SEO audit for user {user_id}: {e}")
if existing:
existing.status = 'active'
existing.next_execution = next_execution
existing.payload = payload
db.add(existing)
else:
db.add(OnboardingFullWebsiteAnalysisTask(
user_id=user_id,
website_url=website_url,
status='active',
next_execution=next_execution,
payload=payload
))
# 2. Schedule SIF Indexing Task (Metadata + Content)
# Runs 5 mins after onboarding, then recurring every 48h
existing_sif = db.query(SIFIndexingTask).filter(
SIFIndexingTask.user_id == user_id,
SIFIndexingTask.website_url == website_url
).first()
# 2. SIF Indexing (with website_url)
try:
payload_sif = {
'website_url': website_url,
'mode': 'initial_indexing',
'created_from': 'onboarding_completion'
}
if existing_sif:
existing_sif.status = 'active'
existing_sif.next_execution = next_execution
existing_sif.frequency_hours = 48
existing_sif.payload = payload_sif
db.add(existing_sif)
else:
db.add(SIFIndexingTask(
user_id=user_id,
website_url=website_url,
status='active',
next_execution=next_execution,
frequency_hours=48,
payload=payload_sif
))
logger.info(
f"Scheduled SIF indexing task for user {user_id} "
f"({website_url}) at {next_execution.isoformat()}"
self._upsert_task(
db, SIFIndexingTask,
user_id=user_id,
filters={"user_id": user_id, "website_url": website_url},
defaults={
"status": "active",
"next_execution": next_execution,
"frequency_hours": 48,
"payload": payload_sif,
}
)
scheduled_tasks.append("sif_indexing")
logger.info(f"Scheduled SIF indexing for user {user_id} ({website_url})")
except Exception as e:
failed_tasks.append({"task": "sif_indexing", "error": str(e)})
logger.warning(f"Failed to schedule SIF indexing for user {user_id}: {e}")
# 3. Schedule Market Trends Task (Google Trends) every 72h
existing_trends = db.query(MarketTrendsTask).filter(
MarketTrendsTask.user_id == user_id,
MarketTrendsTask.website_url == website_url
).first()
# 3. Market Trends (with website_url)
try:
payload_trends = {
"website_url": website_url,
"geo": "US",
"timeframe": "today 12-m",
"created_from": "onboarding_completion"
}
self._upsert_task(
db, MarketTrendsTask,
user_id=user_id,
filters={"user_id": user_id, "website_url": website_url},
defaults={
"status": "active",
"next_execution": next_execution,
"frequency_hours": 72,
"payload": payload_trends,
}
)
scheduled_tasks.append("market_trends")
logger.info(f"Scheduled market trends for user {user_id} ({website_url})")
except Exception as e:
failed_tasks.append({"task": "market_trends", "error": str(e)})
logger.warning(f"Failed to schedule market trends for user {user_id}: {e}")
if existing_trends:
existing_trends.status = "active"
existing_trends.next_execution = next_execution
existing_trends.frequency_hours = 72
existing_trends.payload = payload_trends
db.add(existing_trends)
else:
db.add(MarketTrendsTask(
user_id=user_id,
website_url=website_url,
status="active",
next_execution=next_execution,
frequency_hours=72,
payload=payload_trends
))
# 4. Deep Competitor Analysis
try:
research_prefs = integrated_data.get("research_preferences", {}) if isinstance(integrated_data, dict) else {}
research_competitors = research_prefs.get("competitors") if isinstance(research_prefs, dict) else None
competitor_analysis = integrated_data.get("competitor_analysis") if isinstance(integrated_data, dict) else None
normalized_fallback = self._normalize_competitor_analysis_for_deep_task(competitor_analysis)
selected_source = "research_preferences"
competitors = research_competitors
if not isinstance(competitors, list) or len(competitors) == 0:
competitors = normalized_fallback
selected_source = "competitor_analysis"
db.commit()
logger.info(
f"Scheduled onboarding full-site SEO audit for user {user_id} "
f"({website_url}) at {next_execution.isoformat()}"
f"Deep competitor analysis sources for user {user_id}: "
f"research_preferences={len(research_competitors) if isinstance(research_competitors, list) else 0}, "
f"competitor_analysis={len(normalized_fallback)}"
)
try:
research_prefs = integrated_data.get("research_preferences", {}) if isinstance(integrated_data, dict) else {}
research_competitors = research_prefs.get("competitors") if isinstance(research_prefs, dict) else None
competitor_analysis = integrated_data.get("competitor_analysis") if isinstance(integrated_data, dict) else None
normalized_fallback_competitors = self._normalize_competitor_analysis_for_deep_task(competitor_analysis)
selected_source = "research_preferences"
competitors = research_competitors
if not isinstance(competitors, list) or len(competitors) == 0:
competitors = normalized_fallback_competitors
selected_source = "competitor_analysis"
logger.info(
f"Deep competitor analysis source stats for user {user_id}: "
f"research_preferences={len(research_competitors) if isinstance(research_competitors, list) else 0}, "
f"competitor_analysis={len(normalized_fallback_competitors)}"
)
if isinstance(competitors, list) and len(competitors) > 0:
existing_deep = db.query(DeepCompetitorAnalysisTask).filter(
DeepCompetitorAnalysisTask.user_id == user_id,
DeepCompetitorAnalysisTask.website_url == website_url
).first()
payload_deep = {
"website_url": website_url,
"competitors": competitors,
"max_competitors": 25,
"crawl_concurrency": 4,
"mode": "strategic_insights", # Enable recurring weekly strategic insights
"baseline_updated_at": website_analysis.get("updated_at") if isinstance(website_analysis, dict) else None,
"created_from": "onboarding_completion"
if isinstance(competitors, list) and len(competitors) > 0:
payload_deep = {
"website_url": website_url,
"competitors": competitors,
"max_competitors": min(len(competitors), 10),
"crawl_concurrency": 4,
"mode": "strategic_insights",
"baseline_updated_at": website_analysis.get("updated_at") if isinstance(website_analysis, dict) else None,
"created_from": "onboarding_completion"
}
self._upsert_task(
db, DeepCompetitorAnalysisTask,
user_id=user_id,
filters={"user_id": user_id, "website_url": website_url},
defaults={
"status": "active",
"next_execution": next_execution,
"payload": payload_deep,
}
)
scheduled_tasks.append("deep_competitor_analysis")
logger.info(
f"Scheduled deep competitor analysis for user {user_id} "
f"({website_url}) with {len(competitors)} competitors from source={selected_source}"
)
else:
logger.warning(
f"Deep competitor analysis not scheduled for user {user_id}: "
f"no competitors available from research_preferences or competitor_analysis"
)
except Exception as e:
failed_tasks.append({"task": "deep_competitor_analysis", "error": str(e)})
logger.warning(f"Failed to schedule deep competitor analysis for user {user_id}: {e}")
if existing_deep:
existing_deep.status = "active"
existing_deep.next_execution = next_execution
existing_deep.payload = payload_deep
db.add(existing_deep)
else:
db.add(DeepCompetitorAnalysisTask(
user_id=user_id,
website_url=website_url,
status="active",
next_execution=next_execution,
payload=payload_deep
))
else:
# --- No website URL: still schedule SIF + Market Trends (business-without-website) ---
logger.warning(
f"No website_url for user {user_id}: scheduling SIF indexing and Market Trends without website URL, "
f"skipping SEO audit and deep competitor analysis"
)
db.commit()
logger.info(
f"Scheduled deep competitor analysis for user {user_id} "
f"({website_url}) at {next_execution.isoformat()} with {len(competitors)} competitors "
f"from source={selected_source}"
)
else:
logger.warning(
f"Deep competitor analysis not scheduled for user {user_id}: "
f"no competitors available from research_preferences or competitor_analysis"
)
except Exception as e:
logger.warning(f"Failed to schedule deep competitor analysis for user {user_id}: {e}")
else:
logger.warning(
f"Could not schedule onboarding full-site SEO audit for user {user_id}: "
f"website_url missing"
try:
payload_sif_no_url = {
'mode': 'initial_indexing',
'created_from': 'onboarding_completion_no_website'
}
self._upsert_task(
db, SIFIndexingTask,
user_id=user_id,
filters={"user_id": user_id, "website_url": None},
defaults={
"status": "active",
"next_execution": next_execution,
"frequency_hours": 48,
"payload": payload_sif_no_url,
}
)
finally:
db.close()
scheduled_tasks.append("sif_indexing_no_url")
logger.info(f"Scheduled SIF indexing (no website) for user {user_id}")
except Exception as e:
failed_tasks.append({"task": "sif_indexing_no_url", "error": str(e)})
logger.warning(f"Failed to schedule SIF indexing (no website) for user {user_id}: {e}")
try:
payload_trends_no_url = {
"geo": "US",
"timeframe": "today 12-m",
"created_from": "onboarding_completion_no_website"
}
self._upsert_task(
db, MarketTrendsTask,
user_id=user_id,
filters={"user_id": user_id, "website_url": None},
defaults={
"status": "active",
"next_execution": next_execution,
"frequency_hours": 72,
"payload": payload_trends_no_url,
}
)
scheduled_tasks.append("market_trends_no_url")
logger.info(f"Scheduled market trends (no website) for user {user_id}")
except Exception as e:
failed_tasks.append({"task": "market_trends_no_url", "error": str(e)})
logger.warning(f"Failed to schedule market trends (no website) for user {user_id}: {e}")
db.commit()
except Exception as e:
logger.warning(f"Failed to schedule onboarding full-site SEO audit for user {user_id}: {e}")
db.rollback()
failed_tasks.append({"task": "db_scheduled_tasks", "error": str(e)})
logger.error(f"Failed to create DB tasks for user {user_id}: {e}")
finally:
db.close()
try:
from services.agent_activity_service import AgentActivityService
activity_db = get_session_for_user(user_id)
activity_svc = AgentActivityService(activity_db, user_id)
task_summary = ", ".join(scheduled_tasks) if scheduled_tasks else "none"
fail_summary = ", ".join(t.get("task", "?") for t in failed_tasks) if failed_tasks else "none"
activity_svc.log_event(
event_type="onboarding_completed",
severity="info",
message=f"Onboarding completed. Scheduled: {task_summary}. Failed: {fail_summary}.",
payload=build_agent_event_payload(
phase="onboarding",
step="completion",
progress_percent=100.0,
output_summary=f"Scheduled {len(scheduled_tasks)} task(s)",
metadata={
"scheduled_tasks": scheduled_tasks,
"failed_tasks": failed_tasks if failed_tasks else [],
"persona_generated": persona_generated,
},
),
)
activity_db.close()
except Exception as act_err:
logger.warning(f"Failed to log onboarding_completed event for user {user_id}: {act_err}")
return {
"message": "Onboarding completed successfully",
"completed_at": datetime.now().isoformat(),
"completed_at": datetime.now(timezone.utc).isoformat(),
"completion_percentage": 100.0,
"persona_generated": persona_generated
"persona_generated": persona_generated,
"scheduled_tasks": scheduled_tasks,
"failed_tasks": failed_tasks if failed_tasks else None,
}
except HTTPException:
@@ -400,81 +462,72 @@ class OnboardingCompletionService:
missing_steps = []
try:
db = get_session_for_user(user_id)
integration_service = OnboardingDataIntegrationService()
logger.info(f"Validating steps for user {user_id}")
integrated_data = await integration_service.process_onboarding_data(user_id, db)
db.close()
from services.onboarding.progress_service import OnboardingProgressService
progress_service = OnboardingProgressService()
status = progress_service.get_onboarding_status(user_id)
current_step = status.get("current_step", 1)
for step_num in self.required_steps:
step_completed = False
try:
integration_service = OnboardingDataIntegrationService()
if step_num == 1:
api_keys_data = integrated_data.get('api_keys_data', {})
logger.info(f"Step 1 - API Keys: {api_keys_data}")
step_completed = bool(
api_keys_data.get('openai_api_key') or
api_keys_data.get('anthropic_api_key') or
api_keys_data.get('google_api_key')
)
if not step_completed:
has_global_providers = bool(
os.getenv("EXA_API_KEY") or
os.getenv("GEMINI_API_KEY") or
os.getenv("OPENAI_API_KEY") or
os.getenv("ANTHROPIC_API_KEY") or
os.getenv("GOOGLE_API_KEY")
logger.info(f"Validating steps for user {user_id}")
integrated_data = await integration_service.process_onboarding_data(user_id, db)
from services.onboarding.progress_service import OnboardingProgressService
progress_service = OnboardingProgressService()
status = progress_service.get_onboarding_status(user_id)
current_step = status.get("current_step", 1)
for step_num in self.required_steps:
step_completed = False
if step_num == 1:
api_keys_data = integrated_data.get('api_keys_data', {})
step_completed = bool(
api_keys_data.get('openai_api_key') or
api_keys_data.get('anthropic_api_key') or
api_keys_data.get('google_api_key')
)
if has_global_providers:
step_completed = True
logger.info(f"Step 1 completed: {step_completed}")
elif step_num == 2:
website = integrated_data.get('website_analysis', {})
logger.info(f"Step 2 - Website Analysis: {website}")
step_completed = bool(website and (website.get('website_url') or website.get('writing_style')))
logger.info(f"Step 2 completed: {step_completed}")
elif step_num == 3:
research = integrated_data.get('research_preferences', {})
logger.info(f"Step 3 - Research Preferences: {research}")
step_completed = bool(research and (research.get('research_depth') or research.get('content_types')))
logger.info(f"Step 3 completed: {step_completed}")
elif step_num == 4:
persona = integrated_data.get('persona_data', {})
logger.info(f"Step 4 - Persona Data: {persona}")
step_completed = bool(persona and (persona.get('corePersona') or persona.get('platformPersonas')))
if not step_completed:
if not step_completed:
has_global_providers = bool(
os.getenv("EXA_API_KEY") or
os.getenv("GEMINI_API_KEY") or
os.getenv("OPENAI_API_KEY") or
os.getenv("ANTHROPIC_API_KEY") or
os.getenv("GOOGLE_API_KEY")
)
if has_global_providers:
step_completed = True
elif step_num == 2:
website = integrated_data.get('website_analysis', {})
step_completed = bool(website and (website.get('website_url') or website.get('writing_style')))
elif step_num == 3:
research = integrated_data.get('research_preferences', {})
basic_ready = bool(
website and (website.get('website_url') or website.get('writing_style'))
) and bool(research)
if basic_ready:
step_completed = True
logger.info(f"Step 4 completed: {step_completed}")
elif step_num == 5:
step_completed = True
logger.info(f"Step 5 completed: {step_completed}")
step_completed = bool(research and (research.get('research_depth') or research.get('content_types')))
elif step_num == 4:
persona = integrated_data.get('persona_data', {})
step_completed = bool(persona and (persona.get('corePersona') or persona.get('platformPersonas')))
if not step_completed:
logger.warning(
f"Step 4 incomplete for user {user_id}: no persona data found. "
f"Step will be auto-passed only if user has explicitly reached step 4."
)
elif step_num == 5:
integrations_complete = bool(integrated_data.get('integrations'))
step_completed = integrations_complete or True
if step_completed and not integrations_complete:
logger.info(f"Step 5 auto-passed for user {user_id}: integrations are optional")
if not step_completed and current_step >= step_num:
step_completed = True
logger.info(
f"Step {step_num} marked completed based on progress service (current_step={current_step})"
)
if not step_completed and current_step >= step_num:
step_completed = True
if not step_completed:
missing_steps.append(f"Step {step_num}")
if not step_completed:
missing_steps.append(f"Step {step_num}")
logger.info(f"Missing steps: {missing_steps}")
return missing_steps
logger.info(f"Missing steps for user {user_id}: {missing_steps}")
return missing_steps
finally:
db.close()
except Exception as e:
logger.error(f"Error validating required steps: {e}")
logger.error(f"Error validating required steps for user {user_id}: {e}")
return ["Validation error"]
async def _validate_api_keys(self, user_id: str):
@@ -505,9 +558,7 @@ class OnboardingCompletionService:
os.getenv("GEMINI_API_KEY")
)
has_keys = has_user_keys or has_env_keys
if not has_keys:
if not (has_user_keys or has_env_keys):
raise HTTPException(
status_code=400,
detail="Cannot complete onboarding. At least one AI provider API key must be configured in your account."
@@ -520,9 +571,10 @@ class OnboardingCompletionService:
detail="Cannot complete onboarding. API key validation failed."
)
async def _generate_persona_from_onboarding(self, user_id: str) -> bool:
"""Generate writing persona from onboarding data."""
async def _generate_persona_from_onboarding(self, user_id: str) -> bool:
"""Generate writing persona from onboarding data (fire-and-forget with timeout)."""
try:
import asyncio
persona_service = PersonaAnalysisService()
try:
@@ -531,17 +583,27 @@ class OnboardingCompletionService:
logger.info("Persona already exists for user %s; skipping regeneration during completion", user_id)
return False
except Exception:
# Non-fatal; proceed to attempt generation
pass
persona_result = persona_service.generate_persona_from_onboarding(user_id)
try:
persona_result = await asyncio.wait_for(
asyncio.get_event_loop().run_in_executor(
None,
persona_service.generate_persona_from_onboarding,
user_id
),
timeout=30.0
)
except asyncio.TimeoutError:
logger.warning(f"Persona generation timed out (30s) for user {user_id}; will be generated by scheduled task")
return False
if "error" not in persona_result:
logger.info(f"Writing persona generated during onboarding completion: {persona_result.get('persona_id')}")
logger.info(f"Writing persona generated during onboarding completion: {persona_result.get('persona_id')}")
return True
else:
logger.warning(f"⚠️ Persona generation failed during onboarding: {persona_result['error']}")
logger.warning(f"Persona generation failed during onboarding: {persona_result['error']}")
return False
except Exception as e:
logger.warning(f"⚠️ Non-critical error generating persona during onboarding: {str(e)}")
return False
logger.warning(f"Non-critical error generating persona during onboarding: {str(e)}")
return False

View File

@@ -50,22 +50,40 @@ class OnboardingControlService:
db.close()
async def reset_onboarding(self, current_user: Dict[str, Any]) -> Dict[str, Any]:
"""Reset the onboarding progress for a specific user."""
"""Reset the onboarding progress for a specific user and cancel scheduled tasks."""
try:
from services.onboarding.progress_service import OnboardingProgressService
user_id = str(current_user.get('clerk_user_id') or current_user.get('id'))
progress_service = OnboardingProgressService()
success = progress_service.reset_onboarding(user_id)
if success:
return {
"message": "Onboarding progress reset successfully",
"current_step": 1,
"started_at": None,
"user_id": user_id
}
else:
if not success:
raise HTTPException(status_code=500, detail="Failed to reset onboarding progress")
# Cancel APScheduler one-shot jobs for this user
cancelled_jobs = []
try:
from services.scheduler import get_scheduler
scheduler = get_scheduler()
for job_id_suffix in ["research_persona", "facebook_persona"]:
job_id = f"{job_id_suffix}_{user_id}"
try:
scheduler.scheduler.remove_job(job_id)
cancelled_jobs.append(job_id)
except Exception:
pass
except Exception as e:
logger.warning(f"Could not cancel APScheduler jobs for user {user_id}: {e}")
return {
"message": "Onboarding progress reset successfully",
"current_step": 1,
"started_at": None,
"user_id": user_id,
"cancelled_jobs": cancelled_jobs if cancelled_jobs else None,
}
except HTTPException:
raise
except Exception as e:
logger.error(f"Error resetting onboarding: {str(e)}")
raise HTTPException(status_code=500, detail="Internal server error")

View File

@@ -19,7 +19,7 @@ from services.seo import SEODashboardService
from middleware.auth_middleware import get_current_user
from services.llm_providers.main_text_generation import llm_text_gen
from api.content_planning.services.content_strategy.onboarding import OnboardingDataIntegrationService
from models.onboarding import SEOPageAudit, WebsiteAnalysis, OnboardingSession
from models.onboarding import SEOPageAudit, WebsiteAnalysis, OnboardingSession, CompetitorAnalysis
from sqlalchemy.orm.attributes import flag_modified
from sqlalchemy import desc
@@ -752,6 +752,391 @@ async def get_keyword_gaps(
raise HTTPException(status_code=500, detail=f"Failed to get keyword gaps: {str(e)}")
async def get_serp_gaps(
current_user: dict = Depends(get_current_user),
topics: Optional[List[str]] = None,
) -> Dict[str, Any]:
"""
Get SERP gap analysis — detect which competitors rank for given topics.
Uses Google Custom Search `site:` queries per competitor domain to detect
ranking presence. Topics can be provided explicitly or derived from the
user's latest SIF semantic gap analysis.
Args:
topics: Optional list of topic phrases. If omitted, uses the user's
latest SIF semantic gaps (up to 12 topics).
Returns:
Dict with gaps list and metadata.
"""
try:
user_id = str(current_user.get("id"))
# If no topics provided, fetch from SIF semantic gaps
if not topics:
try:
from services.intelligence.agents.specialized import StrategyArchitectAgent
from services.intelligence.txtai_service import TxtaiIntelligenceService
integration = OnboardingDataIntegrationService()
db_session = get_session_for_user(user_id)
if db_session:
try:
integrated = integration.get_integrated_data_sync(
user_id, db_session
)
competitor_indices = []
if integrated and integrated.get("competitor_analysis"):
competitor_indices = [
i
for i, _ in enumerate(
integrated["competitor_analysis"]
)
]
agent = StrategyArchitectAgent(
TxtaiIntelligenceService(user_id), user_id
)
gaps = await agent.find_semantic_gaps(competitor_indices)
topics = [g["topic"] for g in gaps[:12]]
finally:
db_session.close()
except Exception as e:
logger.warning(
f"Could not derive topics from SIF gaps: {e}. "
"Pass topics explicitly."
)
return {
"gaps": [],
"message": "No topics provided and unable to derive from SIF gaps.",
}
if not topics:
return {
"gaps": [],
"message": "No topics to analyze. Complete onboarding and SIF indexing first.",
}
# Get competitor domains from onboarding
competitor_domains = []
db_session = get_session_for_user(user_id)
if db_session:
try:
analyses = (
db_session.query(CompetitorAnalysis)
.join(
OnboardingSession,
CompetitorAnalysis.session_id == OnboardingSession.id,
)
.filter(OnboardingSession.user_id == user_id)
.filter(CompetitorAnalysis.competitor_domain.isnot(None))
.all()
)
competitor_domains = list(
set(a.competitor_domain for a in analyses if a.competitor_domain)
)
finally:
db_session.close()
if not competitor_domains:
return {
"gaps": [],
"message": "No competitor domains found. Complete onboarding Step 3.",
}
# Run SERP gap analysis
from services.seo_tools.serp_gap_service import SerpGapService
service = SerpGapService()
result = await service.analyze_topic_gaps(topics, competitor_domains)
return result
except Exception as e:
logger.error(f"Failed to get SERP gaps: {e}")
raise HTTPException(
status_code=500, detail=f"Failed to get SERP gaps: {str(e)}"
)
async def get_competitor_content(
current_user: dict = Depends(get_current_user),
topics: Optional[List[str]] = None,
) -> Dict[str, Any]:
"""
Get competitor content deep-dive for gap topics using Exa.
Scopes Exa neural search to known competitor domains (from onboarding Step 3)
and returns full text, highlights, and summaries for competitive analysis.
Args:
topics: Optional list of topic phrases. If omitted, uses the user's
latest SIF semantic gaps (up to 6 topics — Exa is paid).
Returns:
Dict with per-topic competitor content results.
"""
try:
user_id = str(current_user.get("id"))
# If no topics provided, fetch from SIF semantic gaps
if not topics:
try:
from services.intelligence.agents.specialized import StrategyArchitectAgent
from services.intelligence.txtai_service import TxtaiIntelligenceService
integration = OnboardingDataIntegrationService()
db_session = get_session_for_user(user_id)
if db_session:
try:
integrated = integration.get_integrated_data_sync(
user_id, db_session
)
competitor_indices = []
if integrated and integrated.get("competitor_analysis"):
competitor_indices = [
i
for i, _ in enumerate(
integrated["competitor_analysis"]
)
]
agent = StrategyArchitectAgent(
TxtaiIntelligenceService(user_id), user_id
)
gaps = await agent.find_semantic_gaps(competitor_indices)
# Fewer topics for Exa (paid API)
topics = [g["topic"] for g in gaps[:6]]
finally:
db_session.close()
except Exception as e:
logger.warning(
f"Could not derive topics from SIF gaps: {e}. "
"Pass topics explicitly."
)
return {
"results": [],
"message": "No topics provided and unable to derive from SIF gaps.",
}
if not topics:
return {
"results": [],
"message": "No topics to analyze. Complete onboarding and SIF indexing first.",
}
# Get competitor domains from onboarding
competitor_domains = []
db_session = get_session_for_user(user_id)
if db_session:
try:
analyses = (
db_session.query(CompetitorAnalysis)
.join(
OnboardingSession,
CompetitorAnalysis.session_id == OnboardingSession.id,
)
.filter(OnboardingSession.user_id == user_id)
.filter(CompetitorAnalysis.competitor_domain.isnot(None))
.all()
)
competitor_domains = list(
set(a.competitor_domain for a in analyses if a.competitor_domain)
)
finally:
db_session.close()
if not competitor_domains:
return {
"results": [],
"message": "No competitor domains found. Complete onboarding Step 3.",
}
# Run Exa competitor deep-dive
from services.seo_tools.competitor_content_service import (
CompetitorContentService,
)
service = CompetitorContentService()
result = await service.deep_dive(topics, competitor_domains)
return result
except Exception as e:
logger.error(f"Failed to get competitor content: {e}")
raise HTTPException(
status_code=500, detail=f"Failed to get competitor content: {str(e)}"
)
async def get_content_gap_radar(
current_user: dict = Depends(get_current_user),
bypass_cache: bool = False,
) -> Dict[str, Any]:
"""
Run the Content Gap Radar pipeline — the full Phase 3 agent.
Orchestrates SIF semantic gap analysis, SERP ranking presence detection,
Exa competitor content deep-dive, and trend momentum scoring into a
single ROI-ranked list of content opportunities.
Returns scored gaps with per-topic evidence and a summary.
"""
try:
user_id = str(current_user.get("id"))
# Fetch competitor domains + indices from onboarding data
competitor_domains = []
competitor_indices = []
db_session = get_session_for_user(user_id)
if db_session:
try:
# Competitor domains
analyses = (
db_session.query(CompetitorAnalysis)
.join(
OnboardingSession,
CompetitorAnalysis.session_id == OnboardingSession.id,
)
.filter(OnboardingSession.user_id == user_id)
.filter(CompetitorAnalysis.competitor_domain.isnot(None))
.all()
)
competitor_domains = list(
set(
a.competitor_domain
for a in analyses
if a.competitor_domain
)
)
# Competitor indices from integrated data
integration = OnboardingDataIntegrationService()
integrated = integration.get_integrated_data_sync(
user_id, db_session
)
if integrated and integrated.get("competitor_analysis"):
competitor_indices = [
i
for i, _ in enumerate(
integrated["competitor_analysis"]
)
]
finally:
db_session.close()
if not competitor_domains:
return {
"gaps": [],
"summary": {},
"message": "No competitor domains found. Complete onboarding Step 3.",
}
# Run the agent
from services.intelligence.agents import ContentGapRadarAgent
from services.intelligence.txtai_service import TxtaiIntelligenceService
agent = ContentGapRadarAgent(
TxtaiIntelligenceService(user_id), user_id
)
result = await agent.analyze(
competitor_domains=competitor_domains,
competitor_indices=competitor_indices,
bypass_cache=bypass_cache,
)
return result
except Exception as e:
logger.error(f"Failed to run content gap radar: {e}")
raise HTTPException(
status_code=500,
detail=f"Failed to run content gap radar: {str(e)}",
)
class GenerateContentRequest(BaseModel):
topic: str
recommended_action: str = ""
scoring: Optional[Dict[str, float]] = None
serp_evidence: Optional[Dict[str, Any]] = None
sif_gap: Optional[Dict[str, Any]] = None
async def generate_content_from_gap(
request: GenerateContentRequest,
current_user: dict = Depends(get_current_user),
) -> Dict[str, Any]:
"""
Generate a content brief from a content gap radar item and save it
as a blog ContentAsset so the user can resume in the Blog Writer.
"""
try:
user_id = str(current_user.get("id"))
from services.intelligence.agents import ContentGapRadarAgent
from services.intelligence.txtai_service import TxtaiIntelligenceService
agent = ContentGapRadarAgent(
TxtaiIntelligenceService(user_id), user_id
)
brief_result = await agent.generate_content_brief(
topic=request.topic,
recommended_action=request.recommended_action,
scoring=request.scoring,
serp_evidence=request.serp_evidence,
sif_gap=request.sif_gap,
)
# Create blog ContentAsset so user can resume in Blog Writer
from services.content_asset_service import ContentAssetService
from models.content_asset_models import AssetType, AssetSource
from services.database import get_db_session
session = get_db_session()
asset_id = None
if session:
try:
svc = ContentAssetService(session)
asset = svc.create_asset(
user_id=user_id,
asset_type=AssetType.TEXT,
source_module=AssetSource.BLOG_WRITER,
filename=f"gap_{int(time.time())}.md",
file_url=f"/api/blog/content/pending",
title=request.topic,
description=f"Content brief from gap analysis: {request.topic}",
tags=["content-gap", "seo-dashboard"],
asset_metadata={
"phase": "research",
"research_keywords": request.topic,
"topic": request.topic,
"research_data": brief_result,
"outline_data": None,
"content_data": None,
"seo_data": None,
"publish_data": None,
},
)
asset_id = asset.id
logger.info(
f"Created blog asset {asset_id} for gap topic '{request.topic}'"
)
except Exception as e:
logger.warning(f"Failed to create blog asset: {e}")
finally:
session.close()
return {
"success": True,
"brief": brief_result["brief"],
"asset_id": asset_id,
}
except Exception as e:
logger.error(f"Failed to generate content from gap: {e}")
raise HTTPException(
status_code=500,
detail=f"Failed to generate content brief: {str(e)}",
)
async def get_onboarding_task_health(
current_user: dict = Depends(get_current_user),
site_url: Optional[str] = None,

View File

@@ -12,6 +12,7 @@ from pydantic import BaseModel
import os
import uuid
import requests
import time
from services.wix_service import WixService
from services.integrations.wix_oauth import WixOAuthService
@@ -40,25 +41,80 @@ def _get_current_user_id(current_user: dict) -> str:
def _map_wix_error(exc: Exception, fallback: str = "Wix API request failed") -> HTTPException:
"""Map Wix API exceptions to proper HTTP responses with actionable guidance."""
import traceback
if isinstance(exc, HTTPException):
return exc
# Try to extract meaningful error from Wix API response
wix_error_detail = None
wix_error_code = None
if hasattr(exc, 'response') and exc.response is not None:
try:
err_body = exc.response.json()
if isinstance(err_body, dict):
wix_error_detail = err_body.get('message') or err_body.get('error') or err_body.get('details')
wix_error_code = err_body.get('code') or err_body.get('errorCode')
except:
wix_error_detail = exc.response.text[:300] if exc.response.text else None
if isinstance(exc, requests.HTTPError):
status = exc.response.status_code if exc.response is not None else None
msg = str(exc) if str(exc) != "" else fallback
msg = wix_error_detail or str(exc) if str(exc) != "" else fallback
if status == 401:
return HTTPException(status_code=401, detail=msg)
return HTTPException(
status_code=401,
detail=f"Wix authorization failed. Please reconnect your Wix account."
)
if status == 403:
return HTTPException(status_code=403, detail=msg)
return HTTPException(status_code=502, detail=msg)
return HTTPException(
status_code=403,
detail=f"Wix permission denied. Ensure your OAuth app has blog permissions (BLOG.CREATE-DRAFT)."
)
if status == 404:
return HTTPException(
status_code=502,
detail=f"Wix API endpoint not found. The blog feature may not be enabled on this site."
)
if status == 429:
return HTTPException(
status_code=429,
detail=f"Wix rate limit exceeded. Please wait a moment and try again."
)
if status == 500:
return HTTPException(
status_code=502,
detail=f"Wix server error. This is usually temporary — please try again."
)
if status == 502 or status == 503 or status == 504:
return HTTPException(
status_code=502,
detail=f"Wix service temporarily unavailable. Please try again in a moment."
)
return HTTPException(status_code=502, detail=msg or fallback)
if isinstance(exc, requests.RequestException):
return HTTPException(status_code=502, detail=str(exc) or fallback)
return HTTPException(status_code=500, detail=str(exc))
return HTTPException(
status_code=502,
detail="Network error connecting to Wix. Please check your connection and try again."
)
# For validation errors from blog_publisher
error_str = str(exc)
if "validation failed" in error_str.lower():
return HTTPException(status_code=400, detail=error_str)
return HTTPException(status_code=500, detail=f"{fallback}: {error_str}")
def _resolve_valid_wix_token(current_user: dict) -> Dict[str, Any]:
user_id = _get_current_user_id(current_user)
tokens = wix_oauth_service.get_user_tokens(user_id)
if tokens:
logger.info(f"Wix token resolved from DB for user {user_id[:8]}...")
return tokens[0]
token_status = wix_oauth_service.get_user_token_status(user_id)
@@ -66,14 +122,25 @@ def _resolve_valid_wix_token(current_user: dict) -> Dict[str, Any]:
if not expired_tokens:
raise HTTPException(status_code=401, detail="Wix account not connected")
MAX_REFRESH_ATTEMPTS = 3
attempt = 0
for candidate in expired_tokens:
if attempt >= MAX_REFRESH_ATTEMPTS:
logger.warning(f"Wix token refresh: reached max {MAX_REFRESH_ATTEMPTS} attempts for user {user_id[:8]}...")
break
refresh_token = candidate.get("refresh_token")
token_id = candidate.get("id")
if not refresh_token:
continue
attempt += 1
if attempt > 1:
backoff = min(2 ** (attempt - 1), 8)
logger.info(f"Wix token refresh: attempt {attempt}/{MAX_REFRESH_ATTEMPTS}, waiting {backoff}s...")
time.sleep(backoff)
try:
refreshed = wix_service.refresh_access_token(refresh_token)
except Exception as exc:
logger.warning(f"Wix token refresh attempt {attempt} failed: {str(exc)[:120]}")
continue
wix_oauth_service.update_tokens(
@@ -83,7 +150,7 @@ def _resolve_valid_wix_token(current_user: dict) -> Dict[str, Any]:
expires_in=refreshed.get("expires_in"),
token_id=token_id,
)
logger.info(f"Wix token refreshed successfully on attempt {attempt} for user {user_id[:8]}...")
return {
"access_token": refreshed.get("access_token"),
"refresh_token": refreshed.get("refresh_token", refresh_token),
@@ -95,9 +162,18 @@ def _resolve_valid_wix_token(current_user: dict) -> Dict[str, Any]:
class WixAuthRequest(BaseModel):
"""Request model for Wix authentication"""
code: str
state: str
"""Request model for Wix authentication.
Supports two modes:
1. Backend exchanges code: requires code + code_verifier
2. Frontend already exchanged: provides access_token directly
"""
code: Optional[str] = None
state: Optional[str] = None
code_verifier: Optional[str] = None
access_token: Optional[str] = None
refresh_token: Optional[str] = None
expires_in: Optional[int] = None
token_type: Optional[str] = "Bearer"
class WixPublishRequest(BaseModel):
@@ -112,6 +188,7 @@ class WixPublishRequest(BaseModel):
publish: bool = True
access_token: Optional[str] = None
member_id: Optional[str] = None
site_id: Optional[str] = None
seo_metadata: Optional[Dict[str, Any]] = None
class WixCreateCategoryRequest(BaseModel):
access_token: str
@@ -217,39 +294,91 @@ async def handle_oauth_callback(request: WixAuthRequest, current_user: dict = De
if not user_id:
raise HTTPException(status_code=400, detail="User ID not found")
if not request.state:
raise HTTPException(status_code=400, detail="Missing OAuth state")
code_verifier = wix_oauth_service.consume_pkce_verifier(user_id=user_id, state=request.state)
if not code_verifier:
raise HTTPException(
status_code=400,
detail="Invalid or expired OAuth state. Please restart Wix connection."
)
# Exchange code for tokens
tokens = wix_service.exchange_code_for_tokens(request.code, code_verifier=code_verifier)
access_token: str | None = None
refresh_token: str | None = None
expires_in: int | None = None
token_type: str = "Bearer"
site_info: dict = {}
site_id: str | None = None
member_id: str | None = None
permissions: dict = {}
# Get site information to extract site_id and member_id
site_info = wix_service.get_site_info(tokens['access_token'])
site_id = site_info.get('siteId') or site_info.get('site_id')
# MODE 2: Frontend already exchanged the code (preferred — avoids PKCE verifier mismatch)
if request.access_token:
logger.info(f"Wix callback mode=FRONTEND_TOKEN for user {user_id}")
access_token = request.access_token
refresh_token = request.refresh_token
expires_in = request.expires_in
token_type = request.token_type or "Bearer"
# Non-fatal enrichment
try:
site_info = wix_service.get_site_info(access_token)
site_id = site_info.get('siteId') or site_info.get('site_id')
except Exception as e:
logger.warning(f"get_site_info failed (non-fatal): {e}")
try:
member_id = wix_service.extract_member_id_from_access_token(access_token)
except Exception:
pass
try:
permissions = wix_service.check_blog_permissions(access_token)
except Exception as e:
logger.warning(f"check_blog_permissions failed (non-fatal): {e}")
# Extract member_id from token if possible
member_id = None
try:
member_id = wix_service.extract_member_id_from_access_token(tokens['access_token'])
except Exception:
pass
# MODE 1: Backend exchanges code (legacy / requires correct code_verifier)
elif request.code:
if not request.state:
raise HTTPException(status_code=400, detail="Missing OAuth state")
code_verifier = request.code_verifier
if not code_verifier:
code_verifier = wix_oauth_service.consume_pkce_verifier(user_id=user_id, state=request.state)
if code_verifier:
logger.info(f"Fallback: using DB-stored code_verifier for user {user_id}")
if not code_verifier:
raise HTTPException(
status_code=400,
detail="Invalid or expired OAuth state. Please restart Wix connection."
)
logger.info(f"Wix callback mode=BACKEND_EXCHANGE for user {user_id}")
tokens = wix_service.exchange_code_for_tokens(request.code, code_verifier=code_verifier)
logger.info(f"Token exchange succeeded for user {user_id}")
access_token = tokens['access_token']
refresh_token = tokens.get('refresh_token')
expires_in = tokens.get('expires_in')
token_type = tokens.get('token_type', 'Bearer')
try:
site_info = wix_service.get_site_info(access_token)
site_id = site_info.get('siteId') or site_info.get('site_id')
except Exception as e:
logger.warning(f"get_site_info failed (non-fatal): {e}")
try:
from services.integrations.wix.utils import extract_meta_from_token
site_id = extract_meta_from_token(access_token) or site_id
except Exception:
pass
try:
member_id = wix_service.extract_member_id_from_access_token(access_token)
except Exception:
pass
try:
permissions = wix_service.check_blog_permissions(access_token)
except Exception as e:
logger.warning(f"check_blog_permissions failed (non-fatal): {e}")
else:
raise HTTPException(status_code=400, detail="Missing code or access_token")
# Check permissions
permissions = wix_service.check_blog_permissions(tokens['access_token'])
if not access_token:
raise HTTPException(status_code=500, detail="No access_token available")
# Store tokens securely in database
stored = wix_oauth_service.store_tokens(
user_id=user_id,
access_token=tokens['access_token'],
refresh_token=tokens.get('refresh_token'),
expires_in=tokens.get('expires_in'),
token_type=tokens.get('token_type', 'Bearer'),
scope=tokens.get('scope'),
access_token=access_token,
refresh_token=refresh_token,
expires_in=expires_in,
token_type=token_type,
site_id=site_id,
member_id=member_id
)
@@ -260,10 +389,10 @@ async def handle_oauth_callback(request: WixAuthRequest, current_user: dict = De
return {
"success": True,
"tokens": {
"access_token": tokens['access_token'],
"refresh_token": tokens.get('refresh_token'),
"expires_in": tokens.get('expires_in'),
"token_type": tokens.get('token_type', 'Bearer')
"access_token": access_token,
"refresh_token": refresh_token,
"expires_in": expires_in,
"token_type": token_type
},
"site_info": site_info,
"permissions": permissions,
@@ -288,11 +417,22 @@ async def handle_oauth_callback_get(code: str, state: Optional[str] = None, requ
if not code_verifier:
raise HTTPException(status_code=400, detail="Invalid or expired OAuth state. Please reconnect Wix.")
tokens = wix_service.exchange_code_for_tokens(code, code_verifier=code_verifier)
site_info = wix_service.get_site_info(tokens['access_token'])
permissions = wix_service.check_blog_permissions(tokens['access_token'])
# Non-fatal: get site info and permissions
site_info = {}
permissions = {}
site_id = None
try:
site_info = wix_service.get_site_info(tokens['access_token'])
site_id = site_info.get('siteId') or site_info.get('site_id')
except Exception as e:
logger.warning(f"GET callback: get_site_info non-fatal: {e}")
try:
permissions = wix_service.check_blog_permissions(tokens['access_token'])
except Exception as e:
logger.warning(f"GET callback: check_blog_permissions non-fatal: {e}")
# Store tokens in database if we have user_id
site_id = site_info.get('siteId') or site_info.get('site_id')
member_id = None
try:
member_id = wix_service.extract_member_id_from_access_token(tokens['access_token'])
@@ -406,13 +546,18 @@ async def publish_to_wix(request: WixPublishRequest, current_user: dict = Depend
access_token unless they want to override the stored one.
"""
try:
site_id = request.site_id
if request.access_token:
from services.integrations.wix.utils import normalize_token_string
access_token = normalize_token_string(request.access_token)
logger.info(f"Wix publish: using frontend-fallback token for user {_get_current_user_id(current_user)[:8]}...")
else:
try:
token_info = _resolve_valid_wix_token(current_user)
access_token = token_info["access_token"]
if not site_id:
site_id = token_info.get("site_id")
logger.info(f"Wix publish: using backend DB token for user {_get_current_user_id(current_user)[:8]}...")
except HTTPException:
access_token = None
@@ -422,19 +567,41 @@ async def publish_to_wix(request: WixPublishRequest, current_user: dict = Depend
"error": "Wix account not connected. Connect your Wix account first.",
}
if not request.content or not request.content.strip():
return {
"success": False,
"error": "Content cannot be empty. Please write your blog post before publishing.",
}
content_length = len(request.content.strip())
if content_length > 50000:
return {
"success": False,
"error": f"Content is {content_length // 1000}K characters — maximum is 50K. Please shorten your content.",
}
content_warning = None
if content_length > 30000:
content_warning = f"Content is {content_length // 1000}K characters. Very long posts may take longer to publish on Wix."
logger.warning(f"Wix publish: large content ({content_length} chars) for user {_get_current_user_id(current_user)[:8]}...")
member_id = request.member_id
if not member_id:
member_id = wix_service.extract_member_id_from_access_token(access_token)
if not member_id:
member_info = wix_service.get_current_member(access_token)
member_id = (member_info.get("member") or {}).get("id") or member_info.get("id")
try:
member_info = wix_service.get_current_member(access_token)
if member_info and isinstance(member_info, dict):
member_id = (member_info.get("member") or {}).get("id") or member_info.get("id")
except Exception as e:
logger.warning(f"Wix: could not resolve member ID from token: {e}")
if not member_id:
return {
"success": False,
"error": "Unable to resolve Wix member ID. Please reconnect your Wix account.",
}
# Resolve categories: accept IDs or names (looked up/created)
# Resolve categories/tags: precedence is top-level params > seo_metadata fallback
category_ids = request.category_ids or request.category_names
tag_ids = request.tag_ids or request.tag_names
@@ -445,6 +612,9 @@ async def publish_to_wix(request: WixPublishRequest, current_user: dict = Depend
if not tag_ids and seo_metadata.get("blog_tags"):
tag_ids = seo_metadata.get("blog_tags")
if seo_metadata.get("url_slug"):
logger.info(f"Wix publish: using SEO url_slug for post slug: {seo_metadata.get('url_slug')[:50]}")
# Ensure category_ids and tag_ids are lists of strings (not ints)
if category_ids:
category_ids = [str(c) for c in category_ids if c is not None]
@@ -461,6 +631,7 @@ async def publish_to_wix(request: WixPublishRequest, current_user: dict = Depend
publish=request.publish,
member_id=member_id,
seo_metadata=seo_metadata,
site_id=site_id,
)
post = result.get("draftPost") or result.get("post") or result
raw_url = post.get("url")
@@ -474,7 +645,8 @@ async def publish_to_wix(request: WixPublishRequest, current_user: dict = Depend
"success": True,
"post_id": str(post.get("id", "")),
"url": post_url,
"publish_state": "PUBLISHED" if request.publish else "DRAFT"
"publish_state": "PUBLISHED" if request.publish else "DRAFT",
**({"warning": content_warning} if content_warning else {}),
}
except Exception as e:
logger.error(f"Failed to publish to Wix: {e}")

View File

@@ -799,12 +799,13 @@ async def startup_event():
else:
logger.info(f"[FEATURE-MODE] Skipping scheduler startup (features: {enabled_features})")
# Check Wix API key configuration
# Check Wix configuration (OAuth-based, API key optional)
wix_api_key = os.getenv('WIX_API_KEY')
if wix_api_key:
logger.warning(f"WIX_API_KEY loaded ({len(wix_api_key)} chars, starts with '{wix_api_key[:10]}...')")
else:
logger.warning("⚠️ WIX_API_KEY not found in environment - Wix publishing may fail")
logger.info(f"WIX_API_KEY loaded ({len(wix_api_key)} chars)")
wix_client_id = os.getenv('WIX_CLIENT_ID')
if not wix_client_id:
logger.warning("⚠️ WIX_CLIENT_ID not found in environment - Wix OAuth connection will fail")
elapsed = time.time() - startup_start
logger.info(f"ALwrity backend started successfully in {elapsed:.1f}s")

Binary file not shown.

After

Width:  |  Height:  |  Size: 525 KiB

View File

@@ -13,7 +13,7 @@ builtins.Union = typing.Union
from models.onboarding import APIKey, WebsiteAnalysis, ResearchPreferences, PersonaData, CompetitorAnalysis
from fastapi import FastAPI, HTTPException, Depends, Request, BackgroundTasks
from fastapi import FastAPI, HTTPException, Depends, Request, BackgroundTasks, Query
from fastapi.middleware.cors import CORSMiddleware
from fastapi.staticfiles import StaticFiles
from fastapi.responses import FileResponse
@@ -137,6 +137,11 @@ from api.seo_dashboard import (
get_sif_indexing_health,
get_guardian_audit,
get_keyword_gaps,
get_serp_gaps,
get_competitor_content,
get_content_gap_radar,
generate_content_from_gap,
GenerateContentRequest,
)
# Initialize FastAPI app
@@ -391,6 +396,64 @@ async def keyword_gaps_endpoint(
return await get_keyword_gaps(current_user, site_url)
@app.get("/api/seo-dashboard/serp-gaps")
async def serp_gaps_endpoint(
current_user: dict = Depends(get_current_user),
topics: Optional[List[str]] = None,
):
"""
Get SERP gap analysis — detect which competitors rank for given topics.
Uses Google Custom Search `site:` queries per competitor domain to detect
ranking presence. If no topics are provided, derives them from the user's
latest SIF semantic gap analysis (up to 12 topics).
"""
return await get_serp_gaps(current_user, topics)
@app.get("/api/seo-dashboard/competitor-content")
async def competitor_content_endpoint(
current_user: dict = Depends(get_current_user),
topics: Optional[List[str]] = None,
):
"""
Get competitor content deep-dive for gap topics using Exa.
Scopes Exa neural search to known competitor domains and returns
full text, highlights, and summaries for competitive analysis.
If no topics provided, derives up to 6 from the latest SIF semantic gaps.
"""
return await get_competitor_content(current_user, topics)
@app.get("/api/seo-dashboard/content-gap-radar")
async def content_gap_radar_endpoint(
current_user: dict = Depends(get_current_user),
bypass_cache: bool = Query(False, description="Bypass 24h cache"),
):
"""
Run the Content Gap Radar pipeline — full Phase 3 agent.
Orchestrates SIF semantic gap analysis, SERP ranking presence (Google CSE),
competitor content deep-dive (Exa), and trend momentum scoring into a single
ROI-ranked list of content opportunities.
"""
return await get_content_gap_radar(current_user, bypass_cache=bypass_cache)
@app.post("/api/seo-dashboard/content-gap-radar/generate-content")
async def generate_content_from_gap_endpoint(
request: GenerateContentRequest,
current_user: dict = Depends(get_current_user),
):
"""
Generate a content brief from a content gap radar item and save it
as a blog ContentAsset. Navigate to /blog-writer with the returned
asset_id to resume in the full Blog Writer workflow.
"""
return await generate_content_from_gap(request, current_user)
# Comprehensive SEO Analysis endpoints
@app.post("/api/seo-dashboard/analyze-comprehensive")
async def analyze_seo_comprehensive_endpoint(request: SEOAnalysisRequest):

View File

@@ -318,7 +318,7 @@ class SIFIndexingTask(Base):
id = Column(Integer, primary_key=True, index=True)
user_id = Column(String(255), nullable=False, index=True)
website_url = Column(String(500), nullable=False, index=True)
website_url = Column(String(500), nullable=True, index=True)
status = Column(String(50), default='active', index=True)
@@ -331,7 +331,7 @@ class SIFIndexingTask(Base):
failure_pattern = Column(JSON, nullable=True)
next_execution = Column(DateTime, nullable=True, index=True)
frequency_hours = Column(Integer, default=48) # Default 48 hours
frequency_hours = Column(Integer, default=48)
payload = Column(JSON, nullable=True)
@@ -346,6 +346,7 @@ class SIFIndexingTask(Base):
__table_args__ = (
Index('idx_sif_indexing_tasks_user_site', 'user_id', 'website_url'),
Index('idx_sif_indexing_tasks_user_only', 'user_id'),
Index('idx_sif_indexing_tasks_next_execution', 'next_execution'),
Index('idx_sif_indexing_tasks_status', 'status'),
)
@@ -387,7 +388,7 @@ class MarketTrendsTask(Base):
id = Column(Integer, primary_key=True, index=True)
user_id = Column(String(255), nullable=False, index=True)
website_url = Column(String(500), nullable=False, index=True)
website_url = Column(String(500), nullable=True, index=True)
status = Column(String(50), default="active", index=True)
@@ -415,6 +416,7 @@ class MarketTrendsTask(Base):
__table_args__ = (
Index("idx_market_trends_tasks_user_site", "user_id", "website_url"),
Index("idx_market_trends_tasks_user_only", "user_id"),
Index("idx_market_trends_tasks_next_execution", "next_execution"),
Index("idx_market_trends_tasks_status", "status"),
)

View File

@@ -27,6 +27,7 @@ class BlogSEORecommendationApplier:
raise ValueError("user_id is required for subscription checking. Please provide Clerk user ID.")
title = payload.get("title", "Untitled Blog")
introduction = payload.get("introduction") or ""
sections: List[Dict[str, Any]] = payload.get("sections", [])
outline = payload.get("outline", [])
research = payload.get("research", {})
@@ -44,6 +45,7 @@ class BlogSEORecommendationApplier:
prompt = self._build_prompt(
title=title,
introduction=introduction,
sections=sections,
outline=outline,
research=research,
@@ -57,6 +59,7 @@ class BlogSEORecommendationApplier:
"type": "object",
"properties": {
"title": {"type": "string"},
"introduction": {"type": "string"},
"sections": {
"type": "array",
"items": {
@@ -103,6 +106,13 @@ class BlogSEORecommendationApplier:
raw_sections = result.get("sections", []) or []
normalized_sections: List[Dict[str, Any]] = []
# Warn if LLM returned different number of sections (may miss intro/conclusion added as new sections)
if len(raw_sections) != len(sections):
logger.warning(
f"LLM returned {len(raw_sections)} sections but {len(sections)} were sent. "
"Extra sections will be ignored; missing sections fall back to original content."
)
# Build lookup table from updated sections using their identifiers
updated_map: Dict[str, Dict[str, Any]] = {}
for updated in raw_sections:
@@ -180,9 +190,17 @@ class BlogSEORecommendationApplier:
logger.info("SEO recommendations applied successfully")
# Extract updated introduction from LLM response if available
updated_introduction = result.get("introduction") or ""
if updated_introduction and updated_introduction != introduction:
logger.info(f"Introduction updated: {len(updated_introduction)} chars")
elif not updated_introduction:
updated_introduction = introduction # fall back to original
return {
"success": True,
"title": result.get("title", title),
"introduction": updated_introduction,
"sections": normalized_sections,
"applied": applied,
}
@@ -191,6 +209,7 @@ class BlogSEORecommendationApplier:
self,
*,
title: str,
introduction: str,
sections: List[Dict[str, Any]],
outline: List[Dict[str, Any]],
research: Dict[str, Any],
@@ -244,6 +263,9 @@ You are an expert SEO content strategist. Update the blog content to apply the a
Current Title: {title}
Current Introduction:
{introduction if introduction else '(No introduction exists — write a compelling one if the recommendations require it)'}
Primary Keywords (for context): {primary_keywords}
Outline Overview:
@@ -260,10 +282,15 @@ Actionable Recommendations to Apply:
Instructions:
1. Carefully apply the recommendations while preserving factual accuracy and research alignment.
2. Keep section identifiers (IDs) unchanged so the frontend can map updates correctly.
3. Improve clarity, flow, and SEO optimization per the guidance.
4. Return updated sections in the requested JSON format.
5. Provide a short summary of which recommendations were addressed.
2. You MUST return EXACTLY the same number of sections, with EXACTLY the same IDs as provided above. Do NOT add or remove sections.
3. If a recommendation says content is MISSING (e.g. missing introduction or conclusion), incorporate that missing content into the MOST APPROPRIATE existing section:
- Missing introduction → PREPEND introductory content to the FIRST section's existing content.
- Missing conclusion → APPEND concluding content to the LAST section's existing content.
- For other missing content, add it to the section whose heading best matches the recommendation.
4. Additionally, if an introduction is missing or weak, write a compelling introduction in the "introduction" field of your response. If the current introduction is adequate, return it unchanged.
5. Improve clarity, flow, and SEO optimization per the guidance.
6. Return updated sections in the requested JSON format.
7. Provide a short summary of which recommendations were addressed.
"""
return prompt

View File

@@ -47,7 +47,10 @@ class WixAuthService:
'code_verifier': code_verifier,
}
token_url = f'{self.base_url}/oauth2/token'
logger.info(f"Wix token exchange: client_id={self.client_id}, redirect_uri={self.redirect_uri}, code_verifier_prefix={code_verifier[:10]}...")
response = requests.post(token_url, headers=headers, data=data)
if response.status_code != 200:
logger.error(f"Wix token exchange failed: {response.status_code} {response.text}")
response.raise_for_status()
return response.json()

View File

@@ -55,19 +55,20 @@ def get_wix_headers(
if token.startswith('OauthNG.JWS.'):
# Wix OAuth token - use Bearer prefix
headers['Authorization'] = f'Bearer {token}'
logger.debug(f"Using Wix OAuth token with Bearer prefix (OauthNG.JWS. format detected)")
logger.debug("Using Wix OAuth token with Bearer prefix (OauthNG.JWS. format detected)")
elif token.startswith('IST.'):
# Wix Headless API key - send as-is, no Bearer
headers['Authorization'] = token
logger.debug("Using Wix API key for authorization (IST. format detected)")
else:
# Count dots - JWT has exactly 2 dots
# Standard JWT has exactly 2 dots separating header.payload.signature
dot_count = token.count('.')
if dot_count == 2 and len(token) < 500:
# Likely OAuth JWT token - use Bearer prefix
if dot_count == 2:
headers['Authorization'] = f'Bearer {token}'
logger.debug(f"Using OAuth Bearer token (JWT format detected)")
logger.debug("Using OAuth Bearer token (JWT format: 2 dots detected)")
else:
# Likely API key - use directly without Bearer prefix
headers['Authorization'] = token
logger.debug(f"Using API key for authorization (non-JWT format detected)")
logger.debug("Using token as-is (non-JWT format detected)")
if client_id:
headers['wix-client-id'] = client_id
@@ -125,8 +126,10 @@ def should_use_api_key(access_token: Optional[str] = None) -> bool:
access_token = str(access_token)
token = access_token.strip()
if token.count('.') != 2 or len(token) > 500:
if token.startswith('OauthNG.JWS.'):
return False
if token.startswith('IST.'):
return True
return False
# Standard JWT has exactly 2 dots
return token.count('.') != 2

View File

@@ -2,20 +2,22 @@ from typing import Any, Dict, List, Optional
import requests
from loguru import logger
from .retry import wix_api_call_with_retry, WixAPIError
class WixBlogService:
"""Service for Wix Blog API operations with retry logic and error handling."""
def __init__(self, base_url: str, client_id: Optional[str]):
self.base_url = base_url
self.client_id = client_id
def headers(self, access_token: str, extra: Optional[Dict[str, str]] = None) -> Dict[str, str]:
"""Build headers with automatic token type detection."""
h: Dict[str, str] = {
'Content-Type': 'application/json',
}
# Support both OAuth tokens and API keys
# API keys don't use 'Bearer' prefix
# Ensure access_token is a string (defensive check)
if access_token:
# Normalize token to string if needed
if not isinstance(access_token, str):
@@ -28,20 +30,18 @@ class WixBlogService:
token = access_token.strip()
if token:
# CRITICAL: Wix OAuth tokens can have format "OauthNG.JWS.xxx.yyy.zzz"
# These should use "Bearer" prefix even though they have more than 2 dots
if token.startswith('OauthNG.JWS.'):
# Wix OAuth token - use Bearer prefix
h['Authorization'] = f'Bearer {token}'
logger.debug("Using Wix OAuth token with Bearer prefix (OauthNG.JWS. format detected)")
elif '.' not in token or len(token) > 500:
# Likely an API key - use directly without Bearer prefix
elif token.startswith('IST.'):
h['Authorization'] = token
logger.debug("Using API key for authorization")
else:
# Standard JWT OAuth token (xxx.yyy.zzz format) - use Bearer prefix
logger.debug("Using Wix API key for authorization (IST. format detected)")
elif token.count('.') == 2:
h['Authorization'] = f'Bearer {token}'
logger.debug("Using OAuth Bearer token for authorization")
logger.debug("Using OAuth Bearer token for authorization (JWT: 2 dots)")
else:
h['Authorization'] = token
logger.debug("Using token as-is for authorization")
if self.client_id:
h['wix-client-id'] = self.client_id
@@ -50,12 +50,12 @@ class WixBlogService:
return h
def create_draft_post(self, access_token: str, payload: Dict[str, Any], extra_headers: Optional[Dict[str, str]] = None) -> Dict[str, Any]:
"""Create draft post with consolidated logging"""
"""Create draft post with retry logic and consolidated logging."""
from .logger import wix_logger
import json
import traceback as tb
# Build payload summary for logging
# Build payload summary for logging (safe, no sensitive data)
payload_summary = {}
if 'draftPost' in payload:
dp = payload['draftPost']
@@ -66,64 +66,114 @@ class WixBlogService:
}
request_headers = self.headers(access_token, extra_headers)
logger.debug(f"Wix API request headers: {list(request_headers.keys())}")
if 'wix-site-id' in request_headers:
logger.info(f"Wix API call includes wix-site-id: {request_headers['wix-site-id'][:8]}...")
else:
logger.warning("Wix API call MISSING wix-site-id header — this may fail for multi-site tokens")
url = f"{self.base_url}/blog/v3/draft-posts"
try:
response = requests.post(f"{self.base_url}/blog/v3/draft-posts", headers=request_headers, json=payload)
except TypeError as e:
logger.error(f"TypeError during requests.post in create_draft_post: {e}")
logger.error(f"Traceback: {tb.format_exc()}")
logger.error(f"access_token type: {type(access_token)}")
logger.error(f"payload type: {type(payload)}, keys: {list(payload.keys()) if isinstance(payload, dict) else 'N/A'}")
result = wix_api_call_with_retry('POST', url, request_headers, json_payload=payload, max_attempts=3)
wix_logger.log_api_call("POST", "/blog/v3/draft-posts", 200, payload_summary, None)
return result
except WixAPIError as e:
wix_logger.log_api_call("POST", "/blog/v3/draft-posts", e.status_code or 500, payload_summary, e.response_body)
logger.error(f"Wix create_draft_post failed after retries: HTTP {e.status_code} - {e.response_body}")
raise
except Exception as e:
wix_logger.log_api_call("POST", "/blog/v3/draft-posts", 500, payload_summary, str(e)[:200])
logger.error(f"Unexpected error in create_draft_post: {e}")
raise
# Consolidated error logging
error_body = None
if response.status_code >= 400:
try:
error_body = response.json()
except:
error_body = {'message': response.text[:200]}
wix_logger.log_api_call("POST", "/blog/v3/draft-posts", response.status_code, payload_summary, error_body)
if response.status_code >= 400:
# Only show detailed error info for debugging
if response.status_code == 500:
logger.debug(f" Full error: {json.dumps(error_body, indent=2) if isinstance(error_body, dict) else error_body}")
response.raise_for_status()
return response.json()
def publish_draft(self, access_token: str, draft_post_id: str, extra_headers: Optional[Dict[str, str]] = None) -> Dict[str, Any]:
response = requests.post(f"{self.base_url}/blog/v3/draft-posts/{draft_post_id}/publish", headers=self.headers(access_token, extra_headers))
response.raise_for_status()
return response.json()
"""Publish a draft post with retry logic."""
url = f"{self.base_url}/blog/v3/draft-posts/{draft_post_id}/publish"
headers = self.headers(access_token, extra_headers)
try:
return wix_api_call_with_retry('POST', url, headers, max_attempts=3)
except WixAPIError as e:
logger.error(f"Wix publish_draft failed: HTTP {e.status_code} - {e.response_body}")
raise
def list_categories(self, access_token: str, extra_headers: Optional[Dict[str, str]] = None) -> List[Dict[str, Any]]:
response = requests.get(f"{self.base_url}/blog/v3/categories", headers=self.headers(access_token, extra_headers))
response.raise_for_status()
return response.json().get('categories', [])
"""List blog categories with retry logic."""
url = f"{self.base_url}/blog/v3/categories"
headers = self.headers(access_token, extra_headers)
try:
result = wix_api_call_with_retry('GET', url, headers, max_attempts=3)
return result.get('categories', [])
except WixAPIError as e:
logger.error(f"Wix list_categories failed: HTTP {e.status_code}")
raise
def create_category(self, access_token: str, label: str, description: Optional[str] = None, language: Optional[str] = None, extra_headers: Optional[Dict[str, str]] = None) -> Dict[str, Any]:
def create_category(self, access_token: str, label: str, description: Optional[str] = None,
language: Optional[str] = None, extra_headers: Optional[Dict[str, str]] = None) -> Dict[str, Any]:
"""Create a blog category with retry logic."""
url = f"{self.base_url}/blog/v3/categories"
headers = self.headers(access_token, extra_headers)
payload: Dict[str, Any] = {'category': {'label': label}, 'fieldsets': ['URL']}
if description:
payload['category']['description'] = description
if language:
payload['category']['language'] = language
response = requests.post(f"{self.base_url}/blog/v3/categories", headers=self.headers(access_token, extra_headers), json=payload)
response.raise_for_status()
return response.json()
try:
return wix_api_call_with_retry('POST', url, headers, json_payload=payload, max_attempts=3)
except WixAPIError as e:
logger.error(f"Wix create_category failed: HTTP {e.status_code}")
raise
def list_tags(self, access_token: str, extra_headers: Optional[Dict[str, str]] = None) -> List[Dict[str, Any]]:
response = requests.get(f"{self.base_url}/blog/v3/tags", headers=self.headers(access_token, extra_headers))
response.raise_for_status()
return response.json().get('tags', [])
"""List blog tags with retry logic."""
url = f"{self.base_url}/blog/v3/tags"
headers = self.headers(access_token, extra_headers)
try:
result = wix_api_call_with_retry('GET', url, headers, max_attempts=3)
return result.get('tags', [])
except WixAPIError as e:
logger.error(f"Wix list_tags failed: HTTP {e.status_code}")
raise
def create_tag(self, access_token: str, label: str, language: Optional[str] = None, extra_headers: Optional[Dict[str, str]] = None) -> Dict[str, Any]:
def create_tag(self, access_token: str, label: str, language: Optional[str] = None,
extra_headers: Optional[Dict[str, str]] = None) -> Dict[str, Any]:
"""Create a blog tag with retry logic."""
url = f"{self.base_url}/blog/v3/tags"
headers = self.headers(access_token, extra_headers)
payload: Dict[str, Any] = {'label': label, 'fieldsets': ['URL']}
if language:
payload['language'] = language
response = requests.post(f"{self.base_url}/blog/v3/tags", headers=self.headers(access_token, extra_headers), json=payload)
response.raise_for_status()
return response.json()
try:
return wix_api_call_with_retry('POST', url, headers, json_payload=payload, max_attempts=3)
except WixAPIError as e:
logger.error(f"Wix create_tag failed: HTTP {e.status_code}")
raise
def get_draft_post(self, access_token: str, draft_post_id: str,
extra_headers: Optional[Dict[str, str]] = None) -> Dict[str, Any]:
"""Get a draft post by ID with retry logic."""
url = f"{self.base_url}/blog/v3/draft-posts/{draft_post_id}"
headers = self.headers(access_token, extra_headers)
try:
return wix_api_call_with_retry('GET', url, headers, max_attempts=3)
except WixAPIError as e:
logger.error(f"Wix get_draft_post failed: HTTP {e.status_code}")
raise
def update_draft_post(self, access_token: str, draft_post_id: str, payload: Dict[str, Any],
extra_headers: Optional[Dict[str, str]] = None) -> Dict[str, Any]:
"""Update a draft post with retry logic."""
url = f"{self.base_url}/blog/v3/draft-posts/{draft_post_id}"
headers = self.headers(access_token, extra_headers)
try:
return wix_api_call_with_retry('PUT', url, headers, json_payload=payload, max_attempts=3)
except WixAPIError as e:
logger.error(f"Wix update_draft_post failed: HTTP {e.status_code}")
raise

View File

@@ -5,6 +5,7 @@ Handles blog post creation, validation, and publishing to Wix.
"""
import json
import os
import re
import uuid
import requests
@@ -193,6 +194,7 @@ def create_blog_post(
tag_ids: List[str] = None,
publish: bool = True,
seo_metadata: Dict[str, Any] = None,
site_id: str = None,
import_image_func = None,
lookup_categories_func = None,
lookup_tags_func = None,
@@ -220,111 +222,50 @@ def create_blog_post(
Returns:
Created blog post information
"""
if not member_id:
raise ValueError("memberId is required for third-party apps creating blog posts")
# ===== PRE-FLIGHT VALIDATION =====
errors = []
# Ensure access_token is a string (handle cases where it might be int, dict, or other type)
# Use normalize_token_string to handle various token formats (dict with accessToken.value, etc.)
if not member_id:
errors.append("memberId is required for third-party apps creating blog posts")
title_clean = str(title).strip() if title else ""
if not title_clean:
errors.append("Title is required")
elif len(title_clean) > 200:
errors.append(f"Title is too long ({len(title_clean)} chars, max 200)")
# Ensure access_token is a string
normalized_token = normalize_token_string(access_token)
if not normalized_token:
raise ValueError("access_token is required and must be a valid string or token object")
access_token = normalized_token.strip()
if not access_token:
raise ValueError("access_token cannot be empty")
errors.append("access_token is required and must be a valid string or token object")
else:
access_token = normalized_token.strip()
if not access_token:
errors.append("access_token cannot be empty")
# BACK TO BASICS MODE: Try simplest possible structure FIRST
# Since posting worked before Ricos/SEO, let's test with absolute minimum
BACK_TO_BASICS_MODE = False # Disabled: full Ricos conversion now produces valid output
content_clean = str(content).strip() if content else ""
if not content_clean:
logger.warning("Content was empty, using default text")
content = "This is a post from ALwrity."
elif len(content_clean) > 100000:
errors.append(f"Content is too long ({len(content_clean)} chars, max 100,000)")
if errors:
raise ValueError(f"Wix publish validation failed: {'; '.join(errors)}")
wix_logger.reset()
wix_logger.log_operation_start("Blog Post Creation", title=title[:50] if title else None, member_id=member_id[:20] if member_id else None)
if BACK_TO_BASICS_MODE:
logger.info("🔧 Wix: BACK TO BASICS MODE - Testing minimal structure")
# Import auth utilities for proper token handling
from .auth_utils import get_wix_headers
# Create absolute minimal Ricos structure
minimal_ricos = {
'nodes': [{
'id': str(uuid.uuid4()),
'type': 'PARAGRAPH',
'nodes': [{
'id': str(uuid.uuid4()),
'type': 'TEXT',
'nodes': [],
'textData': {
'text': (content[:500] if content else "This is a post from ALwrity.").strip(),
'decorations': []
}
}]
}]
}
# Extract wix-site-id from token if possible
extra_headers = {}
try:
token_str = str(access_token)
if token_str and token_str.startswith('OauthNG.JWS.'):
import jwt
import json
jwt_part = token_str[12:]
payload = jwt.decode(jwt_part, options={"verify_signature": False, "verify_aud": False})
data_payload = payload.get('data', {})
if isinstance(data_payload, str):
try:
data_payload = json.loads(data_payload)
except:
pass
instance_data = data_payload.get('instance', {})
meta_site_id = instance_data.get('metaSiteId')
if isinstance(meta_site_id, str) and meta_site_id:
extra_headers['wix-site-id'] = meta_site_id
except Exception:
pass
# Build minimal payload
minimal_blog_data = {
'draftPost': {
'title': str(title).strip() if title else "Untitled",
'memberId': str(member_id).strip(),
'richContent': minimal_ricos
},
'publish': False,
'fieldsets': ['URL']
}
try:
from .blog import WixBlogService
blog_service_test = WixBlogService('https://www.wixapis.com', None)
result = blog_service_test.create_draft_post(access_token, minimal_blog_data, extra_headers if extra_headers else None)
logger.success("✅✅✅ Wix: BACK TO BASICS SUCCEEDED! Issue is with Ricos/SEO structure")
wix_logger.log_operation_result("Back to Basics Test", True, result)
return result
except Exception as e:
logger.error(f"❌ Wix: BACK TO BASICS FAILED - {str(e)[:100]}")
logger.error(" ⚠️ Issue is NOT with Ricos/SEO - likely permissions/token")
wix_logger.add_error(f"Back to Basics: {str(e)[:100]}")
# Import auth utilities for proper token handling
from .auth_utils import get_wix_headers
# Headers for blog post creation (use user's OAuth token)
headers = get_wix_headers(access_token)
# Build valid Ricos rich content
# Ensure content is not empty
if not content or not content.strip():
content = "This is a post from ALwrity."
logger.warning("⚠️ Content was empty, using default text")
# Quick token/permission check (only log if issues found)
has_blog_scope = None
meta_site_id = None
try:
from .utils import decode_wix_token
import json
from .utils import decode_wix_token, extract_meta_from_token
token_data = decode_wix_token(access_token)
if 'scope' in token_data:
scopes = token_data.get('scope')
@@ -332,17 +273,9 @@ def create_blog_post(
scope_list = scopes.split(',') if ',' in scopes else [scopes]
has_blog_scope = any('BLOG' in s.upper() for s in scope_list)
if not has_blog_scope:
logger.error("Wix: Token missing BLOG scopes - verify OAuth app permissions")
if 'data' in token_data:
data = token_data.get('data')
if isinstance(data, str):
try:
data = json.loads(data)
except:
pass
if isinstance(data, dict) and 'instance' in data:
instance = data.get('instance', {})
meta_site_id = instance.get('metaSiteId')
logger.error("Wix: Token missing BLOG scopes - verify OAuth app permissions")
meta_info = extract_meta_from_token(access_token)
meta_site_id = meta_info.get('metaSiteId')
except Exception:
pass
@@ -352,13 +285,12 @@ def create_blog_post(
import requests
test_response = requests.get(f"{base_url}/blog/v3/categories", headers=test_headers, timeout=5)
if test_response.status_code == 403:
logger.error("Wix: Permission denied - OAuth app missing BLOG.CREATE-DRAFT")
logger.error("Wix: Permission denied - OAuth app missing BLOG.CREATE-DRAFT")
elif test_response.status_code == 401:
logger.error("Wix: Unauthorized - token may be expired")
logger.error("Wix: Unauthorized - token may be expired")
except Exception:
pass
# Safely get token length (access_token is already validated as string above)
token_length = len(access_token) if access_token else 0
wix_logger.log_token_info(token_length, has_blog_scope, meta_site_id)
@@ -470,19 +402,20 @@ def create_blog_post(
if cover_image_url and import_image_func:
try:
media_id = import_image_func(access_token, cover_image_url, f'Cover: {title}')
# Ensure media_id is a string and not None
if media_id and isinstance(media_id, str):
# import_image_to_wix now returns Optional[str] — None means failure
if media_id and isinstance(media_id, str) and media_id.strip():
blog_data['draftPost']['media'] = {
'wixMedia': {
'image': {'id': str(media_id).strip()}
'image': {'id': media_id.strip()}
},
'displayed': True,
'custom': True
}
logger.info(f"Cover image imported: {media_id[:16]}...")
else:
logger.warning(f"Invalid media_id type or value: {type(media_id)}, skipping media")
logger.warning(f"Cover image import returned no valid media_id (type={type(media_id)}). Continuing without cover image.")
except Exception as e:
logger.warning(f"Failed to import cover image: {e}")
logger.warning(f"Cover image import failed (non-fatal): {e}. Continuing without cover image.")
# Handle categories - can be either IDs (list of strings) or names (for lookup)
category_ids_to_use = None
@@ -558,34 +491,33 @@ def create_blog_post(
logger.debug("No SEO metadata provided to create_blog_post")
try:
# Extract wix-site-id from token if possible
# Extract wix-site-id from token, parameter, or env var
extra_headers = {}
try:
wix_site_id = site_id or os.getenv('WIX_SITE_ID')
if not wix_site_id:
from .utils import extract_meta_from_token
meta_info = extract_meta_from_token(access_token)
wix_site_id = meta_info.get('metaSiteId')
if wix_site_id:
extra_headers['wix-site-id'] = wix_site_id
logger.info(f"Using wix-site-id: {wix_site_id[:8]}... (source: {'param' if site_id else 'env' if os.getenv('WIX_SITE_ID') else 'token'})")
else:
token_str = str(access_token)
if token_str and token_str.startswith('OauthNG.JWS.'):
import jwt
import json
jwt_part = token_str[12:]
payload = jwt.decode(jwt_part, options={"verify_signature": False, "verify_aud": False})
data_payload = payload.get('data', {})
if isinstance(data_payload, str):
try:
data_payload = json.loads(data_payload)
except:
pass
instance_data = data_payload.get('instance', {})
meta_site_id = instance_data.get('metaSiteId')
if isinstance(meta_site_id, str) and meta_site_id:
extra_headers['wix-site-id'] = meta_site_id
except Exception:
pass
if token_str.startswith('IST.'):
logger.error("❌ IST. API key requires WIX_SITE_ID environment variable or site_id parameter. "
"The token's tenant.id is the account ID, not the site ID. "
"Please set WIX_SITE_ID in your .env file to your Wix site's metaSiteId.")
else:
logger.warning("No wix-site-id found — API calls may fail if token requires it")
except Exception as e:
logger.debug(f"Could not extract wix-site-id from token: {e}")
try:
# Validate payload structure before sending
draft_post = blog_data.get('draftPost', {})
if not isinstance(draft_post, dict):
raise ValueError("draftPost must be a dict object")
# Validate richContent structure
if 'richContent' in draft_post:
rc = draft_post['richContent']
if not isinstance(rc, dict):
@@ -595,8 +527,7 @@ def create_blog_post(
if not isinstance(rc['nodes'], list):
raise ValueError(f"richContent.nodes must be a list, got {type(rc['nodes'])}")
logger.debug(f"✅ richContent validation passed: {len(rc.get('nodes', []))} nodes")
# Validate seoData structure if present
if 'seoData' in draft_post:
seo = draft_post['seoData']
if not isinstance(seo, dict):
@@ -606,46 +537,40 @@ def create_blog_post(
if 'settings' in seo and not isinstance(seo['settings'], dict):
raise ValueError(f"seoData.settings must be a dict, got {type(seo.get('settings'))}")
logger.debug(f"✅ seoData validation passed: {len(seo.get('tags', []))} tags")
# Final validation: Ensure no None values in any nested objects
# Wix API rejects None values and expects proper types
try:
validate_payload_no_none(blog_data, "blog_data")
logger.debug("✅ Payload validation passed: No None values found")
except ValueError as e:
logger.error(f"❌ Payload validation failed: {e}")
raise
# Log payload summary
logger.debug(f"Payload: draftPost keys={list(draft_post.keys())}, "
f"nodes={len(draft_post.get('richContent', {}).get('nodes', []))}, "
f"has_seo={'seoData' in draft_post}")
# Final deep validation: Serialize and deserialize to catch any JSON-serialization issues
try:
import json
json.dumps(blog_data, ensure_ascii=False)
except (TypeError, ValueError) as e:
logger.error(f"❌ Payload JSON serialization failed: {e}")
raise ValueError(f"Payload contains non-serializable data: {e}")
# Clean up None values that Wix API would reject
rc = blog_data['draftPost']['richContent']
for field in ['documentStyle', 'metadata']:
if field in rc and (rc[field] is None or rc[field] == "" or not isinstance(rc[field], dict)):
del rc[field]
logger.info(f"📤 Publishing to Wix: title='{blog_data['draftPost'].get('title', '')}', "
f"nodes={len(rc.get('nodes', []))}")
result = blog_service.create_draft_post(access_token, blog_data, extra_headers or None)
# Log success
draft_post = result.get('draftPost', {})
post_id = draft_post.get('id', 'N/A')
wix_logger.log_operation_result("Create Draft Post", True, result)
logger.success(f"✅ Wix: Blog post created - ID: {post_id}")
return result
except TypeError as e:
import traceback

View File

@@ -5,79 +5,71 @@ from typing import Any, Dict, List
def parse_markdown_inline(text: str) -> List[Dict[str, Any]]:
"""
Parse inline markdown formatting (bold, italic, links) into Ricos text nodes.
Parse inline markdown formatting (bold, italic, links, code, strikethrough) into Ricos text nodes.
Returns a list of text nodes with decorations.
Handles: **bold**, *italic*, [links](url), `code`, and combinations.
Handles: **bold**, *italic*, [links](url), `code`, ~strikethrough~, and combinations.
"""
if not text:
return [{
'id': str(uuid.uuid4()),
'type': 'TEXT',
'nodes': [], # TEXT nodes must have empty nodes array per Wix API
'nodes': [],
'textData': {'text': '', 'decorations': []}
}]
nodes = []
# Process text character by character to handle nested/adjacent formatting
# This is more robust than regex for complex cases
i = 0
current_text = ''
current_decorations = []
def flush_text():
nonlocal current_text
if current_text:
nodes.append({
'id': str(uuid.uuid4()),
'type': 'TEXT',
'nodes': [],
'textData': {'text': current_text, 'decorations': []}
})
current_text = ''
while i < len(text):
# Check for bold **text** (must come before single * check)
# Bold **text**
if i < len(text) - 1 and text[i:i+2] == '**':
# Save any accumulated text
if current_text:
nodes.append({
'id': str(uuid.uuid4()),
'type': 'TEXT',
'nodes': [], # TEXT nodes must have empty nodes array per Wix API
'textData': {
'text': current_text,
'decorations': current_decorations.copy()
}
})
current_text = ''
# Find closing **
flush_text()
end_bold = text.find('**', i + 2)
if end_bold != -1:
bold_text = text[i + 2:end_bold]
# Recursively parse the bold text for nested formatting
bold_nodes = parse_markdown_inline(bold_text)
# Add BOLD decoration to all text nodes within
# Per Wix API: decorations are objects with 'type' field, not strings
for node in bold_nodes:
if node['type'] == 'TEXT':
node_decorations = node['textData'].get('decorations', []).copy()
# Check if BOLD decoration already exists
has_bold = any(d.get('type') == 'BOLD' for d in node_decorations if isinstance(d, dict))
if not has_bold:
node_decorations.append({'type': 'BOLD'})
node['textData']['decorations'] = node_decorations
decs = node['textData'].get('decorations', []).copy()
if not any(d.get('type') == 'BOLD' for d in decs if isinstance(d, dict)):
decs.append({'type': 'BOLD'})
node['textData']['decorations'] = decs
nodes.append(node)
i = end_bold + 2
continue
# Check for link [text](url)
# Strikethrough ~text~
elif text[i] == '~':
flush_text()
end_strike = text.find('~', i + 1)
if end_strike != -1:
strike_text = text[i + 1:end_strike]
strike_nodes = parse_markdown_inline(strike_text)
for node in strike_nodes:
if node['type'] == 'TEXT':
decs = node['textData'].get('decorations', []).copy()
if not any(d.get('type') == 'STRIKETHROUGH' for d in decs if isinstance(d, dict)):
decs.append({'type': 'STRIKETHROUGH'})
node['textData']['decorations'] = decs
nodes.append(node)
i = end_strike + 1
continue
# Link [text](url)
elif text[i] == '[':
# Save any accumulated text
if current_text:
nodes.append({
'id': str(uuid.uuid4()),
'type': 'TEXT',
'nodes': [], # TEXT nodes must have empty nodes array per Wix API
'textData': {
'text': current_text,
'decorations': current_decorations.copy()
}
})
current_text = ''
current_decorations = []
# Find matching ]
flush_text()
link_end = text.find(']', i)
if link_end != -1 and link_end < len(text) - 1 and text[link_end + 1] == '(':
link_text = text[i + 1:link_end]
@@ -85,12 +77,10 @@ def parse_markdown_inline(text: str) -> List[Dict[str, Any]]:
url_end = text.find(')', url_start)
if url_end != -1:
url = text[url_start:url_end]
# Per Wix API: Links are decorations on TEXT nodes, not separate node types
# Create TEXT node with LINK decoration
nodes.append({
'id': str(uuid.uuid4()),
'type': 'TEXT',
'nodes': [], # TEXT nodes must have empty nodes array per Wix API
'nodes': [],
'textData': {
'text': link_text,
'decorations': [{
@@ -98,7 +88,7 @@ def parse_markdown_inline(text: str) -> List[Dict[str, Any]]:
'linkData': {
'link': {
'url': url,
'target': 'BLANK' # Wix API uses 'BLANK', not '_blank'
'target': 'BLANK'
}
}
}]
@@ -107,33 +97,17 @@ def parse_markdown_inline(text: str) -> List[Dict[str, Any]]:
i = url_end + 1
continue
# Check for code `text`
# Inline code `text`
elif text[i] == '`':
# Save any accumulated text
if current_text:
nodes.append({
'id': str(uuid.uuid4()),
'type': 'TEXT',
'nodes': [], # TEXT nodes must have empty nodes array per Wix API
'textData': {
'text': current_text,
'decorations': current_decorations.copy()
}
})
current_text = ''
current_decorations = []
# Find closing `
flush_text()
code_end = text.find('`', i + 1)
if code_end != -1:
code_text = text[i + 1:code_end]
# Per Wix API: CODE is not a valid decoration type, but we'll keep the structure
# Note: Wix uses CODE_BLOCK nodes for code, not CODE decorations
# For inline code, we'll just use plain text for now
# Wix doesn't have a CODE decoration, but we can preserve the text
nodes.append({
'id': str(uuid.uuid4()),
'type': 'TEXT',
'nodes': [], # TEXT nodes must have empty nodes array per Wix API
'nodes': [],
'textData': {
'text': code_text,
'decorations': [] # CODE is not a valid decoration in Wix API
@@ -142,39 +116,21 @@ def parse_markdown_inline(text: str) -> List[Dict[str, Any]]:
i = code_end + 1
continue
# Check for italic *text* (only if not part of **)
# Italic *text* (must come after ** check)
elif text[i] == '*' and (i == 0 or text[i-1] != '*') and (i == len(text) - 1 or text[i+1] != '*'):
# Save any accumulated text
if current_text:
nodes.append({
'id': str(uuid.uuid4()),
'type': 'TEXT',
'nodes': [], # TEXT nodes must have empty nodes array per Wix API
'textData': {
'text': current_text,
'decorations': current_decorations.copy()
}
})
current_text = ''
current_decorations = []
# Find closing * (but not **)
flush_text()
italic_end = text.find('*', i + 1)
if italic_end != -1:
# Make sure it's not part of **
if italic_end == len(text) - 1 or text[italic_end + 1] != '*':
italic_text = text[i + 1:italic_end]
italic_nodes = parse_markdown_inline(italic_text)
# Add ITALIC decoration
# Per Wix API: decorations are objects with 'type' field
for node in italic_nodes:
if node['type'] == 'TEXT':
node_decorations = node['textData'].get('decorations', []).copy()
# Check if ITALIC decoration already exists
has_italic = any(d.get('type') == 'ITALIC' for d in node_decorations if isinstance(d, dict))
if not has_italic:
node_decorations.append({'type': 'ITALIC'})
node['textData']['decorations'] = node_decorations
decs = node['textData'].get('decorations', []).copy()
if not any(d.get('type') == 'ITALIC' for d in decs if isinstance(d, dict)):
decs.append({'type': 'ITALIC'})
node['textData']['decorations'] = decs
nodes.append(node)
i = italic_end + 1
continue
@@ -183,58 +139,116 @@ def parse_markdown_inline(text: str) -> List[Dict[str, Any]]:
current_text += text[i]
i += 1
# Add any remaining text
if current_text:
nodes.append({
'id': str(uuid.uuid4()),
'type': 'TEXT',
'nodes': [], # TEXT nodes must have empty nodes array per Wix API
'textData': {
'text': current_text,
'decorations': current_decorations.copy()
}
})
flush_text()
# If no nodes created, return single plain text node
if not nodes:
nodes.append({
'id': str(uuid.uuid4()),
'type': 'TEXT',
'nodes': [], # TEXT nodes must have empty nodes array per Wix API
'textData': {
'text': text,
'decorations': []
}
'nodes': [],
'textData': {'text': text, 'decorations': []}
})
return nodes
def _make_code_block_node(code_text: str, language: str = '') -> Dict[str, Any]:
"""Create a Ricos CODE_BLOCK node."""
lines = code_text.split('\n')
text_nodes = []
for line in lines:
text_nodes.append({
'id': str(uuid.uuid4()),
'type': 'TEXT',
'nodes': [],
'textData': {'text': line, 'decorations': []}
})
return {
'id': str(uuid.uuid4()),
'type': 'CODE_BLOCK',
'nodes': text_nodes,
'codeBlockData': {
'language': language or 'text',
'textWrap': True
}
}
def _make_horizontal_rule_node() -> Dict[str, Any]:
"""Create a Ricos DIVIDER node."""
return {
'id': str(uuid.uuid4()),
'type': 'DIVIDER',
'nodes': [],
'dividerData': {
'type': 'LINE',
'lineStyle': {
'width': 'LARGE',
'alignment': 'CENTER'
}
}
}
def convert_content_to_ricos(content: str, images: List[str] = None) -> Dict[str, Any]:
"""
Convert markdown content into valid Ricos JSON format.
Supports headings, paragraphs, lists, bold, italic, links, and images.
Supports:
- Headings (# to ######)
- Paragraphs with inline formatting
- Unordered lists (-, *)
- Ordered lists (1., 2.)
- Blockquotes (>)
- Code blocks (```language ... ```)
- Inline images (![alt](url))
- Horizontal rules (---, ***, ___)
"""
if not content:
content = "This is a post from ALwrity."
nodes = []
lines = content.split('\n')
i = 0
while i < len(lines):
line = lines[i].strip()
line = lines[i]
stripped = line.strip()
if not line:
if not stripped:
i += 1
continue
node_id = str(uuid.uuid4())
# Check for headings
if line.startswith('#'):
level = len(line) - len(line.lstrip('#'))
heading_text = line.lstrip('# ').strip()
# Code blocks (```language ... ```)
if stripped.startswith('```'):
language = stripped[3:].strip() or ''
code_lines = []
i += 1
while i < len(lines):
if lines[i].strip() == '```':
i += 1
break
code_lines.append(lines[i])
i += 1
code_text = '\n'.join(code_lines)
if code_text.strip():
nodes.append(_make_code_block_node(code_text, language))
continue
# Horizontal rules
if re.match(r'^(---+|\*\*\*|___+)$', stripped):
nodes.append(_make_horizontal_rule_node())
i += 1
continue
# Headings
if stripped.startswith('#'):
level = len(stripped) - len(stripped.lstrip('#'))
heading_text = stripped.lstrip('# ').strip()
text_nodes = parse_markdown_inline(heading_text)
nodes.append({
'id': node_id,
@@ -243,42 +257,38 @@ def convert_content_to_ricos(content: str, images: List[str] = None) -> Dict[str
'headingData': {'level': min(level, 6)}
})
i += 1
continue
# Check for blockquotes
elif line.startswith('>'):
quote_text = line.lstrip('> ').strip()
# Continue reading consecutive blockquote lines
quote_lines = [quote_text]
# Blockquotes
if stripped.startswith('>'):
quote_lines = [stripped.lstrip('> ').strip()]
i += 1
while i < len(lines) and lines[i].strip().startswith('>'):
quote_lines.append(lines[i].strip().lstrip('> ').strip())
i += 1
quote_content = ' '.join(quote_lines)
text_nodes = parse_markdown_inline(quote_content)
# CRITICAL: TEXT nodes must be wrapped in PARAGRAPH nodes within BLOCKQUOTE
# Wix API: omit empty data objects, don't include them as {}
paragraph_node = {
'id': str(uuid.uuid4()),
'type': 'PARAGRAPH',
'nodes': text_nodes,
}
blockquote_node = {
nodes.append({
'id': node_id,
'type': 'BLOCKQUOTE',
'nodes': [paragraph_node],
}
nodes.append(blockquote_node)
})
continue
# Check for unordered lists (handle both '- ' and '* ' markers)
elif (line.startswith('- ') or line.startswith('* ') or
(line.startswith('-') and len(line) > 1 and line[1] != '-') or
(line.startswith('*') and len(line) > 1 and line[1] != '*')):
# Unordered lists
if (stripped.startswith('- ') or stripped.startswith('* ') or
(stripped.startswith('-') and len(stripped) > 1 and stripped[1] != '-') or
(stripped.startswith('*') and len(stripped) > 1 and stripped[1] != '*')):
list_items = []
list_marker = '- ' if line.startswith('-') else '* '
# Process list items
list_marker = '- ' if stripped.startswith('-') else '* '
while i < len(lines):
current_line = lines[i].strip()
# Check if this is a list item
is_list_item = (current_line.startswith('- ') or current_line.startswith('* ') or
(current_line.startswith('-') and len(current_line) > 1 and current_line[1] != '-') or
(current_line.startswith('*') and len(current_line) > 1 and current_line[1] != '*'))
@@ -286,12 +296,9 @@ def convert_content_to_ricos(content: str, images: List[str] = None) -> Dict[str
if not is_list_item:
break
# Extract item text (handle both '- ' and '-item' formats)
if current_line.startswith('- ') or current_line.startswith('* '):
item_text = current_line[2:].strip()
elif current_line.startswith('-'):
item_text = current_line[1:].strip()
elif current_line.startswith('*'):
elif current_line.startswith('-') or current_line.startswith('*'):
item_text = current_line[1:].strip()
else:
item_text = current_line
@@ -302,52 +309,41 @@ def convert_content_to_ricos(content: str, images: List[str] = None) -> Dict[str
# Check for nested items (indented with 2+ spaces)
while i < len(lines):
next_line = lines[i]
# Must be indented and be a list marker
if next_line.startswith(' ') and (next_line.strip().startswith('- ') or
next_line.strip().startswith('* ') or
(next_line.strip().startswith('-') and len(next_line.strip()) > 1) or
(next_line.strip().startswith('*') and len(next_line.strip()) > 1)):
if (next_line.startswith(' ') and
(next_line.strip().startswith('- ') or next_line.strip().startswith('* '))):
nested_text = next_line.strip()
if nested_text.startswith('- ') or nested_text.startswith('* '):
nested_text = nested_text[2:].strip()
elif nested_text.startswith('-'):
nested_text = nested_text[1:].strip()
elif nested_text.startswith('*'):
elif nested_text.startswith('-') or nested_text.startswith('*'):
nested_text = nested_text[1:].strip()
list_items.append(nested_text)
i += 1
else:
break
# Build list items with proper formatting
# CRITICAL: TEXT nodes must be wrapped in PARAGRAPH nodes within LIST_ITEM
# NOTE: LIST_ITEM nodes do NOT have a data field per Wix API schema
# Wix API: omit empty data objects, don't include them as {}
list_node_items = []
for item_text in list_items:
item_node_id = str(uuid.uuid4())
text_nodes = parse_markdown_inline(item_text)
paragraph_node = {
'id': str(uuid.uuid4()),
'type': 'PARAGRAPH',
'nodes': text_nodes,
}
list_item_node = {
'id': item_node_id,
list_node_items.append({
'id': str(uuid.uuid4()),
'type': 'LIST_ITEM',
'nodes': [paragraph_node]
}
list_node_items.append(list_item_node)
})
bulleted_list_node = {
nodes.append({
'id': node_id,
'type': 'BULLETED_LIST',
'nodes': list_node_items,
}
nodes.append(bulleted_list_node)
})
continue
# Check for ordered lists
elif re.match(r'^\d+\.\s+', line):
# Ordered lists
if re.match(r'^\d+\.\s+', stripped):
list_items = []
while i < len(lines) and re.match(r'^\d+\.\s+', lines[i].strip()):
item_text = re.sub(r'^\d+\.\s+', '', lines[i].strip())
@@ -359,35 +355,30 @@ def convert_content_to_ricos(content: str, images: List[str] = None) -> Dict[str
list_items.append(nested_text)
i += 1
# CRITICAL: TEXT nodes must be wrapped in PARAGRAPH nodes within LIST_ITEM
# NOTE: LIST_ITEM nodes do NOT have a data field per Wix API schema
# Wix API: omit empty data objects, don't include them as {}
list_node_items = []
for item_text in list_items:
item_node_id = str(uuid.uuid4())
text_nodes = parse_markdown_inline(item_text)
paragraph_node = {
'id': str(uuid.uuid4()),
'type': 'PARAGRAPH',
'nodes': text_nodes,
}
list_item_node = {
'id': item_node_id,
list_node_items.append({
'id': str(uuid.uuid4()),
'type': 'LIST_ITEM',
'nodes': [paragraph_node]
}
list_node_items.append(list_item_node)
})
ordered_list_node = {
nodes.append({
'id': node_id,
'type': 'ORDERED_LIST',
'nodes': list_node_items,
}
nodes.append(ordered_list_node)
})
continue
# Check for images
elif line.startswith('!['):
img_match = re.match(r'!\[([^\]]*)\]\(([^)]+)\)', line)
# Images
if stripped.startswith('!['):
img_match = re.match(r'!\[([^\]]*)\]\(([^)]+)\)', stripped)
if img_match:
alt_text = img_match.group(1)
img_url = img_match.group(2)
@@ -407,62 +398,52 @@ def convert_content_to_ricos(content: str, images: List[str] = None) -> Dict[str
}
})
i += 1
continue
# Regular paragraph
else:
# Collect consecutive non-empty lines as paragraph content
para_lines = [line]
para_lines = [stripped]
i += 1
while i < len(lines):
next_line = lines[i].strip()
if not next_line:
break
# Stop if next line is a special markdown element
if (next_line.startswith('#') or
next_line.startswith('- ') or
next_line.startswith('* ') or
next_line.startswith('>') or
next_line.startswith('![') or
next_line.startswith('```') or
re.match(r'^(---+|\*\*\*|___+)$', next_line) or
re.match(r'^\d+\.\s+', next_line)):
break
para_lines.append(next_line)
i += 1
while i < len(lines):
next_line = lines[i].strip()
if not next_line:
break
# Stop if next line is a special markdown element
if (next_line.startswith('#') or
next_line.startswith('- ') or
next_line.startswith('* ') or
next_line.startswith('>') or
next_line.startswith('![') or
re.match(r'^\d+\.\s+', next_line)):
break
para_lines.append(next_line)
i += 1
para_text = ' '.join(para_lines)
text_nodes = parse_markdown_inline(para_text)
# Only add paragraph if there are text nodes
if text_nodes:
paragraph_node = {
'id': node_id,
'type': 'PARAGRAPH',
'nodes': text_nodes,
}
nodes.append(paragraph_node)
para_text = ' '.join(para_lines)
text_nodes = parse_markdown_inline(para_text)
if text_nodes:
nodes.append({
'id': node_id,
'type': 'PARAGRAPH',
'nodes': text_nodes,
})
# Ensure at least one node exists
# Wix API: omit empty data objects, don't include them as {}
if not nodes:
fallback_paragraph = {
nodes.append({
'id': str(uuid.uuid4()),
'type': 'PARAGRAPH',
'nodes': [{
'id': str(uuid.uuid4()),
'type': 'TEXT',
'nodes': [], # TEXT nodes must have empty nodes array per Wix API
'nodes': [],
'textData': {
'text': content[:500] if content else "This is a post from ALwrity.",
'decorations': []
}
}],
}
nodes.append(fallback_paragraph)
})
# Per Wix Blog API documentation: richContent should ONLY contain 'nodes'
# Do NOT include 'type', 'id', 'metadata', or 'documentStyle' at root level
# These fields are for Ricos Document format, but Blog API expects just the nodes structure
return {
'nodes': nodes
}
return {'nodes': nodes}

View File

@@ -1,17 +1,33 @@
from typing import Any, Dict
from typing import Any, Dict, Optional
import requests
from loguru import logger
from .retry import wix_api_call_with_retry, WixAPIError
class WixMediaService:
"""Service for Wix Media Manager operations with retry logic and error handling."""
def __init__(self, base_url: str):
self.base_url = base_url
def import_image(self, access_token: str, image_url: str, display_name: str) -> Dict[str, Any]:
def import_image(self, access_token: str, image_url: str, display_name: str) -> Optional[Dict[str, Any]]:
"""
Import external image to Wix Media Manager.
Official endpoint: https://www.wixapis.com/site-media/v1/files/import
Reference: https://dev.wix.com/docs/rest/assets/media/media-manager/files/import-file
Args:
access_token: Valid access token
image_url: URL of the image to import
display_name: Display name for the image
Returns:
Media result dict with 'file' key, or None on failure
Raises:
WixAPIError: On non-retryable failure or after retries exhausted
"""
headers = {
'Authorization': f'Bearer {access_token}',
@@ -22,10 +38,54 @@ class WixMediaService:
'mediaType': 'IMAGE',
'displayName': display_name,
}
# Correct endpoint per Wix API documentation
endpoint = f"{self.base_url}/site-media/v1/files/import"
response = requests.post(endpoint, headers=headers, json=payload)
response.raise_for_status()
return response.json()
try:
result = wix_api_call_with_retry(
'POST', endpoint, headers, json_payload=payload, max_attempts=2
)
if result and 'file' in result and 'id' in result['file']:
logger.info(f"Image imported successfully: {result['file']['id'][:16]}...")
return result
else:
logger.warning(f"Image import returned unexpected structure: {list(result.keys()) if isinstance(result, dict) else type(result)}")
return None
except WixAPIError as e:
if e.status_code == 403:
logger.error(f"Image import forbidden (403): OAuth app may lack MEDIA.SITE_MEDIA_FILES_IMPORT scope")
elif e.status_code == 400:
logger.error(f"Image import bad request (400): {e.response_body}")
elif e.status_code == 404:
logger.error(f"Image import endpoint not found (404) — Wix Media API may not be available for this site")
else:
logger.error(f"Image import failed after retries: HTTP {e.status_code} - {e.response_body}")
raise
except Exception as e:
logger.error(f"Unexpected error importing image: {e}")
raise
def get_image_url(self, access_token: str, media_id: str) -> Optional[str]:
"""
Get public URL for a Wix media item.
Args:
access_token: Valid access token
media_id: Wix media ID
Returns:
Public URL string, or None
"""
url = f"{self.base_url}/site-media/v1/files/{media_id}"
headers = {
'Authorization': f'Bearer {access_token}',
'Content-Type': 'application/json',
}
try:
result = wix_api_call_with_retry('GET', url, headers, max_attempts=2)
if result and 'file' in result:
return result['file'].get('url')
return None
except Exception as e:
logger.warning(f"Failed to get image URL for {media_id}: {e}")
return None

View File

@@ -0,0 +1,168 @@
"""
Retry utilities for Wix API calls with exponential backoff.
Production-grade retry logic that respects Wix rate limits and handles
transient failures gracefully.
"""
import time
import random
from typing import Callable, TypeVar, Optional
from loguru import logger
T = TypeVar('T')
class WixAPIError(Exception):
"""Custom exception for Wix API errors with status code context."""
def __init__(self, message: str, status_code: Optional[int] = None, response_body: Optional[str] = None):
super().__init__(message)
self.status_code = status_code
self.response_body = response_body
def is_retryable(self) -> bool:
"""Determine if this error is retryable based on status code."""
if self.status_code is None:
return True # Network errors are retryable
# 429 = rate limit, 502/503/504 = gateway errors, 500 = internal server error (sometimes transient)
return self.status_code in (429, 500, 502, 503, 504)
def is_rate_limit(self) -> bool:
"""Check if this is a rate limit error."""
return self.status_code == 429
def with_retry(
fn: Callable[[], T],
max_attempts: int = 3,
base_delay: float = 1.0,
max_delay: float = 30.0,
retryable_exceptions: tuple = (Exception,),
operation_name: str = "Wix API call"
) -> T:
"""
Execute a function with exponential backoff retry logic.
Args:
fn: Function to execute (should make the API call)
max_attempts: Maximum number of attempts (default: 3)
base_delay: Initial delay in seconds (default: 1.0)
max_delay: Maximum delay in seconds (default: 30.0)
retryable_exceptions: Tuple of exception types to retry on
operation_name: Name for logging
Returns:
Result of fn()
Raises:
WixAPIError: If all retries are exhausted
Exception: If a non-retryable exception occurs
"""
last_exception = None
for attempt in range(1, max_attempts + 1):
try:
return fn()
except WixAPIError as e:
last_exception = e
if attempt >= max_attempts:
break
if not e.is_retryable():
logger.warning(f"{operation_name}: non-retryable error (HTTP {e.status_code}), failing fast")
raise
# Calculate delay with exponential backoff and jitter
delay = min(base_delay * (2 ** (attempt - 1)), max_delay)
# Add jitter (±25%) to prevent thundering herd
jitter = delay * 0.25
actual_delay = delay + random.uniform(-jitter, jitter)
actual_delay = max(0.1, actual_delay) # Minimum 100ms delay
if e.is_rate_limit():
# For rate limits, use a longer base delay
actual_delay = max(actual_delay, 2.0)
logger.warning(f"{operation_name}: rate limited (429), waiting {actual_delay:.1f}s before retry {attempt + 1}/{max_attempts}")
else:
logger.warning(f"{operation_name}: attempt {attempt}/{max_attempts} failed (HTTP {e.status_code}), waiting {actual_delay:.1f}s before retry")
time.sleep(actual_delay)
except retryable_exceptions as e:
last_exception = e
if attempt >= max_attempts:
break
delay = min(base_delay * (2 ** (attempt - 1)), max_delay)
jitter = delay * 0.25
actual_delay = delay + random.uniform(-jitter, jitter)
actual_delay = max(0.1, actual_delay)
logger.warning(f"{operation_name}: attempt {attempt}/{max_attempts} failed ({type(e).__name__}), waiting {actual_delay:.1f}s before retry")
time.sleep(actual_delay)
# All retries exhausted
if last_exception:
if isinstance(last_exception, WixAPIError):
raise last_exception
raise WixAPIError(f"{operation_name}: failed after {max_attempts} attempts: {last_exception}")
raise WixAPIError(f"{operation_name}: failed after {max_attempts} attempts")
def wix_api_call_with_retry(
method: str,
url: str,
headers: dict,
json_payload: Optional[dict] = None,
max_attempts: int = 3
) -> dict:
"""
Convenience wrapper for making Wix API calls with retry logic.
Args:
method: HTTP method ('GET', 'POST', etc.)
url: Full API URL
headers: Request headers
json_payload: Optional JSON payload for POST/PUT
max_attempts: Maximum retry attempts
Returns:
Parsed JSON response
Raises:
WixAPIError: On failure after retries
"""
import requests
def _call():
if method.upper() == 'GET':
resp = requests.get(url, headers=headers, timeout=30)
elif method.upper() == 'POST':
resp = requests.post(url, headers=headers, json=json_payload, timeout=30)
elif method.upper() == 'PUT':
resp = requests.put(url, headers=headers, json=json_payload, timeout=30)
elif method.upper() == 'DELETE':
resp = requests.delete(url, headers=headers, timeout=30)
else:
raise ValueError(f"Unsupported HTTP method: {method}")
if resp.status_code >= 400:
body = None
try:
body = resp.text[:500]
except:
body = str(resp.content)[:500]
raise WixAPIError(
f"Wix API {method} {url} failed: HTTP {resp.status_code}",
status_code=resp.status_code,
response_body=body
)
return resp.json()
return with_retry(
_call,
max_attempts=max_attempts,
operation_name=f"Wix {method} {url.split('/')[-1]}"
)

View File

@@ -85,24 +85,45 @@ def decode_wix_token(access_token: str) -> Dict[str, Any]:
if token_str.startswith('OauthNG.JWS.'):
jwt_part = token_str[12:]
return jwt.decode(jwt_part, options={"verify_signature": False, "verify_aud": False})
if token_str.startswith('IST.'):
jwt_part = token_str[4:]
return jwt.decode(jwt_part, options={"verify_signature": False, "verify_aud": False})
return jwt.decode(token_str, options={"verify_signature": False, "verify_aud": False})
def _extract_data_payload(payload: Dict[str, Any]) -> Dict[str, Any]:
data_payload = payload.get('data', {})
if isinstance(data_payload, str):
try:
data_payload = json.loads(data_payload)
except Exception:
data_payload = {}
return data_payload if isinstance(data_payload, dict) else {}
def extract_meta_from_token(access_token: str) -> Dict[str, Optional[str]]:
try:
payload = decode_wix_token(access_token)
data_payload = payload.get('data', {})
if isinstance(data_payload, str):
try:
data_payload = json.loads(data_payload)
except Exception:
pass
instance = (data_payload or {}).get('instance', {})
return {
data_payload = _extract_data_payload(payload)
instance = (data_payload or {}).get('instance', {}) or {}
result = {
'siteMemberId': instance.get('siteMemberId'),
'metaSiteId': instance.get('metaSiteId'),
'permissions': instance.get('permissions'),
}
# Only fall back to tenant.id for OAuth tokens (not IST. API keys)
# IST. tokens have tenant.id = account_id, which is NOT the site metaSiteId
token_str = str(access_token)
if not result.get('metaSiteId') and not token_str.startswith('IST.'):
tenant = data_payload.get('tenant', {}) or {}
tenant_id = tenant.get('id')
if tenant_id:
result['metaSiteId'] = tenant_id
if not result.get('metaSiteId'):
meta_site_id = payload.get('metaSiteId') or payload.get('site_id')
if meta_site_id:
result['metaSiteId'] = meta_site_id
return result
except Exception:
return {'siteMemberId': None, 'metaSiteId': None, 'permissions': None}

View File

@@ -86,185 +86,6 @@ class StrategyArchitectAgent(SIFBaseAgent):
logger.error(f"[{self.__class__.__name__}] Full traceback: {traceback.format_exc()}")
return []
class ContentGuardianAgent(SIFBaseAgent):
"""Agent for preventing cannibalization and ensuring content originality."""
CANNIBALIZATION_THRESHOLD = 0.85 # Similarity threshold for cannibalization warning
ORIGINALITY_THRESHOLD = 0.75 # Minimum originality score
def __init__(self, intelligence_service: TxtaiIntelligenceService, sif_service: Any = None):
super().__init__(intelligence_service)
self.sif_service = sif_service
async def check_cannibalization(self, new_draft: str) -> Dict[str, Any]:
"""Check if a new draft competes semantically with existing pages."""
self._log_agent_operation("Checking for semantic cannibalization", draft_length=len(new_draft))
try:
if not self.intelligence.is_initialized():
logger.error(f"[{self.__class__.__name__}] Intelligence service not initialized")
return {"warning": False, "error": "Service not initialized"}
if not new_draft or len(new_draft.strip()) < 50:
logger.warning(f"[{self.__class__.__name__}] Draft too short for meaningful analysis")
return {"warning": False, "reason": "Draft too short"}
results = await self.intelligence.search(new_draft, limit=1)
if not results:
logger.info(f"[{self.__class__.__name__}] No similar content found - draft is unique")
return {"warning": False, "uniqueness_score": 1.0}
top_result = results[0]
similarity_score = top_result.get('score', 0.0)
logger.debug(f"[{self.__class__.__name__}] Top similarity score: {similarity_score:.4f}")
if similarity_score > self.CANNIBALIZATION_THRESHOLD:
warning_data = {
"warning": True,
"similar_to": top_result.get('id', 'unknown'),
"score": similarity_score,
"threshold": self.CANNIBALIZATION_THRESHOLD,
"recommendation": "Consider revising the draft to target a different angle or merge with existing content"
}
logger.warning(f"[{self.__class__.__name__}] Cannibalization detected: {warning_data}")
return warning_data
logger.info(f"[{self.__class__.__name__}] No cannibalization detected. Draft is sufficiently unique.")
return {"warning": False, "uniqueness_score": 1.0 - similarity_score}
except Exception as e:
logger.error(f"[{self.__class__.__name__}] Failed to check cannibalization: {e}")
logger.error(f"[{self.__class__.__name__}] Full traceback: {traceback.format_exc()}")
return {"warning": False, "error": str(e)}
async def verify_originality(self, text: str, competitor_index: Any) -> Dict[str, Any]:
"""Verify originality against competitor content index."""
self._log_agent_operation("Verifying originality against competitors", text_length=len(text))
try:
if not text or len(text.strip()) < 50:
logger.warning(f"[{self.__class__.__name__}] Text too short for meaningful originality check")
return {"originality_score": 0.0, "reason": "Text too short"}
# STUB: Implement cross-index search against competitor content
# This would search the text against a competitor-specific index
logger.info(f"[{self.__class__.__name__}] Originality verification stub completed")
return {
"originality_score": 0.95, # Placeholder
"confidence": 0.8,
"method": "semantic_comparison",
"notes": "Competitor index integration pending"
}
except Exception as e:
logger.error(f"[{self.__class__.__name__}] Failed to verify originality: {e}")
logger.error(f"[{self.__class__.__name__}] Full traceback: {traceback.format_exc()}")
return {"originality_score": 0.0, "error": str(e)}
async def style_enforcer(self, text: str, style_guidelines: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
"""
Tool: Ensures content adheres to brand voice and style guidelines.
"""
self._log_agent_operation("Enforcing style guidelines", text_length=len(text))
try:
if not text:
return {"compliance_score": 0.0, "issues": ["No text provided"]}
# 1. Fetch Style Guidelines from SIF if not provided
if not style_guidelines and self.sif_service:
try:
# Search for website analysis to get brand voice/style
# We assume the most relevant 'website_analysis' doc contains the guidelines
results = await self.intelligence.search("website analysis brand voice style", limit=1)
if results:
import json
res = results[0]
metadata_str = res.get('object')
metadata = json.loads(metadata_str) if isinstance(metadata_str, str) else (metadata_str or res)
if metadata.get('type') == 'website_analysis':
report = metadata.get('full_report', {})
style_guidelines = {
"tone": report.get('brand_analysis', {}).get('brand_voice', 'neutral'),
"style_patterns": report.get('style_patterns', {}),
"writing_style": report.get('writing_style', {})
}
logger.info(f"[{self.__class__.__name__}] Retrieved style guidelines from SIF: {style_guidelines.get('tone')}")
except Exception as e:
logger.warning(f"[{self.__class__.__name__}] Failed to retrieve style guidelines from SIF: {e}")
issues = []
score = 1.0
# Basic Heuristic Checks (Placeholder for LLM-based style analysis)
# 1. Tone Check (e.g., formal vs casual)
# If guidelines specify 'formal', check for contractions
tone = style_guidelines.get('tone', '').lower() if style_guidelines else ''
if 'formal' in tone or 'professional' in tone:
contractions = ["can't", "won't", "don't", "it's"]
found_contractions = [c for c in contractions if c in text.lower()]
if found_contractions:
issues.append(f"Found contractions in formal text: {', '.join(found_contractions[:3])}...")
score -= 0.1
# 2. Length/Sentence Structure (simple metric)
sentences = text.split('.')
avg_len = sum(len(s.split()) for s in sentences if s) / max(1, len(sentences))
if avg_len > 25:
issues.append("Average sentence length is too high (>25 words). Consider shortening.")
score -= 0.1
return {
"compliance_score": max(0.0, score),
"issues": issues,
"is_compliant": score > 0.8,
"guidelines_source": "sif_index" if not style_guidelines and self.sif_service else "provided"
}
except Exception as e:
logger.error(f"[{self.__class__.__name__}] Style enforcement failed: {e}")
return {"error": str(e)}
async def safety_filter(self, text: str) -> Dict[str, Any]:
"""
Tool: Flags potentially harmful, offensive, or sensitive content.
"""
self._log_agent_operation("Running safety filter", text_length=len(text))
try:
# Basic Keyword Blocklist (Placeholder for LLM/Safety Model)
# In production, this should call a dedicated safety API (e.g., OpenAI Moderation, Llama Guard)
unsafe_keywords = [
"hate", "kill", "murder", "attack", "destroy", # Violent
"scam", "fraud", "steal", # Illegal
"explicit", "adult" # NSFW
]
found_flags = []
text_lower = text.lower()
for keyword in unsafe_keywords:
if f" {keyword} " in text_lower: # Simple word boundary check
found_flags.append(keyword)
is_safe = len(found_flags) == 0
return {
"is_safe": is_safe,
"flags": found_flags,
"safety_score": 1.0 if is_safe else 0.0,
"action": "approve" if is_safe else "flag_for_review"
}
except Exception as e:
logger.error(f"[{self.__class__.__name__}] Safety filter failed: {e}")
return {"error": str(e)}
class LinkGraphAgent(SIFBaseAgent):
"""
Agent for internal link suggestions, graph management, and authority analysis.

View File

@@ -40,6 +40,7 @@ from .specialized_agents import (
)
from .trend_surfer_agent import TrendSurferAgent
from .content_gap_radar_agent import ContentGapRadarAgent
# Agent Orchestrator
from .agent_orchestrator import (
@@ -67,6 +68,7 @@ __all__ = [
'SEOOptimizationAgent',
'SocialAmplificationAgent',
'TrendSurferAgent',
'ContentGapRadarAgent',
'ALwrityAgentOrchestrator',
'orchestration_service'
]

View File

@@ -230,7 +230,7 @@ class ALwrityAgentOrchestrator:
# Content Guardian Agent
if enabled_by_key.get("content_guardian", True):
try:
from services.intelligence.sif_agents import ContentGuardianAgent
from services.intelligence.agents.specialized.content_guardian import ContentGuardianAgent
from services.intelligence.txtai_service import TxtaiIntelligenceService
# Initialize intelligence service if not already available
@@ -248,6 +248,19 @@ class ALwrityAgentOrchestrator:
except Exception as e:
logger.error(f"Failed to initialize ContentGuardianAgent: {e}")
# Content Gap Radar Agent
if enabled_by_key.get("content_gap_radar", True):
try:
from services.intelligence.agents import ContentGapRadarAgent
from services.intelligence.txtai_service import TxtaiIntelligenceService
intel_service = TxtaiIntelligenceService(self.user_id)
self.content_gap_radar_agent = ContentGapRadarAgent(intel_service, self.user_id)
self.agents['content_gap_radar'] = self.content_gap_radar_agent
initialized_agents.append("Content Gap Radar")
logger.info(f"Initialized ContentGapRadarAgent for user {self.user_id}")
except Exception as e:
logger.error(f"Failed to initialize ContentGapRadarAgent: {e}")
logger.info(f"Created {len(self.agents)} specialized agents for user {self.user_id}")
# Log initialization activity
@@ -449,7 +462,8 @@ class ALwrityAgentOrchestrator:
"competitor": ["Competitor monitoring", "Threat analysis", "Response generation", "Strategy execution"],
"seo": ["SEO auditing", "Issue prioritization", "Auto-fixing", "Strategy generation"],
"social": ["Social monitoring", "Content adaptation", "Engagement optimization", "Distribution management"],
"trend": ["Trend detection", "Opportunity analysis", "Content angle generation"]
"trend": ["Trend detection", "Opportunity analysis", "Content angle generation"],
"content_gap_radar": ["Content gap detection", "SERP opportunity scoring", "Competitor content deep-dive", "ROI-based topic prioritization", "Content brief generation"]
}
# Service class for agent orchestration

View File

@@ -0,0 +1,466 @@
"""
Content Gap Radar Agent
Scores and prioritizes content opportunities by combining SIF semantic gap analysis,
SERP ranking presence (Google CSE), competitor content deep-dive (Exa), and trend
momentum into a single ROI score per topic.
Phase 3 of the Content Gap Radar feature.
"""
import traceback
from typing import List, Dict, Any, Optional
from loguru import logger
from services.intelligence.agents.specialized import SIFBaseAgent
from services.intelligence.agents.specialized.strategy_architect import StrategyArchitectAgent
from services.intelligence.agents.trend_surfer_agent import TrendSurferAgent
from services.intelligence.agents.core_agent_framework import TaskProposal
from services.intelligence.txtai_service import TxtaiIntelligenceService
from services.seo_tools.serp_gap_service import SerpGapService
from services.seo_tools.competitor_content_service import CompetitorContentService
class ContentGapRadarAgent(SIFBaseAgent):
"""
Agent that scores and prioritizes content opportunities by combining
SIF semantic gap analysis, SERP ranking presence, Exa competitor content,
and trend momentum into a single ROI score.
"""
def __init__(self, intelligence_service: TxtaiIntelligenceService, user_id: str, **kwargs):
super().__init__(intelligence_service, user_id, agent_type="content_gap_radar", **kwargs)
self.user_id = user_id
self.serp_service = SerpGapService()
self.competitor_content_service = CompetitorContentService()
self.strategy_architect = StrategyArchitectAgent(intelligence_service, user_id)
async def analyze(
self,
competitor_domains: List[str],
competitor_indices: Optional[List[Any]] = None,
topics: Optional[List[str]] = None,
bypass_cache: bool = False,
) -> Dict[str, Any]:
"""
Full content gap radar pipeline.
1. Get topic-level gaps from SIF semantic analysis
2. Get SERP ranking data per topic
3. Get Exa competitor content for top topics
4. Get trend momentum data
5. Score each topic with ROI formula
6. Return prioritized results
Args:
competitor_domains: Known competitor domains
competitor_indices: SIF index positions for competitor docs
topics: Optional explicit topic list (derived from SIF if omitted)
bypass_cache: Force fresh API calls
Returns:
Dict with scored gaps list and summary.
"""
self._log_agent_operation(
"Running content gap radar",
competitor_count=len(competitor_domains),
topics_provided=bool(topics),
)
try:
sif_gaps = []
# Step 1: Derive topics from SIF semantic gaps if not provided
if not topics:
sif_gaps = await self.strategy_architect.find_semantic_gaps(
competitor_indices or []
)
topics = [g["topic"] for g in sif_gaps[:12]]
logger.info(
f"[{self.__class__.__name__}] Derived {len(topics)} topics from SIF gaps"
)
if not topics:
logger.info(f"[{self.__class__.__name__}] No topics to analyze")
return {"gaps": [], "summary": {}}
# If we got sif_gaps externally but topics were provided, fetch SIF data anyway
if not sif_gaps:
try:
sif_gaps = await self.strategy_architect.find_semantic_gaps(
competitor_indices or []
)
except Exception as e:
logger.warning(
f"[{self.__class__.__name__}] SIF gap fetch failed (non-fatal): {e}"
)
sif_gaps = []
# Build lookup maps for cross-referencing
sif_map = {g["topic"]: g for g in sif_gaps}
# Step 2: SERP gap analysis
serp_data = await self.serp_service.analyze_topic_gaps(
topics, competitor_domains, bypass_cache=bypass_cache
)
serp_map = {}
for g in serp_data.get("gaps", []):
serp_map[g["topic"]] = g
# Step 3: Exa deep-dive (top 6 topics — paid API)
exa_data = await self.competitor_content_service.deep_dive(
topics[:6], competitor_domains, bypass_cache=bypass_cache
)
exa_map = {}
for r in exa_data.get("results", []):
exa_map[r["topic"]] = r
# Step 4: Trend momentum data
trend_surfer = TrendSurferAgent(
self.intelligence, self.user_id
)
trend_signals = await trend_surfer.surf_trends()
# Step 5: Score each topic
scored = []
for topic in topics:
scored.append(
self._score_topic(
topic=topic,
sif_map=sif_map,
serp_map=serp_map,
exa_map=exa_map,
trend_signals=trend_signals,
)
)
scored.sort(key=lambda x: x["roi_score"], reverse=True)
# Step 6: Summary
high = [g for g in scored if g["priority"] == "high"]
medium = [g for g in scored if g["priority"] == "medium"]
low = [g for g in scored if g["priority"] == "low"]
logger.info(
f"[{self.__class__.__name__}] Scored {len(scored)} gaps: "
f"{len(high)} high, {len(medium)} medium, {len(low)} low"
)
return {
"gaps": scored,
"summary": {
"total_topics_analyzed": len(topics),
"high_priority": len(high),
"medium_priority": len(medium),
"low_priority": len(low),
},
}
except Exception as e:
logger.error(
f"[{self.__class__.__name__}] Content gap radar failed: {e}"
)
logger.error(
f"[{self.__class__.__name__}] Full traceback: {traceback.format_exc()}"
)
return {"gaps": [], "summary": {}, "error": str(e)}
async def propose_daily_tasks(self, context: Dict[str, Any]) -> List[TaskProposal]:
"""
Propose high-ROI content tasks from gap radar analysis.
Integrates with Today's Workflow agent committee polling.
"""
proposals = []
onboarding = context.get("onboarding_data", {})
competitor_focus = onboarding.get("competitor_focus", {})
competitor_domains = competitor_focus.get("top_competitor_domains", [])
if not competitor_domains:
logger.info(f"[{self.__class__.__name__}] No competitor domains in context, skipping")
return proposals
try:
result = await self.analyze(
competitor_domains=competitor_domains,
competitor_indices=[],
)
except Exception as e:
logger.error(f"[{self.__class__.__name__}] propose_daily_tasks failed: {e}")
return proposals
gaps = result.get("gaps", [])
scored = [g for g in gaps if g["priority"] in ("high", "medium")]
scored.sort(key=lambda x: x["roi_score"], reverse=True)
for gap in scored[:3]:
pillar_id = self._action_to_pillar(gap["recommended_action"])
action_url = (
"/blog-writer"
if pillar_id == "generate"
else "/seo-dashboard#content-gap-radar"
)
proposals.append(TaskProposal(
title=f"Write about: {gap['topic']}",
description=gap["recommended_action"],
pillar_id=pillar_id,
priority=gap["priority"],
estimated_time=60 if pillar_id == "generate" else 30,
source_agent="ContentGapRadarAgent",
reasoning=(
f"Content gap with {gap['scoring']['gap_size']:.0%} gap size, "
f"{gap['scoring']['volume']:.0%} volume, "
f"{gap['scoring']['trend']:.0%} trend momentum, "
f"ROI {gap['roi_score']:.0%}"
),
action_type="navigate",
action_url=action_url,
context_data={"gap": gap},
))
return proposals
@staticmethod
def _action_to_pillar(recommended_action: str) -> str:
action_lower = recommended_action.lower()
if "optimize" in action_lower:
return "analyze"
return "generate"
def _score_topic(
self,
topic: str,
sif_map: Dict[str, Any],
serp_map: Dict[str, Any],
exa_map: Dict[str, Any],
trend_signals: List[Any],
) -> Dict[str, Any]:
"""Score a single topic with the ROI formula."""
# gap_size: from SIF coverage_delta
sif = sif_map.get(topic, {})
gap_size = sif.get("coverage_delta", 0.5)
# volume: from SERP gap — competitors ranking for this topic
serp = serp_map.get(topic, {})
comp_count = serp.get("competitor_count", 0)
total_domains = serp.get("total_domains_checked", 1)
volume = min(comp_count / max(total_domains, 1), 1.0)
# trend: match topic against TrendSurfer signals
trend_score = self._match_trend_score(topic, trend_signals)
# intent: classify topic commercial value
intent = self._classify_intent(topic)
# competition: Exa content depth as penalty
exa = exa_map.get(topic, {})
content_count = exa.get("total_results", 0)
competition = min(content_count / 10.0, 1.0)
# ROI = (gap_size × volume × trend × intent) × (1 - 0.3 × competition)
base_roi = gap_size * volume * trend_score * intent
roi = base_roi * (1 - 0.3 * competition)
# Priority thresholds
if roi >= 0.6:
priority = "high"
elif roi >= 0.3:
priority = "medium"
else:
priority = "low"
# Recommended action based on scoring profile
action = self._recommend_action(gap_size, competition, intent)
return {
"topic": topic,
"roi_score": round(roi, 3),
"priority": priority,
"recommended_action": action,
"scoring": {
"gap_size": round(gap_size, 3),
"volume": round(volume, 3),
"trend": round(trend_score, 3),
"intent": round(intent, 3),
"competition": round(competition, 3),
},
"sif_gap": sif if sif else None,
"serp_evidence": {
"competitors_found": serp.get("competitors_found", []),
"competitor_count": comp_count,
"domains_with_content": serp.get("domains_with_content", []),
} if serp else None,
"competitor_content": exa if exa else None,
}
def _match_trend_score(self, topic: str, signals: List[Dict[str, Any]]) -> float:
if not signals:
return 0.5
topic_lower = topic.lower()
topic_words = set(topic_lower.split())
best_score = 0.0
for signal in signals:
impact = signal.get("impact_score", 0.5)
text_fields = " ".join(filter(None, [
signal.get("topic", ""),
signal.get("headline", ""),
signal.get("suggested_angle", ""),
]))
text_lower = text_fields.lower()
if topic_lower in text_lower:
best_score = max(best_score, impact)
text_words = set(text_lower.split())
overlap = len(topic_words & text_words)
if overlap > 0:
word_score = (overlap / max(len(topic_words), 1)) * impact
best_score = max(best_score, word_score)
return max(best_score, 0.5)
def _classify_intent(self, topic: str) -> float:
"""
Classify topic intent using LLM with keyword fallback.
Returns intent score 0.0-1.0.
"""
topic_lower = topic.lower()
# Keyword-based heuristics
commercial_words = [
"best", "top", "review", "vs", "comparison", "alternative",
"vs.", "versus", "pricing", "cost", "price", "cheap",
"affordable", "discount", "coupon", "deal", "buy",
]
transactional_words = [
"buy", "purchase", "order", "subscribe", "sign up",
"download", "get started", "free trial", "demo",
]
has_commercial = any(w in topic_lower for w in commercial_words)
has_transactional = any(w in topic_lower for w in transactional_words)
if has_transactional:
return 0.9
if has_commercial:
return 0.7
return 0.4 # Informational default
def _recommend_action(
self, gap_size: float, competition: float, intent: float
) -> str:
"""Generate a recommended action based on scoring profile."""
if gap_size > 0.7 and competition < 0.3:
return "Create comprehensive pillar page — large gap, low competition"
elif gap_size > 0.5 and intent > 0.6:
return "Create high-conversion content — significant gap, strong intent"
elif competition > 0.7:
return "Create differentiated content — high competition requires unique angle"
elif gap_size < 0.3:
return "Optimize existing content — incremental gap, update current pages"
else:
return "Create targeted blog post — moderate opportunity"
async def generate_content_brief(
self,
topic: str,
recommended_action: str,
scoring: Optional[Dict[str, float]] = None,
serp_evidence: Optional[Dict[str, Any]] = None,
sif_gap: Optional[Dict[str, Any]] = None,
) -> Dict[str, Any]:
"""
Generate a structured content brief from a gap item.
Uses LLM to produce title options, outline sections, target keywords,
and a writing angle. Falls back to template-based generation on LLM failure.
"""
gap_size = (scoring or {}).get("gap_size", 0.5)
volume = (scoring or {}).get("volume", 0.5)
trend = (scoring or {}).get("trend", 0.5)
intent = (scoring or {}).get("intent", 0.5)
competition = (scoring or {}).get("competition", 0.5)
word_count = 800 if competition > 0.7 else 1200 if gap_size > 0.5 else 600
serp_context = ""
if serp_evidence and serp_evidence.get("competitors_found"):
snippets = [
f"- {c.get('title','')}: {c.get('snippet','')[:100]}"
for c in serp_evidence["competitors_found"][:3]
]
serp_context = "Competitor content already ranking:\n" + "\n".join(snippets)
sif_context = ""
if sif_gap:
sif_context = (
f"SIF coverage delta: {sif_gap.get('coverage_delta', 0):.2%}, "
f"confidence: {sif_gap.get('confidence', 0):.2%}"
)
prompt = f"""You are a senior content strategist. Create a detailed content brief for the topic below.
TOPIC: {topic}
RECOMMENDED ACTION: {recommended_action}
{serp_context}
{sif_context}
Scoring profile:
- Gap size: {gap_size:.0%}
- Search volume: {volume:.0%}
- Trend momentum: {trend:.0%}
- Intent score: {intent:.0%}
- Competition level: {competition:.0%}
- Target word count: {word_count}
Return a JSON object with these exact keys:
{{
"titles": ["Title option 1", "Title option 2", "Title option 3"],
"outline": [
{{"heading": "Section heading", "key_points": ["point 1", "point 2", "point 3"]}}
],
"keywords": ["keyword1", "keyword2", "keyword3", "keyword4", "keyword5"],
"angle": "A single paragraph describing the strategic writing angle",
"word_count": {word_count}
}}
Generate 4-6 outline sections. Only return valid JSON, no other text."""
try:
response = await self._generate_llm_response(prompt)
import json as _json
start = response.find("{")
end = response.rfind("}") + 1
if start >= 0 and end > start:
brief = _json.loads(response[start:end])
else:
raise ValueError("No JSON found in LLM response")
except Exception as e:
logger.warning(
f"[{self.__class__.__name__}] LLM brief generation failed, using template: {e}"
)
brief = {
"titles": [
f"The Ultimate Guide to {topic}",
f"{topic}: Strategies That Actually Work",
f"Why {topic} Matters More Than Ever",
],
"outline": [
{"heading": f"Introduction to {topic}", "key_points": ["Context and importance", "What this guide covers"]},
{"heading": "Why This Matters", "key_points": ["Current landscape", "Key challenges and opportunities"]},
{"heading": "Key Strategies", "key_points": ["Strategy 1 with examples", "Strategy 2 with implementation tips", "Strategy 3 for advanced practitioners"]},
{"heading": "Common Pitfalls to Avoid", "key_points": ["Mistake 1 and how to avoid it", "Mistake 2 and how to avoid it"]},
{"heading": "Measuring Success", "key_points": ["Key metrics to track", "Tools and methods for measurement"]},
{"heading": "Conclusion & Next Steps", "key_points": ["Summary of key takeaways", "Actionable next steps"]},
],
"keywords": [topic] + [topic.split()[-1]] if len(topic.split()) > 1 else [topic, "guide", "strategy"],
"angle": f"Create comprehensive, actionable content about {topic} that fills the gap identified in competitor analysis. Focus on providing unique insights and practical implementation guidance.",
"word_count": word_count,
}
return {
"topic": topic,
"recommended_action": recommended_action,
"brief": brief,
"scoring": scoring,
}

View File

@@ -144,25 +144,25 @@ class CompetitorResponseAgent(BaseALwrityAgent):
proposals.append(TaskProposal(
title="Review Competitor Content",
description=f"SIF found {competitor_count} competitor pages. Review for gap opportunities.",
pillar_id="create",
pillar_id="analyze",
priority="high",
estimated_time=45,
source_agent="CompetitorResponseAgent",
reasoning="SIF-detected competitor activity presents content gap opportunities.",
action_type="navigate",
action_url="/content-planning-dashboard"
action_url="/seo-dashboard"
))
else:
proposals.append(TaskProposal(
title="Research Competitor Topics",
description="Search for competitor content in your niche to identify coverage gaps.",
pillar_id="create",
pillar_id="analyze",
priority="medium",
estimated_time=30,
source_agent="CompetitorResponseAgent",
reasoning="Understanding competitor positioning improves content strategy.",
action_type="navigate",
action_url="/content-planning-dashboard"
action_url="/seo-dashboard"
))
return proposals

View File

@@ -1,6 +1,11 @@
"""
Content Guardian Agent implementation.
Content Guardian Agent — ALwrity's committee watchdog.
Audits committee proposals, evaluates agent behaviour, flags coverage gaps,
and alerts the user when agents need correction.
"""
import json
import traceback
import asyncio
from typing import List, Dict, Any, Optional
from datetime import datetime
from loguru import logger
@@ -8,59 +13,414 @@ from .base import SIFBaseAgent, TXTAI_AVAILABLE, Agent
from services.intelligence.agents.core_agent_framework import TaskProposal
from services.intelligence.txtai_service import TxtaiIntelligenceService
class ContentGuardianAgent(SIFBaseAgent):
"""Agent for monitoring brand consistency and quality."""
def __init__(self, intelligence_service: TxtaiIntelligenceService, user_id: str, **kwargs):
# Pass kwargs to superclass to handle 'task' and other framework arguments
super().__init__(intelligence_service, user_id, agent_type="content_guardian", **kwargs)
# ── known committee agents for critique ──────────────────────────
KNOWN_AGENTS = {
"ContentStrategyAgent": {"label": "Content Strategy", "short": "Strategy", "pillar_focus": "plan"},
"StrategyArchitectAgent": {"label": "Strategy Architect", "short": "Architect", "pillar_focus": "plan"},
"SEOOptimizationAgent": {"label": "SEO Optimization", "short": "SEO", "pillar_focus": "analyze"},
"SocialAmplificationAgent":{"label": "Social Amplification","short": "Social", "pillar_focus": "engage"},
"CompetitorResponseAgent": {"label": "Competitor Response", "short": "Competitor", "pillar_focus": "analyze"},
"ContentGapRadarAgent": {"label": "Content Gap Radar", "short": "Gap Radar", "pillar_focus": "generate"},
}
PILLAR_IDS = {"plan", "generate", "publish", "analyze", "engage", "remarket"}
COMMITTEE_CYCLE_WINDOW_DAYS = 30
class ContentGuardianAgent(SIFBaseAgent):
"""Committee watchdog — audits proposals, critiques agents, flags faults, alerts users."""
CANNIBALIZATION_THRESHOLD = 0.85
ORIGINALITY_THRESHOLD = 0.75
def __init__(self, intelligence_service: TxtaiIntelligenceService, user_id: str, sif_service: Any = None, **kwargs):
super().__init__(intelligence_service, user_id, agent_type="content_guardian", **kwargs)
self.sif_service = sif_service
# ── existing utilities ────────────────────────────────────────
async def _create_txtai_agent(self):
"""Create a specialized txtai Agent for content review."""
if not TXTAI_AVAILABLE or Agent is None:
return None
try:
_llm_for_agent = getattr(self.llm, "llm", self.llm)
return Agent(
tools=[
{
"name": "brand_voice_checker",
"description": "Checks content against brand voice guidelines",
"target": self._check_brand_voice
}
],
llm=_llm_for_agent,
max_iterations=3
)
tools=[{"name": "brand_voice_checker", "description": "Checks content against brand voice guidelines", "target": self._check_brand_voice}],
llm=_llm_for_agent, max_iterations=3)
except Exception as e:
logger.error(f"Failed to create txtai agent for ContentGuardian: {e}")
raise e
logger.error(f"Failed to create txtai agent for ContentGuardian: {e}"); raise e
def _check_brand_voice(self, content: str) -> Dict[str, Any]:
"""Tool to check brand voice consistency."""
# This would use semantic search to compare against brand guidelines
return {
"consistent": True,
"score": 0.95,
"notes": "Content aligns with professional/authoritative tone."
}
return {"consistent": True, "score": 0.95, "notes": "Content aligns with professional/authoritative tone."}
async def propose_daily_tasks(self, context: Dict[str, Any]) -> List[TaskProposal]:
"""Propose quality assurance tasks."""
proposals = []
# 1. Content Freshness Audit
proposals.append(TaskProposal(
title="Audit Old Content",
description="Review top performing posts from >6 months ago for updates.",
pillar_id="create",
priority="low",
estimated_time=30,
source_agent="ContentGuardianAgent",
reasoning="Maintains content relevance and authority.",
action_type="navigate",
action_url="/content-planning-dashboard"
))
return proposals
return [TaskProposal(title="Audit Old Content", description="Review top performing posts from >6 months ago for updates.", pillar_id="create", priority="low", estimated_time=30, source_agent="ContentGuardianAgent", reasoning="Maintains content relevance and authority.", action_type="navigate", action_url="/content-planning-dashboard")]
async def perform_site_audit(self, website_url: str) -> Dict[str, Any]:
self._log_agent_operation("Performing site audit", website_url=website_url)
try:
results = await self.intelligence.search(f"website content analysis {website_url}", limit=10)
audit: Dict[str, Any] = {"website_url": website_url, "audit_timestamp": datetime.utcnow().isoformat(), "total_pages_crawled": len(results), "content_quality": None, "brand_voice_consistency": None, "safety_issues": None, "cannibalization_issues": None}
if not results: return audit
quality_scores, style_scores, safety_flags = [], [], []
for result in results:
text = result.get("text", "") or result.get("id", "")
if len(text) < 50: continue
quality = await self.assess_content_quality({"description": text, "title": website_url}); quality_scores.append(quality.get("score", 0.0))
style = await self.style_enforcer(text); style_scores.append(style.get("compliance_score", 0.0))
safety = await self.safety_filter(text)
if not safety.get("is_safe", True): safety_flags.append(safety.get("flags", []))
audit["content_quality"] = {"score": round(sum(quality_scores)/max(len(quality_scores),1),4), "pages_analyzed": len(quality_scores)}
audit["brand_voice_consistency"] = {"compliance_score": round(sum(style_scores)/max(len(style_scores),1),4), "pages_checked": len(style_scores)}
audit["safety_issues"] = {"has_issues": len(safety_flags)>0, "flagged_pages": len(safety_flags)}
audit["cannibalization_issues"] = await self.check_cannibalization(website_url)
return audit
except Exception as e: logger.error(f"[{self.__class__.__name__}] Site audit failed: {e}"); return {"website_url": website_url, "error": str(e), "audit_timestamp": datetime.utcnow().isoformat()}
async def assess_content_quality(self, website_data: Dict[str, Any]) -> Dict[str, Any]:
self._log_agent_operation("Assessing content quality")
try:
text = website_data.get('description','') or website_data.get('title','')
if not text: return {"score":0.5,"reason":"No content to analyze"}
style = await self.style_enforcer(text); safety = await self.safety_filter(text)
base = style.get('compliance_score',0.8)
if safety.get('action')=='flag_for_review': base*=0.5
return {"score":base,"style_analysis":style,"safety_analysis":safety,"analyzed_text_length":len(text)}
except Exception as e: return {"score":0.0,"error":str(e)}
async def check_cannibalization(self, new_draft: str) -> Dict[str, Any]:
self._log_agent_operation("Checking for semantic cannibalization", draft_length=len(new_draft))
try:
if not await self._ensure_intelligence_ready(): return {"warning":False,"error":"Service not initialized"}
if not new_draft or len(new_draft.strip())<50: return {"warning":False,"reason":"Draft too short"}
results = await self.intelligence.search(new_draft, limit=1)
if not results: return {"warning":False,"uniqueness_score":1.0}
score = results[0].get('score',0.0)
if score > self.CANNIBALIZATION_THRESHOLD: return {"warning":True,"similar_to":results[0].get('id','unknown'),"score":score,"threshold":self.CANNIBALIZATION_THRESHOLD,"recommendation":"Consider revising the draft to target a different angle or merge with existing content"}
return {"warning":False,"uniqueness_score":1.0-score}
except Exception as e: return {"warning":False,"error":str(e)}
async def verify_originality(self, text: str, competitor_index: Any) -> Dict[str, Any]:
"""(unchanged — kept for backward compat)"""
self._log_agent_operation("Verifying originality against competitors", text_length=len(text))
try:
if not text or len(text.strip())<50: return {"originality_score":0.0,"reason":"Text too short"}
query = text.strip(); competitor_results = []; method="user_index_competitor_filter"
if competitor_index is not None and hasattr(competitor_index,"search"):
method="competitor_index_search"; raw=competitor_index.search(query,limit=5)
if asyncio.iscoroutine(raw): raw=await raw
competitor_results=raw or []
else:
raw=await self.intelligence.search(query,limit=10)
for r in raw or []:
m_raw=r.get("object"); m=m_raw if isinstance(m_raw,dict) else {}
if not m and isinstance(m_raw,str):
try: m=json.loads(m_raw)
except Exception: m={}
if "competitor" in str(m.get("type","")).lower() or "competitor" in str(m.get("source","")).lower():
competitor_results.append(r)
if not competitor_results: return {"originality_score":1.0,"confidence":0.6,"method":method,"notes":"No competitor overlap detected"}
top=max(competitor_results,key=lambda i:float(i.get("score",0.0))); s=max(0.0,min(1.0,float(top.get("score",0.0))))
os_=max(0.0,round(1.0-s,4)); c=round(min(1.0,0.55+(min(len(competitor_results),5)*0.07)),3)
return {"originality_score":os_,"confidence":c,"method":method,"warning":os_<self.ORIGINALITY_THRESHOLD,"threshold":self.ORIGINALITY_THRESHOLD,"top_competitor_match":{"id":top.get("id"),"score":round(s,4)},"matches_evaluated":len(competitor_results)}
except Exception as e: return {"originality_score":0.0,"error":str(e)}
async def style_enforcer(self, text: str, style_guidelines: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
self._log_agent_operation("Enforcing style guidelines", text_length=len(text))
try:
if not text: return {"compliance_score":0.0,"issues":["No text provided"]}
if not style_guidelines and self.sif_service:
try:
r=await self.intelligence.search("website analysis brand voice style",limit=1)
if r:
m_raw=r[0].get('object'); m=json.loads(m_raw) if isinstance(m_raw,str) else (m_raw or r[0])
if m.get('type')=='website_analysis':
rep=m.get('full_report',{}); style_guidelines={"tone":rep.get('brand_analysis',{}).get('brand_voice','neutral'),"style_patterns":rep.get('style_patterns',{}),"writing_style":rep.get('writing_style',{})}
except Exception: pass
issues=[]; score=1.0
tone=(style_guidelines or {}).get('tone','').lower()
if 'formal' in tone or 'professional' in tone:
found=[c for c in ["can't","won't","don't","it's"] if c in text.lower()]
if found: issues.append(f"Found contractions in formal text: {', '.join(found[:3])}..."); score-=0.1
sentences=text.split('.'); avg=sum(len(s.split()) for s in sentences if s)/max(1,len(sentences))
if avg>25: issues.append("Average sentence length is too high (>25 words). Consider shortening."); score-=0.1
return {"compliance_score":max(0.0,score),"issues":issues,"is_compliant":score>0.8,"guidelines_source":"sif_index" if not style_guidelines and self.sif_service else "provided"}
except Exception as e: return {"error":str(e)}
async def safety_filter(self, text: str) -> Dict[str, Any]:
self._log_agent_operation("Running safety filter", text_length=len(text))
try:
kw=["hate","kill","murder","attack","destroy","scam","fraud","steal","explicit","adult"]
found=[k for k in kw if f" {k} " in text.lower()]
ok=len(found)==0
return {"is_safe":ok,"flags":found,"safety_score":1.0 if ok else 0.0,"action":"approve" if ok else "flag_for_review"}
except Exception as e: return {"error":str(e)}
# ═══════════════════════════════════════════════════════════════
# COMMITTEE WATCHDOG — the core audit entry point
# ═══════════════════════════════════════════════════════════════
async def audit_committee(self, proposals: List[Dict[str, Any]]) -> Dict[str, Any]:
"""
Audits a batch of committee proposals and returns a structured report.
proposals: list of dicts with at minimum:
agent, title, pillar_id, priority, reasoning, accepted, valid
"""
if not proposals:
return {
"health_score": 0, "verdict": "No proposals received from any agent",
"agent_critiques": [], "coverage_gaps": [], "overlaps": [],
"alerts": []
}
by_agent: Dict[str, List[Dict]] = {}
for p in proposals:
by_agent.setdefault(p.get("agent", "unknown"), []).append(p)
# 1. Critique each agent
agent_critiques = []
for agent_name, agent_props in sorted(by_agent.items()):
critique = self._critique_agent(agent_name, agent_props)
agent_critiques.append(critique)
# 2. Coverage check
coverage_gaps = self._find_coverage_gaps(proposals)
overstuffed = self._find_overstuffed_pillars(proposals)
# 3. Overlap detection
overlaps = self._find_overlaps(proposals)
# 4. Overall health score
health_score = self._compute_health_score(agent_critiques, coverage_gaps, overlaps)
# 5. Generate actionable alerts
alerts = self._generate_alerts(agent_critiques, coverage_gaps, overlaps)
verdict = self._verdict_text(health_score, agent_critiques, coverage_gaps)
return {
"health_score": health_score,
"verdict": verdict,
"agent_critiques": agent_critiques,
"coverage_gaps": coverage_gaps,
"overstuffed_pillars": overstuffed,
"overlaps": overlaps,
"alerts": alerts,
"audit_timestamp": datetime.utcnow().isoformat(),
}
# ── agent critique ────────────────────────────────────────────
def _critique_agent(self, agent_name: str, proposals: List[Dict]) -> Dict[str, Any]:
info = KNOWN_AGENTS.get(agent_name, {"label": agent_name, "short": agent_name[:6], "pillar_focus": None})
total = len(proposals)
accepted = sum(1 for p in proposals if p.get("accepted"))
rejected = total - accepted
acceptance_rate = accepted / total if total > 0 else 0
weak_reasoning = []
poor_priority = []
off_pillar = []
for p in proposals:
# Reasoning quality
reason = (p.get("reasoning") or "").strip()
r_score = self._reasoning_score(reason)
if r_score < 0.5:
weak_reasoning.append({"title": p.get("title",""), "reasoning": reason, "score": r_score})
# Priority appropriateness
pr = (p.get("priority") or "").lower()
if info["pillar_focus"] and pr == "low" and p.get("pillar_id") == info["pillar_focus"]:
poor_priority.append({"title": p.get("title",""), "pillar": p.get("pillar_id",""), "priority": pr,
"note": f"Pillar '{info['pillar_focus']}' is {info['label']}'s core — low priority seems wrong"})
# Pillar relevance
if info["pillar_focus"] and p.get("pillar_id") and p["pillar_id"] != info["pillar_focus"]:
off_pillar.append({"title": p.get("title",""), "proposed_pillar": p.get("pillar_id",""),
"expected_pillar": info["pillar_focus"],
"note": f"'{info['label']}' proposed for '{p['pillar_id']}' pillar but typically operates in '{info['pillar_focus']}'"})
issues = []
if weak_reasoning:
issues.append({"type": "weak_reasoning", "severity": "warning", "count": len(weak_reasoning),
"summary": f"{len(weak_reasoning)} proposal(s) with vague or empty reasoning",
"details": weak_reasoning,
"action_label": "Improve reasoning", "action_url": None})
if poor_priority:
issues.append({"type": "poor_priority", "severity": "warning", "count": len(poor_priority),
"summary": f"{len(poor_priority)} proposal(s) under-prioritised for core pillar",
"details": poor_priority,
"action_label": "Review priorities", "action_url": None})
if off_pillar:
issues.append({"type": "off_pillar", "severity": "info", "count": len(off_pillar),
"summary": f"{len(off_pillar)} proposal(s) outside usual pillar",
"details": off_pillar,
"action_label": "Review pillar assignment", "action_url": None})
if rejected > 0:
issues.append({"type": "rejected_proposals", "severity": "error" if acceptance_rate < 0.3 else "warning",
"count": rejected,
"summary": f"{rejected} proposal(s) rejected by committee" if rejected > 0 else "",
"details": [{"title": p.get("title",""), "reason": p.get("rejected_reason","no reason")} for p in proposals if not p.get("accepted")],
"action_label": "Review rejections", "action_url": None})
# Agent score (0-100)
score = 100
if weak_reasoning: score -= len(weak_reasoning) * 15
if poor_priority: score -= len(poor_priority) * 10
if acceptance_rate < 0.3: score -= 20
if acceptance_rate == 0: score = max(0, score - 30)
score = max(0, min(100, score))
health = "good" if score >= 80 else "warning" if score >= 50 else "failing"
return {
"agent": agent_name,
"label": info["label"],
"short": info["short"],
"score": score,
"health": health,
"total_proposals": total,
"accepted": accepted,
"rejected": rejected,
"acceptance_rate": round(acceptance_rate, 2),
"issues": issues,
"summary": self._agent_summary(health, score, accepted, total, weak_reasoning, poor_priority),
}
# ── reasoning quality ─────────────────────────────────────────
def _reasoning_score(self, reasoning: str) -> float:
if not reasoning or len(reasoning) < 10:
return 0.0
# Short = weak
if len(reasoning) < 25:
return 0.2
if len(reasoning) < 50:
return 0.4
# Has specifics
specifics = ["because", "since", "based on", "data", "metric", "trend", "observed",
"target", "audience", "competitor", "gap", "opportunity", "improve",
"increase", "reduce", "goal", "kpi", "score", "result"]
found = sum(1 for s in specifics if s in reasoning.lower())
base = min(1.0, 0.4 + found * 0.1)
# Length bonus
if len(reasoning) > 100:
base = min(1.0, base + 0.15)
return min(1.0, base)
# ── coverage ──────────────────────────────────────────────────
def _find_coverage_gaps(self, proposals: List[Dict]) -> List[Dict]:
covered = set()
for p in proposals:
pid = p.get("pillar_id")
if pid and pid in PILLAR_IDS:
covered.add(pid)
gaps = []
for pid in sorted(PILLAR_IDS):
if pid not in covered:
gaps.append({"pillar_id": pid, "severity": "warning",
"summary": f"Pillar '{pid}' has no proposals from any agent",
"action_label": "Add task", "action_url": None})
return gaps
def _find_overstuffed_pillars(self, proposals: List[Dict]) -> List[Dict]:
counts: Dict[str, int] = {}
for p in proposals:
pid = p.get("pillar_id")
if pid and pid in PILLAR_IDS:
counts[pid] = counts.get(pid, 0) + 1
total = len(proposals)
overstuffed = []
for pid, count in sorted(counts.items()):
if total > 0 and count / total > 0.5:
overstuffed.append({"pillar_id": pid, "count": count, "total": total,
"severity": "info",
"summary": f"Pillar '{pid}' has {count}/{total} proposals ({count/total*100:.0f}%) — may be over-represented",
"action_label": None, "action_url": None})
return overstuffed
# ── overlap detection ─────────────────────────────────────────
def _find_overlaps(self, proposals: List[Dict]) -> List[Dict]:
overlaps = []
by_title: Dict[str, List[Dict]] = {}
for p in proposals:
t = (p.get("title") or "").strip().lower()
by_title.setdefault(t, []).append(p)
for title, dups in by_title.items():
if len(dups) > 1 and title:
agents = [d.get("agent","?") for d in dups]
overlaps.append({"title": dups[0].get("title",""), "pillar": dups[0].get("pillar_id",""),
"agents": agents, "count": len(dups),
"severity": "warning",
"summary": f"{len(dups)} agents proposed '{dups[0].get('title','')}': {', '.join(agents)}",
"action_label": "Resolve conflict", "action_url": None})
return overlaps
# ── health ────────────────────────────────────────────────────
def _compute_health_score(self, critiques: List[Dict], gaps: List[Dict], overlaps: List[Dict]) -> int:
score = 100
for c in critiques:
if c["health"] == "failing": score -= 15
elif c["health"] == "warning": score -= 8
score -= len(gaps) * 10
score -= len(overlaps) * 5
return max(0, min(100, score))
def _verdict_text(self, health: int, critiques: List[Dict], gaps: List[Dict]) -> str:
if health >= 90:
return "Committee is performing well — all agents submitting quality proposals with good coverage."
failing = [c for c in critiques if c["health"] == "failing"]
warning = [c for c in critiques if c["health"] == "warning"]
parts = []
if failing:
parts.append(f"{len(failing)} agent(s) need attention: {', '.join(c['label'] for c in failing)}")
if warning:
parts.append(f"{len(warning)} agent(s) showing issues: {', '.join(c['label'] for c in warning)}")
if gaps:
parts.append(f"Missing coverage: {', '.join(g['pillar_id'] for g in gaps)}")
if not parts:
parts.append("Minor issues detected — monitoring.")
return "".join(parts)
def _agent_summary(self, health: str, score: int, accepted: int, total: int, weak: List, poor: List) -> str:
if health == "failing":
return f"Score {score}/100 — {accepted}/{total} accepted, {len(weak)} weak reasoning, {len(poor)} under-prioritised"
if health == "warning":
return f"Score {score}/100 — {accepted}/{total} accepted, {len(weak)} weak reasoning"
return f"Score {score}/100 — {accepted}/{total} accepted"
# ── alerts ────────────────────────────────────────────────────
def _generate_alerts(self, critiques: List[Dict], gaps: List[Dict], overlaps: List[Dict]) -> List[Dict]:
alerts = []
for c in critiques:
if c["health"] == "failing":
alerts.append({
"type": "agent_failing", "severity": "error",
"agent": c["agent"], "label": c["label"],
"title": f"{c['label']} needs attention",
"message": c["summary"],
"cta_path": None,
})
for issue in c.get("issues", []):
if issue["type"] == "weak_reasoning" and issue["count"] >= 3:
alerts.append({
"type": "weak_reasoning", "severity": "warning",
"agent": c["agent"], "label": c["label"],
"title": f"{c['label']}: {issue['count']} proposals with weak reasoning",
"message": issue["summary"],
"cta_path": None,
})
for g in gaps:
alerts.append({
"type": "coverage_gap", "severity": "warning",
"agent": None, "label": None,
"title": f"Coverage gap: pillar '{g['pillar_id']}'",
"message": g["summary"],
"cta_path": None,
})
for o in overlaps:
alerts.append({
"type": "proposal_overlap", "severity": "warning",
"agent": None, "label": None,
"title": f"Duplicate proposal: '{o['title']}'",
"message": o["summary"],
"cta_path": None,
})
return alerts

View File

@@ -294,21 +294,95 @@ class ContentStrategyAgent(BaseALwrityAgent):
async def propose_daily_tasks(self, context: Dict[str, Any]) -> List[TaskProposal]:
"""
Propose strategic tasks based on content analysis.
Propose strategic tasks based on user onboarding context.
Derives content pillars, industry, and competitor info to
generate personalized daily content suggestions.
"""
proposals = []
# 1. Content Refresh
onboarding = context.get("onboarding_data", {})
if not isinstance(onboarding, dict):
return proposals
# Extract user profile hints from onboarding data
industry = ""
content_pillars = []
competitor_domains = []
try:
cp = onboarding.get("core_persona") or {}
if isinstance(cp, dict):
industry = str(cp.get("industry") or cp.get("company_type") or "")
step2 = onboarding.get("step2_summary") or onboarding.get("industry_context") or {}
if isinstance(step2, dict):
content_pillars = (
step2.get("content_pillars")
or step2.get("topics")
or onboarding.get("content_pillars")
or []
)
cf = onboarding.get("competitor_focus") or {}
if isinstance(cf, dict):
competitor_domains = cf.get("top_competitor_domains") or []
except Exception:
pass
# Task 1: Create content for a key pillar (generate)
if content_pillars:
pillar_topic = content_pillars[0] if isinstance(content_pillars[0], str) else (
content_pillars[0].get("topic") or content_pillars[0].get("name") or "your audience"
)
proposals.append(TaskProposal(
title=f"Create content for '{pillar_topic}'",
description=f"Write a blog post or social content around your {pillar_topic} content pillar.",
pillar_id="generate",
priority="high",
estimated_time=45,
source_agent="ContentStrategyAgent",
reasoning=f"'{pillar_topic}' is a core content pillar in your strategy. Regular publishing keeps your topical authority growing.",
action_type="navigate",
action_url="/blog-writer",
context_data={"pillar_topic": pillar_topic, "industry": industry},
))
else:
proposals.append(TaskProposal(
title="Define your content pillars",
description="Set up your core content topics to get personalized daily suggestions.",
pillar_id="plan",
priority="high",
estimated_time=20,
source_agent="ContentStrategyAgent",
reasoning="Content pillars drive every other task in your workflow. Defining them unlocks the full agent committee.",
action_type="navigate",
action_url="/content-planning-dashboard",
))
# Task 2: Competitor content review (analyze)
if competitor_domains:
domain = competitor_domains[0]
proposals.append(TaskProposal(
title=f"Review competitor: {domain}",
description=f"Analyze recently published content from {domain} to find gaps and opportunities.",
pillar_id="analyze",
priority="medium",
estimated_time=25,
source_agent="ContentStrategyAgent",
reasoning=f"{domain} is your top tracked competitor. Regular reviews help you stay ahead of their content strategy moves.",
action_type="navigate",
action_url="/seo-dashboard",
context_data={"competitor_domain": domain},
))
# Task 3: Content audit (analyze) — always suggested
proposals.append(TaskProposal(
title="Refresh 'SEO Basics'",
description="Update your SEO basics guide with 2024 trends.",
pillar_id="create",
priority="high",
estimated_time=45,
title="Quick content performance audit",
description="Review your top 3 pieces from last month. Identify what worked and what to update.",
pillar_id="analyze",
priority="medium",
estimated_time=20,
source_agent="ContentStrategyAgent",
reasoning="Declining traffic and outdated references.",
reasoning="Regular audits surface declining pages that need refreshing and winning formats to double down on.",
action_type="navigate",
action_url="/content-planning-dashboard"
action_url="/content-planning-dashboard",
))
return proposals

View File

@@ -168,25 +168,25 @@ class SEOOptimizationAgent(BaseALwrityAgent):
proposals.append(TaskProposal(
title="Review SEO Issues",
description=f"SIF indexed content suggests {issues_found} areas that may need SEO attention.",
pillar_id="distribute",
pillar_id="analyze",
priority="high",
estimated_time=30,
source_agent="SEOOptimizationAgent",
reasoning="Addressing SEO gaps improves organic visibility.",
action_type="navigate",
action_url="/content-planning-dashboard"
action_url="/seo-dashboard"
))
else:
proposals.append(TaskProposal(
title="Run SEO Audit",
description="Perform a comprehensive SEO audit to identify optimization opportunities.",
pillar_id="distribute",
pillar_id="analyze",
priority="medium",
estimated_time=15,
source_agent="SEOOptimizationAgent",
reasoning="Regular audits prevent SEO degradation.",
action_type="navigate",
action_url="/content-planning-dashboard"
action_url="/seo-dashboard"
))
return proposals

View File

@@ -126,21 +126,85 @@ class SocialAmplificationAgent(BaseALwrityAgent):
async def propose_daily_tasks(self, context: Dict[str, Any]) -> List[TaskProposal]:
"""
Propose social media tasks.
Propose social media tasks based on user's onboarding context.
Derives platforms and content types from user data.
"""
proposals = []
# 1. Social Post Creation
onboarding = context.get("onboarding_data", {})
if not isinstance(onboarding, dict):
return proposals
# Extract selected platforms from onboarding step 5
selected_platforms = []
try:
step5 = onboarding.get("step5_summary") or onboarding.get("distribution_channels") or {}
if isinstance(step5, dict):
sp = step5.get("selected_platforms") or step5.get("platforms") or []
selected_platforms = [p for p in sp if isinstance(p, str)]
if not selected_platforms:
# Fallback: check top-level keys
for key in ("selected_platforms", "platforms", "social_platforms"):
val = onboarding.get(key)
if isinstance(val, list):
selected_platforms = [p for p in val if isinstance(p, str)]
break
except Exception:
pass
platform_urls = {
"linkedin": "/linkedin-writer",
"facebook": "/facebook-writer",
"twitter": "/linkedin-writer", # no dedicated twitter writer, use linkedin as fallback
"instagram": "/linkedin-writer",
"tiktok": "/linkedin-writer",
"youtube": "/linkedin-writer",
}
target_platforms = [p for p in selected_platforms if p.lower() in platform_urls]
if not target_platforms:
# No known platforms configured — generic engage task
proposals.append(TaskProposal(
title="Share content on social media",
description="Promote your latest published piece across your social channels.",
pillar_id="engage",
priority="medium",
estimated_time=20,
source_agent="SocialAmplificationAgent",
reasoning="Social distribution drives referral traffic and builds audience engagement.",
action_type="navigate",
action_url="/linkedin-writer",
))
return proposals
platform = target_platforms[0]
platform_label = platform.capitalize()
proposals.append(TaskProposal(
title="Create LinkedIn Thread",
description="Summarize your latest blog post into a 5-tweet thread.",
pillar_id="distribute",
title=f"Share content on {platform_label}",
description=f"Adapt and publish your latest content as a {platform_label} post to drive engagement.",
pillar_id="engage",
priority="medium",
estimated_time=20,
source_agent="SocialAmplificationAgent",
reasoning="Repurpose existing content.",
reasoning=f"Consistent {platform_label} posting maintains audience engagement and extends content reach.",
action_type="navigate",
action_url="/content-planning-dashboard"
action_url=platform_urls[platform.lower()],
context_data={"platform": platform.lower()},
))
if len(target_platforms) > 1:
platform2 = target_platforms[1]
proposals.append(TaskProposal(
title=f"Cross-post to {platform2.capitalize()}",
description=f"Repurpose your latest content for your {platform2.capitalize()} audience.",
pillar_id="engage",
priority="low",
estimated_time=15,
source_agent="SocialAmplificationAgent",
reasoning=f"Cross-posting to {platform2.capitalize()} increases reach without additional content creation cost.",
action_type="navigate",
action_url=platform_urls[platform2.lower()],
context_data={"platform": platform2.lower()},
))
return proposals

View File

@@ -587,334 +587,6 @@ class StrategyArchitectAgent(SIFBaseAgent):
return samples
class ContentGuardianAgent(SIFBaseAgent):
"""Agent for preventing cannibalization and ensuring content originality."""
CANNIBALIZATION_THRESHOLD = 0.85 # Similarity threshold for cannibalization warning
ORIGINALITY_THRESHOLD = 0.75 # Minimum originality score
def __init__(self, intelligence_service: TxtaiIntelligenceService, user_id: str, sif_service: Any = None):
super().__init__(intelligence_service, user_id, agent_type="content_guardian")
self.sif_service = sif_service
async def perform_site_audit(self, website_url: str) -> Dict[str, Any]:
"""
Perform a comprehensive content audit on the indexed website content.
Called by the SIF indexing executor after content sync completes.
Returns a structured audit report with quality, brand voice, and safety assessments.
"""
self._log_agent_operation("Performing site audit", website_url=website_url)
try:
# Search the user's SIF index for website content
results = await self.intelligence.search(
f"website content analysis {website_url}", limit=10
)
audit: Dict[str, Any] = {
"website_url": website_url,
"audit_timestamp": datetime.utcnow().isoformat(),
"total_pages_crawled": len(results),
"content_quality": None,
"brand_voice_consistency": None,
"safety_issues": None,
"cannibalization_issues": None,
}
if not results:
logger.warning(f"[{self.__class__.__name__}] No indexed content found for {website_url}")
return audit
# Run assessments on each indexed page
quality_scores = []
style_scores = []
safety_flags = []
for result in results:
text = result.get("text", "") or result.get("id", "")
if len(text) < 50:
continue
quality = await self.assess_content_quality({"description": text, "title": website_url})
quality_scores.append(quality.get("score", 0.0))
style = await self.style_enforcer(text)
style_scores.append(style.get("compliance_score", 0.0))
safety = await self.safety_filter(text)
if not safety.get("is_safe", True):
safety_flags.append(safety.get("flags", []))
audit["content_quality"] = {
"score": round(sum(quality_scores) / max(len(quality_scores), 1), 4),
"pages_analyzed": len(quality_scores),
}
audit["brand_voice_consistency"] = {
"compliance_score": round(sum(style_scores) / max(len(style_scores), 1), 4),
"pages_checked": len(style_scores),
}
audit["safety_issues"] = {
"has_issues": len(safety_flags) > 0,
"flagged_pages": len(safety_flags),
}
cannibalization = await self.check_cannibalization(website_url)
audit["cannibalization_issues"] = cannibalization
logger.info(
f"[{self.__class__.__name__}] Site audit complete for {website_url}: "
f"quality={audit['content_quality']['score']}, "
f"brand_voice={audit['brand_voice_consistency']['compliance_score']}"
)
return audit
except Exception as e:
logger.error(f"[{self.__class__.__name__}] Site audit failed for {website_url}: {e}")
return {
"website_url": website_url,
"error": str(e),
"audit_timestamp": datetime.utcnow().isoformat(),
}
async def assess_content_quality(self, website_data: Dict[str, Any]) -> Dict[str, Any]:
"""Assess overall content quality based on website data."""
self._log_agent_operation("Assessing content quality")
try:
# Extract sample text or description from website_data
text_to_analyze = website_data.get('description', '') or website_data.get('title', '')
if not text_to_analyze:
return {"score": 0.5, "reason": "No content to analyze"}
# Run style check
style_result = await self.style_enforcer(text_to_analyze)
# Run safety check
safety_result = await self.safety_filter(text_to_analyze)
# Calculate aggregate score
base_score = style_result.get('compliance_score', 0.8)
if safety_result.get('action') == 'flag_for_review':
base_score *= 0.5
return {
"score": base_score,
"style_analysis": style_result,
"safety_analysis": safety_result,
"analyzed_text_length": len(text_to_analyze)
}
except Exception as e:
logger.error(f"[{self.__class__.__name__}] Quality assessment failed: {e}")
return {"score": 0.0, "error": str(e)}
async def check_cannibalization(self, new_draft: str) -> Dict[str, Any]:
"""Check if a new draft competes semantically with existing pages."""
self._log_agent_operation("Checking for semantic cannibalization", draft_length=len(new_draft))
try:
if not await self._ensure_intelligence_ready():
logger.error(f"[{self.__class__.__name__}] Intelligence service not initialized")
return {"warning": False, "error": "Service not initialized"}
if not new_draft or len(new_draft.strip()) < 50:
logger.warning(f"[{self.__class__.__name__}] Draft too short for meaningful analysis")
return {"warning": False, "reason": "Draft too short"}
results = await self.intelligence.search(new_draft, limit=1)
if not results:
logger.info(f"[{self.__class__.__name__}] No similar content found - draft is unique")
return {"warning": False, "uniqueness_score": 1.0}
top_result = results[0]
similarity_score = top_result.get('score', 0.0)
logger.debug(f"[{self.__class__.__name__}] Top similarity score: {similarity_score:.4f}")
if similarity_score > self.CANNIBALIZATION_THRESHOLD:
warning_data = {
"warning": True,
"similar_to": top_result.get('id', 'unknown'),
"score": similarity_score,
"threshold": self.CANNIBALIZATION_THRESHOLD,
"recommendation": "Consider revising the draft to target a different angle or merge with existing content"
}
logger.warning(f"[{self.__class__.__name__}] Cannibalization detected: {warning_data}")
return warning_data
logger.info(f"[{self.__class__.__name__}] No cannibalization detected. Draft is sufficiently unique.")
return {"warning": False, "uniqueness_score": 1.0 - similarity_score}
except Exception as e:
logger.error(f"[{self.__class__.__name__}] Failed to check cannibalization: {e}")
logger.error(f"[{self.__class__.__name__}] Full traceback: {traceback.format_exc()}")
return {"warning": False, "error": str(e)}
async def verify_originality(self, text: str, competitor_index: Any) -> Dict[str, Any]:
"""Verify originality against competitor content index."""
self._log_agent_operation("Verifying originality against competitors", text_length=len(text))
try:
if not text or len(text.strip()) < 50:
logger.warning(f"[{self.__class__.__name__}] Text too short for meaningful originality check")
return {"originality_score": 0.0, "reason": "Text too short"}
query = text.strip()
competitor_results = []
method = "user_index_competitor_filter"
if competitor_index is not None and hasattr(competitor_index, "search"):
method = "competitor_index_search"
raw_results = competitor_index.search(query, limit=5)
if asyncio.iscoroutine(raw_results):
raw_results = await raw_results
competitor_results = raw_results or []
else:
raw_results = await self.intelligence.search(query, limit=10)
for result in raw_results or []:
metadata_raw = result.get("object")
metadata = metadata_raw if isinstance(metadata_raw, dict) else {}
if not metadata and isinstance(metadata_raw, str):
try:
metadata = json.loads(metadata_raw)
except Exception:
metadata = {}
doc_type = str((metadata or {}).get("type", "")).lower()
source = str((metadata or {}).get("source", "")).lower()
if "competitor" in doc_type or "competitor" in source:
competitor_results.append(result)
if not competitor_results:
return {
"originality_score": 1.0,
"confidence": 0.6,
"method": method,
"notes": "No competitor overlap detected in available index"
}
top_match = max(competitor_results, key=lambda item: float(item.get("score", 0.0)))
top_score = max(0.0, min(1.0, float(top_match.get("score", 0.0))))
originality_score = max(0.0, round(1.0 - top_score, 4))
confidence = round(min(1.0, 0.55 + (min(len(competitor_results), 5) * 0.07)), 3)
warning = originality_score < self.ORIGINALITY_THRESHOLD
return {
"originality_score": originality_score,
"confidence": confidence,
"method": method,
"warning": warning,
"threshold": self.ORIGINALITY_THRESHOLD,
"top_competitor_match": {
"id": top_match.get("id"),
"score": round(top_score, 4)
},
"matches_evaluated": len(competitor_results)
}
except Exception as e:
logger.error(f"[{self.__class__.__name__}] Failed to verify originality: {e}")
logger.error(f"[{self.__class__.__name__}] Full traceback: {traceback.format_exc()}")
return {"originality_score": 0.0, "error": str(e)}
async def style_enforcer(self, text: str, style_guidelines: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
"""
Tool: Ensures content adheres to brand voice and style guidelines.
"""
self._log_agent_operation("Enforcing style guidelines", text_length=len(text))
try:
if not text:
return {"compliance_score": 0.0, "issues": ["No text provided"]}
# 1. Fetch Style Guidelines from SIF if not provided
if not style_guidelines and self.sif_service:
try:
# Search for website analysis to get brand voice/style
# We assume the most relevant 'website_analysis' doc contains the guidelines
results = await self.intelligence.search("website analysis brand voice style", limit=1)
if results:
import json
res = results[0]
metadata_str = res.get('object')
metadata = json.loads(metadata_str) if isinstance(metadata_str, str) else (metadata_str or res)
if metadata.get('type') == 'website_analysis':
report = metadata.get('full_report', {})
style_guidelines = {
"tone": report.get('brand_analysis', {}).get('brand_voice', 'neutral'),
"style_patterns": report.get('style_patterns', {}),
"writing_style": report.get('writing_style', {})
}
logger.info(f"[{self.__class__.__name__}] Retrieved style guidelines from SIF: {style_guidelines.get('tone')}")
except Exception as e:
logger.warning(f"[{self.__class__.__name__}] Failed to retrieve style guidelines from SIF: {e}")
issues = []
score = 1.0
# Basic Heuristic Checks (Placeholder for LLM-based style analysis)
# 1. Tone Check (e.g., formal vs casual)
# If guidelines specify 'formal', check for contractions
tone = style_guidelines.get('tone', '').lower() if style_guidelines else ''
if 'formal' in tone or 'professional' in tone:
contractions = ["can't", "won't", "don't", "it's"]
found_contractions = [c for c in contractions if c in text.lower()]
if found_contractions:
issues.append(f"Found contractions in formal text: {', '.join(found_contractions[:3])}...")
score -= 0.1
# 2. Length/Sentence Structure (simple metric)
sentences = text.split('.')
avg_len = sum(len(s.split()) for s in sentences if s) / max(1, len(sentences))
if avg_len > 25:
issues.append("Average sentence length is too high (>25 words). Consider shortening.")
score -= 0.1
return {
"compliance_score": max(0.0, score),
"issues": issues,
"is_compliant": score > 0.8,
"guidelines_source": "sif_index" if not style_guidelines and self.sif_service else "provided"
}
except Exception as e:
logger.error(f"[{self.__class__.__name__}] Style enforcement failed: {e}")
return {"error": str(e)}
async def safety_filter(self, text: str) -> Dict[str, Any]:
"""
Tool: Flags potentially harmful, offensive, or sensitive content.
"""
self._log_agent_operation("Running safety filter", text_length=len(text))
try:
# Basic Keyword Blocklist (Placeholder for LLM/Safety Model)
# In production, this should call a dedicated safety API (e.g., OpenAI Moderation, Llama Guard)
unsafe_keywords = [
"hate", "kill", "murder", "attack", "destroy", # Violent
"scam", "fraud", "steal", # Illegal
"explicit", "adult" # NSFW
]
found_flags = []
text_lower = text.lower()
for keyword in unsafe_keywords:
if f" {keyword} " in text_lower: # Simple word boundary check
found_flags.append(keyword)
is_safe = len(found_flags) == 0
return {
"is_safe": is_safe,
"flags": found_flags,
"safety_score": 1.0 if is_safe else 0.0,
"action": "approve" if is_safe else "flag_for_review"
}
except Exception as e:
logger.error(f"[{self.__class__.__name__}] Safety filter failed: {e}")
return {"error": str(e)}
class LinkGraphAgent(SIFBaseAgent):
"""

View File

@@ -375,9 +375,13 @@ def llm_text_gen(
system_prompt=system_instructions
)
elif gpt_provider == "wavespeed":
llm_start = time.time()
t0 = time.time()
logger.warning(f"[llm_text_gen][{flow_tag}] wavespeed: Starting provider init for user {user_id}")
if json_struct:
logger.warning(f"[llm_text_gen][{flow_tag}] wavespeed: Importing wavespeed_provider module (lazy import) for user {user_id}")
from services.llm_providers.wavespeed_provider import wavespeed_structured_json_response
logger.warning(f"[llm_text_gen][{flow_tag}] wavespeed: Import done, making API call for user {user_id}, import_took={(time.time()-t0)*1000:.0f}ms")
t1 = time.time()
response_text = wavespeed_structured_json_response(
prompt=prompt,
schema=json_struct,
@@ -387,7 +391,10 @@ def llm_text_gen(
system_prompt=system_instructions
)
else:
logger.warning(f"[llm_text_gen][{flow_tag}] wavespeed: Importing wavespeed_provider module (lazy import) for user {user_id}")
from services.llm_providers.wavespeed_provider import wavespeed_text_response
logger.warning(f"[llm_text_gen][{flow_tag}] wavespeed: Import done, making API call for user {user_id}, import_took={(time.time()-t0)*1000:.0f}ms")
t1 = time.time()
response_text = wavespeed_text_response(
prompt=prompt,
model=model or "openai/gpt-oss-120b",
@@ -396,8 +403,9 @@ def llm_text_gen(
top_p=top_p,
system_prompt=system_instructions
)
llm_ms = (time.time() - llm_start) * 1000
logger.warning(f"[llm_text_gen][{flow_tag}] LLM API call took {llm_ms:.0f}ms for user {user_id} (wavespeed)")
api_took_ms = (time.time() - t1) * 1000
total_ms = (time.time() - t0) * 1000
logger.warning(f"[llm_text_gen][{flow_tag}] wavespeed: user={user_id} import_took={(t1-t0)*1000:.0f}ms api_took={api_took_ms:.0f}ms total={total_ms:.0f}ms")
else:
logger.error(f"[llm_text_gen] Unknown provider: {gpt_provider}")
raise RuntimeError(f"Unknown LLM provider: {gpt_provider}. Supported providers: google, huggingface, wavespeed")

View File

@@ -38,6 +38,7 @@ Last Updated: March 2026
import os
import sys
import time as _time
from pathlib import Path
import json
import re
@@ -46,15 +47,16 @@ from typing import Optional, Dict, Any, List
from dotenv import load_dotenv
# Fix the environment loading path - load from backend directory
_mod_start = _time.time()
current_dir = Path(__file__).parent.parent # services directory
backend_dir = current_dir.parent # backend directory
env_path = backend_dir / '.env'
if env_path.exists():
load_dotenv(env_path)
print(f"Loaded .env from: {env_path}")
_dotenv_ms = (_time.time() - _mod_start) * 1000
print(f"Loaded .env from: {env_path} (took {_dotenv_ms:.0f}ms)")
else:
# Fallback to current directory
load_dotenv()
print(f"No .env found at {env_path}, using current directory")
@@ -64,6 +66,7 @@ from utils.logger_utils import get_service_logger
# Use service-specific logger to avoid conflicts
logger = get_service_logger("wavespeed_provider")
_import_start = _time.time()
from tenacity import (
retry,
retry_if_exception,
@@ -80,6 +83,8 @@ except ImportError:
NotFoundError = Exception
logger.warn("OpenAI library not available. Install with: pip install openai")
logger.warning(f"[wavespeed_provider] Module import completed in {(_time.time()-_import_start)*1000:.0f}ms (openai_available={OPENAI_AVAILABLE})")
# Default WaveSpeed models for fallback
WAVESPEED_FALLBACK_MODELS = [
"openai/gpt-oss-120b",
@@ -276,12 +281,13 @@ def wavespeed_text_response(
if not api_key:
raise Exception("WAVESPEED_API_KEY not found in environment variables")
_t0 = _time.time()
# Initialize WaveSpeed client
client = OpenAI(
base_url="https://llm.wavespeed.ai/v1",
api_key=api_key,
)
logger.info("✅ WaveSpeed client initialized for text response")
logger.warning(f"[wavespeed_text_response] OpenAI client init took {(_time.time()-_t0)*1000:.0f}ms")
# Prepare input for the API
messages = []
@@ -311,6 +317,7 @@ def wavespeed_text_response(
logger.info("🚀 Making WaveSpeed API call (chat completion)...")
_api_t0 = _time.time()
# Call exactly the requested model; no retries, no fallbacks, no variants
response = client.chat.completions.create(
model=model,
@@ -319,6 +326,7 @@ def wavespeed_text_response(
top_p=top_p,
max_tokens=max_tokens
)
logger.warning(f"[wavespeed_text_response] API call took {(_time.time()-_api_t0)*1000:.0f}ms")
# Extract text from response
generated_text = response.choices[0].message.content
@@ -422,13 +430,15 @@ def wavespeed_structured_json_response(
if not api_key:
raise Exception("WAVESPEED_API_KEY not found in environment variables")
_fn_start = _time.time()
# Initialize OpenAI client with WaveSpeed base URL
client = OpenAI(
base_url="https://llm.wavespeed.ai/v1",
api_key=api_key,
)
logger.info("✅ WaveSpeed client initialized for structured JSON response")
_client_init_ms = (_time.time() - _fn_start) * 1000
logger.warning(f"[wavespeed_structured_json_response] OpenAI client init took {_client_init_ms:.0f}ms")
# Prepare input for the API
messages = []
@@ -463,11 +473,13 @@ def wavespeed_structured_json_response(
json_schema_str = json.dumps(schema, indent=2)
messages[-1]["content"] += f"\n\nJSON Schema:\n{json_schema_str}"
_api_start = _time.time()
try:
response = None
last_error = None
for candidate_model in _fallback_model_sequence(model, fallback_models):
try:
logger.info(f"[wavespeed_structured_json_response] Calling model={candidate_model}...")
response = client.chat.completions.create(
model=candidate_model,
messages=messages,
@@ -475,8 +487,10 @@ def wavespeed_structured_json_response(
max_tokens=max_tokens,
response_format={"type": "json_object"} # Try to enforce JSON mode if supported
)
_api_ms = (_time.time() - _api_start) * 1000
if candidate_model != model:
logger.warning("WaveSpeed structured generation switched to fallback model: {}", candidate_model)
logger.warning(f"[wavespeed_structured_json_response] First API call completed in {_api_ms:.0f}ms (model={candidate_model})")
break
except NotFoundError as nf_err:
last_error = nf_err

View File

@@ -168,3 +168,74 @@ class OnboardingProgressService:
except Exception as e:
logger.error(f"Error completing onboarding: {e}")
return False
def reset_onboarding(self, user_id: str) -> bool:
"""Reset onboarding progress and cancel/pause all scheduled tasks for the user."""
try:
db = get_session_for_user(user_id)
try:
# Reset the onboarding session
session = db.query(OnboardingSession).filter(OnboardingSession.user_id == user_id).first()
if session:
session.current_step = 1
session.progress = 0.0
session.updated_at = datetime.utcnow()
db.commit()
finally:
db.close()
# Cancel/pause all scheduled tasks for this user
self._cancel_scheduled_tasks(user_id)
logger.info(f"Reset onboarding for user {user_id}")
return True
except Exception as e:
logger.error(f"Error resetting onboarding for user {user_id}: {e}")
return False
def _cancel_scheduled_tasks(self, user_id: str):
"""Pause all DB-backed scheduled tasks for a user after onboarding reset."""
try:
from models.website_analysis_monitoring_models import (
OnboardingFullWebsiteAnalysisTask,
DeepCompetitorAnalysisTask,
SIFIndexingTask,
MarketTrendsTask,
WebsiteAnalysisTask,
)
from models.advertools_monitoring_models import AdvertoolsTask
db = get_session_for_user(user_id)
try:
task_models = [
OnboardingFullWebsiteAnalysisTask,
DeepCompetitorAnalysisTask,
SIFIndexingTask,
MarketTrendsTask,
WebsiteAnalysisTask,
]
try:
task_models.append(AdvertoolsTask)
except Exception:
pass
paused_count = 0
for model_cls in task_models:
try:
active_tasks = db.query(model_cls).filter(
model_cls.user_id == user_id,
model_cls.status == "active"
).all()
for task in active_tasks:
task.status = "paused"
paused_count += 1
except Exception as e:
logger.warning(f"Could not pause {model_cls.__tablename__} tasks for user {user_id}: {e}")
db.commit()
if paused_count > 0:
logger.info(f"Paused {paused_count} scheduled tasks for user {user_id} after onboarding reset")
finally:
db.close()
except Exception as e:
logger.warning(f"Failed to cancel scheduled tasks for user {user_id}: {e}")

View File

@@ -76,7 +76,7 @@ class GoogleSearchService:
logger.info(f"Searching for: {search_query}")
# Perform the search
search_results = await self._perform_search(search_query, max_results)
search_results = await self.perform_search(search_query, max_results)
# Process and rank results
processed_results = await self._process_search_results(search_results, topic, industry)
@@ -140,13 +140,16 @@ class GoogleSearchService:
return " ".join(query_components)
async def _perform_search(self, query: str, max_results: int) -> List[Dict[str, Any]]:
async def perform_search(self, query: str, max_results: int, **overrides) -> List[Dict[str, Any]]:
"""
Perform the actual Google Custom Search API call.
Args:
query: The search query to execute
max_results: Maximum number of results to return
**overrides: Override or disable default params.
Pass `param=None` to remove a default param entirely.
Pass `param=value` to override its value.
Returns:
Raw search results from Google API
@@ -158,8 +161,15 @@ class GoogleSearchService:
"num": min(max_results, 10), # Google CSE max is 10 per request
"dateRestrict": "m1", # Last month
"sort": "date", # Sort by date for current information
"safe": "active" # Safe search for professional content
"safe": "active", # Safe search for professional content
}
# Apply overrides: None removes the key, non-None overrides the value
if overrides:
for k, v in overrides.items():
if v is None:
params.pop(k, None)
else:
params[k] = v
async with aiohttp.ClientSession() as session:
async with session.get(self.base_url, params=params) as response:
@@ -477,7 +487,7 @@ class GoogleSearchService:
try:
# Perform a simple test search
test_query = "AI technology trends 2024"
test_results = await self._perform_search(test_query, 1)
test_results = await self.perform_search(test_query, 1)
return {
"status": "success",

View File

@@ -1,3 +1,4 @@
import asyncio
import time
from datetime import datetime, timedelta
from typing import Any, Dict
@@ -16,6 +17,9 @@ from utils.logger_utils import get_service_logger
logger = get_service_logger("deep_competitor_analysis_executor")
DEEP_COMPETITOR_TIMEOUT_SECONDS = 300 # 5-minute hard timeout
DEEP_COMPETITOR_MAX_COMPETITORS = 10 # cap to reduce API pressure
class DeepCompetitorAnalysisExecutor(TaskExecutor):
def __init__(self):
@@ -82,17 +86,23 @@ class DeepCompetitorAnalysisExecutor(TaskExecutor):
retryable=False
)
max_competitors = int(payload.get("max_competitors") or 25)
max_competitors = min(int(payload.get("max_competitors") or 25), DEEP_COMPETITOR_MAX_COMPETITORS)
crawl_concurrency = int(payload.get("crawl_concurrency") or 4)
mode = payload.get("mode", "deep_analysis")
if mode == "strategic_insights":
logger.info(f"Executing weekly strategic insights for user {user_id}")
report = await self.analysis_service.generate_weekly_strategy_brief(
user_id=user_id,
website_analysis=website_analysis if isinstance(website_analysis, dict) else {},
competitors=competitors
)
try:
report = await asyncio.wait_for(
self.analysis_service.generate_weekly_strategy_brief(
user_id=user_id,
website_analysis=website_analysis if isinstance(website_analysis, dict) else {},
competitors=competitors
),
timeout=DEEP_COMPETITOR_TIMEOUT_SECONDS
)
except asyncio.TimeoutError:
raise TimeoutError(f"Strategic insights timed out after {DEEP_COMPETITOR_TIMEOUT_SECONDS}s for user {user_id}")
# Persist to WebsiteAnalysis history
analysis_id = website_analysis.get('id')
@@ -110,13 +120,19 @@ class DeepCompetitorAnalysisExecutor(TaskExecutor):
flag_modified(wa, "strategic_insights_history")
db.commit()
else:
report = await self.analysis_service.run(
user_id=user_id,
website_analysis=website_analysis if isinstance(website_analysis, dict) else {},
competitors=competitors,
max_competitors=max_competitors,
crawl_concurrency=crawl_concurrency
)
try:
report = await asyncio.wait_for(
self.analysis_service.run(
user_id=user_id,
website_analysis=website_analysis if isinstance(website_analysis, dict) else {},
competitors=competitors,
max_competitors=max_competitors,
crawl_concurrency=crawl_concurrency
),
timeout=DEEP_COMPETITOR_TIMEOUT_SECONDS
)
except asyncio.TimeoutError:
raise TimeoutError(f"Deep competitor analysis timed out after {DEEP_COMPETITOR_TIMEOUT_SECONDS}s for user {user_id}")
task.last_executed = datetime.utcnow()
task.last_success = datetime.utcnow()

View File

@@ -103,7 +103,7 @@ class SIFIndexingExecutor(TaskExecutor):
guardian_report = None
if content_synced:
try:
from services.intelligence.sif_agents import ContentGuardianAgent
from services.intelligence.agents.specialized import ContentGuardianAgent
# Re-use the intelligence service from sif_service
guardian_agent = ContentGuardianAgent(
intelligence_service=sif_service.intelligence_service,

View File

@@ -9,6 +9,8 @@ from .on_page_seo_service import OnPageSEOService
from .technical_seo_service import TechnicalSEOService
from .enterprise_seo_service import EnterpriseSEOService
from .content_strategy_service import ContentStrategyService
from .serp_gap_service import SerpGapService
from .competitor_content_service import CompetitorContentService
__all__ = [
'MetaDescriptionService',
@@ -20,4 +22,6 @@ __all__ = [
'TechnicalSEOService',
'EnterpriseSEOService',
'ContentStrategyService',
'SerpGapService',
'CompetitorContentService',
]

View File

@@ -0,0 +1,214 @@
"""
Competitor Content Service for ALwrity
Fetches full competitor content for gap topics using Exa with include_domains.
Phase 2 of the Content Gap Radar feature.
Usage:
service = CompetitorContentService()
result = await service.deep_dive(
topics=["AI content strategy"],
competitor_domains=["example.com"]
)
"""
import os
import asyncio
import hashlib
import json
import time
from typing import Dict, List, Optional, Any
from loguru import logger
class CompetitorContentService:
"""
Fetches competitor content for gap topics using Exa neural search.
Uses Exa's `include_domains` to scope searches to known competitor domains,
returning full text, highlights, and summaries for deeper competitive analysis.
Results are cached for 24h to reduce API costs.
Designed to be consumed by the future ContentGapRadarAgent.
"""
CACHE_TTL = int(os.getenv("COMPETITOR_CONTENT_CACHE_TTL", "86400"))
def __init__(self):
self.api_key = os.getenv("EXA_API_KEY")
if not self.api_key:
logger.warning(
"EXA_API_KEY not configured; CompetitorContentService disabled"
)
self._exa = None
self._cache: Dict[str, Dict[str, Any]] = {}
@property
def exa(self):
"""Lazy-init Exa SDK to allow env injection after import."""
if self._exa is None and self.api_key:
from exa_py import Exa
self._exa = Exa(self.api_key)
return self._exa
def _cache_key(self, topics: List[str], domains: List[str]) -> str:
raw = json.dumps(
{"t": sorted(topics), "d": sorted(domains)}, sort_keys=True
)
return hashlib.md5(raw.encode()).hexdigest()
def _get_cached(self, key: str) -> Optional[Dict[str, Any]]:
entry = self._cache.get(key)
if entry and (time.time() - entry["ts"]) < self.CACHE_TTL:
return entry["data"]
return None
def _set_cache(self, key: str, data: Dict[str, Any]):
self._cache[key] = {"data": data, "ts": time.time()}
async def deep_dive(
self,
topics: List[str],
competitor_domains: List[str],
max_total_results: int = 10,
concurrency: int = 3,
bypass_cache: bool = False,
) -> Dict[str, Any]:
"""
Fetch competitor content for a list of gap topics.
For each topic, searches Exa scoped to competitor domains and returns
full text, highlights, and publishing metadata.
Args:
topics: Topic phrases to research (e.g. from SERP gap analysis)
competitor_domains: Known competitor domains to scope search
max_total_results: Max results per topic total (Exa API limit varies)
concurrency: Max concurrent Exa API calls
bypass_cache: Force fresh API calls, ignoring cache
Returns:
Dict with keys:
results: List of per-topic competitor content results
total_topics_analyzed: int
topics_with_content: int
cached: bool
"""
if not topics or not competitor_domains:
return {
"results": [],
"total_topics_analyzed": 0,
"topics_with_content": 0,
"cached": False,
}
ck = self._cache_key(topics, competitor_domains)
if not bypass_cache:
cached = self._get_cached(ck)
if cached:
logger.info("Returning cached competitor content results")
return {**cached, "cached": True}
if not self.api_key or not self.exa:
return {
"results": [],
"total_topics_analyzed": len(topics),
"topics_with_content": 0,
"cached": False,
"error": "EXA_API_KEY not configured",
}
semaphore = asyncio.Semaphore(concurrency)
loop = asyncio.get_running_loop()
async def search_topic(topic: str) -> Dict[str, Any]:
async with semaphore:
return await self._search_single_topic(
topic, competitor_domains, max_total_results, loop
)
tasks = [search_topic(topic) for topic in topics]
results = await asyncio.gather(*tasks)
output = {
"results": results,
"total_topics_analyzed": len(topics),
"topics_with_content": sum(
1 for r in results if r.get("total_results", 0) > 0
),
"cached": False,
}
self._set_cache(ck, output)
return output
async def _search_single_topic(
self,
topic: str,
competitor_domains: List[str],
max_results: int,
loop: asyncio.AbstractEventLoop,
) -> Dict[str, Any]:
"""
Search Exa for a single topic, scoped to competitor domains.
"""
query = topic
search_kwargs = {
"type": "auto",
"num_results": max_results,
"include_domains": competitor_domains,
"text": {"max_characters": 2000},
"highlights": {"num_sentences": 3, "highlights_per_url": 3},
"summary": {"query": f"Key details about {topic}"},
}
try:
results = await loop.run_in_executor(
None,
lambda: self.exa.search_and_contents(query, **search_kwargs),
)
content = []
seen_urls = set()
for result in getattr(results, "results", []) or []:
url = getattr(result, "url", "")
if not url or url in seen_urls:
continue
seen_urls.add(url)
content.append({
"domain": self._extract_domain(url),
"title": getattr(result, "title", "Untitled"),
"url": url,
"highlights": getattr(result, "highlights", []),
"summary": getattr(result, "summary", ""),
"text": getattr(result, "text", ""),
"published_date": getattr(result, "published_date", None),
"author": getattr(result, "author", None),
})
return {
"topic": topic,
"competitor_content": content,
"total_results": len(content),
"domains_found": list(
set(c["domain"] for c in content if c["domain"])
),
}
except Exception as e:
logger.warning(f"Exa search failed for topic '{topic}': {e}")
return {
"topic": topic,
"competitor_content": [],
"total_results": 0,
"domains_found": [],
"error": str(e),
}
@staticmethod
def _extract_domain(url: str) -> str:
"""Extract domain from URL."""
try:
from urllib.parse import urlparse
return urlparse(url).netloc.lower()
except Exception:
return url.lower()

View File

@@ -0,0 +1,175 @@
"""
SERP Gap Service for ALwrity
Detects which competitors rank for target topics using Google Custom Search.
Phase 1 of the Content Gap Radar feature.
Usage:
service = SerpGapService()
result = await service.analyze_topic_gaps(
topics=["AI content strategy", "topic clustering"],
competitor_domains=["example.com", "competitor.org"]
)
"""
import asyncio
import hashlib
import json
import os
import time
from typing import Dict, List, Optional, Any
from loguru import logger
from services.research.google_search_service import GoogleSearchService
class SerpGapService:
"""
SERP Gap Analysis Service.
Uses Google Custom Search `site:` queries to detect competitor ranking presence
for specific topics. Results are cached for 24h to stay within free-tier quotas
(100 queries/day). Designed to be consumed by a future ContentGapRadarAgent
that scores and prioritizes gaps.
"""
CACHE_TTL = int(os.getenv("SERP_GAP_CACHE_TTL", "86400")) # 24 hours default
def __init__(self, google_search_service: Optional[GoogleSearchService] = None):
self.gcs = google_search_service or GoogleSearchService()
self._cache: Dict[str, Dict[str, Any]] = {}
logger.info("SerpGapService initialized")
def _cache_key(self, topics: List[str], domains: List[str]) -> str:
"""Deterministic cache key from sorted topics + domains."""
raw = json.dumps(
{"t": sorted(topics), "d": sorted(domains)}, sort_keys=True
)
return hashlib.md5(raw.encode()).hexdigest()
def _get_cached(self, key: str) -> Optional[Dict[str, Any]]:
entry = self._cache.get(key)
if entry and (time.time() - entry["ts"]) < self.CACHE_TTL:
return entry["data"]
return None
def _set_cache(self, key: str, data: Dict[str, Any]):
self._cache[key] = {"data": data, "ts": time.time()}
async def analyze_topic_gaps(
self,
topics: List[str],
competitor_domains: List[str],
max_results_per_site: int = 5,
concurrency: int = 3,
bypass_cache: bool = False,
) -> Dict[str, Any]:
"""
Analyze SERP gaps for a list of topics across known competitors.
For each topic, queries Google with `site:competitor_domain topic` for
each known competitor to detect ranking presence.
Args:
topics: Topic phrases to check (e.g. from find_semantic_gaps())
competitor_domains: Known competitor domains (e.g. ["example.com"])
max_results_per_site: Max Google CSE results per site: query (max 10)
concurrency: Max concurrent API calls to stay under rate limits
bypass_cache: Force fresh API calls, ignoring cache
Returns:
Dict with keys:
gaps: List of per-topic SERP gap results
total_topics_analyzed: int
total_competitors: int
cached: bool
"""
if not topics or not competitor_domains:
return {
"gaps": [],
"total_topics_analyzed": 0,
"total_competitors": 0,
"cached": False,
}
ck = self._cache_key(topics, competitor_domains)
if not bypass_cache:
cached = self._get_cached(ck)
if cached:
logger.info("Returning cached SERP gap results")
return {**cached, "cached": True}
semaphore = asyncio.Semaphore(concurrency)
async def analyze_topic(topic: str) -> Dict[str, Any]:
async with semaphore:
return await self._analyze_single_topic(
topic, competitor_domains, max_results_per_site
)
tasks = [analyze_topic(topic) for topic in topics]
results = await asyncio.gather(*tasks)
output = {
"gaps": results,
"total_topics_analyzed": len(topics),
"total_competitors": len(competitor_domains),
"cached": False,
}
self._set_cache(ck, output)
return dict(output)
async def _analyze_single_topic(
self,
topic: str,
competitor_domains: List[str],
max_results: int,
) -> Dict[str, Any]:
"""
Check SERP presence for a single topic across all competitor domains.
Removes the dateRestrict and sort=date defaults from Google CSE so we
see all-time competitor content (not just last month).
"""
competitors_found = []
failed_queries = 0
for domain in competitor_domains:
query = f"site:{domain} {topic}"
try:
raw_results = await self.gcs.perform_search(
query,
max_results,
dateRestrict=None, # Don't limit to last month
sort=None, # Use relevance sorting, not date
)
for result in raw_results:
competitors_found.append({
"domain": domain,
"title": result.get("title", ""),
"url": result.get("link", ""),
"snippet": result.get("snippet", ""),
})
except Exception as e:
logger.warning(
f"GCS query failed for site:{domain} topic='{topic}': {e}"
)
failed_queries += 1
continue
seen_urls = set()
unique_competitors = []
for entry in competitors_found:
if entry["url"] not in seen_urls:
seen_urls.add(entry["url"])
unique_competitors.append(entry)
return {
"topic": topic,
"competitors_found": unique_competitors,
"competitor_count": len(unique_competitors),
"domains_with_content": list(
set(e["domain"] for e in unique_competitors)
),
"failed_queries": failed_queries,
"total_domains_checked": len(competitor_domains),
}

View File

@@ -123,13 +123,15 @@ def _is_coverage_guardrail_enabled(grounding: Dict[str, Any]) -> bool:
return True
def _sanitize_task(task: Dict[str, Any]) -> Optional[Dict[str, Any]]:
def _sanitize_task(task: Dict[str, Any], agent_name: Optional[str] = None) -> Optional[Dict[str, Any]]:
if not isinstance(task, dict):
return None
pillar_id = str(task.get("pillarId") or "").lower().strip()
title = str(task.get("title") or "").strip()
if pillar_id not in PILLAR_IDS or not title:
reason = "empty title" if not title else f"invalid pillar_id={pillar_id!r}"
logger.warning(f"Rejected task from agent {agent_name or 'unknown'}: {reason}")
return None
sanitized = dict(task)
@@ -418,6 +420,7 @@ async def generate_agent_enhanced_plan(
orchestrator.agents.get('seo'), # SEOOptimizationAgent
orchestrator.agents.get('social'), # SocialAmplificationAgent
orchestrator.agents.get('competitor'), # CompetitorResponseAgent
orchestrator.agents.get('content_gap_radar'), # ContentGapRadarAgent
]
# Filter out None agents (disabled/failed init)
@@ -466,7 +469,118 @@ async def generate_agent_enhanced_plan(
# Phase 3: Check memory for rejections (Semantic Filter)
agent_tasks = await memory_service.filter_redundant_proposals(agent_tasks)
# Log committee meeting event for frontend transparency
try:
accepted_ids = {f"{p.pillar_id}:{p.title}" for p in agent_tasks}
proposals_log = []
for p in raw_proposals:
valid = p.pillar_id in PILLAR_IDS
key = f"{p.pillar_id}:{p.title}"
proposals_log.append({
"agent": p.source_agent,
"title": p.title,
"pillar_id": p.pillar_id,
"priority": p.priority,
"valid": valid,
"accepted": key in accepted_ids,
"rejected_reason": None if valid else f"pillar_id '{p.pillar_id}' not in {PILLAR_IDS}",
"reasoning": p.reasoning,
"estimated_time": p.estimated_time,
"action_type": p.action_type,
})
if not valid:
logger.warning(
f"Rejected proposal from agent {p.source_agent}: "
f"invalid pillar_id={p.pillar_id!r} (title={p.title!r}). "
f"Must be one of {PILLAR_IDS}"
)
activity.log_event(
event_type="committee_meeting",
message=f"Committee: {len(agent_tasks)}/{len(raw_proposals)} tasks accepted from {len(active_agents)} agents",
payload={
"agents_polled": len(active_agents),
"total_proposals": len(raw_proposals),
"accepted_count": len(agent_tasks),
"rejected_count": len(raw_proposals) - len(agent_tasks),
"proposals": proposals_log,
},
)
except Exception as e:
logger.warning(f"Failed to log committee meeting event: {e}")
# --- Committee Watchdog Audit (ContentGuardianAgent) ---
try:
guardian_agent = orchestrator.agents.get('guardian')
if guardian_agent and hasattr(guardian_agent, 'audit_committee'):
# Build proposals list from committee data (same format as proposals_log above)
accepted_ids = {f"{p.pillar_id}:{p.title}" for p in agent_tasks}
audit_input = []
for p in raw_proposals:
key = f"{p.pillar_id}:{p.title}"
audit_input.append({
"agent": p.source_agent,
"title": p.title,
"pillar_id": p.pillar_id,
"priority": p.priority,
"reasoning": p.reasoning or "",
"accepted": key in accepted_ids,
"valid": p.pillar_id in PILLAR_IDS,
"rejected_reason": None if p.pillar_id in PILLAR_IDS else f"pillar_id '{p.pillar_id}' not in {PILLAR_IDS}",
})
audit_report = await guardian_agent.audit_committee(audit_input)
activity.log_event(
event_type="quality_audit",
message=f"Committee audit: {audit_report['health_score']}/100 health — {len(audit_report['alerts'])} findings",
payload=audit_report,
)
logger.info(
f"Committee audit: health={audit_report['health_score']}, "
f"critiques={len(audit_report['agent_critiques'])}, "
f"gaps={len(audit_report['coverage_gaps'])}, "
f"overlaps={len(audit_report['overlaps'])}"
)
# Create alerts for serious watchdog findings
for alert in audit_report.get("alerts", []):
sev = alert.get("severity", "warning")
dedupe_key = f"guardian:{alert['type']}:{alert.get('agent','')}:{alert.get('title','')}"
try:
activity.create_alert(
alert_type=f"guardian_{alert['type']}",
title=alert["title"],
message=alert["message"],
severity="error" if sev == "error" else "warning",
cta_path=alert.get("cta_path"),
payload={"guardian_agent": alert.get("agent"), "type": alert["type"]},
dedupe_key=dedupe_key,
)
except Exception as ae:
logger.warning(f"Failed to create guardian alert: {ae}")
except Exception as e:
logger.warning(f"Committee watchdog audit failed: {e}")
# --- Trend Signals (TrendSurferAgent) ---
try:
trend_agent = orchestrator.agents.get('trend')
if trend_agent and hasattr(trend_agent, 'surf_trends'):
opportunities = await trend_agent.surf_trends()
if opportunities:
activity.log_event(
event_type="trend_signals",
message=f"Trend signals: {len(opportunities)} opportunities detected",
payload={
"opportunities": opportunities[:5],
"total_detected": len(opportunities),
"scan_timestamp": datetime.utcnow().isoformat(),
},
)
logger.info(f"Logged trend_signals event with {len(opportunities)} opportunities")
except Exception as e:
logger.warning(f"Trend signal phase failed: {e}")
except Exception as e:
logger.error(f"Committee proposal phase failed: {e}")
# Continue to fallback or LLM generation if committee fails
@@ -669,6 +783,12 @@ async def get_or_create_daily_workflow_plan(
for t in tasks:
pillar_id = str(t.get("pillarId") or "").lower().strip()
if pillar_id not in PILLAR_IDS:
agent = None
metadata = t.get("metadata")
if isinstance(metadata, dict):
agent = metadata.get("source_agent")
logger.warning(f"Skipping task persistence for invalid pillar_id={pillar_id!r} "
f"from agent {agent or 'unknown'}: title={t.get('title', '')}")
continue
task = DailyWorkflowTask(
plan_id=plan.id,

View File

@@ -225,9 +225,9 @@ class WixService:
'error': str(e)
}
def import_image_to_wix(self, access_token: str, image_url: str, display_name: str = None) -> str:
def import_image_to_wix(self, access_token: str, image_url: str, display_name: str = None) -> Optional[str]:
"""
Import external image to Wix Media Manager
Import external image to Wix Media Manager.
Args:
access_token: Valid access token
@@ -235,7 +235,7 @@ class WixService:
display_name: Optional display name for the image
Returns:
Wix media ID
Wix media ID string, or None if import failed
"""
try:
result = self.media_service.import_image(
@@ -243,10 +243,15 @@ class WixService:
image_url,
display_name or f'Imported Image {datetime.now().strftime("%Y%m%d_%H%M%S")}'
)
return result['file']['id']
except requests.RequestException as e:
logger.error(f"Failed to import image to Wix: {e}")
raise
if result and isinstance(result, dict) and 'file' in result:
media_id = result['file'].get('id')
if media_id:
return str(media_id)
logger.warning(f"Image import returned unexpected result structure: {type(result)}")
return None
except Exception as e:
logger.warning(f"Failed to import image to Wix (non-fatal): {e}")
return None
def convert_content_to_ricos(self, content: str, images: List[str] = None,
use_wix_api: bool = False, access_token: str = None) -> Dict[str, Any]:
@@ -276,7 +281,8 @@ class WixService:
def create_blog_post(self, access_token: str, title: str, content: str,
cover_image_url: str = None, category_ids: List[str] = None,
tag_ids: List[str] = None, publish: bool = True,
member_id: str = None, seo_metadata: Dict[str, Any] = None) -> Dict[str, Any]:
member_id: str = None, seo_metadata: Dict[str, Any] = None,
site_id: str = None) -> Dict[str, Any]:
"""
Create and optionally publish a blog post on Wix
@@ -322,6 +328,7 @@ class WixService:
tag_ids=tag_ids,
publish=publish,
seo_metadata=seo_metadata,
site_id=site_id,
import_image_func=self.import_image_to_wix,
lookup_categories_func=self.lookup_or_create_categories,
lookup_tags_func=self.lookup_or_create_tags,

View File

@@ -9,7 +9,7 @@ if str(ROOT) not in sys.path:
from services.intelligence.monitoring.semantic_dashboard import RealTimeSemanticMonitor, SemanticHealthMetric
from services.today_workflow_service import _ensure_pillar_coverage, PILLAR_IDS, validate_plan_contextuality
from services.intelligence.sif_agents import ContentGuardianAgent as SifGuardian
from services.intelligence.agents.specialized import ContentGuardianAgent as SifGuardian
from services.intelligence.agents.specialized_agents import ContentGuardianAgent as SpecializedGuardian