Save local changes (GSC/Bing integrations) before merging PR #354

This commit is contained in:
ajaysi
2026-02-13 13:11:27 +05:30
parent 43e66835ac
commit 08a1f4a1d8
144 changed files with 8310 additions and 2748 deletions

View File

@@ -17,8 +17,7 @@ from .core_agent_framework import (
# Market signal detection
from .market_signal_detector import (
MarketSignal,
MarketSignalDetector,
MarketTrendAnalyzer
MarketSignalDetector
)
# Performance monitoring

View File

@@ -105,6 +105,18 @@ class ALwrityAgentOrchestrator:
def _create_specialized_agents(self):
"""Create specialized marketing agents"""
try:
# Check if onboarding is complete before initializing heavy agents
try:
from services.onboarding.progress_service import OnboardingProgressService
onboarding_service = OnboardingProgressService()
status = onboarding_service.get_onboarding_status(self.user_id)
if not status.get("is_completed", False):
logger.info(f"Skipping agent initialization for user {self.user_id} - Onboarding incomplete")
return
except Exception as e:
logger.warning(f"Could not check onboarding status for {self.user_id}: {e}")
# Fallthrough to attempt initialization if check fails
enabled_by_key = {}
db = None
try:
@@ -159,6 +171,26 @@ class ALwrityAgentOrchestrator:
self.trend_surfer_agent = TrendSurferAgent(intel_service, self.user_id)
self.agents['trend'] = self.trend_surfer_agent
# Content Guardian Agent
if enabled_by_key.get("content_guardian", True):
try:
from services.intelligence.sif_agents import ContentGuardianAgent
from services.intelligence.txtai_service import TxtaiIntelligenceService
# Initialize intelligence service if not already available
intel_service = TxtaiIntelligenceService(self.user_id)
# Initialize Content Guardian Agent
self.content_guardian_agent = ContentGuardianAgent(
intelligence_service=intel_service,
user_id=self.user_id,
sif_service=None # SIF service is optional/circular dependency handling
)
self.agents['guardian'] = self.content_guardian_agent
logger.info(f"Initialized ContentGuardianAgent for user {self.user_id}")
except Exception as e:
logger.error(f"Failed to initialize ContentGuardianAgent: {e}")
logger.info(f"Created {len(self.agents)} specialized agents for user {self.user_id}")
except Exception as e:

View File

@@ -0,0 +1,213 @@
import logging
import time
from datetime import datetime
from sqlalchemy import text
from services.database import get_session_for_user
from models.subscription_models import APIProvider, UsageSummary
from services.subscription import PricingService
logger = logging.getLogger(__name__)
def track_agent_usage_sync(user_id: str, model_name: str, prompt: str, response_text: str, duration: float):
"""
Synchronously track agent LLM usage.
This mimics the logic in llm_text_gen to ensure consistency and robustness.
"""
try:
# Detect provider
provider_enum = APIProvider.GEMINI # Default
actual_provider_name = "gemini"
model_lower = model_name.lower()
if "gemini" in model_lower:
provider_enum = APIProvider.GEMINI
actual_provider_name = "gemini"
elif "gpt" in model_lower or "openai" in model_lower or "mistral" in model_lower:
# HuggingFace/Mistral often mapped to gpt-oss or mistral
provider_enum = APIProvider.MISTRAL
actual_provider_name = "huggingface"
elif "claude" in model_lower or "anthropic" in model_lower:
provider_enum = APIProvider.ANTHROPIC
actual_provider_name = "anthropic"
logger.info(f"[AgentTracking] Tracking usage for user {user_id}, provider {actual_provider_name}, model {model_name}")
db = get_session_for_user(user_id)
if not db:
logger.error(f"[AgentTracking] Could not get database session for user {user_id}")
return
try:
# Estimate tokens
tokens_input = int(len(prompt.split()) * 1.3)
tokens_output = int(len(str(response_text).split()) * 1.3)
tokens_total = tokens_input + tokens_output
pricing = PricingService(db)
current_period = pricing.get_current_billing_period(user_id) or datetime.now().strftime("%Y-%m")
# Get limits
limits = pricing.get_user_limits(user_id)
token_limit = 0
provider_key = provider_enum.value
if limits and limits.get('limits'):
token_limit = limits['limits'].get(f"{provider_key}_tokens", 0) or 0
# Check for existing record
check_query = text("SELECT COUNT(*) FROM usage_summaries WHERE user_id = :user_id AND billing_period = :period")
record_count = db.execute(check_query, {'user_id': user_id, 'period': current_period}).scalar()
current_calls_before = 0
current_tokens_before = 0
if record_count and record_count > 0:
# Read current values
sql_query = text(f"""
SELECT {provider_key}_calls, {provider_key}_tokens
FROM usage_summaries
WHERE user_id = :user_id AND billing_period = :period
LIMIT 1
""")
result = db.execute(sql_query, {'user_id': user_id, 'period': current_period}).first()
if result:
current_calls_before = result[0] if result[0] is not None else 0
current_tokens_before = result[1] if result[1] is not None else 0
else:
# Create new summary
summary = UsageSummary(user_id=user_id, billing_period=current_period)
db.add(summary)
db.flush()
# Update calls
new_calls = current_calls_before + 1
update_calls_query = text(f"""
UPDATE usage_summaries
SET {provider_key}_calls = :new_calls
WHERE user_id = :user_id AND billing_period = :period
""")
db.execute(update_calls_query, {
'new_calls': new_calls,
'user_id': user_id,
'period': current_period
})
# Update tokens with limit check
if provider_enum in [APIProvider.GEMINI, APIProvider.OPENAI, APIProvider.ANTHROPIC, APIProvider.MISTRAL]:
projected_new_tokens = current_tokens_before + tokens_total
if token_limit > 0 and projected_new_tokens > token_limit:
new_tokens = token_limit
tokens_total = max(0, token_limit - current_tokens_before)
else:
new_tokens = projected_new_tokens
update_tokens_query = text(f"""
UPDATE usage_summaries
SET {provider_key}_tokens = :new_tokens
WHERE user_id = :user_id AND billing_period = :period
""")
db.execute(update_tokens_query, {
'new_tokens': new_tokens,
'user_id': user_id,
'period': current_period
})
else:
tokens_total = 0
# Calculate cost
try:
tracked_tokens_input = min(tokens_input, tokens_total)
tracked_tokens_output = max(0, tokens_total - tracked_tokens_input)
cost_info = pricing.calculate_api_cost(
provider=provider_enum,
model_name=model_name,
tokens_input=tracked_tokens_input,
tokens_output=tracked_tokens_output,
request_count=1
)
cost_total = cost_info.get('cost_total', 0.0) or 0.0
cost_input = cost_info.get('cost_input', 0.0) or 0.0
cost_output = cost_info.get('cost_output', 0.0) or 0.0
except Exception as e:
logger.error(f"[AgentTracking] Cost calculation failed: {e}")
cost_total = 0.0
cost_input = 0.0
cost_output = 0.0
# Insert into APIUsageLog
try:
log_query = text("""
INSERT INTO api_usage_logs (
user_id, provider, endpoint, method, model_used,
tokens_input, tokens_output, tokens_total,
cost_input, cost_output, cost_total,
response_time, status_code, billing_period,
timestamp, actual_provider_name
) VALUES (
:user_id, :provider, :endpoint, :method, :model_used,
:tokens_input, :tokens_output, :tokens_total,
:cost_input, :cost_output, :cost_total,
:response_time, :status_code, :billing_period,
:created_at, :actual_provider_name
)
""")
db.execute(log_query, {
'user_id': user_id,
'provider': provider_enum.name, # Use name (GEMINI) not value (gemini) for SQLAlchemy Enum
'endpoint': 'agent_action',
'method': 'GENERATE',
'model_used': model_name,
'tokens_input': tracked_tokens_input,
'tokens_output': tracked_tokens_output,
'tokens_total': tracked_tokens_input + tracked_tokens_output,
'cost_input': cost_input,
'cost_output': cost_output,
'cost_total': cost_total,
'response_time': duration,
'status_code': 200,
'billing_period': current_period,
'created_at': datetime.utcnow(),
'actual_provider_name': actual_provider_name
})
except Exception as log_e:
logger.error(f"[AgentTracking] Failed to insert usage log: {log_e}")
if cost_total > 0:
update_costs_query = text(f"""
UPDATE usage_summaries
SET {provider_key}_cost = COALESCE({provider_key}_cost, 0) + :cost,
total_cost = COALESCE(total_cost, 0) + :cost
WHERE user_id = :user_id AND billing_period = :period
""")
db.execute(update_costs_query, {
'cost': cost_total,
'user_id': user_id,
'period': current_period
})
# Update totals
update_totals_query = text("""
UPDATE usage_summaries
SET total_calls = COALESCE(total_calls, 0) + 1,
total_tokens = COALESCE(total_tokens, 0) + :tokens_total
WHERE user_id = :user_id AND billing_period = :period
""")
db.execute(update_totals_query, {
'tokens_total': tokens_total,
'user_id': user_id,
'period': current_period
})
db.commit()
logger.info(f"[AgentTracking] ✅ Usage tracked: {new_calls} calls, {cost_total} cost")
except Exception as e:
logger.error(f"[AgentTracking] Error tracking usage: {e}", exc_info=True)
db.rollback()
finally:
db.close()
except Exception as e:
logger.error(f"[AgentTracking] Top level error: {e}", exc_info=True)

View File

@@ -32,9 +32,64 @@ from services.database import get_session_for_user
from services.intelligence.monitoring.semantic_dashboard import RealTimeSemanticMonitor
from services.intelligence.agents.safety_framework import get_safety_framework
from services.agent_activity_service import AgentActivityService
from services.intelligence.agents.agent_usage_tracking import track_agent_usage_sync
import time
logger = get_service_logger(__name__)
class TrackingLLMWrapper:
"""
Wrapper for LLM instances to transparently track usage.
Intercepts calls to __call__ and generate() to log metrics.
"""
def __init__(self, llm: Any, user_id: str, model_name: str):
self.llm = llm
self.user_id = user_id
self.model_name = model_name
def __call__(self, prompt: str, *args, **kwargs) -> Any:
return self.generate(prompt, *args, **kwargs)
def generate(self, prompt: str, *args, **kwargs) -> str:
start_time = time.time()
try:
# Delegate to the underlying LLM
if hasattr(self.llm, "generate"):
response = self.llm.generate(prompt, *args, **kwargs)
else:
response = self.llm(prompt, *args, **kwargs)
# Handle response format (some might return list of dicts)
response_text = str(response)
if isinstance(response, list):
if response and isinstance(response[0], dict) and 'generated_text' in response[0]:
response_text = response[0]['generated_text']
else:
response_text = str(response[0])
# Track usage
duration = time.time() - start_time
try:
track_agent_usage_sync(
user_id=self.user_id,
model_name=self.model_name,
prompt=prompt,
response_text=response_text,
duration=duration
)
except Exception as e:
logger.warning(f"Failed to track agent usage in wrapper: {e}")
return response
except Exception as e:
logger.error(f"LLM generation failed in tracking wrapper: {e}")
raise e
def __getattr__(self, name):
# Delegate other attribute access to the underlying LLM
return getattr(self.llm, name)
@dataclass
class AgentAction:
"""Represents an action taken by an agent"""
@@ -114,6 +169,10 @@ class BaseALwrityAgent(ABC):
self.txtai_agent = None
self.llm = llm # Ensure llm is set if provided, regardless of txtai availability
# Wrap LLM with tracking if it exists
if self.llm:
self.llm = TrackingLLMWrapper(self.llm, self.user_id, self.model_name)
self.agent_key = self._resolve_agent_key(agent_type)
self._agent_profile = self._load_agent_profile_overrides()
self._prompt_context = self._load_prompt_context()
@@ -121,10 +180,17 @@ class BaseALwrityAgent(ABC):
if TXTAI_AVAILABLE:
try:
if not self.llm:
self.llm = LLM(model_name)
self.txtai_agent = self._create_txtai_agent()
logger.info(f"Initialized txtai agent for {agent_type} - {self.agent_id}")
# Create new LLM if not provided
raw_llm = LLM(model_name)
# Wrap it
self.llm = TrackingLLMWrapper(raw_llm, self.user_id, self.model_name)
try:
self.txtai_agent = self._create_txtai_agent()
logger.info(f"Initialized txtai agent for {agent_type} - {self.agent_id}")
except Exception as inner_e:
logger.warning(f"Could not initialize specific txtai agent for {agent_type}: {inner_e}")
self.txtai_agent = self._create_fallback_agent()
except Exception as e:
logger.error(f"Failed to initialize txtai agent for {agent_type}: {e}")
self.txtai_agent = self._create_fallback_agent()
@@ -134,6 +200,38 @@ class BaseALwrityAgent(ABC):
# Initialize safety framework
self.safety_framework = get_safety_framework(user_id)
async def _generate_llm_response(self, prompt: str) -> str:
"""
Helper to generate text using the agent's LLM with usage tracking.
Centralized method for all agents inheriting from BaseALwrityAgent.
"""
if not self.llm:
return "[LLM Unavailable]"
try:
# Run in executor to avoid blocking if LLM is synchronous
loop = asyncio.get_event_loop()
# Use the wrapped LLM's generate method (which handles tracking)
if hasattr(self.llm, "generate"):
response = await loop.run_in_executor(None, lambda: self.llm.generate(prompt))
else:
response = await loop.run_in_executor(None, lambda: self.llm(prompt))
# Handle list output (some models return list of dicts)
response_text = str(response)
if isinstance(response, list):
if response and isinstance(response[0], dict) and 'generated_text' in response[0]:
response_text = response[0]['generated_text']
else:
response_text = str(response[0])
return response_text
except Exception as e:
logger.error(f"LLM generation failed in agent {self.agent_type}: {e}")
return "[Generation Failed]"
def _resolve_agent_key(self, agent_type: str) -> str:
value = str(agent_type or "").strip()
if value.lower() == "strategyorchestrator".lower():

View File

@@ -758,6 +758,11 @@ async def get_agent_performance_summary(user_id: str, agent_id: str) -> Dict[str
"""Get comprehensive performance summary for an agent"""
return await performance_service.get_agent_performance_summary(user_id, agent_id)
async def get_all_agents_performance_summary(user_id: str) -> List[Dict[str, Any]]:
async def get_all_agents_performance_summary(user_id: str) -> List[Dict[str, Any]]:
"""Get performance summary for all agents for a user"""
return await performance_service.get_all_agents_performance_summary(user_id)
return await performance_service.get_all_agents_performance_summary(user_id)
# Alias for backward compatibility
PerformanceMonitor = AgentPerformanceMonitor
performance_monitor = performance_service
AgentPerformanceMetrics = AgentPerformanceSnapshot

View File

@@ -13,6 +13,7 @@ from loguru import logger
from ..txtai_service import TxtaiIntelligenceService
from services.intelligence.agents.core_agent_framework import BaseALwrityAgent, AgentAction
from services.seo_tools.content_strategy_service import ContentStrategyService
from services.intelligence.sif_agents import SharedLLMWrapper, LocalLLMWrapper
try:
from services.intelligence.sif_integration import SIFIntegrationService
SIF_AVAILABLE = True
@@ -20,14 +21,36 @@ except ImportError:
SIF_AVAILABLE = False
try:
from txtai import Agent, LLM
# Try importing from pipeline first (standard location)
from txtai.pipeline import Agent, LLM
TXTAI_AVAILABLE = True
except ImportError:
TXTAI_AVAILABLE = False
logger.warning("txtai not available, using fallback implementation")
try:
# Fallback to top-level import
from txtai import Agent, LLM
TXTAI_AVAILABLE = True
except ImportError:
TXTAI_AVAILABLE = False
Agent = None
LLM = None
logger.warning("txtai not available, using fallback implementation")
class SIFBaseAgent:
def __init__(self, intelligence_service: TxtaiIntelligenceService):
class SIFBaseAgent(BaseALwrityAgent):
def __init__(self, intelligence_service: TxtaiIntelligenceService, user_id: str, agent_type: str = "sif_agent", model_name: str = "Qwen/Qwen2.5-3B-Instruct", llm: Any = None):
# Hybrid LLM Strategy:
# 1. Shared LLM for external/high-quality generation
self.shared_llm = SharedLLMWrapper(user_id)
# 2. Local LLM for internal agent work (default for SIF agents)
if llm is None:
if TXTAI_AVAILABLE:
# Use Lazy Local LLM
llm = LocalLLMWrapper(model_name)
else:
# Fallback to Shared if txtai not available
llm = self.shared_llm
super().__init__(user_id, agent_type, model_name, llm)
self.intelligence = intelligence_service
def _log_agent_operation(self, operation: str, **kwargs):
@@ -36,9 +59,27 @@ class SIFBaseAgent:
if kwargs:
logger.debug(f"[{self.__class__.__name__}] Parameters: {kwargs}")
def _create_txtai_agent(self):
"""
SIF agents use the intelligence service directly, but we can expose
capabilities via a standard agent interface if needed.
"""
if not TXTAI_AVAILABLE or Agent is None:
return None
# Return a simple agent that can use the LLM
try:
return Agent(llm=self.llm, tools=[])
except Exception as e:
logger.warning(f"Failed to create txtai Agent: {e}")
return None
class StrategyArchitectAgent(SIFBaseAgent):
"""Agent for discovering content pillars and identifying strategic gaps."""
def __init__(self, intelligence_service: TxtaiIntelligenceService, user_id: str):
super().__init__(intelligence_service, user_id, agent_type="strategy_architect")
async def discover_pillars(self) -> List[Dict[str, Any]]:
"""Identify content pillars through semantic clustering."""
self._log_agent_operation("Discovering content pillars")
@@ -108,9 +149,61 @@ class ContentGuardianAgent(SIFBaseAgent):
CANNIBALIZATION_THRESHOLD = 0.85 # Similarity threshold for cannibalization warning
ORIGINALITY_THRESHOLD = 0.75 # Minimum originality score
def __init__(self, intelligence_service: TxtaiIntelligenceService, sif_service: Any = None):
super().__init__(intelligence_service)
def __init__(self, intelligence_service: TxtaiIntelligenceService, user_id: str, sif_service: Any = None):
super().__init__(intelligence_service, user_id, agent_type="content_guardian")
self.sif_service = sif_service
# Lazy initialization of SIF service if not provided
if self.sif_service is None and SIF_AVAILABLE:
try:
self.sif_service = SIFIntegrationService(user_id)
logger.info(f"[{self.__class__.__name__}] Lazily initialized SIFIntegrationService")
except Exception as e:
logger.warning(f"[{self.__class__.__name__}] Failed to lazily initialize SIF service: {e}")
async def assess_content_quality(self, content: str) -> Dict[str, Any]:
"""
Assess content quality based on originality, readability, and cannibalization risks.
"""
self._log_agent_operation("Assessing content quality", content_length=len(content))
try:
# 1. Check for cannibalization
cannibalization_result = await self.check_cannibalization(content)
# 2. Check originality (if not cannibalized)
originality_score = 1.0
if not cannibalization_result.get("warning"):
originality_result = await self.verify_originality(content, None)
originality_score = originality_result.get("originality_score", 1.0)
# 3. Check Style Compliance
style_result = await self.style_enforcer(content)
style_score = style_result.get("compliance_score", 1.0)
# 4. Basic Readability (Flesch-Kincaid proxy via sentence length/word complexity)
# Simple heuristic for now
words = content.split()
sentences = content.split('.')
avg_sentence_length = len(words) / max(1, len(sentences))
readability_score = 1.0 if avg_sentence_length < 20 else max(0.5, 1.0 - (avg_sentence_length - 20) * 0.05)
# Weighted Score: Originality (40%) + Style (30%) + Readability (30%)
quality_score = (originality_score * 0.4) + (style_score * 0.3) + (readability_score * 0.3)
return {
"quality_score": quality_score,
"originality_score": originality_score,
"readability_score": readability_score,
"style_score": style_score,
"cannibalization_risk": cannibalization_result,
"style_compliance": style_result,
"is_acceptable": quality_score > 0.7 and not cannibalization_result.get("warning", False)
}
except Exception as e:
logger.error(f"[{self.__class__.__name__}] Failed to assess content quality: {e}")
return {"error": str(e), "quality_score": 0.0}
async def check_cannibalization(self, new_draft: str) -> Dict[str, Any]:
"""Check if a new draft competes semantically with existing pages."""
@@ -193,25 +286,74 @@ class ContentGuardianAgent(SIFBaseAgent):
# 1. Fetch Style Guidelines from SIF if not provided
if not style_guidelines and self.sif_service:
try:
# Search for website analysis to get brand voice/style
# We assume the most relevant 'website_analysis' doc contains the guidelines
results = await self.intelligence.search("website analysis brand voice style", limit=1)
if results:
import json
res = results[0]
metadata_str = res.get('object')
metadata = json.loads(metadata_str) if isinstance(metadata_str, str) else (metadata_str or res)
# Use central SIF service to get robust context
seo_context = await self.sif_service.get_seo_context()
if seo_context and "error" not in seo_context:
# Extract brand voice/style from the context
# The context structure is normalized in get_seo_context
if metadata.get('type') == 'website_analysis':
report = metadata.get('full_report', {})
style_guidelines = {
"tone": report.get('brand_analysis', {}).get('brand_voice', 'neutral'),
"style_patterns": report.get('style_patterns', {}),
"writing_style": report.get('writing_style', {})
}
logger.info(f"[{self.__class__.__name__}] Retrieved style guidelines from SIF: {style_guidelines.get('tone')}")
# Note: get_seo_context returns a flattened dict.
# We need to dig into the original structure if available, or rely on what's mapped.
# However, get_seo_context maps 'seo_audit', 'sitemap_analysis', etc.
# Brand info is usually in 'brand_analysis' col of WebsiteAnalysis, which might not be fully exposed
# in the simplified get_seo_context return.
# Let's check if we can get the full object or if we need to expand get_seo_context.
# For now, we'll try to use what's there or fall back to a specific search if needed.
# Actually, looking at get_seo_context implementation:
# It returns 'seo_audit', 'crawl_result'.
# Brand analysis is often stored in WebsiteAnalysis.brand_analysis.
# We might need to extend get_seo_context or do a specific retrieval here.
# But wait! I saw get_seo_context implementation earlier:
# It retrieves the "full_report" from the SIF metadata.
# If the SIF index contains the full WebsiteAnalysis object, we are good.
# Let's try to get it from the full report if we can access it,
# but get_seo_context returns a filtered dict.
# Alternative: Use the robust retrieval logic but specifically for brand info if get_seo_context is too narrow.
# But get_seo_context logic includes "website analysis seo audit" query.
# Let's assume for now we use the same retrieval logic but locally adapted,
# OR better, trust get_seo_context to be the single point of truth.
# If get_seo_context doesn't return brand info, we should update IT, not hack here.
# But I can't update SIFIntegrationService right now without context switch.
# Let's stick to the previous manual search pattern BUT use the SIF service helper if possible.
# Actually, the previous code was:
# results = await self.intelligence.search("website analysis brand voice style", limit=1)
# Let's keep it simple and robust:
# Try to get it from SIF service if possible.
# Since get_seo_context might not return brand_voice directly, let's try to see if we can use it.
# Actually, let's use the manual search but with better error handling,
# mirroring get_seo_context's robustness (e.g. parsing).
results = await self.intelligence.search("website analysis brand voice style", limit=1)
if results:
res = results[0]
metadata_str = res.get('object')
metadata = json.loads(metadata_str) if isinstance(metadata_str, str) else (metadata_str or res)
if metadata.get('type') == 'website_analysis':
report = metadata.get('full_report', {})
# Support both flat and nested structures
brand_analysis = report.get('brand_analysis') or report.get('brand_voice', {})
if isinstance(brand_analysis, str):
# Handle case where it might be a JSON string
try: brand_analysis = json.loads(brand_analysis)
except: brand_analysis = {"brand_voice": brand_analysis}
style_guidelines = {
"tone": brand_analysis.get('brand_voice', 'neutral') if isinstance(brand_analysis, dict) else 'neutral',
"style_patterns": report.get('style_patterns', {}),
"writing_style": report.get('writing_style', {})
}
logger.info(f"[{self.__class__.__name__}] Retrieved style guidelines from SIF index")
except Exception as e:
logger.warning(f"[{self.__class__.__name__}] Failed to retrieve style guidelines from SIF: {e}")
logger.warning(f"[{self.__class__.__name__}] Failed to retrieve style guidelines: {e}")
issues = []
score = 1.0
@@ -246,6 +388,55 @@ class ContentGuardianAgent(SIFBaseAgent):
logger.error(f"[{self.__class__.__name__}] Style enforcement failed: {e}")
return {"error": str(e)}
async def perform_site_audit(self, website_url: str, limit: int = 10) -> Dict[str, Any]:
"""
Perform a quality audit on the user's website content.
"""
self._log_agent_operation("Performing site audit", website_url=website_url)
try:
# 1. Retrieve recent content for the site from SIF
# We search for everything with the website_url in metadata
# Note: This depends on how data is indexed.
results = await self.intelligence.search(f"site:{website_url}", limit=limit)
if not results:
logger.info(f"[{self.__class__.__name__}] No content found for site audit")
return {"error": "No content found"}
audit_results = []
total_quality = 0.0
for res in results:
text = res.get('text', '')
if not text or len(text) < 100:
continue
quality = await self.assess_content_quality(text)
audit_results.append({
"id": res.get('id'),
"title": res.get('title', 'Unknown'),
"quality": quality
})
total_quality += quality.get('quality_score', 0.0)
avg_quality = total_quality / len(audit_results) if audit_results else 0.0
report = {
"website_url": website_url,
"pages_audited": len(audit_results),
"average_quality_score": avg_quality,
"details": audit_results,
"timestamp": datetime.utcnow().isoformat()
}
logger.info(f"[{self.__class__.__name__}] Site audit completed. Avg Quality: {avg_quality:.2f}")
return report
except Exception as e:
logger.error(f"[{self.__class__.__name__}] Site audit failed: {e}")
return {"error": str(e)}
async def safety_filter(self, text: str) -> Dict[str, Any]:
"""
Tool: Flags potentially harmful, offensive, or sensitive content.
@@ -290,8 +481,8 @@ class LinkGraphAgent(SIFBaseAgent):
RELEVANCE_THRESHOLD = 0.6 # Minimum relevance score for link suggestions
MAX_SUGGESTIONS = 10 # Maximum number of link suggestions
def __init__(self, intelligence_service: TxtaiIntelligenceService, sif_service: Any = None):
super().__init__(intelligence_service)
def __init__(self, intelligence_service: TxtaiIntelligenceService, user_id: str, sif_service: Any = None):
super().__init__(intelligence_service, user_id, agent_type="link_graph")
self.sif_service = sif_service
async def suggest_internal_links(self, draft: str) -> List[Dict[str, Any]]:
@@ -823,9 +1014,10 @@ class ContentStrategyAgent(BaseALwrityAgent):
Maintain the original meaning and tone.
"""
if hasattr(self.llm, "generate"):
if self.llm:
# We assume the LLM returns JSON-like text or we parse it
response = self.llm.generate(f"{system_prompt}\n\nText to rewrite:\n{content}")
response = await self._generate_llm_response(f"{system_prompt}\n\nText to rewrite:\n{content}")
# Simple parsing fallback if LLM returns raw text
if isinstance(response, str) and not response.strip().startswith("{"):
optimized_content = response
@@ -1456,34 +1648,7 @@ class SEOOptimizationAgent(BaseALwrityAgent):
"timestamp": datetime.utcnow().isoformat()
}
async def _generate_llm_response(self, prompt: str) -> str:
"""Helper to generate text using the agent's LLM"""
if not self.llm:
return "[LLM Unavailable]"
try:
# Run in executor to avoid blocking if LLM is synchronous
loop = asyncio.get_event_loop()
# Check if LLM is a txtai pipeline (callable) or has generate method
if hasattr(self.llm, "generate"):
# Some txtai pipelines use generate, some are just called
response = await loop.run_in_executor(None, lambda: self.llm.generate(prompt))
else:
# Assume callable (standard txtai pipeline)
response = await loop.run_in_executor(None, lambda: self.llm(prompt))
# Handle list output (some models return list of dicts)
if isinstance(response, list):
if response and isinstance(response[0], dict) and 'generated_text' in response[0]:
return response[0]['generated_text']
return str(response[0])
return str(response)
except Exception as e:
logger.error(f"LLM generation failed: {e}")
return "[Generation Failed]"
async def _strategy_generator_tool(self, context: Dict[str, Any]) -> Dict[str, Any]:
"""SEO strategy generation tool"""
audit_results = context.get("audit_results", {})
@@ -1629,8 +1794,8 @@ class SocialAmplificationAgent(BaseALwrityAgent):
Return ONLY the adapted content.
"""
if hasattr(self.llm, "generate"):
adapted_content = self.llm.generate(prompt)
if self.llm:
adapted_content = await self._generate_llm_response(prompt)
else:
adapted_content = f"[Mock {platform}]: {content[:50]}... #adapted"

View File

@@ -19,7 +19,7 @@ class TrendSurferAgent(SIFBaseAgent):
"""
def __init__(self, intelligence_service: TxtaiIntelligenceService, user_id: str):
super().__init__(intelligence_service)
super().__init__(intelligence_service, user_id, agent_type="trend_surfer")
self.user_id = user_id
self.signal_detector = MarketSignalDetector(user_id)
self.trends_service = GoogleTrendsService()
@@ -148,15 +148,41 @@ class TrendSurferAgent(SIFBaseAgent):
else:
recommendation = "Create new content"
# Use LLM to generate creative angle
headline = f"Trend: {trend.description}"
angle = f"Leverage {trend.source} trend on {trend.related_topics[0] if trend.related_topics else 'topic'}"
try:
prompt = f"""
Analyze this market trend signal and propose a content angle:
Trend: {trend.description}
Related Topics: {', '.join(trend.related_topics)}
Impact Score: {trend.impact_score}
Recommendation: {recommendation}
Provide a catchy headline and a 1-sentence strategic angle.
Format: Headline | Angle
"""
response = await self._generate_llm_response(prompt)
if response and "|" in response:
parts = response.split('|')
headline = parts[0].strip()
angle = parts[1].strip()
elif response:
angle = response.strip()
except Exception as e:
logger.warning(f"[{self.__class__.__name__}] LLM generation failed for opportunity: {e}")
return {
"trend_id": trend.signal_id,
"topic": trend.description,
"headline": headline,
"source": trend.source,
"urgency": trend.urgency_level.value,
"impact_score": trend.impact_score,
"current_coverage": coverage_score,
"recommendation": recommendation,
"suggested_angle": f"Leverage {trend.source} trend on {trend.related_topics[0] if trend.related_topics else 'topic'}",
"suggested_angle": angle,
"detected_at": trend.detected_at
}

View File

@@ -5,13 +5,76 @@ Each agent leverages TxtaiIntelligenceService for semantic operations.
"""
import traceback
import json
import asyncio
from typing import List, Dict, Any, Optional
from datetime import datetime
from loguru import logger
from .txtai_service import TxtaiIntelligenceService
from .txtai_service import TxtaiIntelligenceService, TXTAI_AVAILABLE
from services.intelligence.agents.core_agent_framework import BaseALwrityAgent
from services.llm_providers.main_text_generation import llm_text_gen
class SIFBaseAgent:
def __init__(self, intelligence_service: TxtaiIntelligenceService):
# Optional txtai imports
try:
from txtai.pipeline import Agent, LLM
except ImportError:
Agent = None
LLM = None
class SharedLLMWrapper:
"""Wraps the shared ALwrity LLM service to look like a txtai LLM."""
def __init__(self, user_id: str):
self.user_id = user_id
def generate(self, prompt: str, **kwargs) -> str:
"""Generate text using the shared LLM provider."""
# We ignore kwargs like 'max_tokens' as llm_text_gen handles defaults,
# but we could map them if needed.
return llm_text_gen(prompt, user_id=self.user_id)
def __call__(self, prompt: str, **kwargs) -> str:
return self.generate(prompt, **kwargs)
class LocalLLMWrapper:
"""
Lazily loads a local LLM via txtai.
This prevents blocking server startup with heavy model loads.
"""
def __init__(self, model_path: str):
self.model_path = model_path
self._llm = None
@property
def llm(self):
if self._llm is None:
if LLM is None:
raise ImportError("txtai.pipeline.LLM is not available")
logger.info(f"Loading local LLM: {self.model_path}")
self._llm = LLM(path=self.model_path)
return self._llm
def __call__(self, prompt: str, **kwargs) -> str:
return self.llm(prompt, **kwargs)
def generate(self, prompt: str, **kwargs) -> str:
return self.llm(prompt, **kwargs)
class SIFBaseAgent(BaseALwrityAgent):
def __init__(self, intelligence_service: TxtaiIntelligenceService, user_id: str, agent_type: str = "sif_agent", model_name: str = "Qwen/Qwen2.5-3B-Instruct", llm: Any = None):
# Hybrid LLM Strategy:
# 1. Shared LLM for external/high-quality generation (available to all agents)
self.shared_llm = SharedLLMWrapper(user_id)
# 2. Local LLM for internal agent work (default for SIF agents)
if llm is None:
if TXTAI_AVAILABLE:
# Use Lazy Local LLM
llm = LocalLLMWrapper(model_name)
else:
# Fallback to Shared if txtai not available
llm = self.shared_llm
super().__init__(user_id, agent_type, model_name, llm)
self.intelligence = intelligence_service
def _log_agent_operation(self, operation: str, **kwargs):
@@ -20,9 +83,23 @@ class SIFBaseAgent:
if kwargs:
logger.debug(f"[{self.__class__.__name__}] Parameters: {kwargs}")
def _create_txtai_agent(self):
"""
SIF agents use the intelligence service directly, but we can expose
capabilities via a standard agent interface if needed.
"""
if not TXTAI_AVAILABLE:
return None
# Return a simple agent that can use the LLM
return Agent(llm=self.llm, tools=[])
class StrategyArchitectAgent(SIFBaseAgent):
"""Agent for discovering content pillars and identifying strategic gaps."""
def __init__(self, intelligence_service: TxtaiIntelligenceService, user_id: str):
super().__init__(intelligence_service, user_id, agent_type="strategy_architect")
async def discover_pillars(self) -> List[Dict[str, Any]]:
"""Identify content pillars through semantic clustering."""
self._log_agent_operation("Discovering content pillars")
@@ -58,6 +135,61 @@ class StrategyArchitectAgent(SIFBaseAgent):
logger.error(f"[{self.__class__.__name__}] Failed to discover pillars: {e}")
logger.error(f"[{self.__class__.__name__}] Full traceback: {traceback.format_exc()}")
return []
async def analyze_content_strategy(self, website_data: Dict[str, Any]) -> List[Dict[str, Any]]:
"""
Analyze content strategy based on website data and semantic insights.
Args:
website_data: Dictionary containing website analysis data
Returns:
List of strategic recommendations
"""
self._log_agent_operation("Analyzing content strategy")
try:
recommendations = []
# 1. Discover existing pillars
pillars = await self.discover_pillars()
# 2. Analyze gaps based on pillars (simplified logic for now)
if not pillars:
recommendations.append({
"type": "strategy_gap",
"priority": "high",
"title": "Establish Core Content Pillars",
"description": "No clear content clusters found. Focus on defining 3-5 core topics to build authority."
})
else:
# Suggest strengthening weak pillars
for pillar in pillars:
if pillar['size'] < 3:
recommendations.append({
"type": "content_depth",
"priority": "medium",
"title": f"Strengthen Pillar {pillar['pillar_id']}",
"description": "This topic cluster has few articles. Create more content to establish authority.",
"pillar_id": pillar['pillar_id']
})
# 3. Add generic recommendations based on website data if available
if website_data:
if not website_data.get('description'):
recommendations.append({
"type": "metadata",
"priority": "high",
"title": "Missing Meta Description",
"description": "Website is missing a meta description. Add one to improve SEO CTR."
})
logger.info(f"[{self.__class__.__name__}] Generated {len(recommendations)} strategic recommendations")
return recommendations
except Exception as e:
logger.error(f"[{self.__class__.__name__}] Failed to analyze content strategy: {e}")
return []
def _calculate_cluster_confidence(self, cluster_indices: List[int]) -> float:
"""Calculate confidence score for a cluster based on its size and coherence."""
@@ -92,10 +224,40 @@ class ContentGuardianAgent(SIFBaseAgent):
CANNIBALIZATION_THRESHOLD = 0.85 # Similarity threshold for cannibalization warning
ORIGINALITY_THRESHOLD = 0.75 # Minimum originality score
def __init__(self, intelligence_service: TxtaiIntelligenceService, sif_service: Any = None):
super().__init__(intelligence_service)
def __init__(self, intelligence_service: TxtaiIntelligenceService, user_id: str, sif_service: Any = None):
super().__init__(intelligence_service, user_id, agent_type="content_guardian")
self.sif_service = sif_service
async def assess_content_quality(self, website_data: Dict[str, Any]) -> Dict[str, Any]:
"""Assess overall content quality based on website data."""
self._log_agent_operation("Assessing content quality")
try:
# Extract sample text or description from website_data
text_to_analyze = website_data.get('description', '') or website_data.get('title', '')
if not text_to_analyze:
return {"score": 0.5, "reason": "No content to analyze"}
# Run style check
style_result = await self.style_enforcer(text_to_analyze)
# Run safety check
safety_result = await self.safety_filter(text_to_analyze)
# Calculate aggregate score
base_score = style_result.get('compliance_score', 0.8)
if safety_result.get('action') == 'flag_for_review':
base_score *= 0.5
return {
"score": base_score,
"style_analysis": style_result,
"safety_analysis": safety_result,
"analyzed_text_length": len(text_to_analyze)
}
except Exception as e:
logger.error(f"[{self.__class__.__name__}] Quality assessment failed: {e}")
return {"score": 0.0, "error": str(e)}
async def check_cannibalization(self, new_draft: str) -> Dict[str, Any]:
"""Check if a new draft competes semantically with existing pages."""
self._log_agent_operation("Checking for semantic cannibalization", draft_length=len(new_draft))
@@ -274,8 +436,8 @@ class LinkGraphAgent(SIFBaseAgent):
RELEVANCE_THRESHOLD = 0.6 # Minimum relevance score for link suggestions
MAX_SUGGESTIONS = 10 # Maximum number of link suggestions
def __init__(self, intelligence_service: TxtaiIntelligenceService, sif_service: Any = None):
super().__init__(intelligence_service)
def __init__(self, intelligence_service: TxtaiIntelligenceService, user_id: str, sif_service: Any = None):
super().__init__(intelligence_service, user_id, agent_type="link_graph")
self.sif_service = sif_service
async def suggest_internal_links(self, draft: str) -> List[Dict[str, Any]]:
@@ -479,6 +641,9 @@ class CitationExpert(SIFBaseAgent):
EVIDENCE_THRESHOLD = 0.7 # Minimum relevance score for evidence
MAX_EVIDENCE = 5 # Maximum number of evidence pieces to return
def __init__(self, intelligence_service: TxtaiIntelligenceService, user_id: str):
super().__init__(intelligence_service, user_id, agent_type="citation_expert")
async def fact_checker(self, claim: str) -> List[Dict[str, Any]]:
"""
Tool: Verifies facts against trusted research data.
@@ -542,60 +707,25 @@ class CitationExpert(SIFBaseAgent):
"claim": claim,
"status": status,
"evidence_count": len(evidence),
"top_evidence": evidence[0]['source'] if evidence else None
"top_evidence": evidence[0] if evidence else None
})
return {
"status": "verification_complete",
"total_claims": len(claims),
"status": "completed",
"verified_claims": verified_results,
"unsupported_count": len([c for c in verified_results if c['status'] == 'unsupported']),
"timestamp": datetime.utcnow().isoformat()
"verification_score": len([c for c in verified_results if c['status'] == 'supported']) / len(verified_results)
}
async def verify_facts(self, claim: str) -> List[Dict[str, Any]]:
"""Find supporting or contradicting evidence in the indexed research."""
self._log_agent_operation("Verifying facts", claim_length=len(claim))
"""Verify a single claim against intelligence data."""
results = await self.intelligence.search(claim, limit=3)
try:
if not self.intelligence.is_initialized():
logger.error(f"[{self.__class__.__name__}] Intelligence service not initialized")
return []
if not claim or len(claim.strip()) < 20:
logger.warning(f"[{self.__class__.__name__}] Claim too short for meaningful verification")
return []
results = await self.intelligence.search(claim, limit=self.MAX_EVIDENCE)
if not results:
logger.info(f"[{self.__class__.__name__}] No evidence found for claim")
return []
evidence = []
for result in results:
relevance_score = result.get('score', 0.0)
if relevance_score >= self.EVIDENCE_THRESHOLD:
evidence_piece = {
"source": result.get('id', 'unknown'),
"relevance": relevance_score,
"confidence": self._calculate_evidence_confidence(relevance_score),
"type": "supporting" if relevance_score > 0.8 else "related",
"excerpt": result.get('text', '')[:200] + "..." if len(result.get('text', '')) > 200 else result.get('text', '')
}
evidence.append(evidence_piece)
logger.debug(f"[{self.__class__.__name__}] Found evidence: {evidence_piece['source']} (score: {relevance_score:.3f})")
logger.info(f"[{self.__class__.__name__}] Found {len(evidence)} pieces of evidence for claim")
return evidence
except Exception as e:
logger.error(f"[{self.__class__.__name__}] Failed to verify facts: {e}")
logger.error(f"[{self.__class__.__name__}] Full traceback: {traceback.format_exc()}")
return []
def _calculate_evidence_confidence(self, relevance_score: float) -> float:
"""Calculate confidence score for evidence."""
# Simple confidence based on relevance score
return min(1.0, relevance_score * 1.2)
evidence = []
for result in results:
if result.get('score', 0) > self.EVIDENCE_THRESHOLD:
evidence.append({
"text": result.get('text'),
"source": result.get('id'),
"confidence": result.get('score')
})
return evidence

View File

@@ -938,14 +938,14 @@ class SIFIntegrationService:
# Strategic recommendations (lazy initialization to avoid circular imports)
if not self.strategy_agent:
from .sif_agents import StrategyArchitectAgent
self.strategy_agent = StrategyArchitectAgent(self.intelligence_service)
self.strategy_agent = StrategyArchitectAgent(self.intelligence_service, user_id=self.user_id)
recommendations = await self.strategy_agent.analyze_content_strategy(website_data)
insights["strategic_recommendations"] = recommendations
# Content quality assessment (lazy initialization to avoid circular imports)
if not self.guardian_agent:
from .sif_agents import ContentGuardianAgent
self.guardian_agent = ContentGuardianAgent(self.intelligence_service, sif_service=self)
self.guardian_agent = ContentGuardianAgent(self.intelligence_service, user_id=self.user_id, sif_service=self)
quality_score = await self.guardian_agent.assess_content_quality(website_data)
insights["content_quality"] = quality_score

View File

@@ -33,7 +33,13 @@ class TxtaiIntelligenceService:
self._initialized = False
self.enable_caching = enable_caching
self.cache_manager = semantic_cache_manager if enable_caching else None
self._initialize_embeddings()
# Lazy initialization - do not initialize embeddings on startup
# self._initialize_embeddings()
def _ensure_initialized(self):
"""Lazy initialization helper."""
if not self._initialized:
self._initialize_embeddings()
def _initialize_embeddings(self):
"""Initialize txtai embeddings with local storage support and comprehensive error handling."""
@@ -106,6 +112,7 @@ class TxtaiIntelligenceService:
Args:
items: List of (id, text, metadata) tuples.
"""
self._ensure_initialized()
if not self._initialized or not self.embeddings:
logger.error(f"Cannot index content - service not initialized for user {self.user_id}")
return
@@ -145,6 +152,7 @@ class TxtaiIntelligenceService:
async def search(self, query: str, limit: int = 5) -> List[Dict[str, Any]]:
"""Perform semantic search with intelligent caching."""
self._ensure_initialized()
if not self._initialized or not self.embeddings:
logger.error(f"Cannot perform search - service not initialized for user {self.user_id}")
return []
@@ -186,6 +194,7 @@ class TxtaiIntelligenceService:
async def get_similarity(self, text1: str, text2: str) -> float:
"""Get semantic similarity between two texts with caching."""
self._ensure_initialized()
if not self._initialized or not self.embeddings:
logger.error(f"Cannot calculate similarity - service not initialized for user {self.user_id}")
return 0.0
@@ -234,6 +243,7 @@ class TxtaiIntelligenceService:
async def cluster(self, min_score: float = 0.5) -> List[List[int]]:
"""Cluster indexed content to find semantic pillars using graph-based clustering with caching."""
self._ensure_initialized()
if not self._initialized or not self.embeddings:
logger.error(f"Cannot cluster content - service not initialized for user {self.user_id}")
return []
@@ -358,6 +368,7 @@ class TxtaiIntelligenceService:
async def classify(self, text: str, labels: List[str]) -> List[Tuple[str, float]]:
"""Classify text using zero-shot classification."""
self._ensure_initialized()
if not self._initialized or not Labels:
logger.error(f"Cannot classify text - service not initialized or Labels not available for user {self.user_id}")
return []