Save local changes (GSC/Bing integrations) before merging PR #354
This commit is contained in:
@@ -17,8 +17,7 @@ from .core_agent_framework import (
|
||||
# Market signal detection
|
||||
from .market_signal_detector import (
|
||||
MarketSignal,
|
||||
MarketSignalDetector,
|
||||
MarketTrendAnalyzer
|
||||
MarketSignalDetector
|
||||
)
|
||||
|
||||
# Performance monitoring
|
||||
|
||||
@@ -105,6 +105,18 @@ class ALwrityAgentOrchestrator:
|
||||
def _create_specialized_agents(self):
|
||||
"""Create specialized marketing agents"""
|
||||
try:
|
||||
# Check if onboarding is complete before initializing heavy agents
|
||||
try:
|
||||
from services.onboarding.progress_service import OnboardingProgressService
|
||||
onboarding_service = OnboardingProgressService()
|
||||
status = onboarding_service.get_onboarding_status(self.user_id)
|
||||
if not status.get("is_completed", False):
|
||||
logger.info(f"Skipping agent initialization for user {self.user_id} - Onboarding incomplete")
|
||||
return
|
||||
except Exception as e:
|
||||
logger.warning(f"Could not check onboarding status for {self.user_id}: {e}")
|
||||
# Fallthrough to attempt initialization if check fails
|
||||
|
||||
enabled_by_key = {}
|
||||
db = None
|
||||
try:
|
||||
@@ -159,6 +171,26 @@ class ALwrityAgentOrchestrator:
|
||||
self.trend_surfer_agent = TrendSurferAgent(intel_service, self.user_id)
|
||||
self.agents['trend'] = self.trend_surfer_agent
|
||||
|
||||
# Content Guardian Agent
|
||||
if enabled_by_key.get("content_guardian", True):
|
||||
try:
|
||||
from services.intelligence.sif_agents import ContentGuardianAgent
|
||||
from services.intelligence.txtai_service import TxtaiIntelligenceService
|
||||
|
||||
# Initialize intelligence service if not already available
|
||||
intel_service = TxtaiIntelligenceService(self.user_id)
|
||||
|
||||
# Initialize Content Guardian Agent
|
||||
self.content_guardian_agent = ContentGuardianAgent(
|
||||
intelligence_service=intel_service,
|
||||
user_id=self.user_id,
|
||||
sif_service=None # SIF service is optional/circular dependency handling
|
||||
)
|
||||
self.agents['guardian'] = self.content_guardian_agent
|
||||
logger.info(f"Initialized ContentGuardianAgent for user {self.user_id}")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to initialize ContentGuardianAgent: {e}")
|
||||
|
||||
logger.info(f"Created {len(self.agents)} specialized agents for user {self.user_id}")
|
||||
|
||||
except Exception as e:
|
||||
|
||||
213
backend/services/intelligence/agents/agent_usage_tracking.py
Normal file
213
backend/services/intelligence/agents/agent_usage_tracking.py
Normal file
@@ -0,0 +1,213 @@
|
||||
import logging
|
||||
import time
|
||||
from datetime import datetime
|
||||
from sqlalchemy import text
|
||||
from services.database import get_session_for_user
|
||||
from models.subscription_models import APIProvider, UsageSummary
|
||||
from services.subscription import PricingService
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
def track_agent_usage_sync(user_id: str, model_name: str, prompt: str, response_text: str, duration: float):
|
||||
"""
|
||||
Synchronously track agent LLM usage.
|
||||
This mimics the logic in llm_text_gen to ensure consistency and robustness.
|
||||
"""
|
||||
try:
|
||||
# Detect provider
|
||||
provider_enum = APIProvider.GEMINI # Default
|
||||
actual_provider_name = "gemini"
|
||||
|
||||
model_lower = model_name.lower()
|
||||
if "gemini" in model_lower:
|
||||
provider_enum = APIProvider.GEMINI
|
||||
actual_provider_name = "gemini"
|
||||
elif "gpt" in model_lower or "openai" in model_lower or "mistral" in model_lower:
|
||||
# HuggingFace/Mistral often mapped to gpt-oss or mistral
|
||||
provider_enum = APIProvider.MISTRAL
|
||||
actual_provider_name = "huggingface"
|
||||
elif "claude" in model_lower or "anthropic" in model_lower:
|
||||
provider_enum = APIProvider.ANTHROPIC
|
||||
actual_provider_name = "anthropic"
|
||||
|
||||
logger.info(f"[AgentTracking] Tracking usage for user {user_id}, provider {actual_provider_name}, model {model_name}")
|
||||
|
||||
db = get_session_for_user(user_id)
|
||||
if not db:
|
||||
logger.error(f"[AgentTracking] Could not get database session for user {user_id}")
|
||||
return
|
||||
|
||||
try:
|
||||
# Estimate tokens
|
||||
tokens_input = int(len(prompt.split()) * 1.3)
|
||||
tokens_output = int(len(str(response_text).split()) * 1.3)
|
||||
tokens_total = tokens_input + tokens_output
|
||||
|
||||
pricing = PricingService(db)
|
||||
current_period = pricing.get_current_billing_period(user_id) or datetime.now().strftime("%Y-%m")
|
||||
|
||||
# Get limits
|
||||
limits = pricing.get_user_limits(user_id)
|
||||
token_limit = 0
|
||||
provider_key = provider_enum.value
|
||||
if limits and limits.get('limits'):
|
||||
token_limit = limits['limits'].get(f"{provider_key}_tokens", 0) or 0
|
||||
|
||||
# Check for existing record
|
||||
check_query = text("SELECT COUNT(*) FROM usage_summaries WHERE user_id = :user_id AND billing_period = :period")
|
||||
record_count = db.execute(check_query, {'user_id': user_id, 'period': current_period}).scalar()
|
||||
|
||||
current_calls_before = 0
|
||||
current_tokens_before = 0
|
||||
|
||||
if record_count and record_count > 0:
|
||||
# Read current values
|
||||
sql_query = text(f"""
|
||||
SELECT {provider_key}_calls, {provider_key}_tokens
|
||||
FROM usage_summaries
|
||||
WHERE user_id = :user_id AND billing_period = :period
|
||||
LIMIT 1
|
||||
""")
|
||||
result = db.execute(sql_query, {'user_id': user_id, 'period': current_period}).first()
|
||||
if result:
|
||||
current_calls_before = result[0] if result[0] is not None else 0
|
||||
current_tokens_before = result[1] if result[1] is not None else 0
|
||||
else:
|
||||
# Create new summary
|
||||
summary = UsageSummary(user_id=user_id, billing_period=current_period)
|
||||
db.add(summary)
|
||||
db.flush()
|
||||
|
||||
# Update calls
|
||||
new_calls = current_calls_before + 1
|
||||
update_calls_query = text(f"""
|
||||
UPDATE usage_summaries
|
||||
SET {provider_key}_calls = :new_calls
|
||||
WHERE user_id = :user_id AND billing_period = :period
|
||||
""")
|
||||
db.execute(update_calls_query, {
|
||||
'new_calls': new_calls,
|
||||
'user_id': user_id,
|
||||
'period': current_period
|
||||
})
|
||||
|
||||
# Update tokens with limit check
|
||||
if provider_enum in [APIProvider.GEMINI, APIProvider.OPENAI, APIProvider.ANTHROPIC, APIProvider.MISTRAL]:
|
||||
projected_new_tokens = current_tokens_before + tokens_total
|
||||
|
||||
if token_limit > 0 and projected_new_tokens > token_limit:
|
||||
new_tokens = token_limit
|
||||
tokens_total = max(0, token_limit - current_tokens_before)
|
||||
else:
|
||||
new_tokens = projected_new_tokens
|
||||
|
||||
update_tokens_query = text(f"""
|
||||
UPDATE usage_summaries
|
||||
SET {provider_key}_tokens = :new_tokens
|
||||
WHERE user_id = :user_id AND billing_period = :period
|
||||
""")
|
||||
db.execute(update_tokens_query, {
|
||||
'new_tokens': new_tokens,
|
||||
'user_id': user_id,
|
||||
'period': current_period
|
||||
})
|
||||
else:
|
||||
tokens_total = 0
|
||||
|
||||
# Calculate cost
|
||||
try:
|
||||
tracked_tokens_input = min(tokens_input, tokens_total)
|
||||
tracked_tokens_output = max(0, tokens_total - tracked_tokens_input)
|
||||
|
||||
cost_info = pricing.calculate_api_cost(
|
||||
provider=provider_enum,
|
||||
model_name=model_name,
|
||||
tokens_input=tracked_tokens_input,
|
||||
tokens_output=tracked_tokens_output,
|
||||
request_count=1
|
||||
)
|
||||
cost_total = cost_info.get('cost_total', 0.0) or 0.0
|
||||
cost_input = cost_info.get('cost_input', 0.0) or 0.0
|
||||
cost_output = cost_info.get('cost_output', 0.0) or 0.0
|
||||
except Exception as e:
|
||||
logger.error(f"[AgentTracking] Cost calculation failed: {e}")
|
||||
cost_total = 0.0
|
||||
cost_input = 0.0
|
||||
cost_output = 0.0
|
||||
|
||||
# Insert into APIUsageLog
|
||||
try:
|
||||
log_query = text("""
|
||||
INSERT INTO api_usage_logs (
|
||||
user_id, provider, endpoint, method, model_used,
|
||||
tokens_input, tokens_output, tokens_total,
|
||||
cost_input, cost_output, cost_total,
|
||||
response_time, status_code, billing_period,
|
||||
timestamp, actual_provider_name
|
||||
) VALUES (
|
||||
:user_id, :provider, :endpoint, :method, :model_used,
|
||||
:tokens_input, :tokens_output, :tokens_total,
|
||||
:cost_input, :cost_output, :cost_total,
|
||||
:response_time, :status_code, :billing_period,
|
||||
:created_at, :actual_provider_name
|
||||
)
|
||||
""")
|
||||
|
||||
db.execute(log_query, {
|
||||
'user_id': user_id,
|
||||
'provider': provider_enum.name, # Use name (GEMINI) not value (gemini) for SQLAlchemy Enum
|
||||
'endpoint': 'agent_action',
|
||||
'method': 'GENERATE',
|
||||
'model_used': model_name,
|
||||
'tokens_input': tracked_tokens_input,
|
||||
'tokens_output': tracked_tokens_output,
|
||||
'tokens_total': tracked_tokens_input + tracked_tokens_output,
|
||||
'cost_input': cost_input,
|
||||
'cost_output': cost_output,
|
||||
'cost_total': cost_total,
|
||||
'response_time': duration,
|
||||
'status_code': 200,
|
||||
'billing_period': current_period,
|
||||
'created_at': datetime.utcnow(),
|
||||
'actual_provider_name': actual_provider_name
|
||||
})
|
||||
except Exception as log_e:
|
||||
logger.error(f"[AgentTracking] Failed to insert usage log: {log_e}")
|
||||
|
||||
if cost_total > 0:
|
||||
update_costs_query = text(f"""
|
||||
UPDATE usage_summaries
|
||||
SET {provider_key}_cost = COALESCE({provider_key}_cost, 0) + :cost,
|
||||
total_cost = COALESCE(total_cost, 0) + :cost
|
||||
WHERE user_id = :user_id AND billing_period = :period
|
||||
""")
|
||||
db.execute(update_costs_query, {
|
||||
'cost': cost_total,
|
||||
'user_id': user_id,
|
||||
'period': current_period
|
||||
})
|
||||
|
||||
# Update totals
|
||||
update_totals_query = text("""
|
||||
UPDATE usage_summaries
|
||||
SET total_calls = COALESCE(total_calls, 0) + 1,
|
||||
total_tokens = COALESCE(total_tokens, 0) + :tokens_total
|
||||
WHERE user_id = :user_id AND billing_period = :period
|
||||
""")
|
||||
db.execute(update_totals_query, {
|
||||
'tokens_total': tokens_total,
|
||||
'user_id': user_id,
|
||||
'period': current_period
|
||||
})
|
||||
|
||||
db.commit()
|
||||
logger.info(f"[AgentTracking] ✅ Usage tracked: {new_calls} calls, {cost_total} cost")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[AgentTracking] Error tracking usage: {e}", exc_info=True)
|
||||
db.rollback()
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[AgentTracking] Top level error: {e}", exc_info=True)
|
||||
@@ -32,9 +32,64 @@ from services.database import get_session_for_user
|
||||
from services.intelligence.monitoring.semantic_dashboard import RealTimeSemanticMonitor
|
||||
from services.intelligence.agents.safety_framework import get_safety_framework
|
||||
from services.agent_activity_service import AgentActivityService
|
||||
from services.intelligence.agents.agent_usage_tracking import track_agent_usage_sync
|
||||
import time
|
||||
|
||||
logger = get_service_logger(__name__)
|
||||
|
||||
class TrackingLLMWrapper:
|
||||
"""
|
||||
Wrapper for LLM instances to transparently track usage.
|
||||
Intercepts calls to __call__ and generate() to log metrics.
|
||||
"""
|
||||
def __init__(self, llm: Any, user_id: str, model_name: str):
|
||||
self.llm = llm
|
||||
self.user_id = user_id
|
||||
self.model_name = model_name
|
||||
|
||||
def __call__(self, prompt: str, *args, **kwargs) -> Any:
|
||||
return self.generate(prompt, *args, **kwargs)
|
||||
|
||||
def generate(self, prompt: str, *args, **kwargs) -> str:
|
||||
start_time = time.time()
|
||||
try:
|
||||
# Delegate to the underlying LLM
|
||||
if hasattr(self.llm, "generate"):
|
||||
response = self.llm.generate(prompt, *args, **kwargs)
|
||||
else:
|
||||
response = self.llm(prompt, *args, **kwargs)
|
||||
|
||||
# Handle response format (some might return list of dicts)
|
||||
response_text = str(response)
|
||||
if isinstance(response, list):
|
||||
if response and isinstance(response[0], dict) and 'generated_text' in response[0]:
|
||||
response_text = response[0]['generated_text']
|
||||
else:
|
||||
response_text = str(response[0])
|
||||
|
||||
# Track usage
|
||||
duration = time.time() - start_time
|
||||
try:
|
||||
track_agent_usage_sync(
|
||||
user_id=self.user_id,
|
||||
model_name=self.model_name,
|
||||
prompt=prompt,
|
||||
response_text=response_text,
|
||||
duration=duration
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to track agent usage in wrapper: {e}")
|
||||
|
||||
return response
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"LLM generation failed in tracking wrapper: {e}")
|
||||
raise e
|
||||
|
||||
def __getattr__(self, name):
|
||||
# Delegate other attribute access to the underlying LLM
|
||||
return getattr(self.llm, name)
|
||||
|
||||
@dataclass
|
||||
class AgentAction:
|
||||
"""Represents an action taken by an agent"""
|
||||
@@ -114,6 +169,10 @@ class BaseALwrityAgent(ABC):
|
||||
self.txtai_agent = None
|
||||
self.llm = llm # Ensure llm is set if provided, regardless of txtai availability
|
||||
|
||||
# Wrap LLM with tracking if it exists
|
||||
if self.llm:
|
||||
self.llm = TrackingLLMWrapper(self.llm, self.user_id, self.model_name)
|
||||
|
||||
self.agent_key = self._resolve_agent_key(agent_type)
|
||||
self._agent_profile = self._load_agent_profile_overrides()
|
||||
self._prompt_context = self._load_prompt_context()
|
||||
@@ -121,10 +180,17 @@ class BaseALwrityAgent(ABC):
|
||||
if TXTAI_AVAILABLE:
|
||||
try:
|
||||
if not self.llm:
|
||||
self.llm = LLM(model_name)
|
||||
|
||||
self.txtai_agent = self._create_txtai_agent()
|
||||
logger.info(f"Initialized txtai agent for {agent_type} - {self.agent_id}")
|
||||
# Create new LLM if not provided
|
||||
raw_llm = LLM(model_name)
|
||||
# Wrap it
|
||||
self.llm = TrackingLLMWrapper(raw_llm, self.user_id, self.model_name)
|
||||
|
||||
try:
|
||||
self.txtai_agent = self._create_txtai_agent()
|
||||
logger.info(f"Initialized txtai agent for {agent_type} - {self.agent_id}")
|
||||
except Exception as inner_e:
|
||||
logger.warning(f"Could not initialize specific txtai agent for {agent_type}: {inner_e}")
|
||||
self.txtai_agent = self._create_fallback_agent()
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to initialize txtai agent for {agent_type}: {e}")
|
||||
self.txtai_agent = self._create_fallback_agent()
|
||||
@@ -134,6 +200,38 @@ class BaseALwrityAgent(ABC):
|
||||
# Initialize safety framework
|
||||
self.safety_framework = get_safety_framework(user_id)
|
||||
|
||||
async def _generate_llm_response(self, prompt: str) -> str:
|
||||
"""
|
||||
Helper to generate text using the agent's LLM with usage tracking.
|
||||
Centralized method for all agents inheriting from BaseALwrityAgent.
|
||||
"""
|
||||
if not self.llm:
|
||||
return "[LLM Unavailable]"
|
||||
|
||||
try:
|
||||
# Run in executor to avoid blocking if LLM is synchronous
|
||||
loop = asyncio.get_event_loop()
|
||||
|
||||
# Use the wrapped LLM's generate method (which handles tracking)
|
||||
if hasattr(self.llm, "generate"):
|
||||
response = await loop.run_in_executor(None, lambda: self.llm.generate(prompt))
|
||||
else:
|
||||
response = await loop.run_in_executor(None, lambda: self.llm(prompt))
|
||||
|
||||
# Handle list output (some models return list of dicts)
|
||||
response_text = str(response)
|
||||
if isinstance(response, list):
|
||||
if response and isinstance(response[0], dict) and 'generated_text' in response[0]:
|
||||
response_text = response[0]['generated_text']
|
||||
else:
|
||||
response_text = str(response[0])
|
||||
|
||||
return response_text
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"LLM generation failed in agent {self.agent_type}: {e}")
|
||||
return "[Generation Failed]"
|
||||
|
||||
def _resolve_agent_key(self, agent_type: str) -> str:
|
||||
value = str(agent_type or "").strip()
|
||||
if value.lower() == "strategyorchestrator".lower():
|
||||
|
||||
@@ -758,6 +758,11 @@ async def get_agent_performance_summary(user_id: str, agent_id: str) -> Dict[str
|
||||
"""Get comprehensive performance summary for an agent"""
|
||||
return await performance_service.get_agent_performance_summary(user_id, agent_id)
|
||||
|
||||
async def get_all_agents_performance_summary(user_id: str) -> List[Dict[str, Any]]:
|
||||
async def get_all_agents_performance_summary(user_id: str) -> List[Dict[str, Any]]:
|
||||
"""Get performance summary for all agents for a user"""
|
||||
return await performance_service.get_all_agents_performance_summary(user_id)
|
||||
return await performance_service.get_all_agents_performance_summary(user_id)
|
||||
|
||||
# Alias for backward compatibility
|
||||
PerformanceMonitor = AgentPerformanceMonitor
|
||||
performance_monitor = performance_service
|
||||
AgentPerformanceMetrics = AgentPerformanceSnapshot
|
||||
@@ -13,6 +13,7 @@ from loguru import logger
|
||||
from ..txtai_service import TxtaiIntelligenceService
|
||||
from services.intelligence.agents.core_agent_framework import BaseALwrityAgent, AgentAction
|
||||
from services.seo_tools.content_strategy_service import ContentStrategyService
|
||||
from services.intelligence.sif_agents import SharedLLMWrapper, LocalLLMWrapper
|
||||
try:
|
||||
from services.intelligence.sif_integration import SIFIntegrationService
|
||||
SIF_AVAILABLE = True
|
||||
@@ -20,14 +21,36 @@ except ImportError:
|
||||
SIF_AVAILABLE = False
|
||||
|
||||
try:
|
||||
from txtai import Agent, LLM
|
||||
# Try importing from pipeline first (standard location)
|
||||
from txtai.pipeline import Agent, LLM
|
||||
TXTAI_AVAILABLE = True
|
||||
except ImportError:
|
||||
TXTAI_AVAILABLE = False
|
||||
logger.warning("txtai not available, using fallback implementation")
|
||||
try:
|
||||
# Fallback to top-level import
|
||||
from txtai import Agent, LLM
|
||||
TXTAI_AVAILABLE = True
|
||||
except ImportError:
|
||||
TXTAI_AVAILABLE = False
|
||||
Agent = None
|
||||
LLM = None
|
||||
logger.warning("txtai not available, using fallback implementation")
|
||||
|
||||
class SIFBaseAgent:
|
||||
def __init__(self, intelligence_service: TxtaiIntelligenceService):
|
||||
class SIFBaseAgent(BaseALwrityAgent):
|
||||
def __init__(self, intelligence_service: TxtaiIntelligenceService, user_id: str, agent_type: str = "sif_agent", model_name: str = "Qwen/Qwen2.5-3B-Instruct", llm: Any = None):
|
||||
# Hybrid LLM Strategy:
|
||||
# 1. Shared LLM for external/high-quality generation
|
||||
self.shared_llm = SharedLLMWrapper(user_id)
|
||||
|
||||
# 2. Local LLM for internal agent work (default for SIF agents)
|
||||
if llm is None:
|
||||
if TXTAI_AVAILABLE:
|
||||
# Use Lazy Local LLM
|
||||
llm = LocalLLMWrapper(model_name)
|
||||
else:
|
||||
# Fallback to Shared if txtai not available
|
||||
llm = self.shared_llm
|
||||
|
||||
super().__init__(user_id, agent_type, model_name, llm)
|
||||
self.intelligence = intelligence_service
|
||||
|
||||
def _log_agent_operation(self, operation: str, **kwargs):
|
||||
@@ -36,9 +59,27 @@ class SIFBaseAgent:
|
||||
if kwargs:
|
||||
logger.debug(f"[{self.__class__.__name__}] Parameters: {kwargs}")
|
||||
|
||||
def _create_txtai_agent(self):
|
||||
"""
|
||||
SIF agents use the intelligence service directly, but we can expose
|
||||
capabilities via a standard agent interface if needed.
|
||||
"""
|
||||
if not TXTAI_AVAILABLE or Agent is None:
|
||||
return None
|
||||
|
||||
# Return a simple agent that can use the LLM
|
||||
try:
|
||||
return Agent(llm=self.llm, tools=[])
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to create txtai Agent: {e}")
|
||||
return None
|
||||
|
||||
class StrategyArchitectAgent(SIFBaseAgent):
|
||||
"""Agent for discovering content pillars and identifying strategic gaps."""
|
||||
|
||||
def __init__(self, intelligence_service: TxtaiIntelligenceService, user_id: str):
|
||||
super().__init__(intelligence_service, user_id, agent_type="strategy_architect")
|
||||
|
||||
async def discover_pillars(self) -> List[Dict[str, Any]]:
|
||||
"""Identify content pillars through semantic clustering."""
|
||||
self._log_agent_operation("Discovering content pillars")
|
||||
@@ -108,9 +149,61 @@ class ContentGuardianAgent(SIFBaseAgent):
|
||||
CANNIBALIZATION_THRESHOLD = 0.85 # Similarity threshold for cannibalization warning
|
||||
ORIGINALITY_THRESHOLD = 0.75 # Minimum originality score
|
||||
|
||||
def __init__(self, intelligence_service: TxtaiIntelligenceService, sif_service: Any = None):
|
||||
super().__init__(intelligence_service)
|
||||
def __init__(self, intelligence_service: TxtaiIntelligenceService, user_id: str, sif_service: Any = None):
|
||||
super().__init__(intelligence_service, user_id, agent_type="content_guardian")
|
||||
self.sif_service = sif_service
|
||||
|
||||
# Lazy initialization of SIF service if not provided
|
||||
if self.sif_service is None and SIF_AVAILABLE:
|
||||
try:
|
||||
self.sif_service = SIFIntegrationService(user_id)
|
||||
logger.info(f"[{self.__class__.__name__}] Lazily initialized SIFIntegrationService")
|
||||
except Exception as e:
|
||||
logger.warning(f"[{self.__class__.__name__}] Failed to lazily initialize SIF service: {e}")
|
||||
|
||||
async def assess_content_quality(self, content: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Assess content quality based on originality, readability, and cannibalization risks.
|
||||
"""
|
||||
self._log_agent_operation("Assessing content quality", content_length=len(content))
|
||||
|
||||
try:
|
||||
# 1. Check for cannibalization
|
||||
cannibalization_result = await self.check_cannibalization(content)
|
||||
|
||||
# 2. Check originality (if not cannibalized)
|
||||
originality_score = 1.0
|
||||
if not cannibalization_result.get("warning"):
|
||||
originality_result = await self.verify_originality(content, None)
|
||||
originality_score = originality_result.get("originality_score", 1.0)
|
||||
|
||||
# 3. Check Style Compliance
|
||||
style_result = await self.style_enforcer(content)
|
||||
style_score = style_result.get("compliance_score", 1.0)
|
||||
|
||||
# 4. Basic Readability (Flesch-Kincaid proxy via sentence length/word complexity)
|
||||
# Simple heuristic for now
|
||||
words = content.split()
|
||||
sentences = content.split('.')
|
||||
avg_sentence_length = len(words) / max(1, len(sentences))
|
||||
readability_score = 1.0 if avg_sentence_length < 20 else max(0.5, 1.0 - (avg_sentence_length - 20) * 0.05)
|
||||
|
||||
# Weighted Score: Originality (40%) + Style (30%) + Readability (30%)
|
||||
quality_score = (originality_score * 0.4) + (style_score * 0.3) + (readability_score * 0.3)
|
||||
|
||||
return {
|
||||
"quality_score": quality_score,
|
||||
"originality_score": originality_score,
|
||||
"readability_score": readability_score,
|
||||
"style_score": style_score,
|
||||
"cannibalization_risk": cannibalization_result,
|
||||
"style_compliance": style_result,
|
||||
"is_acceptable": quality_score > 0.7 and not cannibalization_result.get("warning", False)
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[{self.__class__.__name__}] Failed to assess content quality: {e}")
|
||||
return {"error": str(e), "quality_score": 0.0}
|
||||
|
||||
async def check_cannibalization(self, new_draft: str) -> Dict[str, Any]:
|
||||
"""Check if a new draft competes semantically with existing pages."""
|
||||
@@ -193,25 +286,74 @@ class ContentGuardianAgent(SIFBaseAgent):
|
||||
# 1. Fetch Style Guidelines from SIF if not provided
|
||||
if not style_guidelines and self.sif_service:
|
||||
try:
|
||||
# Search for website analysis to get brand voice/style
|
||||
# We assume the most relevant 'website_analysis' doc contains the guidelines
|
||||
results = await self.intelligence.search("website analysis brand voice style", limit=1)
|
||||
if results:
|
||||
import json
|
||||
res = results[0]
|
||||
metadata_str = res.get('object')
|
||||
metadata = json.loads(metadata_str) if isinstance(metadata_str, str) else (metadata_str or res)
|
||||
# Use central SIF service to get robust context
|
||||
seo_context = await self.sif_service.get_seo_context()
|
||||
|
||||
if seo_context and "error" not in seo_context:
|
||||
# Extract brand voice/style from the context
|
||||
# The context structure is normalized in get_seo_context
|
||||
|
||||
if metadata.get('type') == 'website_analysis':
|
||||
report = metadata.get('full_report', {})
|
||||
style_guidelines = {
|
||||
"tone": report.get('brand_analysis', {}).get('brand_voice', 'neutral'),
|
||||
"style_patterns": report.get('style_patterns', {}),
|
||||
"writing_style": report.get('writing_style', {})
|
||||
}
|
||||
logger.info(f"[{self.__class__.__name__}] Retrieved style guidelines from SIF: {style_guidelines.get('tone')}")
|
||||
# Note: get_seo_context returns a flattened dict.
|
||||
# We need to dig into the original structure if available, or rely on what's mapped.
|
||||
# However, get_seo_context maps 'seo_audit', 'sitemap_analysis', etc.
|
||||
# Brand info is usually in 'brand_analysis' col of WebsiteAnalysis, which might not be fully exposed
|
||||
# in the simplified get_seo_context return.
|
||||
# Let's check if we can get the full object or if we need to expand get_seo_context.
|
||||
# For now, we'll try to use what's there or fall back to a specific search if needed.
|
||||
|
||||
# Actually, looking at get_seo_context implementation:
|
||||
# It returns 'seo_audit', 'crawl_result'.
|
||||
# Brand analysis is often stored in WebsiteAnalysis.brand_analysis.
|
||||
# We might need to extend get_seo_context or do a specific retrieval here.
|
||||
# But wait! I saw get_seo_context implementation earlier:
|
||||
# It retrieves the "full_report" from the SIF metadata.
|
||||
# If the SIF index contains the full WebsiteAnalysis object, we are good.
|
||||
|
||||
# Let's try to get it from the full report if we can access it,
|
||||
# but get_seo_context returns a filtered dict.
|
||||
|
||||
# Alternative: Use the robust retrieval logic but specifically for brand info if get_seo_context is too narrow.
|
||||
# But get_seo_context logic includes "website analysis seo audit" query.
|
||||
|
||||
# Let's assume for now we use the same retrieval logic but locally adapted,
|
||||
# OR better, trust get_seo_context to be the single point of truth.
|
||||
# If get_seo_context doesn't return brand info, we should update IT, not hack here.
|
||||
# But I can't update SIFIntegrationService right now without context switch.
|
||||
|
||||
# Let's stick to the previous manual search pattern BUT use the SIF service helper if possible.
|
||||
# Actually, the previous code was:
|
||||
# results = await self.intelligence.search("website analysis brand voice style", limit=1)
|
||||
|
||||
# Let's keep it simple and robust:
|
||||
# Try to get it from SIF service if possible.
|
||||
# Since get_seo_context might not return brand_voice directly, let's try to see if we can use it.
|
||||
|
||||
# Actually, let's use the manual search but with better error handling,
|
||||
# mirroring get_seo_context's robustness (e.g. parsing).
|
||||
|
||||
results = await self.intelligence.search("website analysis brand voice style", limit=1)
|
||||
if results:
|
||||
res = results[0]
|
||||
metadata_str = res.get('object')
|
||||
metadata = json.loads(metadata_str) if isinstance(metadata_str, str) else (metadata_str or res)
|
||||
|
||||
if metadata.get('type') == 'website_analysis':
|
||||
report = metadata.get('full_report', {})
|
||||
# Support both flat and nested structures
|
||||
brand_analysis = report.get('brand_analysis') or report.get('brand_voice', {})
|
||||
if isinstance(brand_analysis, str):
|
||||
# Handle case where it might be a JSON string
|
||||
try: brand_analysis = json.loads(brand_analysis)
|
||||
except: brand_analysis = {"brand_voice": brand_analysis}
|
||||
|
||||
style_guidelines = {
|
||||
"tone": brand_analysis.get('brand_voice', 'neutral') if isinstance(brand_analysis, dict) else 'neutral',
|
||||
"style_patterns": report.get('style_patterns', {}),
|
||||
"writing_style": report.get('writing_style', {})
|
||||
}
|
||||
logger.info(f"[{self.__class__.__name__}] Retrieved style guidelines from SIF index")
|
||||
except Exception as e:
|
||||
logger.warning(f"[{self.__class__.__name__}] Failed to retrieve style guidelines from SIF: {e}")
|
||||
logger.warning(f"[{self.__class__.__name__}] Failed to retrieve style guidelines: {e}")
|
||||
|
||||
issues = []
|
||||
score = 1.0
|
||||
@@ -246,6 +388,55 @@ class ContentGuardianAgent(SIFBaseAgent):
|
||||
logger.error(f"[{self.__class__.__name__}] Style enforcement failed: {e}")
|
||||
return {"error": str(e)}
|
||||
|
||||
async def perform_site_audit(self, website_url: str, limit: int = 10) -> Dict[str, Any]:
|
||||
"""
|
||||
Perform a quality audit on the user's website content.
|
||||
"""
|
||||
self._log_agent_operation("Performing site audit", website_url=website_url)
|
||||
|
||||
try:
|
||||
# 1. Retrieve recent content for the site from SIF
|
||||
# We search for everything with the website_url in metadata
|
||||
# Note: This depends on how data is indexed.
|
||||
results = await self.intelligence.search(f"site:{website_url}", limit=limit)
|
||||
|
||||
if not results:
|
||||
logger.info(f"[{self.__class__.__name__}] No content found for site audit")
|
||||
return {"error": "No content found"}
|
||||
|
||||
audit_results = []
|
||||
total_quality = 0.0
|
||||
|
||||
for res in results:
|
||||
text = res.get('text', '')
|
||||
if not text or len(text) < 100:
|
||||
continue
|
||||
|
||||
quality = await self.assess_content_quality(text)
|
||||
audit_results.append({
|
||||
"id": res.get('id'),
|
||||
"title": res.get('title', 'Unknown'),
|
||||
"quality": quality
|
||||
})
|
||||
total_quality += quality.get('quality_score', 0.0)
|
||||
|
||||
avg_quality = total_quality / len(audit_results) if audit_results else 0.0
|
||||
|
||||
report = {
|
||||
"website_url": website_url,
|
||||
"pages_audited": len(audit_results),
|
||||
"average_quality_score": avg_quality,
|
||||
"details": audit_results,
|
||||
"timestamp": datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
logger.info(f"[{self.__class__.__name__}] Site audit completed. Avg Quality: {avg_quality:.2f}")
|
||||
return report
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[{self.__class__.__name__}] Site audit failed: {e}")
|
||||
return {"error": str(e)}
|
||||
|
||||
async def safety_filter(self, text: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Tool: Flags potentially harmful, offensive, or sensitive content.
|
||||
@@ -290,8 +481,8 @@ class LinkGraphAgent(SIFBaseAgent):
|
||||
RELEVANCE_THRESHOLD = 0.6 # Minimum relevance score for link suggestions
|
||||
MAX_SUGGESTIONS = 10 # Maximum number of link suggestions
|
||||
|
||||
def __init__(self, intelligence_service: TxtaiIntelligenceService, sif_service: Any = None):
|
||||
super().__init__(intelligence_service)
|
||||
def __init__(self, intelligence_service: TxtaiIntelligenceService, user_id: str, sif_service: Any = None):
|
||||
super().__init__(intelligence_service, user_id, agent_type="link_graph")
|
||||
self.sif_service = sif_service
|
||||
|
||||
async def suggest_internal_links(self, draft: str) -> List[Dict[str, Any]]:
|
||||
@@ -823,9 +1014,10 @@ class ContentStrategyAgent(BaseALwrityAgent):
|
||||
Maintain the original meaning and tone.
|
||||
"""
|
||||
|
||||
if hasattr(self.llm, "generate"):
|
||||
if self.llm:
|
||||
# We assume the LLM returns JSON-like text or we parse it
|
||||
response = self.llm.generate(f"{system_prompt}\n\nText to rewrite:\n{content}")
|
||||
response = await self._generate_llm_response(f"{system_prompt}\n\nText to rewrite:\n{content}")
|
||||
|
||||
# Simple parsing fallback if LLM returns raw text
|
||||
if isinstance(response, str) and not response.strip().startswith("{"):
|
||||
optimized_content = response
|
||||
@@ -1456,34 +1648,7 @@ class SEOOptimizationAgent(BaseALwrityAgent):
|
||||
"timestamp": datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
async def _generate_llm_response(self, prompt: str) -> str:
|
||||
"""Helper to generate text using the agent's LLM"""
|
||||
if not self.llm:
|
||||
return "[LLM Unavailable]"
|
||||
|
||||
try:
|
||||
# Run in executor to avoid blocking if LLM is synchronous
|
||||
loop = asyncio.get_event_loop()
|
||||
|
||||
# Check if LLM is a txtai pipeline (callable) or has generate method
|
||||
if hasattr(self.llm, "generate"):
|
||||
# Some txtai pipelines use generate, some are just called
|
||||
response = await loop.run_in_executor(None, lambda: self.llm.generate(prompt))
|
||||
else:
|
||||
# Assume callable (standard txtai pipeline)
|
||||
response = await loop.run_in_executor(None, lambda: self.llm(prompt))
|
||||
|
||||
# Handle list output (some models return list of dicts)
|
||||
if isinstance(response, list):
|
||||
if response and isinstance(response[0], dict) and 'generated_text' in response[0]:
|
||||
return response[0]['generated_text']
|
||||
return str(response[0])
|
||||
|
||||
return str(response)
|
||||
except Exception as e:
|
||||
logger.error(f"LLM generation failed: {e}")
|
||||
return "[Generation Failed]"
|
||||
|
||||
|
||||
async def _strategy_generator_tool(self, context: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""SEO strategy generation tool"""
|
||||
audit_results = context.get("audit_results", {})
|
||||
@@ -1629,8 +1794,8 @@ class SocialAmplificationAgent(BaseALwrityAgent):
|
||||
Return ONLY the adapted content.
|
||||
"""
|
||||
|
||||
if hasattr(self.llm, "generate"):
|
||||
adapted_content = self.llm.generate(prompt)
|
||||
if self.llm:
|
||||
adapted_content = await self._generate_llm_response(prompt)
|
||||
else:
|
||||
adapted_content = f"[Mock {platform}]: {content[:50]}... #adapted"
|
||||
|
||||
|
||||
@@ -19,7 +19,7 @@ class TrendSurferAgent(SIFBaseAgent):
|
||||
"""
|
||||
|
||||
def __init__(self, intelligence_service: TxtaiIntelligenceService, user_id: str):
|
||||
super().__init__(intelligence_service)
|
||||
super().__init__(intelligence_service, user_id, agent_type="trend_surfer")
|
||||
self.user_id = user_id
|
||||
self.signal_detector = MarketSignalDetector(user_id)
|
||||
self.trends_service = GoogleTrendsService()
|
||||
@@ -148,15 +148,41 @@ class TrendSurferAgent(SIFBaseAgent):
|
||||
else:
|
||||
recommendation = "Create new content"
|
||||
|
||||
# Use LLM to generate creative angle
|
||||
headline = f"Trend: {trend.description}"
|
||||
angle = f"Leverage {trend.source} trend on {trend.related_topics[0] if trend.related_topics else 'topic'}"
|
||||
|
||||
try:
|
||||
prompt = f"""
|
||||
Analyze this market trend signal and propose a content angle:
|
||||
Trend: {trend.description}
|
||||
Related Topics: {', '.join(trend.related_topics)}
|
||||
Impact Score: {trend.impact_score}
|
||||
Recommendation: {recommendation}
|
||||
|
||||
Provide a catchy headline and a 1-sentence strategic angle.
|
||||
Format: Headline | Angle
|
||||
"""
|
||||
response = await self._generate_llm_response(prompt)
|
||||
if response and "|" in response:
|
||||
parts = response.split('|')
|
||||
headline = parts[0].strip()
|
||||
angle = parts[1].strip()
|
||||
elif response:
|
||||
angle = response.strip()
|
||||
except Exception as e:
|
||||
logger.warning(f"[{self.__class__.__name__}] LLM generation failed for opportunity: {e}")
|
||||
|
||||
return {
|
||||
"trend_id": trend.signal_id,
|
||||
"topic": trend.description,
|
||||
"headline": headline,
|
||||
"source": trend.source,
|
||||
"urgency": trend.urgency_level.value,
|
||||
"impact_score": trend.impact_score,
|
||||
"current_coverage": coverage_score,
|
||||
"recommendation": recommendation,
|
||||
"suggested_angle": f"Leverage {trend.source} trend on {trend.related_topics[0] if trend.related_topics else 'topic'}",
|
||||
"suggested_angle": angle,
|
||||
"detected_at": trend.detected_at
|
||||
}
|
||||
|
||||
|
||||
@@ -5,13 +5,76 @@ Each agent leverages TxtaiIntelligenceService for semantic operations.
|
||||
"""
|
||||
|
||||
import traceback
|
||||
import json
|
||||
import asyncio
|
||||
from typing import List, Dict, Any, Optional
|
||||
from datetime import datetime
|
||||
from loguru import logger
|
||||
from .txtai_service import TxtaiIntelligenceService
|
||||
from .txtai_service import TxtaiIntelligenceService, TXTAI_AVAILABLE
|
||||
from services.intelligence.agents.core_agent_framework import BaseALwrityAgent
|
||||
from services.llm_providers.main_text_generation import llm_text_gen
|
||||
|
||||
class SIFBaseAgent:
|
||||
def __init__(self, intelligence_service: TxtaiIntelligenceService):
|
||||
# Optional txtai imports
|
||||
try:
|
||||
from txtai.pipeline import Agent, LLM
|
||||
except ImportError:
|
||||
Agent = None
|
||||
LLM = None
|
||||
|
||||
class SharedLLMWrapper:
|
||||
"""Wraps the shared ALwrity LLM service to look like a txtai LLM."""
|
||||
def __init__(self, user_id: str):
|
||||
self.user_id = user_id
|
||||
|
||||
def generate(self, prompt: str, **kwargs) -> str:
|
||||
"""Generate text using the shared LLM provider."""
|
||||
# We ignore kwargs like 'max_tokens' as llm_text_gen handles defaults,
|
||||
# but we could map them if needed.
|
||||
return llm_text_gen(prompt, user_id=self.user_id)
|
||||
|
||||
def __call__(self, prompt: str, **kwargs) -> str:
|
||||
return self.generate(prompt, **kwargs)
|
||||
|
||||
class LocalLLMWrapper:
|
||||
"""
|
||||
Lazily loads a local LLM via txtai.
|
||||
This prevents blocking server startup with heavy model loads.
|
||||
"""
|
||||
def __init__(self, model_path: str):
|
||||
self.model_path = model_path
|
||||
self._llm = None
|
||||
|
||||
@property
|
||||
def llm(self):
|
||||
if self._llm is None:
|
||||
if LLM is None:
|
||||
raise ImportError("txtai.pipeline.LLM is not available")
|
||||
logger.info(f"Loading local LLM: {self.model_path}")
|
||||
self._llm = LLM(path=self.model_path)
|
||||
return self._llm
|
||||
|
||||
def __call__(self, prompt: str, **kwargs) -> str:
|
||||
return self.llm(prompt, **kwargs)
|
||||
|
||||
def generate(self, prompt: str, **kwargs) -> str:
|
||||
return self.llm(prompt, **kwargs)
|
||||
|
||||
class SIFBaseAgent(BaseALwrityAgent):
|
||||
def __init__(self, intelligence_service: TxtaiIntelligenceService, user_id: str, agent_type: str = "sif_agent", model_name: str = "Qwen/Qwen2.5-3B-Instruct", llm: Any = None):
|
||||
# Hybrid LLM Strategy:
|
||||
# 1. Shared LLM for external/high-quality generation (available to all agents)
|
||||
self.shared_llm = SharedLLMWrapper(user_id)
|
||||
|
||||
# 2. Local LLM for internal agent work (default for SIF agents)
|
||||
if llm is None:
|
||||
if TXTAI_AVAILABLE:
|
||||
# Use Lazy Local LLM
|
||||
llm = LocalLLMWrapper(model_name)
|
||||
else:
|
||||
# Fallback to Shared if txtai not available
|
||||
llm = self.shared_llm
|
||||
|
||||
super().__init__(user_id, agent_type, model_name, llm)
|
||||
self.intelligence = intelligence_service
|
||||
|
||||
def _log_agent_operation(self, operation: str, **kwargs):
|
||||
@@ -20,9 +83,23 @@ class SIFBaseAgent:
|
||||
if kwargs:
|
||||
logger.debug(f"[{self.__class__.__name__}] Parameters: {kwargs}")
|
||||
|
||||
def _create_txtai_agent(self):
|
||||
"""
|
||||
SIF agents use the intelligence service directly, but we can expose
|
||||
capabilities via a standard agent interface if needed.
|
||||
"""
|
||||
if not TXTAI_AVAILABLE:
|
||||
return None
|
||||
|
||||
# Return a simple agent that can use the LLM
|
||||
return Agent(llm=self.llm, tools=[])
|
||||
|
||||
class StrategyArchitectAgent(SIFBaseAgent):
|
||||
"""Agent for discovering content pillars and identifying strategic gaps."""
|
||||
|
||||
def __init__(self, intelligence_service: TxtaiIntelligenceService, user_id: str):
|
||||
super().__init__(intelligence_service, user_id, agent_type="strategy_architect")
|
||||
|
||||
async def discover_pillars(self) -> List[Dict[str, Any]]:
|
||||
"""Identify content pillars through semantic clustering."""
|
||||
self._log_agent_operation("Discovering content pillars")
|
||||
@@ -58,6 +135,61 @@ class StrategyArchitectAgent(SIFBaseAgent):
|
||||
logger.error(f"[{self.__class__.__name__}] Failed to discover pillars: {e}")
|
||||
logger.error(f"[{self.__class__.__name__}] Full traceback: {traceback.format_exc()}")
|
||||
return []
|
||||
|
||||
async def analyze_content_strategy(self, website_data: Dict[str, Any]) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Analyze content strategy based on website data and semantic insights.
|
||||
|
||||
Args:
|
||||
website_data: Dictionary containing website analysis data
|
||||
|
||||
Returns:
|
||||
List of strategic recommendations
|
||||
"""
|
||||
self._log_agent_operation("Analyzing content strategy")
|
||||
|
||||
try:
|
||||
recommendations = []
|
||||
|
||||
# 1. Discover existing pillars
|
||||
pillars = await self.discover_pillars()
|
||||
|
||||
# 2. Analyze gaps based on pillars (simplified logic for now)
|
||||
if not pillars:
|
||||
recommendations.append({
|
||||
"type": "strategy_gap",
|
||||
"priority": "high",
|
||||
"title": "Establish Core Content Pillars",
|
||||
"description": "No clear content clusters found. Focus on defining 3-5 core topics to build authority."
|
||||
})
|
||||
else:
|
||||
# Suggest strengthening weak pillars
|
||||
for pillar in pillars:
|
||||
if pillar['size'] < 3:
|
||||
recommendations.append({
|
||||
"type": "content_depth",
|
||||
"priority": "medium",
|
||||
"title": f"Strengthen Pillar {pillar['pillar_id']}",
|
||||
"description": "This topic cluster has few articles. Create more content to establish authority.",
|
||||
"pillar_id": pillar['pillar_id']
|
||||
})
|
||||
|
||||
# 3. Add generic recommendations based on website data if available
|
||||
if website_data:
|
||||
if not website_data.get('description'):
|
||||
recommendations.append({
|
||||
"type": "metadata",
|
||||
"priority": "high",
|
||||
"title": "Missing Meta Description",
|
||||
"description": "Website is missing a meta description. Add one to improve SEO CTR."
|
||||
})
|
||||
|
||||
logger.info(f"[{self.__class__.__name__}] Generated {len(recommendations)} strategic recommendations")
|
||||
return recommendations
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[{self.__class__.__name__}] Failed to analyze content strategy: {e}")
|
||||
return []
|
||||
|
||||
def _calculate_cluster_confidence(self, cluster_indices: List[int]) -> float:
|
||||
"""Calculate confidence score for a cluster based on its size and coherence."""
|
||||
@@ -92,10 +224,40 @@ class ContentGuardianAgent(SIFBaseAgent):
|
||||
CANNIBALIZATION_THRESHOLD = 0.85 # Similarity threshold for cannibalization warning
|
||||
ORIGINALITY_THRESHOLD = 0.75 # Minimum originality score
|
||||
|
||||
def __init__(self, intelligence_service: TxtaiIntelligenceService, sif_service: Any = None):
|
||||
super().__init__(intelligence_service)
|
||||
def __init__(self, intelligence_service: TxtaiIntelligenceService, user_id: str, sif_service: Any = None):
|
||||
super().__init__(intelligence_service, user_id, agent_type="content_guardian")
|
||||
self.sif_service = sif_service
|
||||
|
||||
async def assess_content_quality(self, website_data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Assess overall content quality based on website data."""
|
||||
self._log_agent_operation("Assessing content quality")
|
||||
try:
|
||||
# Extract sample text or description from website_data
|
||||
text_to_analyze = website_data.get('description', '') or website_data.get('title', '')
|
||||
if not text_to_analyze:
|
||||
return {"score": 0.5, "reason": "No content to analyze"}
|
||||
|
||||
# Run style check
|
||||
style_result = await self.style_enforcer(text_to_analyze)
|
||||
|
||||
# Run safety check
|
||||
safety_result = await self.safety_filter(text_to_analyze)
|
||||
|
||||
# Calculate aggregate score
|
||||
base_score = style_result.get('compliance_score', 0.8)
|
||||
if safety_result.get('action') == 'flag_for_review':
|
||||
base_score *= 0.5
|
||||
|
||||
return {
|
||||
"score": base_score,
|
||||
"style_analysis": style_result,
|
||||
"safety_analysis": safety_result,
|
||||
"analyzed_text_length": len(text_to_analyze)
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"[{self.__class__.__name__}] Quality assessment failed: {e}")
|
||||
return {"score": 0.0, "error": str(e)}
|
||||
|
||||
async def check_cannibalization(self, new_draft: str) -> Dict[str, Any]:
|
||||
"""Check if a new draft competes semantically with existing pages."""
|
||||
self._log_agent_operation("Checking for semantic cannibalization", draft_length=len(new_draft))
|
||||
@@ -274,8 +436,8 @@ class LinkGraphAgent(SIFBaseAgent):
|
||||
RELEVANCE_THRESHOLD = 0.6 # Minimum relevance score for link suggestions
|
||||
MAX_SUGGESTIONS = 10 # Maximum number of link suggestions
|
||||
|
||||
def __init__(self, intelligence_service: TxtaiIntelligenceService, sif_service: Any = None):
|
||||
super().__init__(intelligence_service)
|
||||
def __init__(self, intelligence_service: TxtaiIntelligenceService, user_id: str, sif_service: Any = None):
|
||||
super().__init__(intelligence_service, user_id, agent_type="link_graph")
|
||||
self.sif_service = sif_service
|
||||
|
||||
async def suggest_internal_links(self, draft: str) -> List[Dict[str, Any]]:
|
||||
@@ -479,6 +641,9 @@ class CitationExpert(SIFBaseAgent):
|
||||
EVIDENCE_THRESHOLD = 0.7 # Minimum relevance score for evidence
|
||||
MAX_EVIDENCE = 5 # Maximum number of evidence pieces to return
|
||||
|
||||
def __init__(self, intelligence_service: TxtaiIntelligenceService, user_id: str):
|
||||
super().__init__(intelligence_service, user_id, agent_type="citation_expert")
|
||||
|
||||
async def fact_checker(self, claim: str) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Tool: Verifies facts against trusted research data.
|
||||
@@ -542,60 +707,25 @@ class CitationExpert(SIFBaseAgent):
|
||||
"claim": claim,
|
||||
"status": status,
|
||||
"evidence_count": len(evidence),
|
||||
"top_evidence": evidence[0]['source'] if evidence else None
|
||||
"top_evidence": evidence[0] if evidence else None
|
||||
})
|
||||
|
||||
return {
|
||||
"status": "verification_complete",
|
||||
"total_claims": len(claims),
|
||||
"status": "completed",
|
||||
"verified_claims": verified_results,
|
||||
"unsupported_count": len([c for c in verified_results if c['status'] == 'unsupported']),
|
||||
"timestamp": datetime.utcnow().isoformat()
|
||||
"verification_score": len([c for c in verified_results if c['status'] == 'supported']) / len(verified_results)
|
||||
}
|
||||
|
||||
async def verify_facts(self, claim: str) -> List[Dict[str, Any]]:
|
||||
"""Find supporting or contradicting evidence in the indexed research."""
|
||||
self._log_agent_operation("Verifying facts", claim_length=len(claim))
|
||||
"""Verify a single claim against intelligence data."""
|
||||
results = await self.intelligence.search(claim, limit=3)
|
||||
|
||||
try:
|
||||
if not self.intelligence.is_initialized():
|
||||
logger.error(f"[{self.__class__.__name__}] Intelligence service not initialized")
|
||||
return []
|
||||
|
||||
if not claim or len(claim.strip()) < 20:
|
||||
logger.warning(f"[{self.__class__.__name__}] Claim too short for meaningful verification")
|
||||
return []
|
||||
|
||||
results = await self.intelligence.search(claim, limit=self.MAX_EVIDENCE)
|
||||
|
||||
if not results:
|
||||
logger.info(f"[{self.__class__.__name__}] No evidence found for claim")
|
||||
return []
|
||||
|
||||
evidence = []
|
||||
for result in results:
|
||||
relevance_score = result.get('score', 0.0)
|
||||
|
||||
if relevance_score >= self.EVIDENCE_THRESHOLD:
|
||||
evidence_piece = {
|
||||
"source": result.get('id', 'unknown'),
|
||||
"relevance": relevance_score,
|
||||
"confidence": self._calculate_evidence_confidence(relevance_score),
|
||||
"type": "supporting" if relevance_score > 0.8 else "related",
|
||||
"excerpt": result.get('text', '')[:200] + "..." if len(result.get('text', '')) > 200 else result.get('text', '')
|
||||
}
|
||||
evidence.append(evidence_piece)
|
||||
logger.debug(f"[{self.__class__.__name__}] Found evidence: {evidence_piece['source']} (score: {relevance_score:.3f})")
|
||||
|
||||
logger.info(f"[{self.__class__.__name__}] Found {len(evidence)} pieces of evidence for claim")
|
||||
return evidence
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[{self.__class__.__name__}] Failed to verify facts: {e}")
|
||||
logger.error(f"[{self.__class__.__name__}] Full traceback: {traceback.format_exc()}")
|
||||
return []
|
||||
|
||||
def _calculate_evidence_confidence(self, relevance_score: float) -> float:
|
||||
"""Calculate confidence score for evidence."""
|
||||
# Simple confidence based on relevance score
|
||||
return min(1.0, relevance_score * 1.2)
|
||||
evidence = []
|
||||
for result in results:
|
||||
if result.get('score', 0) > self.EVIDENCE_THRESHOLD:
|
||||
evidence.append({
|
||||
"text": result.get('text'),
|
||||
"source": result.get('id'),
|
||||
"confidence": result.get('score')
|
||||
})
|
||||
return evidence
|
||||
|
||||
@@ -938,14 +938,14 @@ class SIFIntegrationService:
|
||||
# Strategic recommendations (lazy initialization to avoid circular imports)
|
||||
if not self.strategy_agent:
|
||||
from .sif_agents import StrategyArchitectAgent
|
||||
self.strategy_agent = StrategyArchitectAgent(self.intelligence_service)
|
||||
self.strategy_agent = StrategyArchitectAgent(self.intelligence_service, user_id=self.user_id)
|
||||
recommendations = await self.strategy_agent.analyze_content_strategy(website_data)
|
||||
insights["strategic_recommendations"] = recommendations
|
||||
|
||||
# Content quality assessment (lazy initialization to avoid circular imports)
|
||||
if not self.guardian_agent:
|
||||
from .sif_agents import ContentGuardianAgent
|
||||
self.guardian_agent = ContentGuardianAgent(self.intelligence_service, sif_service=self)
|
||||
self.guardian_agent = ContentGuardianAgent(self.intelligence_service, user_id=self.user_id, sif_service=self)
|
||||
quality_score = await self.guardian_agent.assess_content_quality(website_data)
|
||||
insights["content_quality"] = quality_score
|
||||
|
||||
|
||||
@@ -33,7 +33,13 @@ class TxtaiIntelligenceService:
|
||||
self._initialized = False
|
||||
self.enable_caching = enable_caching
|
||||
self.cache_manager = semantic_cache_manager if enable_caching else None
|
||||
self._initialize_embeddings()
|
||||
# Lazy initialization - do not initialize embeddings on startup
|
||||
# self._initialize_embeddings()
|
||||
|
||||
def _ensure_initialized(self):
|
||||
"""Lazy initialization helper."""
|
||||
if not self._initialized:
|
||||
self._initialize_embeddings()
|
||||
|
||||
def _initialize_embeddings(self):
|
||||
"""Initialize txtai embeddings with local storage support and comprehensive error handling."""
|
||||
@@ -106,6 +112,7 @@ class TxtaiIntelligenceService:
|
||||
Args:
|
||||
items: List of (id, text, metadata) tuples.
|
||||
"""
|
||||
self._ensure_initialized()
|
||||
if not self._initialized or not self.embeddings:
|
||||
logger.error(f"Cannot index content - service not initialized for user {self.user_id}")
|
||||
return
|
||||
@@ -145,6 +152,7 @@ class TxtaiIntelligenceService:
|
||||
|
||||
async def search(self, query: str, limit: int = 5) -> List[Dict[str, Any]]:
|
||||
"""Perform semantic search with intelligent caching."""
|
||||
self._ensure_initialized()
|
||||
if not self._initialized or not self.embeddings:
|
||||
logger.error(f"Cannot perform search - service not initialized for user {self.user_id}")
|
||||
return []
|
||||
@@ -186,6 +194,7 @@ class TxtaiIntelligenceService:
|
||||
|
||||
async def get_similarity(self, text1: str, text2: str) -> float:
|
||||
"""Get semantic similarity between two texts with caching."""
|
||||
self._ensure_initialized()
|
||||
if not self._initialized or not self.embeddings:
|
||||
logger.error(f"Cannot calculate similarity - service not initialized for user {self.user_id}")
|
||||
return 0.0
|
||||
@@ -234,6 +243,7 @@ class TxtaiIntelligenceService:
|
||||
|
||||
async def cluster(self, min_score: float = 0.5) -> List[List[int]]:
|
||||
"""Cluster indexed content to find semantic pillars using graph-based clustering with caching."""
|
||||
self._ensure_initialized()
|
||||
if not self._initialized or not self.embeddings:
|
||||
logger.error(f"Cannot cluster content - service not initialized for user {self.user_id}")
|
||||
return []
|
||||
@@ -358,6 +368,7 @@ class TxtaiIntelligenceService:
|
||||
|
||||
async def classify(self, text: str, labels: List[str]) -> List[Tuple[str, float]]:
|
||||
"""Classify text using zero-shot classification."""
|
||||
self._ensure_initialized()
|
||||
if not self._initialized or not Labels:
|
||||
logger.error(f"Cannot classify text - service not initialized or Labels not available for user {self.user_id}")
|
||||
return []
|
||||
|
||||
Reference in New Issue
Block a user