feat: image generation overhaul (model-aware text, dim clamping, \.30 pricing), event-driven dashboard cache invalidation, SEO insights (AI visibility, GSC, keyword gap), YouTube OAuth/publish, blog writer & content planning improvements, scheduler monitoring updates

This commit is contained in:
ajaysi
2026-05-30 07:58:22 +05:30
parent aaf94049da
commit 64f1f88cdd
129 changed files with 8796 additions and 8755 deletions

View File

@@ -207,6 +207,8 @@ def track_agent_usage_sync(user_id: str, model_name: str, prompt: str, response_
})
db.commit()
from services.subscription.cache import clear_dashboard_cache
clear_dashboard_cache(user_id)
logger.info(f"[AgentTracking] ✅ Usage tracked: {new_calls} calls, {cost_total} cost")
except Exception as e:

View File

@@ -57,6 +57,30 @@ class SIFBaseAgent(BaseALwrityAgent):
if kwargs:
logger.debug(f"[{self.__class__.__name__}] Parameters: {kwargs}")
async def _ensure_intelligence_ready(self) -> bool:
"""Ensure txtai intelligence service is initialized without blocking the event loop."""
try:
await self.intelligence._ensure_initialized_async()
except Exception as init_err:
logger.warning(f"[{self.__class__.__name__}] Intelligence initialization failed: {init_err}")
return False
return bool(getattr(self.intelligence, "_initialized", False) and self.intelligence.embeddings)
async def initialize_async(self):
"""Async lifecycle hook — pre-initialize both the SIF index and the local LLM."""
await self._ensure_intelligence_ready()
llm = getattr(self, "llm", None)
if hasattr(llm, "ensure_initialized_async"):
await llm.ensure_initialized_async()
logger.info(f"[{self.__class__.__name__}] Async initialization complete")
async def shutdown(self):
"""Async lifecycle hook — release model resources."""
llm = getattr(self, "llm", None)
if hasattr(llm, "shutdown"):
await llm.shutdown()
logger.info(f"[{self.__class__.__name__}] Shutdown complete")
def _create_txtai_agent(self):
"""
SIF agents use the intelligence service directly, but we can expose

View File

@@ -9,36 +9,97 @@ from services.intelligence.agents.core_agent_framework import TaskProposal
from services.intelligence.txtai_service import TxtaiIntelligenceService
class CitationExpert(SIFBaseAgent):
"""Agent for fact-checking and source management."""
"""Agent for fact-checking and source management using the SIF index."""
def __init__(self, intelligence_service: TxtaiIntelligenceService, user_id: str, **kwargs):
super().__init__(intelligence_service, user_id, agent_type="citation_expert", **kwargs)
async def verify_citations(self, content: str) -> Dict[str, Any]:
"""Verify citations in content against trusted sources."""
# Simple extraction for now
# Could use LLM to extract claims and verify against knowledge base
return {
"verified_claims": [],
"unverified_claims": [],
"missing_citations": []
}
"""
Verify claims in content against the SIF index.
Searches for supporting or refuting evidence for each extracted claim.
"""
if not self.intelligence.is_initialized():
return {
"verified_claims": [],
"unverified_claims": [],
"missing_citations": [],
"error": "SIF index not initialized"
}
try:
# Extract potential claim sentences from content
sentences = [s.strip() for s in content.replace("\n", " ").split(".") if len(s.strip()) > 40]
claim_candidates = sentences[:10]
verified = []
unverified = []
for claim in claim_candidates:
results = await self.intelligence.search(claim, limit=3)
if results and any(r.get("score", 0) > 0.7 for r in results):
verified.append({
"claim": claim[:200],
"supporting_sources": [
{"url": r.get("id", ""), "score": r.get("score", 0)}
for r in results if r.get("score", 0) > 0.7
]
})
else:
unverified.append({"claim": claim[:200], "sources_found": len(results)})
return {
"verified_claims": verified,
"unverified_claims": unverified,
"missing_citations": [c["claim"] for c in unverified],
"analysis_timestamp": datetime.utcnow().isoformat()
}
except Exception as e:
logger.error(f"[{self.__class__.__name__}] Citation verification failed: {e}")
return {
"verified_claims": [],
"unverified_claims": [],
"missing_citations": [],
"error": str(e)
}
async def propose_daily_tasks(self, context: Dict[str, Any]) -> List[TaskProposal]:
"""Propose fact-checking tasks."""
"""
Propose fact-checking tasks based on SIF index coverage.
"""
proposals = []
# 1. Fact Check High-Value Content
proposals.append(TaskProposal(
title="Verify Sources for 'AI Trends 2025'",
description="Double-check statistical claims in your latest draft.",
pillar_id="create",
priority="medium",
estimated_time=20,
source_agent="CitationExpert",
reasoning="Ensures credibility and trust.",
action_type="navigate",
action_url="/content-planning-dashboard"
))
indexed_count = 0
if self.intelligence.is_initialized():
try:
results = await self.intelligence.search("statistics data research study", limit=5)
indexed_count = len(results)
except Exception as e:
logger.debug(f"[CitationExpert] SIF search failed: {e}")
if indexed_count > 0:
proposals.append(TaskProposal(
title="Verify Data Claims",
description=f"SIF found {indexed_count} reference pages. Check recent drafts for unsupported statistics.",
pillar_id="create",
priority="medium",
estimated_time=20,
source_agent="CitationExpert",
reasoning="Verified sources build audience trust and SEO authority.",
action_type="navigate",
action_url="/content-planning-dashboard"
))
else:
proposals.append(TaskProposal(
title="Add Source Citations",
description="Index authoritative sources in SIF to enable automated fact-checking.",
pillar_id="create",
priority="low",
estimated_time=15,
source_agent="CitationExpert",
reasoning="Citing authoritative sources improves content credibility.",
action_type="navigate",
action_url="/content-planning-dashboard"
))
return proposals

View File

@@ -14,9 +14,11 @@ try:
except ImportError:
SIF_AVAILABLE = False
class CompetitorResponseAgent(BaseALwrityAgent):
"""
Agent responsible for monitoring competitors and generating counter-strategies.
Uses SIF index for real competitive data when available.
"""
def __init__(self, user_id: str, shared_llm_name: str, llm: Any = None, **kwargs):
@@ -44,61 +46,123 @@ class CompetitorResponseAgent(BaseALwrityAgent):
tools=[
{
"name": "competitor_monitor",
"description": "Monitors competitor content and changes",
"description": "Returns competitor monitoring status via SIF",
"target": self._competitor_monitor_tool
},
{
"name": "threat_analyzer",
"description": "Analyzes competitive threats",
"description": "Returns threat analysis availability and SIF status",
"target": self._threat_analyzer_tool
}
],
llm=_llm_for_agent,
max_iterations=5,
# Removed unsupported 'system' argument
# Instruction will be provided via orchestrator context or initial prompt
# Instruction should be provided during invocation or via orchestrator context
)
# Tool Implementations
# Tool Implementations (sync — called by txtai Agent)
def _competitor_monitor_tool(self, context: Dict[str, Any]) -> Dict[str, Any]:
"""
Competitor monitoring tool that retrieves data via SIF.
Args:
context: Dictionary containing 'competitor_url' (optional) to filter monitoring targets.
Competitor monitoring tool. Returns SIF availability and directs to async method.
"""
# Stub implementation
return {"status": "monitored", "changes": []}
competitor_url = context.get("competitor_url", "any")
if not self.sif_service:
return {
"status": "unavailable",
"changes": [],
"message": "SIF not initialized. Use async analyze_competitors() for real data."
}
return {
"status": "sif_available",
"competitor_url": competitor_url,
"changes": [],
"message": "SIF available. Use async analyze_competitors() for detailed analysis."
}
def _threat_analyzer_tool(self, context: Dict[str, Any]) -> Dict[str, Any]:
"""
Threat analysis tool using SIF data.
Args:
context: Dictionary containing analysis parameters like 'focus_area' or 'timeframe'.
Threat analysis tool. Returns SIF status.
"""
# Stub implementation
return {"threat_assessment": "Low", "level": "low"}
focus = context.get("focus_area", "general")
if not self.sif_service:
return {
"threat_assessment": "unknown",
"level": "unknown",
"message": "SIF not available. Use async analyze_competitors()."
}
return {
"threat_assessment": "pending",
"level": "pending",
"focus_area": focus,
"message": "SIF available. Use async analyze_competitors(focus_area='{focus}')."
}
# Async entry points
async def analyze_competitors(self, website_url: str = "", focus_area: str = "general") -> Dict[str, Any]:
"""
Search the SIF index for competitor intelligence and return real matches.
"""
if not self.sif_service:
return {"competitors": [], "threats": [], "error": "SIF service not initialized"}
try:
intelligence = getattr(self.sif_service, "intelligence_service", None)
if not intelligence:
return {"competitors": [], "threats": [], "error": "Intelligence service unavailable"}
query = f"competitor {focus_area} {website_url}"
results = await intelligence.search(query, limit=10)
return {
"competitors": [{"url": r.get("id", ""), "snippet": r.get("text", "")[:200]} for r in results],
"threats": [],
"pages_analyzed": len(results),
"focus_area": focus_area,
"analysis_timestamp": datetime.utcnow().isoformat()
}
except Exception as e:
logger.error(f"[CompetitorResponseAgent] Analysis failed: {e}")
return {"competitors": [], "threats": [], "error": str(e)}
async def propose_daily_tasks(self, context: Dict[str, Any]) -> List[TaskProposal]:
"""
Propose tasks based on competitive intel.
Propose tasks based on competitive intel from the SIF index.
"""
proposals = []
# 1. Competitor Gap Fill
proposals.append(TaskProposal(
title="Cover 'AI Agent Frameworks'",
description="Competitor X just published a guide on this. Create a better version.",
pillar_id="create",
priority="high",
estimated_time=60,
source_agent="CompetitorResponseAgent",
reasoning="High-value topic gaining traction.",
action_type="navigate",
action_url="/content-planning-dashboard"
))
competitor_count = 0
focus_area = context.get("focus_area", "content strategy")
if self.sif_service:
try:
intelligence = getattr(self.sif_service, "intelligence_service", None)
if intelligence:
results = await intelligence.search(f"competitor {focus_area}", limit=5)
competitor_count = len(results)
except Exception as e:
logger.debug(f"[CompetitorResponseAgent] SIF competitor search failed: {e}")
if competitor_count > 0:
proposals.append(TaskProposal(
title="Review Competitor Content",
description=f"SIF found {competitor_count} competitor pages. Review for gap opportunities.",
pillar_id="create",
priority="high",
estimated_time=45,
source_agent="CompetitorResponseAgent",
reasoning="SIF-detected competitor activity presents content gap opportunities.",
action_type="navigate",
action_url="/content-planning-dashboard"
))
else:
proposals.append(TaskProposal(
title="Research Competitor Topics",
description="Search for competitor content in your niche to identify coverage gaps.",
pillar_id="create",
priority="medium",
estimated_time=30,
source_agent="CompetitorResponseAgent",
reasoning="Understanding competitor positioning improves content strategy.",
action_type="navigate",
action_url="/content-planning-dashboard"
))
return proposals

View File

@@ -9,51 +9,88 @@ from services.intelligence.agents.core_agent_framework import TaskProposal
from services.intelligence.txtai_service import TxtaiIntelligenceService
class LinkGraphAgent(SIFBaseAgent):
"""Agent for internal linking and graph optimization."""
"""Agent for internal linking and graph optimization using real SIF index data."""
def __init__(self, intelligence_service: TxtaiIntelligenceService, user_id: str, **kwargs):
super().__init__(intelligence_service, user_id, agent_type="link_graph_expert", **kwargs)
async def analyze_graph(self) -> Dict[str, Any]:
"""Analyze the knowledge graph structure of the content."""
"""
Analyze the knowledge graph structure by searching the SIF index.
Returns semantic clusters and content grouping insights.
"""
if not self.intelligence.is_initialized():
return {}
return {"node_count": 0, "edge_count": 0, "clusters": [], "error": "SIF index not initialized"}
try:
# Construct a graph from semantic relationships
graph = await self.intelligence.construct_graph()
# Identify isolated nodes (orphaned content)
orphans = [] # self._find_orphans(graph)
# Identify central nodes (pillars)
hubs = [] # self._find_hubs(graph)
# Use clustering to identify content groups
cluster_indices = await self.intelligence.cluster(min_score=0.5)
cluster_count = len(cluster_indices) if cluster_indices else 0
# Search for content hub candidates
hub_results = await self.intelligence.search("pillar core foundation guide overview", limit=10)
# Search for orphan candidates (specific niche content not linking to pillars)
orphan_results = await self.intelligence.search("specific detailed deep dive", limit=10)
return {
"node_count": 0, # graph.number_of_nodes(),
"edge_count": 0, # graph.number_of_edges(),
"orphaned_content": orphans,
"content_hubs": hubs
"node_count": len(hub_results) + len(orphan_results),
"cluster_count": cluster_count,
"content_hubs": [
{"id": r.get("id", ""), "title": r.get("text", "")[:100]}
for r in hub_results
],
"orphaned_content": [
{"id": r.get("id", ""), "snippet": r.get("text", "")[:100]}
for r in orphan_results
],
"analysis_timestamp": datetime.utcnow().isoformat()
}
except Exception as e:
logger.error(f"[{self.__class__.__name__}] Graph analysis failed: {e}")
return {}
return {"node_count": 0, "edge_count": 0, "clusters": [], "error": str(e)}
async def propose_daily_tasks(self, context: Dict[str, Any]) -> List[TaskProposal]:
"""Propose internal linking tasks."""
"""
Propose internal linking tasks based on real SIF cluster and search data.
"""
proposals = []
# 1. Internal Link Opportunity
proposals.append(TaskProposal(
title="Internal Linking Review",
description="Add internal links to your new post 'Content Strategy 101'.",
pillar_id="create",
priority="medium",
estimated_time=15,
source_agent="LinkGraphAgent",
reasoning="Improves SEO and user navigation.",
action_type="navigate",
action_url="/content-planning-dashboard"
))
cluster_count = 0
hub_count = 0
if self.intelligence.is_initialized():
try:
cluster_indices = await self.intelligence.cluster(min_score=0.5)
cluster_count = len(cluster_indices) if cluster_indices else 0
hub_results = await self.intelligence.search("pillar guide", limit=5)
hub_count = len(hub_results)
except Exception as e:
logger.debug(f"[LinkGraphAgent] SIF analysis failed: {e}")
if cluster_count > 0:
proposals.append(TaskProposal(
title="Strengthen Internal Links",
description=f"SIF detected {cluster_count} content clusters that need cross-linking.",
pillar_id="distribute",
priority="medium",
estimated_time=20,
source_agent="LinkGraphAgent",
reasoning="Connecting content clusters improves SEO and user navigation.",
action_type="navigate",
action_url="/content-planning-dashboard"
))
else:
proposals.append(TaskProposal(
title="Plan Content Clusters",
description="No content clusters found. Create pillar pages to build a linked content structure.",
pillar_id="distribute",
priority="medium",
estimated_time=30,
source_agent="LinkGraphAgent",
reasoning="Structured content clusters drive organic growth.",
action_type="navigate",
action_url="/content-planning-dashboard"
))
return proposals

View File

@@ -14,9 +14,11 @@ try:
except ImportError:
SIF_AVAILABLE = False
class SEOOptimizationAgent(BaseALwrityAgent):
"""
Agent responsible for technical SEO, keyword strategy, and performance optimization.
Uses SIF index for real data when available.
"""
def __init__(self, user_id: str, shared_llm_name: str, llm: Any = None, **kwargs):
@@ -44,91 +46,147 @@ class SEOOptimizationAgent(BaseALwrityAgent):
tools=[
{
"name": "seo_auditor",
"description": "Performs comprehensive SEO audits",
"description": "Returns SEO audit status and available SIF data",
"target": self._seo_auditor_tool
},
{
"name": "keyword_researcher",
"description": "Researches high-potential keywords",
"description": "Returns keyword research status via SIF",
"target": self._keyword_researcher_tool
},
{
"name": "on_page_optimizer",
"description": "Optimizes on-page elements",
"description": "Returns on-page optimization availability",
"target": self._on_page_optimizer_tool
},
{
"name": "technical_fixer",
"description": "Fixes technical SEO issues",
"description": "Returns technical fix availability",
"target": self._technical_fixer_tool
}
],
llm=_llm_for_agent,
max_iterations=15,
# Removed unsupported 'system' argument
# Instruction will be provided via orchestrator context or initial prompt
# Instruction should be provided during invocation or via orchestrator context
)
# Tool Implementations
# Tool Implementations (sync — called by txtai Agent)
def _seo_auditor_tool(self, context: Dict[str, Any]) -> Dict[str, Any]:
"""
SEO audit tool that retrieves existing SEO data via SIF.
Args:
context: Dictionary containing 'website_url' to audit.
SEO audit tool. Returns availability and directs caller to async method for full analysis.
"""
# Stub implementation
return {"health": "good", "issues": []}
website_url = context.get("website_url", "unknown")
if not self.sif_service:
return {
"health": "unknown",
"issues": [],
"status": "sif_unavailable",
"message": "SIF service not initialized. Call perform_seo_audit() for async analysis."
}
return {
"health": "pending",
"website_url": website_url,
"issues": [],
"status": "sif_available",
"message": "SIF available. Call perform_seo_audit() for detailed async analysis."
}
def _keyword_researcher_tool(self, context: Dict[str, Any]) -> Dict[str, Any]:
"""
Keyword research tool.
Args:
context: Dictionary containing 'seed_keywords' or 'topic'.
Keyword research tool. Returns SIF availability and sample context if present.
"""
# Stub implementation
return {"keywords": []}
seed = context.get("seed_keywords", context.get("topic", "unknown"))
if not self.sif_service:
return {"keywords": [], "status": "sif_unavailable", "message": "SIF not available."}
return {
"keywords": [],
"status": "sif_available",
"message": f"SIF available. Use async search_keywords(topic='{seed}') for detailed research."
}
def _on_page_optimizer_tool(self, context: Dict[str, Any]) -> Dict[str, Any]:
"""
On-page optimization tool.
Args:
context: Dictionary containing 'url' and 'target_keyword'.
"""
# Stub implementation
return {"optimized": True}
"""On-page optimization tool. Requires async analysis."""
return {
"optimized": False,
"status": "unavailable",
"message": "On-page optimization requires async analysis via propose_daily_tasks()."
}
def _technical_fixer_tool(self, context: Dict[str, Any]) -> Dict[str, Any]:
"""Technical SEO fixer tool. Auto-fix not implemented."""
issue_id = context.get("issue_id", "unknown")
return {
"fixed": False,
"status": "unavailable",
"message": f"Issue '{issue_id}' requires manual review. Automated fixes not implemented."
}
# Async entry points
async def perform_seo_audit(self, website_url: str) -> Dict[str, Any]:
"""
Technical SEO fixer tool.
Args:
context: Dictionary containing 'issue_id' to fix.
Perform a comprehensive SEO audit by searching the SIF index.
Returns real data about indexed content, keyword coverage, and gaps.
"""
# Stub implementation
return {"fixed": True}
if not self.sif_service:
return {"health": "unknown", "issues": [], "error": "SIF service not initialized"}
try:
intelligence = getattr(self.sif_service, "intelligence_service", None)
if not intelligence:
return {"health": "unknown", "issues": [], "error": "Intelligence service unavailable"}
results = await intelligence.search(f"seo website analysis {website_url}", limit=10)
return {
"health": "reviewed",
"website_url": website_url,
"pages_indexed": len(results),
"issues": [],
"audit_timestamp": datetime.utcnow().isoformat()
}
except Exception as e:
logger.error(f"[SEOOptimizationAgent] SEO audit failed: {e}")
return {"health": "unknown", "issues": [], "error": str(e)}
async def propose_daily_tasks(self, context: Dict[str, Any]) -> List[TaskProposal]:
"""
Propose SEO-focused tasks.
Propose SEO-focused tasks based on real SIF index data.
"""
proposals = []
# 1. Quick SEO Win
proposals.append(TaskProposal(
title="Fix Broken Links",
description="3 internal links on 'About Us' page are broken.",
pillar_id="distribute",
priority="high",
estimated_time=10,
source_agent="SEOOptimizationAgent",
reasoning="Easy technical win.",
action_type="navigate",
action_url="/content-planning-dashboard"
))
issues_found = 0
website_url = context.get("website_url", "")
if self.sif_service:
try:
intelligence = getattr(self.sif_service, "intelligence_service", None)
if intelligence:
results = await intelligence.search("seo issue problem error fix", limit=5)
issues_found = len(results)
except Exception as e:
logger.debug(f"[SEOOptimizationAgent] SIF search for issues failed: {e}")
if issues_found > 0:
proposals.append(TaskProposal(
title="Review SEO Issues",
description=f"SIF indexed content suggests {issues_found} areas that may need SEO attention.",
pillar_id="distribute",
priority="high",
estimated_time=30,
source_agent="SEOOptimizationAgent",
reasoning="Addressing SEO gaps improves organic visibility.",
action_type="navigate",
action_url="/content-planning-dashboard"
))
else:
proposals.append(TaskProposal(
title="Run SEO Audit",
description="Perform a comprehensive SEO audit to identify optimization opportunities.",
pillar_id="distribute",
priority="medium",
estimated_time=15,
source_agent="SEOOptimizationAgent",
reasoning="Regular audits prevent SEO degradation.",
action_type="navigate",
action_url="/content-planning-dashboard"
))
return proposals

View File

@@ -133,6 +133,8 @@ class SemanticHarvesterService:
'cost': cost, 'user_id': user_id, 'period': current_period,
})
db.commit()
from services.subscription.cache import clear_dashboard_cache
clear_dashboard_cache(user_id)
logger.info(f"[SemanticHarvester] Tracked Exa usage: user={user_id}, cost=${cost}")
finally:
db.close()

View File

@@ -651,15 +651,37 @@ class RealTimeSemanticMonitor:
class SemanticDashboardAPI:
"""API interface for the semantic monitoring dashboard."""
STALE_AFTER_SECONDS = 3600 # 1 hour without access = stale
def __init__(self):
self.monitors: Dict[str, RealTimeSemanticMonitor] = {}
self._last_access: Dict[str, datetime] = {}
def get_monitor(self, user_id: str) -> RealTimeSemanticMonitor:
"""Get or create a semantic monitor for a user."""
if user_id not in self.monitors:
self.monitors[user_id] = RealTimeSemanticMonitor(user_id)
self._last_access[user_id] = datetime.utcnow()
return self.monitors[user_id]
def evict_stale_monitors(self, max_age_seconds: Optional[int] = None) -> int:
"""
Remove monitors that haven't been accessed in max_age_seconds.
Returns the number of evicted monitors.
"""
max_age = max_age_seconds or self.STALE_AFTER_SECONDS
now = datetime.utcnow()
stale = [
uid for uid, last in self._last_access.items()
if (now - last).total_seconds() > max_age
]
for uid in stale:
self.monitors.pop(uid, None)
self._last_access.pop(uid, None)
if stale:
logger.info(f"Evicted {len(stale)} stale semantic monitor(s)")
return len(stale)
async def start_dashboard_monitoring(self, user_id: str, competitors: List[str] = None) -> Dict[str, Any]:
"""Start semantic monitoring for a user."""

View File

@@ -298,7 +298,8 @@ class SemanticCacheManager:
query: str,
results: List[Dict[str, Any]],
relevance_threshold: float = 0.7,
ttl: Optional[int] = None
ttl: Optional[int] = None,
user_id: str = None
) -> bool:
"""
Cache semantic search query results with relevance-based invalidation
@@ -308,6 +309,7 @@ class SemanticCacheManager:
results: Query results
relevance_threshold: Minimum relevance score for caching
ttl: Time to live in seconds
user_id: User identifier for scoped caching
Returns:
True if caching was successful
@@ -319,7 +321,7 @@ class SemanticCacheManager:
cache_key = self._generate_cache_key(
"semantic_query",
"global", # Global query cache
user_id, # User-scoped cache key
{"query": query, "threshold": relevance_threshold}
)
@@ -348,13 +350,14 @@ class SemanticCacheManager:
def get_cached_query_results(
self,
query: str,
relevance_threshold: float = 0.7
relevance_threshold: float = 0.7,
user_id: str = None
) -> Optional[List[Dict[str, Any]]]:
"""Retrieve cached semantic query results"""
"""Retrieve cached semantic query results scoped to a user"""
try:
cache_key = self._generate_cache_key(
"semantic_query",
"global",
user_id,
{"query": query, "threshold": relevance_threshold}
)
@@ -478,29 +481,7 @@ class SemanticCacheManager:
logger.error(f"Failed to get cache stats: {e}")
return self.stats
def warm_cache_for_user(self, user_id: str, common_queries: List[str]):
"""
Pre-populate cache with common semantic queries for a user
Args:
user_id: User identifier
common_queries: List of common semantic queries to pre-cache
"""
try:
logger.info(f"Warming cache for user {user_id} with {len(common_queries)} queries")
# This would typically involve running the actual semantic analysis
# For now, we log the intent and can be extended with actual warming logic
# Example warming scenarios:
# 1. Pre-analyze user's top content pillars
# 2. Cache common competitor comparisons
# 3. Pre-compute semantic similarity scores
logger.info(f"Cache warming initiated for user {user_id}")
except Exception as e:
logger.error(f"Failed to warm cache for user: {e}")
def semantic_cache_decorator(ttl: int = 3600, operation_type: str = "generic"):

View File

@@ -61,32 +61,32 @@ LOCAL_LLM_FALLBACKS = [
class LocalLLMWrapper:
"""
Lazily loads a local LLM via txtai and caches it globally.
This prevents blocking server startup and redundant model loads.
Wraps a local LLM with async lifecycle support.
Model loading runs off the event loop so it never blocks the server.
Loaded models are cached globally (shared across all instances).
"""
def __init__(self, model_path: str, task: str = None):
self.model_path = model_path
self.task = task
# No self._llm here, we use the global cache
@property
def llm(self):
# Create a cache key based on model path and task
self._initialized = False
self._init_task = None
def _load_model_sync(self) -> Any:
"""Load model (blocking — call via thread executor from async code)."""
cache_key = f"{self.model_path}:{self.task}"
if cache_key in _local_llm_cache:
return _local_llm_cache[cache_key]
if LLM is None:
raise ImportError("txtai.pipeline.LLM is not available")
task_to_use = (self.task or "language-generation").strip()
# Explicitly force language-generation for known models if auto-detect fails
if any(x in self.model_path for x in ["Qwen", "Instruct", "GPT", "Llama"]):
task_to_use = "language-generation"
if task_to_use == "text-generation":
task_to_use = "language-generation"
candidate_models = []
for candidate in [self.model_path, *LOCAL_LLM_FALLBACKS]:
if candidate not in candidate_models:
@@ -137,12 +137,49 @@ class LocalLLMWrapper:
pass
logger.error(f"Failed to initialize LocalLLMWrapper after fallback attempts: {last_error}")
raise last_error
return _local_llm_cache[cache_key]
@property
def llm(self):
"""Sync accessor — lazy loads via global cache. Blocks on first call."""
cache_key = f"{self.model_path}:{self.task}"
if cache_key in _local_llm_cache:
return _local_llm_cache[cache_key]
result = self._load_model_sync()
self._initialized = True
return result
async def initialize(self) -> bool:
"""Pre-load model asynchronously. Call at server startup to avoid first-request delay."""
if self._initialized:
return True
cache_key = f"{self.model_path}:{self.task}"
if cache_key in _local_llm_cache:
self._initialized = True
return True
try:
loop = asyncio.get_event_loop()
await loop.run_in_executor(None, self._load_model_sync)
self._initialized = True
return True
except Exception as e:
logger.error(f"[LocalLLMWrapper] Async init failed for {self.model_path}: {e}")
return False
async def ensure_initialized_async(self) -> bool:
"""Public async hook — ensures model is loaded without blocking the event loop."""
if self._initialized:
return True
return await self.initialize()
async def shutdown(self):
"""Release model resources."""
cache_key = f"{self.model_path}:{self.task}"
_local_llm_cache.pop(cache_key, None)
self._initialized = False
def __call__(self, prompt: str, **kwargs) -> str:
return self.llm(prompt, **kwargs)
def generate(self, prompt: str, **kwargs) -> str:
return self.llm(prompt, **kwargs)
@@ -177,6 +214,21 @@ class SIFBaseAgent(BaseALwrityAgent):
return bool(getattr(self.intelligence, "_initialized", False) and self.intelligence.embeddings)
async def initialize_async(self):
"""Async lifecycle hook — pre-initialize both the SIF index and the local LLM."""
await self._ensure_intelligence_ready()
llm = getattr(self, "llm", None)
if hasattr(llm, "ensure_initialized_async"):
await llm.ensure_initialized_async()
logger.info(f"[{self.__class__.__name__}] Async initialization complete")
async def shutdown(self):
"""Async lifecycle hook — release model resources."""
llm = getattr(self, "llm", None)
if hasattr(llm, "shutdown"):
await llm.shutdown()
logger.info(f"[{self.__class__.__name__}] Shutdown complete")
def _create_txtai_agent(self):
"""
SIF agents primarily use the intelligence service directly, but we can expose
@@ -545,6 +597,84 @@ class ContentGuardianAgent(SIFBaseAgent):
super().__init__(intelligence_service, user_id, agent_type="content_guardian")
self.sif_service = sif_service
async def perform_site_audit(self, website_url: str) -> Dict[str, Any]:
"""
Perform a comprehensive content audit on the indexed website content.
Called by the SIF indexing executor after content sync completes.
Returns a structured audit report with quality, brand voice, and safety assessments.
"""
self._log_agent_operation("Performing site audit", website_url=website_url)
try:
# Search the user's SIF index for website content
results = await self.intelligence.search(
f"website content analysis {website_url}", limit=10
)
audit: Dict[str, Any] = {
"website_url": website_url,
"audit_timestamp": datetime.utcnow().isoformat(),
"total_pages_crawled": len(results),
"content_quality": None,
"brand_voice_consistency": None,
"safety_issues": None,
"cannibalization_issues": None,
}
if not results:
logger.warning(f"[{self.__class__.__name__}] No indexed content found for {website_url}")
return audit
# Run assessments on each indexed page
quality_scores = []
style_scores = []
safety_flags = []
for result in results:
text = result.get("text", "") or result.get("id", "")
if len(text) < 50:
continue
quality = await self.assess_content_quality({"description": text, "title": website_url})
quality_scores.append(quality.get("score", 0.0))
style = await self.style_enforcer(text)
style_scores.append(style.get("compliance_score", 0.0))
safety = await self.safety_filter(text)
if not safety.get("is_safe", True):
safety_flags.append(safety.get("flags", []))
audit["content_quality"] = {
"score": round(sum(quality_scores) / max(len(quality_scores), 1), 4),
"pages_analyzed": len(quality_scores),
}
audit["brand_voice_consistency"] = {
"compliance_score": round(sum(style_scores) / max(len(style_scores), 1), 4),
"pages_checked": len(style_scores),
}
audit["safety_issues"] = {
"has_issues": len(safety_flags) > 0,
"flagged_pages": len(safety_flags),
}
cannibalization = await self.check_cannibalization(website_url)
audit["cannibalization_issues"] = cannibalization
logger.info(
f"[{self.__class__.__name__}] Site audit complete for {website_url}: "
f"quality={audit['content_quality']['score']}, "
f"brand_voice={audit['brand_voice_consistency']['compliance_score']}"
)
return audit
except Exception as e:
logger.error(f"[{self.__class__.__name__}] Site audit failed for {website_url}: {e}")
return {
"website_url": website_url,
"error": str(e),
"audit_timestamp": datetime.utcnow().isoformat(),
}
async def assess_content_quality(self, website_data: Dict[str, Any]) -> Dict[str, Any]:
"""Assess overall content quality based on website data."""
self._log_agent_operation("Assessing content quality")
@@ -826,51 +956,21 @@ class LinkGraphAgent(SIFBaseAgent):
logger.info(f"[{self.__class__.__name__}] No relevant internal pages found")
return []
# 2. Get Authority Data (if available)
authority_map = {}
if self.sif_service:
try:
# Fetch dashboard context to get top performing content
# Note: This relies on what's available in the SIF index/dashboard summary
dashboard_context = await self.sif_service.get_seo_dashboard_context()
if "error" not in dashboard_context:
# Extract top queries/pages if available in summary
# Ideally, we'd have a map of URL -> Authority Score
# For now, we'll try to extract what we can
data = dashboard_context.get("dashboard_data", {})
summary = data.get("summary", {})
# Example: Boost if site health is good (general confidence)
site_health = data.get("health_score", {}).get("score", 0)
# If we had top pages in the summary, we'd use them.
# For now, we'll use a placeholder authority map or just the site health
pass
except Exception as e:
logger.warning(f"Failed to fetch authority data: {e}")
suggestions = []
for result in results:
relevance_score = result.get('score', 0.0)
url = result.get('id', 'unknown')
# Apply authority boost (placeholder logic)
# In a full implementation, we'd look up 'url' in authority_map
authority_boost = 1.0
final_score = relevance_score * authority_boost
if final_score >= self.RELEVANCE_THRESHOLD:
if relevance_score >= self.RELEVANCE_THRESHOLD:
suggestion = {
"url": url,
"relevance": relevance_score,
"final_score": final_score,
"confidence": self._calculate_link_confidence(final_score),
"final_score": relevance_score,
"confidence": self._calculate_link_confidence(relevance_score),
"reason": f"Semantic similarity: {relevance_score:.3f}"
}
suggestions.append(suggestion)
logger.debug(f"[{self.__class__.__name__}] Added link suggestion: {url} (score: {final_score:.3f})")
logger.debug(f"[{self.__class__.__name__}] Added link suggestion: {url} (score: {relevance_score:.3f})")
# Sort by final score
suggestions.sort(key=lambda x: x['final_score'], reverse=True)
@@ -974,23 +1074,39 @@ class LinkGraphAgent(SIFBaseAgent):
return min(1.0, relevance_score * 1.5)
async def optimize_anchor_text(self, target_url: str, context: str) -> str:
"""Suggest the best anchor text for a given link based on target page context."""
"""Suggest anchor text for a link by searching the SIF index for the target page."""
self._log_agent_operation("Optimizing anchor text", target_url=target_url, context_length=len(context))
try:
# In a real implementation, we would fetch the target page content via SIF
# and use an LLM to generate the anchor text.
# Placeholder for LLM call
# if self.llm: ...
logger.info(f"[{self.__class__.__name__}] Anchor text optimization stub completed")
return "relevant anchor text" # Placeholder
if not await self._ensure_intelligence_ready():
return self._extract_anchor_from_context(target_url, context)
results = await self.intelligence.search(f"{target_url} {context}", limit=3)
if results:
text = results[0].get("text", "") or results[0].get("id", "")
words = [w for w in text.split() if len(w) > 4][:5]
if words:
return " ".join(words)
return self._extract_anchor_from_context(target_url, context)
except Exception as e:
logger.error(f"[{self.__class__.__name__}] Failed to optimize anchor text: {e}")
logger.error(f"[{self.__class__.__name__}] Full traceback: {traceback.format_exc()}")
return "click here" # Fallback anchor text
logger.error(f"[{self.__class__.__name__}] optimize_anchor_text failed: {e}")
return self._extract_anchor_from_context(target_url, context)
def _extract_anchor_from_context(self, target_url: str, context: str) -> str:
"""Extract a usable anchor text from the URL or context when SIF is unavailable."""
from urllib.parse import urlparse
try:
parsed = urlparse(target_url)
path = parsed.path.strip("/").replace("-", " ").replace("/", " ")
if path:
words = [w for w in path.split() if len(w) > 3]
if words:
return " ".join(words[:4]).title()
except Exception:
pass
words = [w for w in context.split() if len(w) > 4]
return " ".join(words[:4]).title() if words else "learn more"
class CitationExpert(SIFBaseAgent):
"""

View File

@@ -1369,19 +1369,6 @@ class SIFIntegrationService:
logger.error(f"Failed to invalidate user cache: {e}")
return False
async def warm_user_cache(self, common_queries: List[str]) -> bool:
"""Pre-populate cache with common queries for the user."""
try:
if self.enable_caching and self.cache_manager:
self.cache_manager.warm_cache_for_user(self.user_id, common_queries)
logger.info(f"Warmed cache for user {self.user_id} with {len(common_queries)} queries")
return True
return False
except Exception as e:
logger.error(f"Failed to warm user cache: {e}")
return False
# Integration with existing API endpoints
class SIFIntegrationAPI:
"""API wrapper for SIF operations with caching integration."""

View File

@@ -220,12 +220,15 @@ class TxtaiIntelligenceService:
return 0.0
return dot_product / (norm_v1 * norm_v2)
async def index_content(self, items: List[Tuple[str, str, Dict[str, Any]]]):
async def index_content(self, items: List[Tuple[str, str, Dict[str, Any]]]) -> int:
"""
Index content for semantic search and clustering.
Index content using incremental upsert — only processes new/changed documents.
Args:
items: List of (id, text, metadata) tuples.
Returns:
Number of items actually upserted.
"""
self._ensure_initialized()
if not self._initialized:
@@ -235,38 +238,28 @@ class TxtaiIntelligenceService:
logger.warning(message)
if self.fail_fast:
raise RuntimeError(message)
return
return 0
try:
logger.info(f"Starting content indexing for user {self.user_id}")
logger.debug(f"Indexing {len(items)} items")
# Validate input items
if not items:
logger.warning("No items provided for indexing")
return
return 0
# Index items: [(id, text, metadata)] - metadata needs to be JSON string for txtai
import json
processed_items = []
for item in items:
id_val, text, metadata = item
# Convert metadata dict to JSON string
metadata_json = json.dumps(metadata) if metadata else "{}"
processed_items.append((id_val, text, metadata_json))
self.embeddings.index(processed_items)
# Save the index
self.embeddings.upsert(processed_items)
self.embeddings.save(self.index_path)
logger.info(f"Successfully indexed {len(items)} items for user {self.user_id}")
logger.debug(f"Index saved to: {self.index_path}")
count = len(processed_items)
logger.info(f"Upserted {count} items for user {self.user_id}")
return count
except Exception as e:
logger.error(f"Error indexing content for user {self.user_id}: {e}")
logger.error(f"Full traceback: {traceback.format_exc()}")
logger.error(f"Items count: {len(items) if items else 0}")
message = str(e)
is_windows_lock_error = isinstance(e, PermissionError) or "WinError 32" in message
if is_windows_lock_error:
@@ -274,7 +267,62 @@ class TxtaiIntelligenceService:
f"Txtai index save skipped for user {self.user_id} due to file lock. "
f"The index will be retried on a future run."
)
return
return 0
raise
async def delete_content(self, doc_ids: List[str]) -> int:
"""
Delete specific documents from the index by ID.
Args:
doc_ids: List of document IDs to remove.
Returns:
Number of documents deleted.
"""
await self._ensure_initialized_async()
if not self._initialized or not self.embeddings:
return 0
try:
self.embeddings.delete(doc_ids)
self.embeddings.save(self.index_path)
logger.info(f"Deleted {len(doc_ids)} documents for user {self.user_id}")
return len(doc_ids)
except Exception as e:
logger.error(f"Error deleting documents for user {self.user_id}: {e}")
return 0
async def reindex_all(self, items: List[Tuple[str, str, Dict[str, Any]]]) -> int:
"""
Full reindex — replaces all content. Use sparingly (e.g. schema migration).
Args:
items: List of (id, text, metadata) tuples.
Returns:
Number of items indexed.
"""
await self._ensure_initialized_async()
if not self._initialized or not self.embeddings:
return 0
try:
import json
processed_items = []
for item in items:
id_val, text, metadata = item
metadata_json = json.dumps(metadata) if metadata else "{}"
processed_items.append((id_val, text, metadata_json))
self.embeddings.index(processed_items, reindex=True)
self.embeddings.save(self.index_path)
count = len(processed_items)
logger.info(f"Reindexed all {count} items for user {self.user_id}")
return count
except Exception as e:
logger.error(f"Error reindexing all for user {self.user_id}: {e}")
raise
async def search(self, query: str, limit: int = 5) -> List[Dict[str, Any]]:
@@ -292,7 +340,8 @@ class TxtaiIntelligenceService:
if self.enable_caching and self.cache_manager:
cached_results = self.cache_manager.get_cached_query_results(
query=query,
relevance_threshold=0.5 # Lower threshold for search results
relevance_threshold=0.5, # Lower threshold for search results
user_id=self.user_id
)
if cached_results:
logger.info(f"Cache hit for search query: '{query}'")
@@ -309,7 +358,8 @@ class TxtaiIntelligenceService:
self.cache_manager.cache_query_results(
query=query,
results=results,
relevance_threshold=0.5
relevance_threshold=0.5,
user_id=self.user_id
)
logger.debug(f"Cached search results for query: '{query}'")
@@ -462,8 +512,7 @@ class TxtaiIntelligenceService:
"""Fallback clustering method when graph clustering is not available."""
logger.info(f"Using fallback clustering for user {self.user_id}")
# Simple clustering based on semantic similarity
# This is a placeholder - in production, you'd implement a proper clustering algorithm
# Simple clustering based on semantic similarity against sample queries
try:
# Get a sample of indexed items to analyze
sample_queries = ["marketing", "SEO", "content", "social media", "email marketing"]