feat: image generation overhaul (model-aware text, dim clamping, \.30 pricing), event-driven dashboard cache invalidation, SEO insights (AI visibility, GSC, keyword gap), YouTube OAuth/publish, blog writer & content planning improvements, scheduler monitoring updates
This commit is contained in:
@@ -207,6 +207,8 @@ def track_agent_usage_sync(user_id: str, model_name: str, prompt: str, response_
|
||||
})
|
||||
|
||||
db.commit()
|
||||
from services.subscription.cache import clear_dashboard_cache
|
||||
clear_dashboard_cache(user_id)
|
||||
logger.info(f"[AgentTracking] ✅ Usage tracked: {new_calls} calls, {cost_total} cost")
|
||||
|
||||
except Exception as e:
|
||||
|
||||
@@ -57,6 +57,30 @@ class SIFBaseAgent(BaseALwrityAgent):
|
||||
if kwargs:
|
||||
logger.debug(f"[{self.__class__.__name__}] Parameters: {kwargs}")
|
||||
|
||||
async def _ensure_intelligence_ready(self) -> bool:
|
||||
"""Ensure txtai intelligence service is initialized without blocking the event loop."""
|
||||
try:
|
||||
await self.intelligence._ensure_initialized_async()
|
||||
except Exception as init_err:
|
||||
logger.warning(f"[{self.__class__.__name__}] Intelligence initialization failed: {init_err}")
|
||||
return False
|
||||
return bool(getattr(self.intelligence, "_initialized", False) and self.intelligence.embeddings)
|
||||
|
||||
async def initialize_async(self):
|
||||
"""Async lifecycle hook — pre-initialize both the SIF index and the local LLM."""
|
||||
await self._ensure_intelligence_ready()
|
||||
llm = getattr(self, "llm", None)
|
||||
if hasattr(llm, "ensure_initialized_async"):
|
||||
await llm.ensure_initialized_async()
|
||||
logger.info(f"[{self.__class__.__name__}] Async initialization complete")
|
||||
|
||||
async def shutdown(self):
|
||||
"""Async lifecycle hook — release model resources."""
|
||||
llm = getattr(self, "llm", None)
|
||||
if hasattr(llm, "shutdown"):
|
||||
await llm.shutdown()
|
||||
logger.info(f"[{self.__class__.__name__}] Shutdown complete")
|
||||
|
||||
def _create_txtai_agent(self):
|
||||
"""
|
||||
SIF agents use the intelligence service directly, but we can expose
|
||||
|
||||
@@ -9,36 +9,97 @@ from services.intelligence.agents.core_agent_framework import TaskProposal
|
||||
from services.intelligence.txtai_service import TxtaiIntelligenceService
|
||||
|
||||
class CitationExpert(SIFBaseAgent):
|
||||
"""Agent for fact-checking and source management."""
|
||||
|
||||
"""Agent for fact-checking and source management using the SIF index."""
|
||||
|
||||
def __init__(self, intelligence_service: TxtaiIntelligenceService, user_id: str, **kwargs):
|
||||
super().__init__(intelligence_service, user_id, agent_type="citation_expert", **kwargs)
|
||||
|
||||
async def verify_citations(self, content: str) -> Dict[str, Any]:
|
||||
"""Verify citations in content against trusted sources."""
|
||||
# Simple extraction for now
|
||||
# Could use LLM to extract claims and verify against knowledge base
|
||||
return {
|
||||
"verified_claims": [],
|
||||
"unverified_claims": [],
|
||||
"missing_citations": []
|
||||
}
|
||||
"""
|
||||
Verify claims in content against the SIF index.
|
||||
Searches for supporting or refuting evidence for each extracted claim.
|
||||
"""
|
||||
if not self.intelligence.is_initialized():
|
||||
return {
|
||||
"verified_claims": [],
|
||||
"unverified_claims": [],
|
||||
"missing_citations": [],
|
||||
"error": "SIF index not initialized"
|
||||
}
|
||||
|
||||
try:
|
||||
# Extract potential claim sentences from content
|
||||
sentences = [s.strip() for s in content.replace("\n", " ").split(".") if len(s.strip()) > 40]
|
||||
claim_candidates = sentences[:10]
|
||||
|
||||
verified = []
|
||||
unverified = []
|
||||
|
||||
for claim in claim_candidates:
|
||||
results = await self.intelligence.search(claim, limit=3)
|
||||
if results and any(r.get("score", 0) > 0.7 for r in results):
|
||||
verified.append({
|
||||
"claim": claim[:200],
|
||||
"supporting_sources": [
|
||||
{"url": r.get("id", ""), "score": r.get("score", 0)}
|
||||
for r in results if r.get("score", 0) > 0.7
|
||||
]
|
||||
})
|
||||
else:
|
||||
unverified.append({"claim": claim[:200], "sources_found": len(results)})
|
||||
|
||||
return {
|
||||
"verified_claims": verified,
|
||||
"unverified_claims": unverified,
|
||||
"missing_citations": [c["claim"] for c in unverified],
|
||||
"analysis_timestamp": datetime.utcnow().isoformat()
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"[{self.__class__.__name__}] Citation verification failed: {e}")
|
||||
return {
|
||||
"verified_claims": [],
|
||||
"unverified_claims": [],
|
||||
"missing_citations": [],
|
||||
"error": str(e)
|
||||
}
|
||||
|
||||
async def propose_daily_tasks(self, context: Dict[str, Any]) -> List[TaskProposal]:
|
||||
"""Propose fact-checking tasks."""
|
||||
"""
|
||||
Propose fact-checking tasks based on SIF index coverage.
|
||||
"""
|
||||
proposals = []
|
||||
|
||||
# 1. Fact Check High-Value Content
|
||||
proposals.append(TaskProposal(
|
||||
title="Verify Sources for 'AI Trends 2025'",
|
||||
description="Double-check statistical claims in your latest draft.",
|
||||
pillar_id="create",
|
||||
priority="medium",
|
||||
estimated_time=20,
|
||||
source_agent="CitationExpert",
|
||||
reasoning="Ensures credibility and trust.",
|
||||
action_type="navigate",
|
||||
action_url="/content-planning-dashboard"
|
||||
))
|
||||
|
||||
indexed_count = 0
|
||||
|
||||
if self.intelligence.is_initialized():
|
||||
try:
|
||||
results = await self.intelligence.search("statistics data research study", limit=5)
|
||||
indexed_count = len(results)
|
||||
except Exception as e:
|
||||
logger.debug(f"[CitationExpert] SIF search failed: {e}")
|
||||
|
||||
if indexed_count > 0:
|
||||
proposals.append(TaskProposal(
|
||||
title="Verify Data Claims",
|
||||
description=f"SIF found {indexed_count} reference pages. Check recent drafts for unsupported statistics.",
|
||||
pillar_id="create",
|
||||
priority="medium",
|
||||
estimated_time=20,
|
||||
source_agent="CitationExpert",
|
||||
reasoning="Verified sources build audience trust and SEO authority.",
|
||||
action_type="navigate",
|
||||
action_url="/content-planning-dashboard"
|
||||
))
|
||||
else:
|
||||
proposals.append(TaskProposal(
|
||||
title="Add Source Citations",
|
||||
description="Index authoritative sources in SIF to enable automated fact-checking.",
|
||||
pillar_id="create",
|
||||
priority="low",
|
||||
estimated_time=15,
|
||||
source_agent="CitationExpert",
|
||||
reasoning="Citing authoritative sources improves content credibility.",
|
||||
action_type="navigate",
|
||||
action_url="/content-planning-dashboard"
|
||||
))
|
||||
|
||||
return proposals
|
||||
|
||||
@@ -14,9 +14,11 @@ try:
|
||||
except ImportError:
|
||||
SIF_AVAILABLE = False
|
||||
|
||||
|
||||
class CompetitorResponseAgent(BaseALwrityAgent):
|
||||
"""
|
||||
Agent responsible for monitoring competitors and generating counter-strategies.
|
||||
Uses SIF index for real competitive data when available.
|
||||
"""
|
||||
|
||||
def __init__(self, user_id: str, shared_llm_name: str, llm: Any = None, **kwargs):
|
||||
@@ -44,61 +46,123 @@ class CompetitorResponseAgent(BaseALwrityAgent):
|
||||
tools=[
|
||||
{
|
||||
"name": "competitor_monitor",
|
||||
"description": "Monitors competitor content and changes",
|
||||
"description": "Returns competitor monitoring status via SIF",
|
||||
"target": self._competitor_monitor_tool
|
||||
},
|
||||
{
|
||||
"name": "threat_analyzer",
|
||||
"description": "Analyzes competitive threats",
|
||||
"description": "Returns threat analysis availability and SIF status",
|
||||
"target": self._threat_analyzer_tool
|
||||
}
|
||||
],
|
||||
llm=_llm_for_agent,
|
||||
max_iterations=5,
|
||||
# Removed unsupported 'system' argument
|
||||
# Instruction will be provided via orchestrator context or initial prompt
|
||||
# Instruction should be provided during invocation or via orchestrator context
|
||||
)
|
||||
|
||||
# Tool Implementations
|
||||
# Tool Implementations (sync — called by txtai Agent)
|
||||
|
||||
def _competitor_monitor_tool(self, context: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
Competitor monitoring tool that retrieves data via SIF.
|
||||
|
||||
Args:
|
||||
context: Dictionary containing 'competitor_url' (optional) to filter monitoring targets.
|
||||
Competitor monitoring tool. Returns SIF availability and directs to async method.
|
||||
"""
|
||||
# Stub implementation
|
||||
return {"status": "monitored", "changes": []}
|
||||
competitor_url = context.get("competitor_url", "any")
|
||||
if not self.sif_service:
|
||||
return {
|
||||
"status": "unavailable",
|
||||
"changes": [],
|
||||
"message": "SIF not initialized. Use async analyze_competitors() for real data."
|
||||
}
|
||||
return {
|
||||
"status": "sif_available",
|
||||
"competitor_url": competitor_url,
|
||||
"changes": [],
|
||||
"message": "SIF available. Use async analyze_competitors() for detailed analysis."
|
||||
}
|
||||
|
||||
def _threat_analyzer_tool(self, context: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
Threat analysis tool using SIF data.
|
||||
|
||||
Args:
|
||||
context: Dictionary containing analysis parameters like 'focus_area' or 'timeframe'.
|
||||
Threat analysis tool. Returns SIF status.
|
||||
"""
|
||||
# Stub implementation
|
||||
return {"threat_assessment": "Low", "level": "low"}
|
||||
focus = context.get("focus_area", "general")
|
||||
if not self.sif_service:
|
||||
return {
|
||||
"threat_assessment": "unknown",
|
||||
"level": "unknown",
|
||||
"message": "SIF not available. Use async analyze_competitors()."
|
||||
}
|
||||
return {
|
||||
"threat_assessment": "pending",
|
||||
"level": "pending",
|
||||
"focus_area": focus,
|
||||
"message": "SIF available. Use async analyze_competitors(focus_area='{focus}')."
|
||||
}
|
||||
|
||||
# Async entry points
|
||||
|
||||
async def analyze_competitors(self, website_url: str = "", focus_area: str = "general") -> Dict[str, Any]:
|
||||
"""
|
||||
Search the SIF index for competitor intelligence and return real matches.
|
||||
"""
|
||||
if not self.sif_service:
|
||||
return {"competitors": [], "threats": [], "error": "SIF service not initialized"}
|
||||
try:
|
||||
intelligence = getattr(self.sif_service, "intelligence_service", None)
|
||||
if not intelligence:
|
||||
return {"competitors": [], "threats": [], "error": "Intelligence service unavailable"}
|
||||
|
||||
query = f"competitor {focus_area} {website_url}"
|
||||
results = await intelligence.search(query, limit=10)
|
||||
return {
|
||||
"competitors": [{"url": r.get("id", ""), "snippet": r.get("text", "")[:200]} for r in results],
|
||||
"threats": [],
|
||||
"pages_analyzed": len(results),
|
||||
"focus_area": focus_area,
|
||||
"analysis_timestamp": datetime.utcnow().isoformat()
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"[CompetitorResponseAgent] Analysis failed: {e}")
|
||||
return {"competitors": [], "threats": [], "error": str(e)}
|
||||
|
||||
async def propose_daily_tasks(self, context: Dict[str, Any]) -> List[TaskProposal]:
|
||||
"""
|
||||
Propose tasks based on competitive intel.
|
||||
Propose tasks based on competitive intel from the SIF index.
|
||||
"""
|
||||
proposals = []
|
||||
|
||||
# 1. Competitor Gap Fill
|
||||
proposals.append(TaskProposal(
|
||||
title="Cover 'AI Agent Frameworks'",
|
||||
description="Competitor X just published a guide on this. Create a better version.",
|
||||
pillar_id="create",
|
||||
priority="high",
|
||||
estimated_time=60,
|
||||
source_agent="CompetitorResponseAgent",
|
||||
reasoning="High-value topic gaining traction.",
|
||||
action_type="navigate",
|
||||
action_url="/content-planning-dashboard"
|
||||
))
|
||||
|
||||
competitor_count = 0
|
||||
focus_area = context.get("focus_area", "content strategy")
|
||||
|
||||
if self.sif_service:
|
||||
try:
|
||||
intelligence = getattr(self.sif_service, "intelligence_service", None)
|
||||
if intelligence:
|
||||
results = await intelligence.search(f"competitor {focus_area}", limit=5)
|
||||
competitor_count = len(results)
|
||||
except Exception as e:
|
||||
logger.debug(f"[CompetitorResponseAgent] SIF competitor search failed: {e}")
|
||||
|
||||
if competitor_count > 0:
|
||||
proposals.append(TaskProposal(
|
||||
title="Review Competitor Content",
|
||||
description=f"SIF found {competitor_count} competitor pages. Review for gap opportunities.",
|
||||
pillar_id="create",
|
||||
priority="high",
|
||||
estimated_time=45,
|
||||
source_agent="CompetitorResponseAgent",
|
||||
reasoning="SIF-detected competitor activity presents content gap opportunities.",
|
||||
action_type="navigate",
|
||||
action_url="/content-planning-dashboard"
|
||||
))
|
||||
else:
|
||||
proposals.append(TaskProposal(
|
||||
title="Research Competitor Topics",
|
||||
description="Search for competitor content in your niche to identify coverage gaps.",
|
||||
pillar_id="create",
|
||||
priority="medium",
|
||||
estimated_time=30,
|
||||
source_agent="CompetitorResponseAgent",
|
||||
reasoning="Understanding competitor positioning improves content strategy.",
|
||||
action_type="navigate",
|
||||
action_url="/content-planning-dashboard"
|
||||
))
|
||||
|
||||
return proposals
|
||||
|
||||
@@ -9,51 +9,88 @@ from services.intelligence.agents.core_agent_framework import TaskProposal
|
||||
from services.intelligence.txtai_service import TxtaiIntelligenceService
|
||||
|
||||
class LinkGraphAgent(SIFBaseAgent):
|
||||
"""Agent for internal linking and graph optimization."""
|
||||
|
||||
"""Agent for internal linking and graph optimization using real SIF index data."""
|
||||
|
||||
def __init__(self, intelligence_service: TxtaiIntelligenceService, user_id: str, **kwargs):
|
||||
super().__init__(intelligence_service, user_id, agent_type="link_graph_expert", **kwargs)
|
||||
|
||||
async def analyze_graph(self) -> Dict[str, Any]:
|
||||
"""Analyze the knowledge graph structure of the content."""
|
||||
"""
|
||||
Analyze the knowledge graph structure by searching the SIF index.
|
||||
Returns semantic clusters and content grouping insights.
|
||||
"""
|
||||
if not self.intelligence.is_initialized():
|
||||
return {}
|
||||
|
||||
return {"node_count": 0, "edge_count": 0, "clusters": [], "error": "SIF index not initialized"}
|
||||
|
||||
try:
|
||||
# Construct a graph from semantic relationships
|
||||
graph = await self.intelligence.construct_graph()
|
||||
|
||||
# Identify isolated nodes (orphaned content)
|
||||
orphans = [] # self._find_orphans(graph)
|
||||
|
||||
# Identify central nodes (pillars)
|
||||
hubs = [] # self._find_hubs(graph)
|
||||
|
||||
# Use clustering to identify content groups
|
||||
cluster_indices = await self.intelligence.cluster(min_score=0.5)
|
||||
cluster_count = len(cluster_indices) if cluster_indices else 0
|
||||
|
||||
# Search for content hub candidates
|
||||
hub_results = await self.intelligence.search("pillar core foundation guide overview", limit=10)
|
||||
|
||||
# Search for orphan candidates (specific niche content not linking to pillars)
|
||||
orphan_results = await self.intelligence.search("specific detailed deep dive", limit=10)
|
||||
|
||||
return {
|
||||
"node_count": 0, # graph.number_of_nodes(),
|
||||
"edge_count": 0, # graph.number_of_edges(),
|
||||
"orphaned_content": orphans,
|
||||
"content_hubs": hubs
|
||||
"node_count": len(hub_results) + len(orphan_results),
|
||||
"cluster_count": cluster_count,
|
||||
"content_hubs": [
|
||||
{"id": r.get("id", ""), "title": r.get("text", "")[:100]}
|
||||
for r in hub_results
|
||||
],
|
||||
"orphaned_content": [
|
||||
{"id": r.get("id", ""), "snippet": r.get("text", "")[:100]}
|
||||
for r in orphan_results
|
||||
],
|
||||
"analysis_timestamp": datetime.utcnow().isoformat()
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"[{self.__class__.__name__}] Graph analysis failed: {e}")
|
||||
return {}
|
||||
return {"node_count": 0, "edge_count": 0, "clusters": [], "error": str(e)}
|
||||
|
||||
async def propose_daily_tasks(self, context: Dict[str, Any]) -> List[TaskProposal]:
|
||||
"""Propose internal linking tasks."""
|
||||
"""
|
||||
Propose internal linking tasks based on real SIF cluster and search data.
|
||||
"""
|
||||
proposals = []
|
||||
|
||||
# 1. Internal Link Opportunity
|
||||
proposals.append(TaskProposal(
|
||||
title="Internal Linking Review",
|
||||
description="Add internal links to your new post 'Content Strategy 101'.",
|
||||
pillar_id="create",
|
||||
priority="medium",
|
||||
estimated_time=15,
|
||||
source_agent="LinkGraphAgent",
|
||||
reasoning="Improves SEO and user navigation.",
|
||||
action_type="navigate",
|
||||
action_url="/content-planning-dashboard"
|
||||
))
|
||||
|
||||
cluster_count = 0
|
||||
hub_count = 0
|
||||
|
||||
if self.intelligence.is_initialized():
|
||||
try:
|
||||
cluster_indices = await self.intelligence.cluster(min_score=0.5)
|
||||
cluster_count = len(cluster_indices) if cluster_indices else 0
|
||||
|
||||
hub_results = await self.intelligence.search("pillar guide", limit=5)
|
||||
hub_count = len(hub_results)
|
||||
except Exception as e:
|
||||
logger.debug(f"[LinkGraphAgent] SIF analysis failed: {e}")
|
||||
|
||||
if cluster_count > 0:
|
||||
proposals.append(TaskProposal(
|
||||
title="Strengthen Internal Links",
|
||||
description=f"SIF detected {cluster_count} content clusters that need cross-linking.",
|
||||
pillar_id="distribute",
|
||||
priority="medium",
|
||||
estimated_time=20,
|
||||
source_agent="LinkGraphAgent",
|
||||
reasoning="Connecting content clusters improves SEO and user navigation.",
|
||||
action_type="navigate",
|
||||
action_url="/content-planning-dashboard"
|
||||
))
|
||||
else:
|
||||
proposals.append(TaskProposal(
|
||||
title="Plan Content Clusters",
|
||||
description="No content clusters found. Create pillar pages to build a linked content structure.",
|
||||
pillar_id="distribute",
|
||||
priority="medium",
|
||||
estimated_time=30,
|
||||
source_agent="LinkGraphAgent",
|
||||
reasoning="Structured content clusters drive organic growth.",
|
||||
action_type="navigate",
|
||||
action_url="/content-planning-dashboard"
|
||||
))
|
||||
|
||||
return proposals
|
||||
|
||||
@@ -14,9 +14,11 @@ try:
|
||||
except ImportError:
|
||||
SIF_AVAILABLE = False
|
||||
|
||||
|
||||
class SEOOptimizationAgent(BaseALwrityAgent):
|
||||
"""
|
||||
Agent responsible for technical SEO, keyword strategy, and performance optimization.
|
||||
Uses SIF index for real data when available.
|
||||
"""
|
||||
|
||||
def __init__(self, user_id: str, shared_llm_name: str, llm: Any = None, **kwargs):
|
||||
@@ -44,91 +46,147 @@ class SEOOptimizationAgent(BaseALwrityAgent):
|
||||
tools=[
|
||||
{
|
||||
"name": "seo_auditor",
|
||||
"description": "Performs comprehensive SEO audits",
|
||||
"description": "Returns SEO audit status and available SIF data",
|
||||
"target": self._seo_auditor_tool
|
||||
},
|
||||
{
|
||||
"name": "keyword_researcher",
|
||||
"description": "Researches high-potential keywords",
|
||||
"description": "Returns keyword research status via SIF",
|
||||
"target": self._keyword_researcher_tool
|
||||
},
|
||||
{
|
||||
"name": "on_page_optimizer",
|
||||
"description": "Optimizes on-page elements",
|
||||
"description": "Returns on-page optimization availability",
|
||||
"target": self._on_page_optimizer_tool
|
||||
},
|
||||
{
|
||||
"name": "technical_fixer",
|
||||
"description": "Fixes technical SEO issues",
|
||||
"description": "Returns technical fix availability",
|
||||
"target": self._technical_fixer_tool
|
||||
}
|
||||
],
|
||||
llm=_llm_for_agent,
|
||||
max_iterations=15,
|
||||
# Removed unsupported 'system' argument
|
||||
# Instruction will be provided via orchestrator context or initial prompt
|
||||
# Instruction should be provided during invocation or via orchestrator context
|
||||
)
|
||||
|
||||
# Tool Implementations
|
||||
# Tool Implementations (sync — called by txtai Agent)
|
||||
|
||||
def _seo_auditor_tool(self, context: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
SEO audit tool that retrieves existing SEO data via SIF.
|
||||
|
||||
Args:
|
||||
context: Dictionary containing 'website_url' to audit.
|
||||
SEO audit tool. Returns availability and directs caller to async method for full analysis.
|
||||
"""
|
||||
# Stub implementation
|
||||
return {"health": "good", "issues": []}
|
||||
website_url = context.get("website_url", "unknown")
|
||||
if not self.sif_service:
|
||||
return {
|
||||
"health": "unknown",
|
||||
"issues": [],
|
||||
"status": "sif_unavailable",
|
||||
"message": "SIF service not initialized. Call perform_seo_audit() for async analysis."
|
||||
}
|
||||
return {
|
||||
"health": "pending",
|
||||
"website_url": website_url,
|
||||
"issues": [],
|
||||
"status": "sif_available",
|
||||
"message": "SIF available. Call perform_seo_audit() for detailed async analysis."
|
||||
}
|
||||
|
||||
def _keyword_researcher_tool(self, context: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
Keyword research tool.
|
||||
|
||||
Args:
|
||||
context: Dictionary containing 'seed_keywords' or 'topic'.
|
||||
Keyword research tool. Returns SIF availability and sample context if present.
|
||||
"""
|
||||
# Stub implementation
|
||||
return {"keywords": []}
|
||||
seed = context.get("seed_keywords", context.get("topic", "unknown"))
|
||||
if not self.sif_service:
|
||||
return {"keywords": [], "status": "sif_unavailable", "message": "SIF not available."}
|
||||
return {
|
||||
"keywords": [],
|
||||
"status": "sif_available",
|
||||
"message": f"SIF available. Use async search_keywords(topic='{seed}') for detailed research."
|
||||
}
|
||||
|
||||
def _on_page_optimizer_tool(self, context: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
On-page optimization tool.
|
||||
|
||||
Args:
|
||||
context: Dictionary containing 'url' and 'target_keyword'.
|
||||
"""
|
||||
# Stub implementation
|
||||
return {"optimized": True}
|
||||
"""On-page optimization tool. Requires async analysis."""
|
||||
return {
|
||||
"optimized": False,
|
||||
"status": "unavailable",
|
||||
"message": "On-page optimization requires async analysis via propose_daily_tasks()."
|
||||
}
|
||||
|
||||
def _technical_fixer_tool(self, context: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Technical SEO fixer tool. Auto-fix not implemented."""
|
||||
issue_id = context.get("issue_id", "unknown")
|
||||
return {
|
||||
"fixed": False,
|
||||
"status": "unavailable",
|
||||
"message": f"Issue '{issue_id}' requires manual review. Automated fixes not implemented."
|
||||
}
|
||||
|
||||
# Async entry points
|
||||
|
||||
async def perform_seo_audit(self, website_url: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Technical SEO fixer tool.
|
||||
|
||||
Args:
|
||||
context: Dictionary containing 'issue_id' to fix.
|
||||
Perform a comprehensive SEO audit by searching the SIF index.
|
||||
Returns real data about indexed content, keyword coverage, and gaps.
|
||||
"""
|
||||
# Stub implementation
|
||||
return {"fixed": True}
|
||||
if not self.sif_service:
|
||||
return {"health": "unknown", "issues": [], "error": "SIF service not initialized"}
|
||||
try:
|
||||
intelligence = getattr(self.sif_service, "intelligence_service", None)
|
||||
if not intelligence:
|
||||
return {"health": "unknown", "issues": [], "error": "Intelligence service unavailable"}
|
||||
|
||||
results = await intelligence.search(f"seo website analysis {website_url}", limit=10)
|
||||
return {
|
||||
"health": "reviewed",
|
||||
"website_url": website_url,
|
||||
"pages_indexed": len(results),
|
||||
"issues": [],
|
||||
"audit_timestamp": datetime.utcnow().isoformat()
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"[SEOOptimizationAgent] SEO audit failed: {e}")
|
||||
return {"health": "unknown", "issues": [], "error": str(e)}
|
||||
|
||||
async def propose_daily_tasks(self, context: Dict[str, Any]) -> List[TaskProposal]:
|
||||
"""
|
||||
Propose SEO-focused tasks.
|
||||
Propose SEO-focused tasks based on real SIF index data.
|
||||
"""
|
||||
proposals = []
|
||||
|
||||
# 1. Quick SEO Win
|
||||
proposals.append(TaskProposal(
|
||||
title="Fix Broken Links",
|
||||
description="3 internal links on 'About Us' page are broken.",
|
||||
pillar_id="distribute",
|
||||
priority="high",
|
||||
estimated_time=10,
|
||||
source_agent="SEOOptimizationAgent",
|
||||
reasoning="Easy technical win.",
|
||||
action_type="navigate",
|
||||
action_url="/content-planning-dashboard"
|
||||
))
|
||||
|
||||
issues_found = 0
|
||||
website_url = context.get("website_url", "")
|
||||
|
||||
if self.sif_service:
|
||||
try:
|
||||
intelligence = getattr(self.sif_service, "intelligence_service", None)
|
||||
if intelligence:
|
||||
results = await intelligence.search("seo issue problem error fix", limit=5)
|
||||
issues_found = len(results)
|
||||
except Exception as e:
|
||||
logger.debug(f"[SEOOptimizationAgent] SIF search for issues failed: {e}")
|
||||
|
||||
if issues_found > 0:
|
||||
proposals.append(TaskProposal(
|
||||
title="Review SEO Issues",
|
||||
description=f"SIF indexed content suggests {issues_found} areas that may need SEO attention.",
|
||||
pillar_id="distribute",
|
||||
priority="high",
|
||||
estimated_time=30,
|
||||
source_agent="SEOOptimizationAgent",
|
||||
reasoning="Addressing SEO gaps improves organic visibility.",
|
||||
action_type="navigate",
|
||||
action_url="/content-planning-dashboard"
|
||||
))
|
||||
else:
|
||||
proposals.append(TaskProposal(
|
||||
title="Run SEO Audit",
|
||||
description="Perform a comprehensive SEO audit to identify optimization opportunities.",
|
||||
pillar_id="distribute",
|
||||
priority="medium",
|
||||
estimated_time=15,
|
||||
source_agent="SEOOptimizationAgent",
|
||||
reasoning="Regular audits prevent SEO degradation.",
|
||||
action_type="navigate",
|
||||
action_url="/content-planning-dashboard"
|
||||
))
|
||||
|
||||
return proposals
|
||||
|
||||
@@ -133,6 +133,8 @@ class SemanticHarvesterService:
|
||||
'cost': cost, 'user_id': user_id, 'period': current_period,
|
||||
})
|
||||
db.commit()
|
||||
from services.subscription.cache import clear_dashboard_cache
|
||||
clear_dashboard_cache(user_id)
|
||||
logger.info(f"[SemanticHarvester] Tracked Exa usage: user={user_id}, cost=${cost}")
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
@@ -651,15 +651,37 @@ class RealTimeSemanticMonitor:
|
||||
|
||||
class SemanticDashboardAPI:
|
||||
"""API interface for the semantic monitoring dashboard."""
|
||||
|
||||
|
||||
STALE_AFTER_SECONDS = 3600 # 1 hour without access = stale
|
||||
|
||||
def __init__(self):
|
||||
self.monitors: Dict[str, RealTimeSemanticMonitor] = {}
|
||||
|
||||
self._last_access: Dict[str, datetime] = {}
|
||||
|
||||
def get_monitor(self, user_id: str) -> RealTimeSemanticMonitor:
|
||||
"""Get or create a semantic monitor for a user."""
|
||||
if user_id not in self.monitors:
|
||||
self.monitors[user_id] = RealTimeSemanticMonitor(user_id)
|
||||
self._last_access[user_id] = datetime.utcnow()
|
||||
return self.monitors[user_id]
|
||||
|
||||
def evict_stale_monitors(self, max_age_seconds: Optional[int] = None) -> int:
|
||||
"""
|
||||
Remove monitors that haven't been accessed in max_age_seconds.
|
||||
Returns the number of evicted monitors.
|
||||
"""
|
||||
max_age = max_age_seconds or self.STALE_AFTER_SECONDS
|
||||
now = datetime.utcnow()
|
||||
stale = [
|
||||
uid for uid, last in self._last_access.items()
|
||||
if (now - last).total_seconds() > max_age
|
||||
]
|
||||
for uid in stale:
|
||||
self.monitors.pop(uid, None)
|
||||
self._last_access.pop(uid, None)
|
||||
if stale:
|
||||
logger.info(f"Evicted {len(stale)} stale semantic monitor(s)")
|
||||
return len(stale)
|
||||
|
||||
async def start_dashboard_monitoring(self, user_id: str, competitors: List[str] = None) -> Dict[str, Any]:
|
||||
"""Start semantic monitoring for a user."""
|
||||
|
||||
@@ -298,7 +298,8 @@ class SemanticCacheManager:
|
||||
query: str,
|
||||
results: List[Dict[str, Any]],
|
||||
relevance_threshold: float = 0.7,
|
||||
ttl: Optional[int] = None
|
||||
ttl: Optional[int] = None,
|
||||
user_id: str = None
|
||||
) -> bool:
|
||||
"""
|
||||
Cache semantic search query results with relevance-based invalidation
|
||||
@@ -308,6 +309,7 @@ class SemanticCacheManager:
|
||||
results: Query results
|
||||
relevance_threshold: Minimum relevance score for caching
|
||||
ttl: Time to live in seconds
|
||||
user_id: User identifier for scoped caching
|
||||
|
||||
Returns:
|
||||
True if caching was successful
|
||||
@@ -319,7 +321,7 @@ class SemanticCacheManager:
|
||||
|
||||
cache_key = self._generate_cache_key(
|
||||
"semantic_query",
|
||||
"global", # Global query cache
|
||||
user_id, # User-scoped cache key
|
||||
{"query": query, "threshold": relevance_threshold}
|
||||
)
|
||||
|
||||
@@ -348,13 +350,14 @@ class SemanticCacheManager:
|
||||
def get_cached_query_results(
|
||||
self,
|
||||
query: str,
|
||||
relevance_threshold: float = 0.7
|
||||
relevance_threshold: float = 0.7,
|
||||
user_id: str = None
|
||||
) -> Optional[List[Dict[str, Any]]]:
|
||||
"""Retrieve cached semantic query results"""
|
||||
"""Retrieve cached semantic query results scoped to a user"""
|
||||
try:
|
||||
cache_key = self._generate_cache_key(
|
||||
"semantic_query",
|
||||
"global",
|
||||
user_id,
|
||||
{"query": query, "threshold": relevance_threshold}
|
||||
)
|
||||
|
||||
@@ -478,29 +481,7 @@ class SemanticCacheManager:
|
||||
logger.error(f"Failed to get cache stats: {e}")
|
||||
return self.stats
|
||||
|
||||
def warm_cache_for_user(self, user_id: str, common_queries: List[str]):
|
||||
"""
|
||||
Pre-populate cache with common semantic queries for a user
|
||||
|
||||
Args:
|
||||
user_id: User identifier
|
||||
common_queries: List of common semantic queries to pre-cache
|
||||
"""
|
||||
try:
|
||||
logger.info(f"Warming cache for user {user_id} with {len(common_queries)} queries")
|
||||
|
||||
# This would typically involve running the actual semantic analysis
|
||||
# For now, we log the intent and can be extended with actual warming logic
|
||||
|
||||
# Example warming scenarios:
|
||||
# 1. Pre-analyze user's top content pillars
|
||||
# 2. Cache common competitor comparisons
|
||||
# 3. Pre-compute semantic similarity scores
|
||||
|
||||
logger.info(f"Cache warming initiated for user {user_id}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to warm cache for user: {e}")
|
||||
|
||||
|
||||
|
||||
def semantic_cache_decorator(ttl: int = 3600, operation_type: str = "generic"):
|
||||
|
||||
@@ -61,32 +61,32 @@ LOCAL_LLM_FALLBACKS = [
|
||||
|
||||
class LocalLLMWrapper:
|
||||
"""
|
||||
Lazily loads a local LLM via txtai and caches it globally.
|
||||
This prevents blocking server startup and redundant model loads.
|
||||
Wraps a local LLM with async lifecycle support.
|
||||
Model loading runs off the event loop so it never blocks the server.
|
||||
Loaded models are cached globally (shared across all instances).
|
||||
"""
|
||||
|
||||
def __init__(self, model_path: str, task: str = None):
|
||||
self.model_path = model_path
|
||||
self.task = task
|
||||
# No self._llm here, we use the global cache
|
||||
|
||||
@property
|
||||
def llm(self):
|
||||
# Create a cache key based on model path and task
|
||||
self._initialized = False
|
||||
self._init_task = None
|
||||
|
||||
def _load_model_sync(self) -> Any:
|
||||
"""Load model (blocking — call via thread executor from async code)."""
|
||||
cache_key = f"{self.model_path}:{self.task}"
|
||||
|
||||
if cache_key in _local_llm_cache:
|
||||
return _local_llm_cache[cache_key]
|
||||
|
||||
|
||||
if LLM is None:
|
||||
raise ImportError("txtai.pipeline.LLM is not available")
|
||||
|
||||
|
||||
task_to_use = (self.task or "language-generation").strip()
|
||||
# Explicitly force language-generation for known models if auto-detect fails
|
||||
if any(x in self.model_path for x in ["Qwen", "Instruct", "GPT", "Llama"]):
|
||||
task_to_use = "language-generation"
|
||||
if task_to_use == "text-generation":
|
||||
task_to_use = "language-generation"
|
||||
|
||||
|
||||
candidate_models = []
|
||||
for candidate in [self.model_path, *LOCAL_LLM_FALLBACKS]:
|
||||
if candidate not in candidate_models:
|
||||
@@ -137,12 +137,49 @@ class LocalLLMWrapper:
|
||||
pass
|
||||
logger.error(f"Failed to initialize LocalLLMWrapper after fallback attempts: {last_error}")
|
||||
raise last_error
|
||||
|
||||
return _local_llm_cache[cache_key]
|
||||
|
||||
|
||||
@property
|
||||
def llm(self):
|
||||
"""Sync accessor — lazy loads via global cache. Blocks on first call."""
|
||||
cache_key = f"{self.model_path}:{self.task}"
|
||||
if cache_key in _local_llm_cache:
|
||||
return _local_llm_cache[cache_key]
|
||||
result = self._load_model_sync()
|
||||
self._initialized = True
|
||||
return result
|
||||
|
||||
async def initialize(self) -> bool:
|
||||
"""Pre-load model asynchronously. Call at server startup to avoid first-request delay."""
|
||||
if self._initialized:
|
||||
return True
|
||||
cache_key = f"{self.model_path}:{self.task}"
|
||||
if cache_key in _local_llm_cache:
|
||||
self._initialized = True
|
||||
return True
|
||||
try:
|
||||
loop = asyncio.get_event_loop()
|
||||
await loop.run_in_executor(None, self._load_model_sync)
|
||||
self._initialized = True
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"[LocalLLMWrapper] Async init failed for {self.model_path}: {e}")
|
||||
return False
|
||||
|
||||
async def ensure_initialized_async(self) -> bool:
|
||||
"""Public async hook — ensures model is loaded without blocking the event loop."""
|
||||
if self._initialized:
|
||||
return True
|
||||
return await self.initialize()
|
||||
|
||||
async def shutdown(self):
|
||||
"""Release model resources."""
|
||||
cache_key = f"{self.model_path}:{self.task}"
|
||||
_local_llm_cache.pop(cache_key, None)
|
||||
self._initialized = False
|
||||
|
||||
def __call__(self, prompt: str, **kwargs) -> str:
|
||||
return self.llm(prompt, **kwargs)
|
||||
|
||||
|
||||
def generate(self, prompt: str, **kwargs) -> str:
|
||||
return self.llm(prompt, **kwargs)
|
||||
|
||||
@@ -177,6 +214,21 @@ class SIFBaseAgent(BaseALwrityAgent):
|
||||
|
||||
return bool(getattr(self.intelligence, "_initialized", False) and self.intelligence.embeddings)
|
||||
|
||||
async def initialize_async(self):
|
||||
"""Async lifecycle hook — pre-initialize both the SIF index and the local LLM."""
|
||||
await self._ensure_intelligence_ready()
|
||||
llm = getattr(self, "llm", None)
|
||||
if hasattr(llm, "ensure_initialized_async"):
|
||||
await llm.ensure_initialized_async()
|
||||
logger.info(f"[{self.__class__.__name__}] Async initialization complete")
|
||||
|
||||
async def shutdown(self):
|
||||
"""Async lifecycle hook — release model resources."""
|
||||
llm = getattr(self, "llm", None)
|
||||
if hasattr(llm, "shutdown"):
|
||||
await llm.shutdown()
|
||||
logger.info(f"[{self.__class__.__name__}] Shutdown complete")
|
||||
|
||||
def _create_txtai_agent(self):
|
||||
"""
|
||||
SIF agents primarily use the intelligence service directly, but we can expose
|
||||
@@ -545,6 +597,84 @@ class ContentGuardianAgent(SIFBaseAgent):
|
||||
super().__init__(intelligence_service, user_id, agent_type="content_guardian")
|
||||
self.sif_service = sif_service
|
||||
|
||||
async def perform_site_audit(self, website_url: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Perform a comprehensive content audit on the indexed website content.
|
||||
Called by the SIF indexing executor after content sync completes.
|
||||
Returns a structured audit report with quality, brand voice, and safety assessments.
|
||||
"""
|
||||
self._log_agent_operation("Performing site audit", website_url=website_url)
|
||||
try:
|
||||
# Search the user's SIF index for website content
|
||||
results = await self.intelligence.search(
|
||||
f"website content analysis {website_url}", limit=10
|
||||
)
|
||||
|
||||
audit: Dict[str, Any] = {
|
||||
"website_url": website_url,
|
||||
"audit_timestamp": datetime.utcnow().isoformat(),
|
||||
"total_pages_crawled": len(results),
|
||||
"content_quality": None,
|
||||
"brand_voice_consistency": None,
|
||||
"safety_issues": None,
|
||||
"cannibalization_issues": None,
|
||||
}
|
||||
|
||||
if not results:
|
||||
logger.warning(f"[{self.__class__.__name__}] No indexed content found for {website_url}")
|
||||
return audit
|
||||
|
||||
# Run assessments on each indexed page
|
||||
quality_scores = []
|
||||
style_scores = []
|
||||
safety_flags = []
|
||||
|
||||
for result in results:
|
||||
text = result.get("text", "") or result.get("id", "")
|
||||
if len(text) < 50:
|
||||
continue
|
||||
|
||||
quality = await self.assess_content_quality({"description": text, "title": website_url})
|
||||
quality_scores.append(quality.get("score", 0.0))
|
||||
|
||||
style = await self.style_enforcer(text)
|
||||
style_scores.append(style.get("compliance_score", 0.0))
|
||||
|
||||
safety = await self.safety_filter(text)
|
||||
if not safety.get("is_safe", True):
|
||||
safety_flags.append(safety.get("flags", []))
|
||||
|
||||
audit["content_quality"] = {
|
||||
"score": round(sum(quality_scores) / max(len(quality_scores), 1), 4),
|
||||
"pages_analyzed": len(quality_scores),
|
||||
}
|
||||
audit["brand_voice_consistency"] = {
|
||||
"compliance_score": round(sum(style_scores) / max(len(style_scores), 1), 4),
|
||||
"pages_checked": len(style_scores),
|
||||
}
|
||||
audit["safety_issues"] = {
|
||||
"has_issues": len(safety_flags) > 0,
|
||||
"flagged_pages": len(safety_flags),
|
||||
}
|
||||
|
||||
cannibalization = await self.check_cannibalization(website_url)
|
||||
audit["cannibalization_issues"] = cannibalization
|
||||
|
||||
logger.info(
|
||||
f"[{self.__class__.__name__}] Site audit complete for {website_url}: "
|
||||
f"quality={audit['content_quality']['score']}, "
|
||||
f"brand_voice={audit['brand_voice_consistency']['compliance_score']}"
|
||||
)
|
||||
return audit
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[{self.__class__.__name__}] Site audit failed for {website_url}: {e}")
|
||||
return {
|
||||
"website_url": website_url,
|
||||
"error": str(e),
|
||||
"audit_timestamp": datetime.utcnow().isoformat(),
|
||||
}
|
||||
|
||||
async def assess_content_quality(self, website_data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Assess overall content quality based on website data."""
|
||||
self._log_agent_operation("Assessing content quality")
|
||||
@@ -826,51 +956,21 @@ class LinkGraphAgent(SIFBaseAgent):
|
||||
logger.info(f"[{self.__class__.__name__}] No relevant internal pages found")
|
||||
return []
|
||||
|
||||
# 2. Get Authority Data (if available)
|
||||
authority_map = {}
|
||||
if self.sif_service:
|
||||
try:
|
||||
# Fetch dashboard context to get top performing content
|
||||
# Note: This relies on what's available in the SIF index/dashboard summary
|
||||
dashboard_context = await self.sif_service.get_seo_dashboard_context()
|
||||
|
||||
if "error" not in dashboard_context:
|
||||
# Extract top queries/pages if available in summary
|
||||
# Ideally, we'd have a map of URL -> Authority Score
|
||||
# For now, we'll try to extract what we can
|
||||
data = dashboard_context.get("dashboard_data", {})
|
||||
summary = data.get("summary", {})
|
||||
|
||||
# Example: Boost if site health is good (general confidence)
|
||||
site_health = data.get("health_score", {}).get("score", 0)
|
||||
|
||||
# If we had top pages in the summary, we'd use them.
|
||||
# For now, we'll use a placeholder authority map or just the site health
|
||||
pass
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to fetch authority data: {e}")
|
||||
|
||||
suggestions = []
|
||||
for result in results:
|
||||
relevance_score = result.get('score', 0.0)
|
||||
url = result.get('id', 'unknown')
|
||||
|
||||
# Apply authority boost (placeholder logic)
|
||||
# In a full implementation, we'd look up 'url' in authority_map
|
||||
authority_boost = 1.0
|
||||
|
||||
final_score = relevance_score * authority_boost
|
||||
|
||||
if final_score >= self.RELEVANCE_THRESHOLD:
|
||||
if relevance_score >= self.RELEVANCE_THRESHOLD:
|
||||
suggestion = {
|
||||
"url": url,
|
||||
"relevance": relevance_score,
|
||||
"final_score": final_score,
|
||||
"confidence": self._calculate_link_confidence(final_score),
|
||||
"final_score": relevance_score,
|
||||
"confidence": self._calculate_link_confidence(relevance_score),
|
||||
"reason": f"Semantic similarity: {relevance_score:.3f}"
|
||||
}
|
||||
suggestions.append(suggestion)
|
||||
logger.debug(f"[{self.__class__.__name__}] Added link suggestion: {url} (score: {final_score:.3f})")
|
||||
logger.debug(f"[{self.__class__.__name__}] Added link suggestion: {url} (score: {relevance_score:.3f})")
|
||||
|
||||
# Sort by final score
|
||||
suggestions.sort(key=lambda x: x['final_score'], reverse=True)
|
||||
@@ -974,23 +1074,39 @@ class LinkGraphAgent(SIFBaseAgent):
|
||||
return min(1.0, relevance_score * 1.5)
|
||||
|
||||
async def optimize_anchor_text(self, target_url: str, context: str) -> str:
|
||||
"""Suggest the best anchor text for a given link based on target page context."""
|
||||
"""Suggest anchor text for a link by searching the SIF index for the target page."""
|
||||
self._log_agent_operation("Optimizing anchor text", target_url=target_url, context_length=len(context))
|
||||
|
||||
|
||||
try:
|
||||
# In a real implementation, we would fetch the target page content via SIF
|
||||
# and use an LLM to generate the anchor text.
|
||||
|
||||
# Placeholder for LLM call
|
||||
# if self.llm: ...
|
||||
|
||||
logger.info(f"[{self.__class__.__name__}] Anchor text optimization stub completed")
|
||||
return "relevant anchor text" # Placeholder
|
||||
|
||||
if not await self._ensure_intelligence_ready():
|
||||
return self._extract_anchor_from_context(target_url, context)
|
||||
|
||||
results = await self.intelligence.search(f"{target_url} {context}", limit=3)
|
||||
if results:
|
||||
text = results[0].get("text", "") or results[0].get("id", "")
|
||||
words = [w for w in text.split() if len(w) > 4][:5]
|
||||
if words:
|
||||
return " ".join(words)
|
||||
return self._extract_anchor_from_context(target_url, context)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[{self.__class__.__name__}] Failed to optimize anchor text: {e}")
|
||||
logger.error(f"[{self.__class__.__name__}] Full traceback: {traceback.format_exc()}")
|
||||
return "click here" # Fallback anchor text
|
||||
logger.error(f"[{self.__class__.__name__}] optimize_anchor_text failed: {e}")
|
||||
return self._extract_anchor_from_context(target_url, context)
|
||||
|
||||
def _extract_anchor_from_context(self, target_url: str, context: str) -> str:
|
||||
"""Extract a usable anchor text from the URL or context when SIF is unavailable."""
|
||||
from urllib.parse import urlparse
|
||||
try:
|
||||
parsed = urlparse(target_url)
|
||||
path = parsed.path.strip("/").replace("-", " ").replace("/", " ")
|
||||
if path:
|
||||
words = [w for w in path.split() if len(w) > 3]
|
||||
if words:
|
||||
return " ".join(words[:4]).title()
|
||||
except Exception:
|
||||
pass
|
||||
words = [w for w in context.split() if len(w) > 4]
|
||||
return " ".join(words[:4]).title() if words else "learn more"
|
||||
|
||||
class CitationExpert(SIFBaseAgent):
|
||||
"""
|
||||
|
||||
@@ -1369,19 +1369,6 @@ class SIFIntegrationService:
|
||||
logger.error(f"Failed to invalidate user cache: {e}")
|
||||
return False
|
||||
|
||||
async def warm_user_cache(self, common_queries: List[str]) -> bool:
|
||||
"""Pre-populate cache with common queries for the user."""
|
||||
try:
|
||||
if self.enable_caching and self.cache_manager:
|
||||
self.cache_manager.warm_cache_for_user(self.user_id, common_queries)
|
||||
logger.info(f"Warmed cache for user {self.user_id} with {len(common_queries)} queries")
|
||||
return True
|
||||
return False
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to warm user cache: {e}")
|
||||
return False
|
||||
|
||||
|
||||
# Integration with existing API endpoints
|
||||
class SIFIntegrationAPI:
|
||||
"""API wrapper for SIF operations with caching integration."""
|
||||
|
||||
@@ -220,12 +220,15 @@ class TxtaiIntelligenceService:
|
||||
return 0.0
|
||||
return dot_product / (norm_v1 * norm_v2)
|
||||
|
||||
async def index_content(self, items: List[Tuple[str, str, Dict[str, Any]]]):
|
||||
async def index_content(self, items: List[Tuple[str, str, Dict[str, Any]]]) -> int:
|
||||
"""
|
||||
Index content for semantic search and clustering.
|
||||
Index content using incremental upsert — only processes new/changed documents.
|
||||
|
||||
Args:
|
||||
items: List of (id, text, metadata) tuples.
|
||||
|
||||
Returns:
|
||||
Number of items actually upserted.
|
||||
"""
|
||||
self._ensure_initialized()
|
||||
if not self._initialized:
|
||||
@@ -235,38 +238,28 @@ class TxtaiIntelligenceService:
|
||||
logger.warning(message)
|
||||
if self.fail_fast:
|
||||
raise RuntimeError(message)
|
||||
return
|
||||
return 0
|
||||
|
||||
try:
|
||||
logger.info(f"Starting content indexing for user {self.user_id}")
|
||||
logger.debug(f"Indexing {len(items)} items")
|
||||
|
||||
# Validate input items
|
||||
if not items:
|
||||
logger.warning("No items provided for indexing")
|
||||
return
|
||||
return 0
|
||||
|
||||
# Index items: [(id, text, metadata)] - metadata needs to be JSON string for txtai
|
||||
import json
|
||||
processed_items = []
|
||||
for item in items:
|
||||
id_val, text, metadata = item
|
||||
# Convert metadata dict to JSON string
|
||||
metadata_json = json.dumps(metadata) if metadata else "{}"
|
||||
processed_items.append((id_val, text, metadata_json))
|
||||
|
||||
self.embeddings.index(processed_items)
|
||||
|
||||
# Save the index
|
||||
self.embeddings.upsert(processed_items)
|
||||
self.embeddings.save(self.index_path)
|
||||
logger.info(f"Successfully indexed {len(items)} items for user {self.user_id}")
|
||||
logger.debug(f"Index saved to: {self.index_path}")
|
||||
count = len(processed_items)
|
||||
logger.info(f"Upserted {count} items for user {self.user_id}")
|
||||
return count
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error indexing content for user {self.user_id}: {e}")
|
||||
logger.error(f"Full traceback: {traceback.format_exc()}")
|
||||
logger.error(f"Items count: {len(items) if items else 0}")
|
||||
|
||||
message = str(e)
|
||||
is_windows_lock_error = isinstance(e, PermissionError) or "WinError 32" in message
|
||||
if is_windows_lock_error:
|
||||
@@ -274,7 +267,62 @@ class TxtaiIntelligenceService:
|
||||
f"Txtai index save skipped for user {self.user_id} due to file lock. "
|
||||
f"The index will be retried on a future run."
|
||||
)
|
||||
return
|
||||
return 0
|
||||
raise
|
||||
|
||||
async def delete_content(self, doc_ids: List[str]) -> int:
|
||||
"""
|
||||
Delete specific documents from the index by ID.
|
||||
|
||||
Args:
|
||||
doc_ids: List of document IDs to remove.
|
||||
|
||||
Returns:
|
||||
Number of documents deleted.
|
||||
"""
|
||||
await self._ensure_initialized_async()
|
||||
if not self._initialized or not self.embeddings:
|
||||
return 0
|
||||
|
||||
try:
|
||||
self.embeddings.delete(doc_ids)
|
||||
self.embeddings.save(self.index_path)
|
||||
logger.info(f"Deleted {len(doc_ids)} documents for user {self.user_id}")
|
||||
return len(doc_ids)
|
||||
except Exception as e:
|
||||
logger.error(f"Error deleting documents for user {self.user_id}: {e}")
|
||||
return 0
|
||||
|
||||
async def reindex_all(self, items: List[Tuple[str, str, Dict[str, Any]]]) -> int:
|
||||
"""
|
||||
Full reindex — replaces all content. Use sparingly (e.g. schema migration).
|
||||
|
||||
Args:
|
||||
items: List of (id, text, metadata) tuples.
|
||||
|
||||
Returns:
|
||||
Number of items indexed.
|
||||
"""
|
||||
await self._ensure_initialized_async()
|
||||
if not self._initialized or not self.embeddings:
|
||||
return 0
|
||||
|
||||
try:
|
||||
import json
|
||||
processed_items = []
|
||||
for item in items:
|
||||
id_val, text, metadata = item
|
||||
metadata_json = json.dumps(metadata) if metadata else "{}"
|
||||
processed_items.append((id_val, text, metadata_json))
|
||||
|
||||
self.embeddings.index(processed_items, reindex=True)
|
||||
self.embeddings.save(self.index_path)
|
||||
count = len(processed_items)
|
||||
logger.info(f"Reindexed all {count} items for user {self.user_id}")
|
||||
return count
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error reindexing all for user {self.user_id}: {e}")
|
||||
raise
|
||||
|
||||
async def search(self, query: str, limit: int = 5) -> List[Dict[str, Any]]:
|
||||
@@ -292,7 +340,8 @@ class TxtaiIntelligenceService:
|
||||
if self.enable_caching and self.cache_manager:
|
||||
cached_results = self.cache_manager.get_cached_query_results(
|
||||
query=query,
|
||||
relevance_threshold=0.5 # Lower threshold for search results
|
||||
relevance_threshold=0.5, # Lower threshold for search results
|
||||
user_id=self.user_id
|
||||
)
|
||||
if cached_results:
|
||||
logger.info(f"Cache hit for search query: '{query}'")
|
||||
@@ -309,7 +358,8 @@ class TxtaiIntelligenceService:
|
||||
self.cache_manager.cache_query_results(
|
||||
query=query,
|
||||
results=results,
|
||||
relevance_threshold=0.5
|
||||
relevance_threshold=0.5,
|
||||
user_id=self.user_id
|
||||
)
|
||||
logger.debug(f"Cached search results for query: '{query}'")
|
||||
|
||||
@@ -462,8 +512,7 @@ class TxtaiIntelligenceService:
|
||||
"""Fallback clustering method when graph clustering is not available."""
|
||||
logger.info(f"Using fallback clustering for user {self.user_id}")
|
||||
|
||||
# Simple clustering based on semantic similarity
|
||||
# This is a placeholder - in production, you'd implement a proper clustering algorithm
|
||||
# Simple clustering based on semantic similarity against sample queries
|
||||
try:
|
||||
# Get a sample of indexed items to analyze
|
||||
sample_queries = ["marketing", "SEO", "content", "social media", "email marketing"]
|
||||
|
||||
Reference in New Issue
Block a user