Release Candidate: Production Release with Multi-Tenant & Onboarding Enhancements

This commit is contained in:
ajaysi
2026-02-28 20:06:26 +05:30
parent 08a1f4a1d8
commit 4828274cbf
162 changed files with 19489 additions and 4300 deletions

View File

@@ -76,7 +76,8 @@ class ALwrityAgentOrchestrator:
try:
# Initialize shared LLM
if TXTAI_AVAILABLE:
self.llm = LLM(self.config.shared_llm)
# Hardening: Explicitly set task to avoid 'text2text-generation' default failures
self.llm = LLM(self.config.shared_llm, task="text-generation")
else:
self.llm = None

View File

@@ -181,7 +181,8 @@ class BaseALwrityAgent(ABC):
try:
if not self.llm:
# Create new LLM if not provided
raw_llm = LLM(model_name)
# Hardening: Explicitly set task to avoid 'text2text-generation' default failures
raw_llm = LLM(model_name, task="text-generation")
# Wrap it
self.llm = TrackingLLMWrapper(raw_llm, self.user_id, self.model_name)
@@ -906,6 +907,11 @@ class StrategyOrchestratorAgent(BaseALwrityAgent):
"name": "task_delegator",
"description": "Delegates specific tasks to specialized agents (content, competitor, seo, social)",
"target": self._delegate_task_tool
},
{
"name": "kickoff_gsc_first_pass",
"description": "Kicks off first-pass execution by invoking SEO/Content default GSC plans",
"target": self._kickoff_gsc_first_pass_tool
}
],
max_iterations=15,
@@ -924,7 +930,9 @@ class StrategyOrchestratorAgent(BaseALwrityAgent):
Do not just plan; EXECUTE by delegating.
Always prioritize user goals and maintain safety constraints.
Coordinate multi-agent responses to market changes effectively."""
Coordinate multi-agent responses to market changes effectively.
First, call 'kickoff_gsc_first_pass' to ground the plan on live GSC signals."""
)
)
@@ -1033,6 +1041,37 @@ class StrategyOrchestratorAgent(BaseALwrityAgent):
except Exception as e:
return {"error": str(e)}
async def _kickoff_gsc_first_pass_tool(self, context: Dict[str, Any]) -> Dict[str, Any]:
"""Invoke SEO and Content agents' default GSC plans and combine results"""
try:
start_date = context.get("start_date")
end_date = context.get("end_date")
payload = {"start_date": start_date, "end_date": end_date}
results = {}
combined_actions = []
seo = self.sub_agents.get("seo")
if seo and hasattr(seo, "_default_seo_gsc_plan_tool"):
plan = await seo._default_seo_gsc_plan_tool(payload)
results["seo"] = plan
combined_actions.extend(plan.get("actions", []) if isinstance(plan, dict) else [])
content = self.sub_agents.get("content")
if content and hasattr(content, "_default_content_gsc_plan_tool"):
plan = await content._default_content_gsc_plan_tool(payload)
results["content"] = plan
combined_actions.extend(plan.get("actions", []) if isinstance(plan, dict) else [])
return {
"status": "ok",
"invoked": list(results.keys()),
"results": results,
"combined_actions": combined_actions,
"timestamp": datetime.utcnow().isoformat()
}
except Exception as e:
return {"status": "error", "error": str(e)}
async def _strategy_synthesizer_tool(self, context: Dict[str, Any]) -> Dict[str, Any]:
"""Tool for synthesizing strategies"""
return {

View File

@@ -13,6 +13,7 @@ from loguru import logger
from ..txtai_service import TxtaiIntelligenceService
from services.intelligence.agents.core_agent_framework import BaseALwrityAgent, AgentAction
from services.seo_tools.content_strategy_service import ContentStrategyService
from services.analytics import PlatformAnalyticsService
from services.intelligence.sif_agents import SharedLLMWrapper, LocalLLMWrapper
try:
from services.intelligence.sif_integration import SIFIntegrationService
@@ -888,7 +889,37 @@ class ContentStrategyAgent(BaseALwrityAgent):
"name": "sitemap_analyzer",
"description": "Analyzes website structure and publishing velocity via sitemap",
"target": self._sitemap_analyzer_tool
}
},
{
"name": "gsc_low_ctr_queries",
"description": "Returns low-CTR queries with evidence from cached GSC metrics",
"target": self._cs_gsc_low_ctr_queries_tool
},
{
"name": "gsc_striking_distance_queries",
"description": "Returns striking-distance queries (positions ~820) with evidence",
"target": self._cs_gsc_striking_distance_tool
},
{
"name": "gsc_declining_queries",
"description": "Returns period-over-period declining queries with evidence",
"target": self._cs_gsc_declining_queries_tool
},
{
"name": "gsc_low_ctr_pages",
"description": "Returns low-CTR pages with top contributing queries",
"target": self._cs_gsc_low_ctr_pages_tool
},
{
"name": "gsc_cannibalization_candidates",
"description": "Returns query→multiple-pages cannibalization candidates with target recommendation",
"target": self._cs_gsc_cannibalization_candidates_tool
},
{
"name": "default_content_gsc_plan",
"description": "Runs a default first-pass plan using GSC signals (titles/meta, consolidation, refreshes)",
"target": self._default_content_gsc_plan_tool
},
],
max_iterations=8,
system=self.get_effective_system_prompt(f"""You are the Content Strategy Agent for ALwrity user {self.user_id}.
@@ -903,12 +934,153 @@ class ContentStrategyAgent(BaseALwrityAgent):
- Performance-based content improvements
Use semantic analysis (SIF) and sitemap analysis to understand content context.
Always prioritize user goals and maintain brand consistency."""
Always prioritize user goals and maintain brand consistency.
In your first pass, call 'default_content_gsc_plan' to ground your actions on live GSC signals."""
)
)
# Tool Implementations
async def _cs_fetch_gsc_analytics(self, start_date: Optional[str] = None, end_date: Optional[str] = None) -> Dict[str, Any]:
svc = PlatformAnalyticsService()
data = await svc.get_comprehensive_analytics(self.user_id, platforms=["gsc"], start_date=start_date, end_date=end_date)
gsc = data.get("gsc")
if not gsc or gsc.status != "success":
err = getattr(gsc, "error_message", None) if gsc else "No data"
raise RuntimeError(f"GSC analytics unavailable: {err}")
return {"metrics": gsc.metrics, "date_range": gsc.date_range}
async def _cs_gsc_low_ctr_queries_tool(self, context: Dict[str, Any]) -> Dict[str, Any]:
limit = int(context.get("limit", 10)); min_impr = int(context.get("min_impressions", 100)); min_clicks = int(context.get("min_clicks", 10)); ctr_threshold = float(context.get("ctr_threshold", 1.5))
start_date = context.get("start_date"); end_date = context.get("end_date")
try:
result = await self._cs_fetch_gsc_analytics(start_date, end_date)
tq = result["metrics"].get("top_queries", []) or []
items = [
{"query": r.get("query"), "clicks": r.get("clicks", 0), "impressions": r.get("impressions", 0), "ctr": r.get("ctr", 0.0), "position": r.get("position")}
for r in tq
if (r.get("impressions", 0) >= min_impr and r.get("clicks", 0) >= min_clicks and float(r.get("ctr", 0.0)) < ctr_threshold)
]
items.sort(key=lambda x: (x.get("impressions", 0), -x.get("ctr", 100.0)), reverse=True)
return {"items": items[:limit], "range": result["date_range"], "source": "gsc_cache"}
except Exception as e:
logger.error(f"cs low_ctr_queries failed: {e}"); return {"error": str(e)}
async def _cs_gsc_striking_distance_tool(self, context: Dict[str, Any]) -> Dict[str, Any]:
limit = int(context.get("limit", 10)); min_impr = int(context.get("min_impressions", 100)); start_date = context.get("start_date"); end_date = context.get("end_date")
try:
result = await self._cs_fetch_gsc_analytics(start_date, end_date)
tq = result["metrics"].get("top_queries", []) or []
items = [
{"query": r.get("query"), "clicks": r.get("clicks", 0), "impressions": r.get("impressions", 0), "ctr": r.get("ctr", 0.0), "position": r.get("position")}
for r in tq
if (r.get("impressions", 0) >= min_impr and r.get("position") is not None and 8.0 <= float(r.get("position")) <= 20.0)
]
items.sort(key=lambda x: (x.get("position") if x.get("position") is not None else 999, -x.get("impressions", 0)))
return {"items": items[:limit], "range": result["date_range"], "source": "gsc_cache"}
except Exception as e:
logger.error(f"cs striking_distance failed: {e}"); return {"error": str(e)}
async def _cs_gsc_declining_queries_tool(self, context: Dict[str, Any]) -> Dict[str, Any]:
limit = int(context.get("limit", 10)); min_prev_clicks = int(context.get("min_prev_clicks", 10)); min_drop_pct = float(context.get("min_drop_pct", 30.0))
start_date = context.get("start_date"); end_date = context.get("end_date")
try:
curr = await self._cs_fetch_gsc_analytics(start_date, end_date)
curr_range = curr["date_range"]; s = curr_range.get("start"); e = curr_range.get("end")
from datetime import datetime, timedelta; fmt = "%Y-%m-%d"
sd = datetime.strptime(s, fmt) if s else datetime.utcnow() - timedelta(days=30); ed = datetime.strptime(e, fmt) if e else datetime.utcnow()
days = max((ed - sd).days + 1, 1); prev_end = sd - timedelta(days=1); prev_start = prev_end - timedelta(days=days - 1)
prev = await self._cs_fetch_gsc_analytics(prev_start.strftime(fmt), prev_end.strftime(fmt))
curr_queries = {r.get("query"): r for r in (curr["metrics"].get("top_queries", []) or [])}
prev_queries = {r.get("query"): r for r in (prev["metrics"].get("top_queries", []) or [])}
items = []
for q, prev_row in prev_queries.items():
curr_row = curr_queries.get(q);
if not curr_row: continue
prev_clicks = int(prev_row.get("clicks", 0) or 0); curr_clicks = int(curr_row.get("clicks", 0) or 0)
if prev_clicks >= min_prev_clicks and curr_clicks < prev_clicks:
drop_pct = ((prev_clicks - curr_clicks) / prev_clicks) * 100.0
if drop_pct >= min_drop_pct:
items.append({"query": q, "prev_clicks": prev_clicks, "curr_clicks": curr_clicks, "drop_pct": round(drop_pct, 2)})
items.sort(key=lambda x: (x.get("drop_pct", 0), x.get("prev_clicks", 0)), reverse=True)
return {"items": items[:limit], "range": curr_range, "previous_range": prev["date_range"], "source": "gsc_cache"}
except Exception as e:
logger.error(f"cs declining_queries failed: {e}"); return {"error": str(e)}
async def _cs_gsc_low_ctr_pages_tool(self, context: Dict[str, Any]) -> Dict[str, Any]:
limit = int(context.get("limit", 10)); min_impr = int(context.get("min_impressions", 200)); ctr_threshold = float(context.get("ctr_threshold", 1.5))
start_date = context.get("start_date"); end_date = context.get("end_date")
try:
result = await self._cs_fetch_gsc_analytics(start_date, end_date)
tp = result["metrics"].get("top_pages", []) or []
items = []
for r in tp:
if (r.get("impressions", 0) >= min_impr and float(r.get("ctr", 0.0)) < ctr_threshold):
items.append({"page": r.get("page"), "clicks": r.get("clicks", 0), "impressions": r.get("impressions", 0), "ctr": r.get("ctr", 0.0), "position": r.get("position"), "evidence_queries": r.get("queries", [])[:5]})
items.sort(key=lambda x: (x.get("impressions", 0), -x.get("ctr", 100.0)), reverse=True)
return {"items": items[:limit], "range": result["date_range"], "source": "gsc_cache"}
except Exception as e:
logger.error(f"cs low_ctr_pages failed: {e}"); return {"error": str(e)}
async def _cs_gsc_cannibalization_candidates_tool(self, context: Dict[str, Any]) -> Dict[str, Any]:
limit = int(context.get("limit", 10)); start_date = context.get("start_date"); end_date = context.get("end_date")
try:
result = await self._cs_fetch_gsc_analytics(start_date, end_date)
candidates = result["metrics"].get("cannibalization", []) or []
return {"items": candidates[:limit], "range": result["date_range"], "source": "gsc_cache"}
except Exception as e:
logger.error(f"cs cannibalization_candidates failed: {e}"); return {"error": str(e)}
async def _default_content_gsc_plan_tool(self, context: Dict[str, Any]) -> Dict[str, Any]:
start_date = context.get("start_date"); end_date = context.get("end_date")
try:
low_ctr_pages = await self._cs_gsc_low_ctr_pages_tool({"start_date": start_date, "end_date": end_date, "limit": 10})
cannibals = await self._cs_gsc_cannibalization_candidates_tool({"start_date": start_date, "end_date": end_date, "limit": 10})
striking = await self._cs_gsc_striking_distance_tool({"start_date": start_date, "end_date": end_date, "limit": 10})
declining = await self._cs_gsc_declining_queries_tool({"start_date": start_date, "end_date": end_date, "limit": 10})
actions = []
for p in low_ctr_pages.get("items", []):
actions.append({
"type": "improve_titles_meta",
"target": p.get("page"),
"reason": f"Low CTR {p.get('ctr')}% with {p.get('impressions')} impressions",
"evidence": p.get("evidence_queries", [])
})
for c in cannibals.get("items", []):
actions.append({
"type": "consolidate/internal_link",
"target": c.get("recommended_target_page"),
"reason": f"Cannibalization on query '{c.get('query')}'",
"pages": c.get("pages", [])
})
for q in striking.get("items", []):
actions.append({
"type": "refresh_content",
"target": "query",
"query": q.get("query"),
"reason": f"Striking distance at position {q.get('position')} with {q.get('impressions')} impressions"
})
for q in declining.get("items", []):
actions.append({
"type": "refresh_content",
"target": "query",
"query": q.get("query"),
"reason": f"Clicks decline {q.get('prev_clicks')}{q.get('curr_clicks')} ({q.get('drop_pct')}%)"
})
return {
"plan_name": "Default Content Plan from GSC",
"range": {"current": {"start": start_date, "end": end_date}},
"actions": actions,
"source": "gsc_cache",
"timestamp": datetime.utcnow().isoformat()
}
except Exception as e:
logger.error(f"default_content_gsc_plan failed: {e}")
return {"error": str(e)}
async def _sitemap_analyzer_tool(self, context: Dict[str, Any]) -> Dict[str, Any]:
"""Sitemap analysis tool using ContentStrategyService"""
website_url = context.get('website_url')
@@ -1324,7 +1496,37 @@ class SEOOptimizationAgent(BaseALwrityAgent):
"name": "query_seo_knowledge_base",
"description": "Queries the SIF knowledge base for SEO dashboard data, GSC/Bing metrics, and semantic insights",
"target": self._query_seo_knowledge_base_tool
}
},
{
"name": "gsc_low_ctr_queries",
"description": "Returns low-CTR queries with evidence from cached GSC metrics",
"target": self._gsc_low_ctr_queries_tool
},
{
"name": "gsc_striking_distance_queries",
"description": "Returns striking-distance queries (positions ~820) with evidence",
"target": self._gsc_striking_distance_tool
},
{
"name": "gsc_declining_queries",
"description": "Returns period-over-period declining queries with evidence",
"target": self._gsc_declining_queries_tool
},
{
"name": "gsc_low_ctr_pages",
"description": "Returns low-CTR pages with top contributing queries",
"target": self._gsc_low_ctr_pages_tool
},
{
"name": "gsc_cannibalization_candidates",
"description": "Returns query→multiple-pages cannibalization candidates with target recommendation",
"target": self._gsc_cannibalization_candidates_tool
},
{
"name": "default_seo_gsc_plan",
"description": "Runs a default first-pass SEO plan using GSC signals (titles/meta, consolidation, refreshes)",
"target": self._default_seo_gsc_plan_tool
},
],
max_iterations=15,
system=self.get_effective_system_prompt(f"""You are the SEO Optimization Agent for ALwrity user {self.user_id}.
@@ -1340,6 +1542,7 @@ class SEOOptimizationAgent(BaseALwrityAgent):
- Deep semantic search of SEO data (GSC, Bing, Audits)
Focus on high-impact, low-effort optimizations first.
In your first pass, call 'default_seo_gsc_plan' to ground your actions on live GSC signals.
Always maintain SEO best practices and user experience."""
)
)
@@ -1666,6 +1869,223 @@ class SEOOptimizationAgent(BaseALwrityAgent):
"timestamp": datetime.utcnow().isoformat()
}
# GSC Insights Tools (Option B)
async def _fetch_gsc_analytics(self, start_date: Optional[str] = None, end_date: Optional[str] = None) -> Dict[str, Any]:
svc = PlatformAnalyticsService()
data = await svc.get_comprehensive_analytics(self.user_id, platforms=["gsc"], start_date=start_date, end_date=end_date)
gsc = data.get("gsc")
if not gsc or gsc.status != "success":
err = getattr(gsc, "error_message", None) if gsc else "No data"
raise RuntimeError(f"GSC analytics unavailable: {err}")
return {
"metrics": gsc.metrics,
"date_range": gsc.date_range
}
async def _gsc_low_ctr_queries_tool(self, context: Dict[str, Any]) -> Dict[str, Any]:
limit = int(context.get("limit", 10))
min_impr = int(context.get("min_impressions", 100))
min_clicks = int(context.get("min_clicks", 10))
ctr_threshold = float(context.get("ctr_threshold", 1.5))
start_date = context.get("start_date")
end_date = context.get("end_date")
try:
result = await self._fetch_gsc_analytics(start_date, end_date)
tq = result["metrics"].get("top_queries", []) or []
items = [
{
"query": r.get("query"),
"clicks": r.get("clicks", 0),
"impressions": r.get("impressions", 0),
"ctr": r.get("ctr", 0.0),
"position": r.get("position")
}
for r in tq
if (r.get("impressions", 0) >= min_impr and r.get("clicks", 0) >= min_clicks and float(r.get("ctr", 0.0)) < ctr_threshold)
]
items.sort(key=lambda x: (x.get("impressions", 0), -x.get("ctr", 100.0)), reverse=True)
return {
"items": items[:limit],
"range": result["date_range"],
"source": "gsc_cache"
}
except Exception as e:
logger.error(f"low_ctr_queries tool failed: {e}")
return {"error": str(e)}
async def _gsc_striking_distance_tool(self, context: Dict[str, Any]) -> Dict[str, Any]:
limit = int(context.get("limit", 10))
min_impr = int(context.get("min_impressions", 100))
start_date = context.get("start_date")
end_date = context.get("end_date")
try:
result = await self._fetch_gsc_analytics(start_date, end_date)
tq = result["metrics"].get("top_queries", []) or []
items = [
{
"query": r.get("query"),
"clicks": r.get("clicks", 0),
"impressions": r.get("impressions", 0),
"ctr": r.get("ctr", 0.0),
"position": r.get("position")
}
for r in tq
if (r.get("impressions", 0) >= min_impr and r.get("position") is not None and 8.0 <= float(r.get("position")) <= 20.0)
]
items.sort(key=lambda x: (x.get("position") if x.get("position") is not None else 999, -x.get("impressions", 0)))
return {
"items": items[:limit],
"range": result["date_range"],
"source": "gsc_cache"
}
except Exception as e:
logger.error(f"striking_distance tool failed: {e}")
return {"error": str(e)}
async def _gsc_declining_queries_tool(self, context: Dict[str, Any]) -> Dict[str, Any]:
limit = int(context.get("limit", 10))
min_prev_clicks = int(context.get("min_prev_clicks", 10))
min_drop_pct = float(context.get("min_drop_pct", 30.0))
start_date = context.get("start_date")
end_date = context.get("end_date")
try:
curr = await self._fetch_gsc_analytics(start_date, end_date)
curr_range = curr["date_range"]
s = curr_range.get("start")
e = curr_range.get("end")
from datetime import datetime, timedelta
fmt = "%Y-%m-%d"
sd = datetime.strptime(s, fmt) if s else datetime.utcnow() - timedelta(days=30)
ed = datetime.strptime(e, fmt) if e else datetime.utcnow()
days = max((ed - sd).days + 1, 1)
prev_end = sd - timedelta(days=1)
prev_start = prev_end - timedelta(days=days - 1)
prev = await self._fetch_gsc_analytics(prev_start.strftime(fmt), prev_end.strftime(fmt))
curr_queries = {r.get("query"): r for r in (curr["metrics"].get("top_queries", []) or [])}
prev_queries = {r.get("query"): r for r in (prev["metrics"].get("top_queries", []) or [])}
items = []
for q, prev_row in prev_queries.items():
curr_row = curr_queries.get(q)
if not curr_row:
continue
prev_clicks = int(prev_row.get("clicks", 0) or 0)
curr_clicks = int(curr_row.get("clicks", 0) or 0)
if prev_clicks >= min_prev_clicks and curr_clicks < prev_clicks:
drop_pct = ((prev_clicks - curr_clicks) / prev_clicks) * 100.0
if drop_pct >= min_drop_pct:
items.append({
"query": q,
"prev_clicks": prev_clicks,
"curr_clicks": curr_clicks,
"drop_pct": round(drop_pct, 2)
})
items.sort(key=lambda x: (x.get("drop_pct", 0), x.get("prev_clicks", 0)), reverse=True)
return {
"items": items[:limit],
"range": curr_range,
"previous_range": prev["date_range"],
"source": "gsc_cache"
}
except Exception as e:
logger.error(f"declining_queries tool failed: {e}")
return {"error": str(e)}
async def _gsc_low_ctr_pages_tool(self, context: Dict[str, Any]) -> Dict[str, Any]:
limit = int(context.get("limit", 10))
min_impr = int(context.get("min_impressions", 200))
ctr_threshold = float(context.get("ctr_threshold", 1.5))
start_date = context.get("start_date")
end_date = context.get("end_date")
try:
result = await self._fetch_gsc_analytics(start_date, end_date)
tp = result["metrics"].get("top_pages", []) or []
items = []
for r in tp:
if (r.get("impressions", 0) >= min_impr and float(r.get("ctr", 0.0)) < ctr_threshold):
items.append({
"page": r.get("page"),
"clicks": r.get("clicks", 0),
"impressions": r.get("impressions", 0),
"ctr": r.get("ctr", 0.0),
"position": r.get("position"),
"evidence_queries": r.get("queries", [])[:5]
})
items.sort(key=lambda x: (x.get("impressions", 0), -x.get("ctr", 100.0)), reverse=True)
return {
"items": items[:limit],
"range": result["date_range"],
"source": "gsc_cache"
}
except Exception as e:
logger.error(f"low_ctr_pages tool failed: {e}")
return {"error": str(e)}
async def _gsc_cannibalization_candidates_tool(self, context: Dict[str, Any]) -> Dict[str, Any]:
limit = int(context.get("limit", 10))
start_date = context.get("start_date")
end_date = context.get("end_date")
try:
result = await self._fetch_gsc_analytics(start_date, end_date)
candidates = result["metrics"].get("cannibalization", []) or []
return {
"items": candidates[:limit],
"range": result["date_range"],
"source": "gsc_cache"
}
except Exception as e:
logger.error(f"cannibalization_candidates tool failed: {e}")
return {"error": str(e)}
async def _default_seo_gsc_plan_tool(self, context: Dict[str, Any]) -> Dict[str, Any]:
start_date = context.get("start_date")
end_date = context.get("end_date")
try:
low_ctr_pages = await self._gsc_low_ctr_pages_tool({"start_date": start_date, "end_date": end_date, "limit": 10})
cannibals = await self._gsc_cannibalization_candidates_tool({"start_date": start_date, "end_date": end_date, "limit": 10})
striking = await self._gsc_striking_distance_tool({"start_date": start_date, "end_date": end_date, "limit": 10})
declining = await self._gsc_declining_queries_tool({"start_date": start_date, "end_date": end_date, "limit": 10})
actions = []
for p in low_ctr_pages.get("items", []):
actions.append({
"type": "update_titles_meta",
"target_page": p.get("page"),
"justification": f"Low CTR {p.get('ctr')}% with {p.get('impressions')} impressions",
"evidence": p.get("evidence_queries", [])
})
for c in cannibals.get("items", []):
actions.append({
"type": "consolidate/internal_link",
"target_page": c.get("recommended_target_page"),
"justification": f"Cannibalization on query '{c.get('query')}'",
"pages": c.get("pages", [])
})
for q in striking.get("items", []):
actions.append({
"type": "refresh_content",
"target": "query",
"query": q.get("query"),
"justification": f"Striking distance at position {q.get('position')} with {q.get('impressions')} impressions"
})
for q in declining.get("items", []):
actions.append({
"type": "refresh_content",
"target": "query",
"query": q.get("query"),
"justification": f"Clicks decline {q.get('prev_clicks')}{q.get('curr_clicks')} ({q.get('drop_pct')}%)"
})
return {
"plan_name": "Default SEO Plan from GSC",
"range": {"current": {"start": start_date, "end": end_date}},
"actions": actions,
"source": "gsc_cache",
"timestamp": datetime.utcnow().isoformat()
}
except Exception as e:
logger.error(f"default_seo_gsc_plan failed: {e}")
return {"error": str(e)}
class SocialAmplificationAgent(BaseALwrityAgent):
"""

View File

@@ -14,9 +14,9 @@ from .txtai_service import TxtaiIntelligenceService, TXTAI_AVAILABLE
from services.intelligence.agents.core_agent_framework import BaseALwrityAgent
from services.llm_providers.main_text_generation import llm_text_gen
# Optional txtai imports
# Optional txtai imports (align with core agent framework)
try:
from txtai.pipeline import Agent, LLM
from txtai import Agent, LLM
except ImportError:
Agent = None
LLM = None
@@ -28,9 +28,13 @@ class SharedLLMWrapper:
def generate(self, prompt: str, **kwargs) -> str:
"""Generate text using the shared LLM provider."""
# We ignore kwargs like 'max_tokens' as llm_text_gen handles defaults,
# but we could map them if needed.
return llm_text_gen(prompt, user_id=self.user_id)
try:
# We ignore kwargs like 'max_tokens' as llm_text_gen handles defaults,
# but we could map them if needed.
return llm_text_gen(prompt, user_id=self.user_id)
except Exception as e:
logger.error(f"SharedLLMWrapper failed to generate text: {e}")
return f"[ERROR: Shared LLM generation failed for user {self.user_id}]"
def __call__(self, prompt: str, **kwargs) -> str:
return self.generate(prompt, **kwargs)
@@ -40,8 +44,9 @@ class LocalLLMWrapper:
Lazily loads a local LLM via txtai.
This prevents blocking server startup with heavy model loads.
"""
def __init__(self, model_path: str):
def __init__(self, model_path: str, task: str = "text-generation"):
self.model_path = model_path
self.task = task
self._llm = None
@property
@@ -49,8 +54,9 @@ class LocalLLMWrapper:
if self._llm is None:
if LLM is None:
raise ImportError("txtai.pipeline.LLM is not available")
logger.info(f"Loading local LLM: {self.model_path}")
self._llm = LLM(path=self.model_path)
logger.info(f"Loading local LLM: {self.model_path} with task: {self.task}")
# Explicitly set task to avoid 'text2text-generation' default failures
self._llm = LLM(path=self.model_path, task=self.task)
return self._llm
def __call__(self, prompt: str, **kwargs) -> str:
@@ -67,11 +73,12 @@ class SIFBaseAgent(BaseALwrityAgent):
# 2. Local LLM for internal agent work (default for SIF agents)
if llm is None:
if TXTAI_AVAILABLE:
# Use Lazy Local LLM
llm = LocalLLMWrapper(model_name)
if TXTAI_AVAILABLE and LLM is not None:
# Use Lazy Local LLM when txtai LLM is available
# Hardening: Specify 'text-generation' task to avoid text2text defaults
llm = LocalLLMWrapper(model_name, task="text-generation")
else:
# Fallback to Shared if txtai not available
# Fallback to Shared if txtai or LLM is not available
llm = self.shared_llm
super().__init__(user_id, agent_type, model_name, llm)
@@ -85,14 +92,18 @@ class SIFBaseAgent(BaseALwrityAgent):
def _create_txtai_agent(self):
"""
SIF agents use the intelligence service directly, but we can expose
capabilities via a standard agent interface if needed.
SIF agents primarily use the intelligence service directly, but we can expose
capabilities via a standard agent interface if available.
"""
if not TXTAI_AVAILABLE:
return None
# Return a simple agent that can use the LLM
return Agent(llm=self.llm, tools=[])
if not TXTAI_AVAILABLE or Agent is None:
logger.debug(f"[{self.__class__.__name__}] txtai Agent not available, using fallback agent")
return self._create_fallback_agent()
try:
return Agent(llm=self.llm, tools=[])
except Exception as e:
logger.warning(f"[{self.__class__.__name__}] Failed to create txtai Agent: {e}")
return self._create_fallback_agent()
class StrategyArchitectAgent(SIFBaseAgent):
"""Agent for discovering content pillars and identifying strategic gaps."""

View File

@@ -25,7 +25,18 @@ except ImportError:
TXTAI_AVAILABLE = False
class TxtaiIntelligenceService:
_instances = {}
def __new__(cls, user_id: str, *args, **kwargs):
if user_id not in cls._instances:
cls._instances[user_id] = super(TxtaiIntelligenceService, cls).__new__(cls)
return cls._instances[user_id]
def __init__(self, user_id: str, model_path: Optional[str] = None, enable_caching: bool = True):
# Singleton: prevent re-initialization if already initialized
if getattr(self, "_singleton_initialized", False):
return
self.user_id = user_id
self.model_path = model_path or "sentence-transformers/all-MiniLM-L6-v2"
self.index_path = f"workspace/workspace_{user_id}/indices/txtai"
@@ -33,6 +44,11 @@ class TxtaiIntelligenceService:
self._initialized = False
self.enable_caching = enable_caching
self.cache_manager = semantic_cache_manager if enable_caching else None
self._backend = "faiss" # Default backend
# Mark as initialized for singleton pattern
self._singleton_initialized = True
# Lazy initialization - do not initialize embeddings on startup
# self._initialize_embeddings()
@@ -52,17 +68,26 @@ class TxtaiIntelligenceService:
logger.debug(f"Model path: {self.model_path}")
logger.debug(f"Index path: {self.index_path}")
# Close existing embeddings if any to release file locks
if self.embeddings:
try:
if hasattr(self.embeddings, 'close'):
self.embeddings.close()
self.embeddings = None
except Exception as close_err:
logger.warning(f"Error closing existing embeddings: {close_err}")
# Ensure directory exists
os.makedirs(os.path.dirname(self.index_path), exist_ok=True)
logger.debug(f"Created index directory: {os.path.dirname(self.index_path)}")
# Initialize embeddings with optimal configuration for ALwrity use case
# Hardening: Disabling quantization by default as it causes 'IndexIDMap' attribute errors with small indices on Windows
self.embeddings = Embeddings({
"path": self.model_path,
"content": True, # Enable content storage for retrieval
"objects": True, # Enable object storage for metadata
"backend": "faiss", # Use Faiss for efficient similarity search
"quantize": True, # Enable quantization for memory efficiency
"backend": self._backend, # Use Faiss for efficient similarity search
"batch": 32, # Batch size for processing
"gpu": False, # Force CPU usage for compatibility
"limit": 1000 # Maximum number of results for queries
@@ -76,7 +101,12 @@ class TxtaiIntelligenceService:
try:
self.embeddings.load(self.index_path)
logger.info(f"Successfully loaded existing txtai index for user {self.user_id}")
logger.debug(f"Index contains {len(self.embeddings)} items")
# Try to log count, handle if not supported
try:
count = self.embeddings.count() if hasattr(self.embeddings, 'count') else "unknown"
logger.debug(f"Index contains {count} items")
except:
logger.debug("Index loaded (count unavailable)")
except Exception as load_error:
logger.warning(f"Failed to load existing index: {load_error}. Creating new index.")
# Reset embeddings to create new index
@@ -84,8 +114,7 @@ class TxtaiIntelligenceService:
"path": self.model_path,
"content": True,
"objects": True,
"backend": "faiss",
"quantize": True,
"backend": self._backend,
"batch": 32,
"gpu": False,
"limit": 1000
@@ -146,8 +175,15 @@ class TxtaiIntelligenceService:
logger.error(f"Error indexing content for user {self.user_id}: {e}")
logger.error(f"Full traceback: {traceback.format_exc()}")
logger.error(f"Items count: {len(items) if items else 0}")
if items and len(items) > 0:
logger.error(f"Sample item structure: {type(items[0])}")
message = str(e)
is_windows_lock_error = isinstance(e, PermissionError) or "WinError 32" in message
if is_windows_lock_error:
logger.warning(
f"Txtai index save skipped for user {self.user_id} due to file lock. "
f"The index will be retried on a future run."
)
return
raise
async def search(self, query: str, limit: int = 5) -> List[Dict[str, Any]]:
@@ -172,7 +208,20 @@ class TxtaiIntelligenceService:
logger.debug(f"Cache miss for search query: '{query}'")
logger.debug(f"Searching for query: '{query}' with limit: {limit}")
results = self.embeddings.search(query, limit=limit)
try:
results = self.embeddings.search(query, limit=limit)
except AttributeError as ae:
if "nprobe" in str(ae):
logger.error(f"Detected known txtai/faiss IndexIDMap/nprobe incompatibility for user {self.user_id}. Attempting re-init with numpy backend fallback...")
# Switch to numpy backend which doesn't have this issue
self._backend = "numpy"
self._initialize_embeddings()
if self.embeddings:
results = self.embeddings.search(query, limit=limit)
else:
raise ae
else:
raise ae
# Cache the results if caching is enabled
if self.enable_caching and self.cache_manager and results:
@@ -216,7 +265,19 @@ class TxtaiIntelligenceService:
logger.debug(f"Cache miss for similarity calculation")
logger.debug(f"Calculating similarity between texts: '{text1[:50]}...' and '{text2[:50]}...'")
similarity = self.embeddings.similarity(text1, text2)
try:
similarity = self.embeddings.similarity(text1, text2)
except AttributeError as ae:
if "nprobe" in str(ae):
logger.error(f"Detected IndexIDMap nprobe error in similarity for user {self.user_id}. Falling back to numpy backend...")
self._backend = "numpy"
self._initialize_embeddings()
if self.embeddings:
similarity = self.embeddings.similarity(text1, text2)
else:
raise ae
else:
raise ae
# Cache the similarity result
if self.enable_caching and self.cache_manager:
@@ -272,7 +333,19 @@ class TxtaiIntelligenceService:
# Use graph-based clustering if available
# Perform a search to get graph structure
sample_query = "content marketing digital strategy"
graph_results = self.embeddings.search(sample_query, limit=10, graph=True)
try:
graph_results = self.embeddings.search(sample_query, limit=10, graph=True)
except AttributeError as ae:
if "nprobe" in str(ae):
logger.error(f"Detected IndexIDMap nprobe error in cluster for user {self.user_id}. Falling back to numpy backend...")
self._backend = "numpy"
self._initialize_embeddings()
if self.embeddings:
graph_results = self.embeddings.search(sample_query, limit=10, graph=True)
else:
raise ae
else:
raise ae
if not graph_results:
logger.warning(f"No graph results for clustering user {self.user_id}")
@@ -306,7 +379,7 @@ class TxtaiIntelligenceService:
logger.error(f"Full traceback: {traceback.format_exc()}")
return self._fallback_clustering(min_score)
def _fallback_clustering(self, min_score: float) -> List[List[int]]:
async def _fallback_clustering(self, min_score: float) -> List[List[int]]:
"""Fallback clustering method when graph clustering is not available."""
logger.info(f"Using fallback clustering for user {self.user_id}")
@@ -318,7 +391,8 @@ class TxtaiIntelligenceService:
all_clusters = []
for query in sample_queries:
results = self.embeddings.search(query, limit=5)
# Use our search wrapper for hardening
results = await self.search(query, limit=5)
if results and results[0].get("score", 0) >= min_score:
# Create a cluster from similar results
cluster = [i for i, result in enumerate(results) if result.get("score", 0) >= min_score]
@@ -393,9 +467,13 @@ class TxtaiIntelligenceService:
return {"status": "not_initialized", "user_id": self.user_id}
try:
# Get count of indexed items - txtai doesn't have a direct len() method
# We'll estimate based on available data or return a placeholder
index_size = getattr(self.embeddings, 'count', 0) or "unknown"
# Get count of indexed items
index_size = "unknown"
if hasattr(self.embeddings, 'count'):
try:
index_size = self.embeddings.count()
except:
pass
return {
"status": "active",
@@ -410,5 +488,7 @@ class TxtaiIntelligenceService:
return {"status": "error", "user_id": self.user_id, "error": str(e)}
def is_initialized(self) -> bool:
"""Check if the service is properly initialized."""
"""Check if the service is properly initialized, triggering lazy init if needed."""
if not self._initialized:
self._ensure_initialized()
return self._initialized and self.embeddings is not None