Release Candidate: Production Release with Multi-Tenant & Onboarding Enhancements
This commit is contained in:
@@ -76,7 +76,8 @@ class ALwrityAgentOrchestrator:
|
||||
try:
|
||||
# Initialize shared LLM
|
||||
if TXTAI_AVAILABLE:
|
||||
self.llm = LLM(self.config.shared_llm)
|
||||
# Hardening: Explicitly set task to avoid 'text2text-generation' default failures
|
||||
self.llm = LLM(self.config.shared_llm, task="text-generation")
|
||||
else:
|
||||
self.llm = None
|
||||
|
||||
|
||||
@@ -181,7 +181,8 @@ class BaseALwrityAgent(ABC):
|
||||
try:
|
||||
if not self.llm:
|
||||
# Create new LLM if not provided
|
||||
raw_llm = LLM(model_name)
|
||||
# Hardening: Explicitly set task to avoid 'text2text-generation' default failures
|
||||
raw_llm = LLM(model_name, task="text-generation")
|
||||
# Wrap it
|
||||
self.llm = TrackingLLMWrapper(raw_llm, self.user_id, self.model_name)
|
||||
|
||||
@@ -906,6 +907,11 @@ class StrategyOrchestratorAgent(BaseALwrityAgent):
|
||||
"name": "task_delegator",
|
||||
"description": "Delegates specific tasks to specialized agents (content, competitor, seo, social)",
|
||||
"target": self._delegate_task_tool
|
||||
},
|
||||
{
|
||||
"name": "kickoff_gsc_first_pass",
|
||||
"description": "Kicks off first-pass execution by invoking SEO/Content default GSC plans",
|
||||
"target": self._kickoff_gsc_first_pass_tool
|
||||
}
|
||||
],
|
||||
max_iterations=15,
|
||||
@@ -924,7 +930,9 @@ class StrategyOrchestratorAgent(BaseALwrityAgent):
|
||||
Do not just plan; EXECUTE by delegating.
|
||||
|
||||
Always prioritize user goals and maintain safety constraints.
|
||||
Coordinate multi-agent responses to market changes effectively."""
|
||||
Coordinate multi-agent responses to market changes effectively.
|
||||
|
||||
First, call 'kickoff_gsc_first_pass' to ground the plan on live GSC signals."""
|
||||
)
|
||||
)
|
||||
|
||||
@@ -1033,6 +1041,37 @@ class StrategyOrchestratorAgent(BaseALwrityAgent):
|
||||
except Exception as e:
|
||||
return {"error": str(e)}
|
||||
|
||||
async def _kickoff_gsc_first_pass_tool(self, context: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Invoke SEO and Content agents' default GSC plans and combine results"""
|
||||
try:
|
||||
start_date = context.get("start_date")
|
||||
end_date = context.get("end_date")
|
||||
payload = {"start_date": start_date, "end_date": end_date}
|
||||
results = {}
|
||||
combined_actions = []
|
||||
|
||||
seo = self.sub_agents.get("seo")
|
||||
if seo and hasattr(seo, "_default_seo_gsc_plan_tool"):
|
||||
plan = await seo._default_seo_gsc_plan_tool(payload)
|
||||
results["seo"] = plan
|
||||
combined_actions.extend(plan.get("actions", []) if isinstance(plan, dict) else [])
|
||||
|
||||
content = self.sub_agents.get("content")
|
||||
if content and hasattr(content, "_default_content_gsc_plan_tool"):
|
||||
plan = await content._default_content_gsc_plan_tool(payload)
|
||||
results["content"] = plan
|
||||
combined_actions.extend(plan.get("actions", []) if isinstance(plan, dict) else [])
|
||||
|
||||
return {
|
||||
"status": "ok",
|
||||
"invoked": list(results.keys()),
|
||||
"results": results,
|
||||
"combined_actions": combined_actions,
|
||||
"timestamp": datetime.utcnow().isoformat()
|
||||
}
|
||||
except Exception as e:
|
||||
return {"status": "error", "error": str(e)}
|
||||
|
||||
async def _strategy_synthesizer_tool(self, context: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Tool for synthesizing strategies"""
|
||||
return {
|
||||
|
||||
@@ -13,6 +13,7 @@ from loguru import logger
|
||||
from ..txtai_service import TxtaiIntelligenceService
|
||||
from services.intelligence.agents.core_agent_framework import BaseALwrityAgent, AgentAction
|
||||
from services.seo_tools.content_strategy_service import ContentStrategyService
|
||||
from services.analytics import PlatformAnalyticsService
|
||||
from services.intelligence.sif_agents import SharedLLMWrapper, LocalLLMWrapper
|
||||
try:
|
||||
from services.intelligence.sif_integration import SIFIntegrationService
|
||||
@@ -888,7 +889,37 @@ class ContentStrategyAgent(BaseALwrityAgent):
|
||||
"name": "sitemap_analyzer",
|
||||
"description": "Analyzes website structure and publishing velocity via sitemap",
|
||||
"target": self._sitemap_analyzer_tool
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "gsc_low_ctr_queries",
|
||||
"description": "Returns low-CTR queries with evidence from cached GSC metrics",
|
||||
"target": self._cs_gsc_low_ctr_queries_tool
|
||||
},
|
||||
{
|
||||
"name": "gsc_striking_distance_queries",
|
||||
"description": "Returns striking-distance queries (positions ~8–20) with evidence",
|
||||
"target": self._cs_gsc_striking_distance_tool
|
||||
},
|
||||
{
|
||||
"name": "gsc_declining_queries",
|
||||
"description": "Returns period-over-period declining queries with evidence",
|
||||
"target": self._cs_gsc_declining_queries_tool
|
||||
},
|
||||
{
|
||||
"name": "gsc_low_ctr_pages",
|
||||
"description": "Returns low-CTR pages with top contributing queries",
|
||||
"target": self._cs_gsc_low_ctr_pages_tool
|
||||
},
|
||||
{
|
||||
"name": "gsc_cannibalization_candidates",
|
||||
"description": "Returns query→multiple-pages cannibalization candidates with target recommendation",
|
||||
"target": self._cs_gsc_cannibalization_candidates_tool
|
||||
},
|
||||
{
|
||||
"name": "default_content_gsc_plan",
|
||||
"description": "Runs a default first-pass plan using GSC signals (titles/meta, consolidation, refreshes)",
|
||||
"target": self._default_content_gsc_plan_tool
|
||||
},
|
||||
],
|
||||
max_iterations=8,
|
||||
system=self.get_effective_system_prompt(f"""You are the Content Strategy Agent for ALwrity user {self.user_id}.
|
||||
@@ -903,12 +934,153 @@ class ContentStrategyAgent(BaseALwrityAgent):
|
||||
- Performance-based content improvements
|
||||
|
||||
Use semantic analysis (SIF) and sitemap analysis to understand content context.
|
||||
Always prioritize user goals and maintain brand consistency."""
|
||||
Always prioritize user goals and maintain brand consistency.
|
||||
|
||||
In your first pass, call 'default_content_gsc_plan' to ground your actions on live GSC signals."""
|
||||
)
|
||||
)
|
||||
|
||||
# Tool Implementations
|
||||
|
||||
async def _cs_fetch_gsc_analytics(self, start_date: Optional[str] = None, end_date: Optional[str] = None) -> Dict[str, Any]:
|
||||
svc = PlatformAnalyticsService()
|
||||
data = await svc.get_comprehensive_analytics(self.user_id, platforms=["gsc"], start_date=start_date, end_date=end_date)
|
||||
gsc = data.get("gsc")
|
||||
if not gsc or gsc.status != "success":
|
||||
err = getattr(gsc, "error_message", None) if gsc else "No data"
|
||||
raise RuntimeError(f"GSC analytics unavailable: {err}")
|
||||
return {"metrics": gsc.metrics, "date_range": gsc.date_range}
|
||||
|
||||
async def _cs_gsc_low_ctr_queries_tool(self, context: Dict[str, Any]) -> Dict[str, Any]:
|
||||
limit = int(context.get("limit", 10)); min_impr = int(context.get("min_impressions", 100)); min_clicks = int(context.get("min_clicks", 10)); ctr_threshold = float(context.get("ctr_threshold", 1.5))
|
||||
start_date = context.get("start_date"); end_date = context.get("end_date")
|
||||
try:
|
||||
result = await self._cs_fetch_gsc_analytics(start_date, end_date)
|
||||
tq = result["metrics"].get("top_queries", []) or []
|
||||
items = [
|
||||
{"query": r.get("query"), "clicks": r.get("clicks", 0), "impressions": r.get("impressions", 0), "ctr": r.get("ctr", 0.0), "position": r.get("position")}
|
||||
for r in tq
|
||||
if (r.get("impressions", 0) >= min_impr and r.get("clicks", 0) >= min_clicks and float(r.get("ctr", 0.0)) < ctr_threshold)
|
||||
]
|
||||
items.sort(key=lambda x: (x.get("impressions", 0), -x.get("ctr", 100.0)), reverse=True)
|
||||
return {"items": items[:limit], "range": result["date_range"], "source": "gsc_cache"}
|
||||
except Exception as e:
|
||||
logger.error(f"cs low_ctr_queries failed: {e}"); return {"error": str(e)}
|
||||
|
||||
async def _cs_gsc_striking_distance_tool(self, context: Dict[str, Any]) -> Dict[str, Any]:
|
||||
limit = int(context.get("limit", 10)); min_impr = int(context.get("min_impressions", 100)); start_date = context.get("start_date"); end_date = context.get("end_date")
|
||||
try:
|
||||
result = await self._cs_fetch_gsc_analytics(start_date, end_date)
|
||||
tq = result["metrics"].get("top_queries", []) or []
|
||||
items = [
|
||||
{"query": r.get("query"), "clicks": r.get("clicks", 0), "impressions": r.get("impressions", 0), "ctr": r.get("ctr", 0.0), "position": r.get("position")}
|
||||
for r in tq
|
||||
if (r.get("impressions", 0) >= min_impr and r.get("position") is not None and 8.0 <= float(r.get("position")) <= 20.0)
|
||||
]
|
||||
items.sort(key=lambda x: (x.get("position") if x.get("position") is not None else 999, -x.get("impressions", 0)))
|
||||
return {"items": items[:limit], "range": result["date_range"], "source": "gsc_cache"}
|
||||
except Exception as e:
|
||||
logger.error(f"cs striking_distance failed: {e}"); return {"error": str(e)}
|
||||
|
||||
async def _cs_gsc_declining_queries_tool(self, context: Dict[str, Any]) -> Dict[str, Any]:
|
||||
limit = int(context.get("limit", 10)); min_prev_clicks = int(context.get("min_prev_clicks", 10)); min_drop_pct = float(context.get("min_drop_pct", 30.0))
|
||||
start_date = context.get("start_date"); end_date = context.get("end_date")
|
||||
try:
|
||||
curr = await self._cs_fetch_gsc_analytics(start_date, end_date)
|
||||
curr_range = curr["date_range"]; s = curr_range.get("start"); e = curr_range.get("end")
|
||||
from datetime import datetime, timedelta; fmt = "%Y-%m-%d"
|
||||
sd = datetime.strptime(s, fmt) if s else datetime.utcnow() - timedelta(days=30); ed = datetime.strptime(e, fmt) if e else datetime.utcnow()
|
||||
days = max((ed - sd).days + 1, 1); prev_end = sd - timedelta(days=1); prev_start = prev_end - timedelta(days=days - 1)
|
||||
prev = await self._cs_fetch_gsc_analytics(prev_start.strftime(fmt), prev_end.strftime(fmt))
|
||||
curr_queries = {r.get("query"): r for r in (curr["metrics"].get("top_queries", []) or [])}
|
||||
prev_queries = {r.get("query"): r for r in (prev["metrics"].get("top_queries", []) or [])}
|
||||
items = []
|
||||
for q, prev_row in prev_queries.items():
|
||||
curr_row = curr_queries.get(q);
|
||||
if not curr_row: continue
|
||||
prev_clicks = int(prev_row.get("clicks", 0) or 0); curr_clicks = int(curr_row.get("clicks", 0) or 0)
|
||||
if prev_clicks >= min_prev_clicks and curr_clicks < prev_clicks:
|
||||
drop_pct = ((prev_clicks - curr_clicks) / prev_clicks) * 100.0
|
||||
if drop_pct >= min_drop_pct:
|
||||
items.append({"query": q, "prev_clicks": prev_clicks, "curr_clicks": curr_clicks, "drop_pct": round(drop_pct, 2)})
|
||||
items.sort(key=lambda x: (x.get("drop_pct", 0), x.get("prev_clicks", 0)), reverse=True)
|
||||
return {"items": items[:limit], "range": curr_range, "previous_range": prev["date_range"], "source": "gsc_cache"}
|
||||
except Exception as e:
|
||||
logger.error(f"cs declining_queries failed: {e}"); return {"error": str(e)}
|
||||
|
||||
async def _cs_gsc_low_ctr_pages_tool(self, context: Dict[str, Any]) -> Dict[str, Any]:
|
||||
limit = int(context.get("limit", 10)); min_impr = int(context.get("min_impressions", 200)); ctr_threshold = float(context.get("ctr_threshold", 1.5))
|
||||
start_date = context.get("start_date"); end_date = context.get("end_date")
|
||||
try:
|
||||
result = await self._cs_fetch_gsc_analytics(start_date, end_date)
|
||||
tp = result["metrics"].get("top_pages", []) or []
|
||||
items = []
|
||||
for r in tp:
|
||||
if (r.get("impressions", 0) >= min_impr and float(r.get("ctr", 0.0)) < ctr_threshold):
|
||||
items.append({"page": r.get("page"), "clicks": r.get("clicks", 0), "impressions": r.get("impressions", 0), "ctr": r.get("ctr", 0.0), "position": r.get("position"), "evidence_queries": r.get("queries", [])[:5]})
|
||||
items.sort(key=lambda x: (x.get("impressions", 0), -x.get("ctr", 100.0)), reverse=True)
|
||||
return {"items": items[:limit], "range": result["date_range"], "source": "gsc_cache"}
|
||||
except Exception as e:
|
||||
logger.error(f"cs low_ctr_pages failed: {e}"); return {"error": str(e)}
|
||||
|
||||
async def _cs_gsc_cannibalization_candidates_tool(self, context: Dict[str, Any]) -> Dict[str, Any]:
|
||||
limit = int(context.get("limit", 10)); start_date = context.get("start_date"); end_date = context.get("end_date")
|
||||
try:
|
||||
result = await self._cs_fetch_gsc_analytics(start_date, end_date)
|
||||
candidates = result["metrics"].get("cannibalization", []) or []
|
||||
return {"items": candidates[:limit], "range": result["date_range"], "source": "gsc_cache"}
|
||||
except Exception as e:
|
||||
logger.error(f"cs cannibalization_candidates failed: {e}"); return {"error": str(e)}
|
||||
|
||||
async def _default_content_gsc_plan_tool(self, context: Dict[str, Any]) -> Dict[str, Any]:
|
||||
start_date = context.get("start_date"); end_date = context.get("end_date")
|
||||
try:
|
||||
low_ctr_pages = await self._cs_gsc_low_ctr_pages_tool({"start_date": start_date, "end_date": end_date, "limit": 10})
|
||||
cannibals = await self._cs_gsc_cannibalization_candidates_tool({"start_date": start_date, "end_date": end_date, "limit": 10})
|
||||
striking = await self._cs_gsc_striking_distance_tool({"start_date": start_date, "end_date": end_date, "limit": 10})
|
||||
declining = await self._cs_gsc_declining_queries_tool({"start_date": start_date, "end_date": end_date, "limit": 10})
|
||||
|
||||
actions = []
|
||||
for p in low_ctr_pages.get("items", []):
|
||||
actions.append({
|
||||
"type": "improve_titles_meta",
|
||||
"target": p.get("page"),
|
||||
"reason": f"Low CTR {p.get('ctr')}% with {p.get('impressions')} impressions",
|
||||
"evidence": p.get("evidence_queries", [])
|
||||
})
|
||||
for c in cannibals.get("items", []):
|
||||
actions.append({
|
||||
"type": "consolidate/internal_link",
|
||||
"target": c.get("recommended_target_page"),
|
||||
"reason": f"Cannibalization on query '{c.get('query')}'",
|
||||
"pages": c.get("pages", [])
|
||||
})
|
||||
for q in striking.get("items", []):
|
||||
actions.append({
|
||||
"type": "refresh_content",
|
||||
"target": "query",
|
||||
"query": q.get("query"),
|
||||
"reason": f"Striking distance at position {q.get('position')} with {q.get('impressions')} impressions"
|
||||
})
|
||||
for q in declining.get("items", []):
|
||||
actions.append({
|
||||
"type": "refresh_content",
|
||||
"target": "query",
|
||||
"query": q.get("query"),
|
||||
"reason": f"Clicks decline {q.get('prev_clicks')}→{q.get('curr_clicks')} ({q.get('drop_pct')}%)"
|
||||
})
|
||||
|
||||
return {
|
||||
"plan_name": "Default Content Plan from GSC",
|
||||
"range": {"current": {"start": start_date, "end": end_date}},
|
||||
"actions": actions,
|
||||
"source": "gsc_cache",
|
||||
"timestamp": datetime.utcnow().isoformat()
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"default_content_gsc_plan failed: {e}")
|
||||
return {"error": str(e)}
|
||||
|
||||
async def _sitemap_analyzer_tool(self, context: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Sitemap analysis tool using ContentStrategyService"""
|
||||
website_url = context.get('website_url')
|
||||
@@ -1324,7 +1496,37 @@ class SEOOptimizationAgent(BaseALwrityAgent):
|
||||
"name": "query_seo_knowledge_base",
|
||||
"description": "Queries the SIF knowledge base for SEO dashboard data, GSC/Bing metrics, and semantic insights",
|
||||
"target": self._query_seo_knowledge_base_tool
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "gsc_low_ctr_queries",
|
||||
"description": "Returns low-CTR queries with evidence from cached GSC metrics",
|
||||
"target": self._gsc_low_ctr_queries_tool
|
||||
},
|
||||
{
|
||||
"name": "gsc_striking_distance_queries",
|
||||
"description": "Returns striking-distance queries (positions ~8–20) with evidence",
|
||||
"target": self._gsc_striking_distance_tool
|
||||
},
|
||||
{
|
||||
"name": "gsc_declining_queries",
|
||||
"description": "Returns period-over-period declining queries with evidence",
|
||||
"target": self._gsc_declining_queries_tool
|
||||
},
|
||||
{
|
||||
"name": "gsc_low_ctr_pages",
|
||||
"description": "Returns low-CTR pages with top contributing queries",
|
||||
"target": self._gsc_low_ctr_pages_tool
|
||||
},
|
||||
{
|
||||
"name": "gsc_cannibalization_candidates",
|
||||
"description": "Returns query→multiple-pages cannibalization candidates with target recommendation",
|
||||
"target": self._gsc_cannibalization_candidates_tool
|
||||
},
|
||||
{
|
||||
"name": "default_seo_gsc_plan",
|
||||
"description": "Runs a default first-pass SEO plan using GSC signals (titles/meta, consolidation, refreshes)",
|
||||
"target": self._default_seo_gsc_plan_tool
|
||||
},
|
||||
],
|
||||
max_iterations=15,
|
||||
system=self.get_effective_system_prompt(f"""You are the SEO Optimization Agent for ALwrity user {self.user_id}.
|
||||
@@ -1340,6 +1542,7 @@ class SEOOptimizationAgent(BaseALwrityAgent):
|
||||
- Deep semantic search of SEO data (GSC, Bing, Audits)
|
||||
|
||||
Focus on high-impact, low-effort optimizations first.
|
||||
In your first pass, call 'default_seo_gsc_plan' to ground your actions on live GSC signals.
|
||||
Always maintain SEO best practices and user experience."""
|
||||
)
|
||||
)
|
||||
@@ -1666,6 +1869,223 @@ class SEOOptimizationAgent(BaseALwrityAgent):
|
||||
"timestamp": datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
# GSC Insights Tools (Option B)
|
||||
async def _fetch_gsc_analytics(self, start_date: Optional[str] = None, end_date: Optional[str] = None) -> Dict[str, Any]:
|
||||
svc = PlatformAnalyticsService()
|
||||
data = await svc.get_comprehensive_analytics(self.user_id, platforms=["gsc"], start_date=start_date, end_date=end_date)
|
||||
gsc = data.get("gsc")
|
||||
if not gsc or gsc.status != "success":
|
||||
err = getattr(gsc, "error_message", None) if gsc else "No data"
|
||||
raise RuntimeError(f"GSC analytics unavailable: {err}")
|
||||
return {
|
||||
"metrics": gsc.metrics,
|
||||
"date_range": gsc.date_range
|
||||
}
|
||||
|
||||
async def _gsc_low_ctr_queries_tool(self, context: Dict[str, Any]) -> Dict[str, Any]:
|
||||
limit = int(context.get("limit", 10))
|
||||
min_impr = int(context.get("min_impressions", 100))
|
||||
min_clicks = int(context.get("min_clicks", 10))
|
||||
ctr_threshold = float(context.get("ctr_threshold", 1.5))
|
||||
start_date = context.get("start_date")
|
||||
end_date = context.get("end_date")
|
||||
try:
|
||||
result = await self._fetch_gsc_analytics(start_date, end_date)
|
||||
tq = result["metrics"].get("top_queries", []) or []
|
||||
items = [
|
||||
{
|
||||
"query": r.get("query"),
|
||||
"clicks": r.get("clicks", 0),
|
||||
"impressions": r.get("impressions", 0),
|
||||
"ctr": r.get("ctr", 0.0),
|
||||
"position": r.get("position")
|
||||
}
|
||||
for r in tq
|
||||
if (r.get("impressions", 0) >= min_impr and r.get("clicks", 0) >= min_clicks and float(r.get("ctr", 0.0)) < ctr_threshold)
|
||||
]
|
||||
items.sort(key=lambda x: (x.get("impressions", 0), -x.get("ctr", 100.0)), reverse=True)
|
||||
return {
|
||||
"items": items[:limit],
|
||||
"range": result["date_range"],
|
||||
"source": "gsc_cache"
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"low_ctr_queries tool failed: {e}")
|
||||
return {"error": str(e)}
|
||||
|
||||
async def _gsc_striking_distance_tool(self, context: Dict[str, Any]) -> Dict[str, Any]:
|
||||
limit = int(context.get("limit", 10))
|
||||
min_impr = int(context.get("min_impressions", 100))
|
||||
start_date = context.get("start_date")
|
||||
end_date = context.get("end_date")
|
||||
try:
|
||||
result = await self._fetch_gsc_analytics(start_date, end_date)
|
||||
tq = result["metrics"].get("top_queries", []) or []
|
||||
items = [
|
||||
{
|
||||
"query": r.get("query"),
|
||||
"clicks": r.get("clicks", 0),
|
||||
"impressions": r.get("impressions", 0),
|
||||
"ctr": r.get("ctr", 0.0),
|
||||
"position": r.get("position")
|
||||
}
|
||||
for r in tq
|
||||
if (r.get("impressions", 0) >= min_impr and r.get("position") is not None and 8.0 <= float(r.get("position")) <= 20.0)
|
||||
]
|
||||
items.sort(key=lambda x: (x.get("position") if x.get("position") is not None else 999, -x.get("impressions", 0)))
|
||||
return {
|
||||
"items": items[:limit],
|
||||
"range": result["date_range"],
|
||||
"source": "gsc_cache"
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"striking_distance tool failed: {e}")
|
||||
return {"error": str(e)}
|
||||
|
||||
async def _gsc_declining_queries_tool(self, context: Dict[str, Any]) -> Dict[str, Any]:
|
||||
limit = int(context.get("limit", 10))
|
||||
min_prev_clicks = int(context.get("min_prev_clicks", 10))
|
||||
min_drop_pct = float(context.get("min_drop_pct", 30.0))
|
||||
start_date = context.get("start_date")
|
||||
end_date = context.get("end_date")
|
||||
try:
|
||||
curr = await self._fetch_gsc_analytics(start_date, end_date)
|
||||
curr_range = curr["date_range"]
|
||||
s = curr_range.get("start")
|
||||
e = curr_range.get("end")
|
||||
from datetime import datetime, timedelta
|
||||
fmt = "%Y-%m-%d"
|
||||
sd = datetime.strptime(s, fmt) if s else datetime.utcnow() - timedelta(days=30)
|
||||
ed = datetime.strptime(e, fmt) if e else datetime.utcnow()
|
||||
days = max((ed - sd).days + 1, 1)
|
||||
prev_end = sd - timedelta(days=1)
|
||||
prev_start = prev_end - timedelta(days=days - 1)
|
||||
prev = await self._fetch_gsc_analytics(prev_start.strftime(fmt), prev_end.strftime(fmt))
|
||||
curr_queries = {r.get("query"): r for r in (curr["metrics"].get("top_queries", []) or [])}
|
||||
prev_queries = {r.get("query"): r for r in (prev["metrics"].get("top_queries", []) or [])}
|
||||
items = []
|
||||
for q, prev_row in prev_queries.items():
|
||||
curr_row = curr_queries.get(q)
|
||||
if not curr_row:
|
||||
continue
|
||||
prev_clicks = int(prev_row.get("clicks", 0) or 0)
|
||||
curr_clicks = int(curr_row.get("clicks", 0) or 0)
|
||||
if prev_clicks >= min_prev_clicks and curr_clicks < prev_clicks:
|
||||
drop_pct = ((prev_clicks - curr_clicks) / prev_clicks) * 100.0
|
||||
if drop_pct >= min_drop_pct:
|
||||
items.append({
|
||||
"query": q,
|
||||
"prev_clicks": prev_clicks,
|
||||
"curr_clicks": curr_clicks,
|
||||
"drop_pct": round(drop_pct, 2)
|
||||
})
|
||||
items.sort(key=lambda x: (x.get("drop_pct", 0), x.get("prev_clicks", 0)), reverse=True)
|
||||
return {
|
||||
"items": items[:limit],
|
||||
"range": curr_range,
|
||||
"previous_range": prev["date_range"],
|
||||
"source": "gsc_cache"
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"declining_queries tool failed: {e}")
|
||||
return {"error": str(e)}
|
||||
|
||||
async def _gsc_low_ctr_pages_tool(self, context: Dict[str, Any]) -> Dict[str, Any]:
|
||||
limit = int(context.get("limit", 10))
|
||||
min_impr = int(context.get("min_impressions", 200))
|
||||
ctr_threshold = float(context.get("ctr_threshold", 1.5))
|
||||
start_date = context.get("start_date")
|
||||
end_date = context.get("end_date")
|
||||
try:
|
||||
result = await self._fetch_gsc_analytics(start_date, end_date)
|
||||
tp = result["metrics"].get("top_pages", []) or []
|
||||
items = []
|
||||
for r in tp:
|
||||
if (r.get("impressions", 0) >= min_impr and float(r.get("ctr", 0.0)) < ctr_threshold):
|
||||
items.append({
|
||||
"page": r.get("page"),
|
||||
"clicks": r.get("clicks", 0),
|
||||
"impressions": r.get("impressions", 0),
|
||||
"ctr": r.get("ctr", 0.0),
|
||||
"position": r.get("position"),
|
||||
"evidence_queries": r.get("queries", [])[:5]
|
||||
})
|
||||
items.sort(key=lambda x: (x.get("impressions", 0), -x.get("ctr", 100.0)), reverse=True)
|
||||
return {
|
||||
"items": items[:limit],
|
||||
"range": result["date_range"],
|
||||
"source": "gsc_cache"
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"low_ctr_pages tool failed: {e}")
|
||||
return {"error": str(e)}
|
||||
|
||||
async def _gsc_cannibalization_candidates_tool(self, context: Dict[str, Any]) -> Dict[str, Any]:
|
||||
limit = int(context.get("limit", 10))
|
||||
start_date = context.get("start_date")
|
||||
end_date = context.get("end_date")
|
||||
try:
|
||||
result = await self._fetch_gsc_analytics(start_date, end_date)
|
||||
candidates = result["metrics"].get("cannibalization", []) or []
|
||||
return {
|
||||
"items": candidates[:limit],
|
||||
"range": result["date_range"],
|
||||
"source": "gsc_cache"
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"cannibalization_candidates tool failed: {e}")
|
||||
return {"error": str(e)}
|
||||
|
||||
async def _default_seo_gsc_plan_tool(self, context: Dict[str, Any]) -> Dict[str, Any]:
|
||||
start_date = context.get("start_date")
|
||||
end_date = context.get("end_date")
|
||||
try:
|
||||
low_ctr_pages = await self._gsc_low_ctr_pages_tool({"start_date": start_date, "end_date": end_date, "limit": 10})
|
||||
cannibals = await self._gsc_cannibalization_candidates_tool({"start_date": start_date, "end_date": end_date, "limit": 10})
|
||||
striking = await self._gsc_striking_distance_tool({"start_date": start_date, "end_date": end_date, "limit": 10})
|
||||
declining = await self._gsc_declining_queries_tool({"start_date": start_date, "end_date": end_date, "limit": 10})
|
||||
|
||||
actions = []
|
||||
for p in low_ctr_pages.get("items", []):
|
||||
actions.append({
|
||||
"type": "update_titles_meta",
|
||||
"target_page": p.get("page"),
|
||||
"justification": f"Low CTR {p.get('ctr')}% with {p.get('impressions')} impressions",
|
||||
"evidence": p.get("evidence_queries", [])
|
||||
})
|
||||
for c in cannibals.get("items", []):
|
||||
actions.append({
|
||||
"type": "consolidate/internal_link",
|
||||
"target_page": c.get("recommended_target_page"),
|
||||
"justification": f"Cannibalization on query '{c.get('query')}'",
|
||||
"pages": c.get("pages", [])
|
||||
})
|
||||
for q in striking.get("items", []):
|
||||
actions.append({
|
||||
"type": "refresh_content",
|
||||
"target": "query",
|
||||
"query": q.get("query"),
|
||||
"justification": f"Striking distance at position {q.get('position')} with {q.get('impressions')} impressions"
|
||||
})
|
||||
for q in declining.get("items", []):
|
||||
actions.append({
|
||||
"type": "refresh_content",
|
||||
"target": "query",
|
||||
"query": q.get("query"),
|
||||
"justification": f"Clicks decline {q.get('prev_clicks')}→{q.get('curr_clicks')} ({q.get('drop_pct')}%)"
|
||||
})
|
||||
|
||||
return {
|
||||
"plan_name": "Default SEO Plan from GSC",
|
||||
"range": {"current": {"start": start_date, "end": end_date}},
|
||||
"actions": actions,
|
||||
"source": "gsc_cache",
|
||||
"timestamp": datetime.utcnow().isoformat()
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"default_seo_gsc_plan failed: {e}")
|
||||
return {"error": str(e)}
|
||||
|
||||
|
||||
class SocialAmplificationAgent(BaseALwrityAgent):
|
||||
"""
|
||||
|
||||
@@ -14,9 +14,9 @@ from .txtai_service import TxtaiIntelligenceService, TXTAI_AVAILABLE
|
||||
from services.intelligence.agents.core_agent_framework import BaseALwrityAgent
|
||||
from services.llm_providers.main_text_generation import llm_text_gen
|
||||
|
||||
# Optional txtai imports
|
||||
# Optional txtai imports (align with core agent framework)
|
||||
try:
|
||||
from txtai.pipeline import Agent, LLM
|
||||
from txtai import Agent, LLM
|
||||
except ImportError:
|
||||
Agent = None
|
||||
LLM = None
|
||||
@@ -28,9 +28,13 @@ class SharedLLMWrapper:
|
||||
|
||||
def generate(self, prompt: str, **kwargs) -> str:
|
||||
"""Generate text using the shared LLM provider."""
|
||||
# We ignore kwargs like 'max_tokens' as llm_text_gen handles defaults,
|
||||
# but we could map them if needed.
|
||||
return llm_text_gen(prompt, user_id=self.user_id)
|
||||
try:
|
||||
# We ignore kwargs like 'max_tokens' as llm_text_gen handles defaults,
|
||||
# but we could map them if needed.
|
||||
return llm_text_gen(prompt, user_id=self.user_id)
|
||||
except Exception as e:
|
||||
logger.error(f"SharedLLMWrapper failed to generate text: {e}")
|
||||
return f"[ERROR: Shared LLM generation failed for user {self.user_id}]"
|
||||
|
||||
def __call__(self, prompt: str, **kwargs) -> str:
|
||||
return self.generate(prompt, **kwargs)
|
||||
@@ -40,8 +44,9 @@ class LocalLLMWrapper:
|
||||
Lazily loads a local LLM via txtai.
|
||||
This prevents blocking server startup with heavy model loads.
|
||||
"""
|
||||
def __init__(self, model_path: str):
|
||||
def __init__(self, model_path: str, task: str = "text-generation"):
|
||||
self.model_path = model_path
|
||||
self.task = task
|
||||
self._llm = None
|
||||
|
||||
@property
|
||||
@@ -49,8 +54,9 @@ class LocalLLMWrapper:
|
||||
if self._llm is None:
|
||||
if LLM is None:
|
||||
raise ImportError("txtai.pipeline.LLM is not available")
|
||||
logger.info(f"Loading local LLM: {self.model_path}")
|
||||
self._llm = LLM(path=self.model_path)
|
||||
logger.info(f"Loading local LLM: {self.model_path} with task: {self.task}")
|
||||
# Explicitly set task to avoid 'text2text-generation' default failures
|
||||
self._llm = LLM(path=self.model_path, task=self.task)
|
||||
return self._llm
|
||||
|
||||
def __call__(self, prompt: str, **kwargs) -> str:
|
||||
@@ -67,11 +73,12 @@ class SIFBaseAgent(BaseALwrityAgent):
|
||||
|
||||
# 2. Local LLM for internal agent work (default for SIF agents)
|
||||
if llm is None:
|
||||
if TXTAI_AVAILABLE:
|
||||
# Use Lazy Local LLM
|
||||
llm = LocalLLMWrapper(model_name)
|
||||
if TXTAI_AVAILABLE and LLM is not None:
|
||||
# Use Lazy Local LLM when txtai LLM is available
|
||||
# Hardening: Specify 'text-generation' task to avoid text2text defaults
|
||||
llm = LocalLLMWrapper(model_name, task="text-generation")
|
||||
else:
|
||||
# Fallback to Shared if txtai not available
|
||||
# Fallback to Shared if txtai or LLM is not available
|
||||
llm = self.shared_llm
|
||||
|
||||
super().__init__(user_id, agent_type, model_name, llm)
|
||||
@@ -85,14 +92,18 @@ class SIFBaseAgent(BaseALwrityAgent):
|
||||
|
||||
def _create_txtai_agent(self):
|
||||
"""
|
||||
SIF agents use the intelligence service directly, but we can expose
|
||||
capabilities via a standard agent interface if needed.
|
||||
SIF agents primarily use the intelligence service directly, but we can expose
|
||||
capabilities via a standard agent interface if available.
|
||||
"""
|
||||
if not TXTAI_AVAILABLE:
|
||||
return None
|
||||
|
||||
# Return a simple agent that can use the LLM
|
||||
return Agent(llm=self.llm, tools=[])
|
||||
if not TXTAI_AVAILABLE or Agent is None:
|
||||
logger.debug(f"[{self.__class__.__name__}] txtai Agent not available, using fallback agent")
|
||||
return self._create_fallback_agent()
|
||||
|
||||
try:
|
||||
return Agent(llm=self.llm, tools=[])
|
||||
except Exception as e:
|
||||
logger.warning(f"[{self.__class__.__name__}] Failed to create txtai Agent: {e}")
|
||||
return self._create_fallback_agent()
|
||||
|
||||
class StrategyArchitectAgent(SIFBaseAgent):
|
||||
"""Agent for discovering content pillars and identifying strategic gaps."""
|
||||
|
||||
@@ -25,7 +25,18 @@ except ImportError:
|
||||
TXTAI_AVAILABLE = False
|
||||
|
||||
class TxtaiIntelligenceService:
|
||||
_instances = {}
|
||||
|
||||
def __new__(cls, user_id: str, *args, **kwargs):
|
||||
if user_id not in cls._instances:
|
||||
cls._instances[user_id] = super(TxtaiIntelligenceService, cls).__new__(cls)
|
||||
return cls._instances[user_id]
|
||||
|
||||
def __init__(self, user_id: str, model_path: Optional[str] = None, enable_caching: bool = True):
|
||||
# Singleton: prevent re-initialization if already initialized
|
||||
if getattr(self, "_singleton_initialized", False):
|
||||
return
|
||||
|
||||
self.user_id = user_id
|
||||
self.model_path = model_path or "sentence-transformers/all-MiniLM-L6-v2"
|
||||
self.index_path = f"workspace/workspace_{user_id}/indices/txtai"
|
||||
@@ -33,6 +44,11 @@ class TxtaiIntelligenceService:
|
||||
self._initialized = False
|
||||
self.enable_caching = enable_caching
|
||||
self.cache_manager = semantic_cache_manager if enable_caching else None
|
||||
self._backend = "faiss" # Default backend
|
||||
|
||||
# Mark as initialized for singleton pattern
|
||||
self._singleton_initialized = True
|
||||
|
||||
# Lazy initialization - do not initialize embeddings on startup
|
||||
# self._initialize_embeddings()
|
||||
|
||||
@@ -52,17 +68,26 @@ class TxtaiIntelligenceService:
|
||||
logger.debug(f"Model path: {self.model_path}")
|
||||
logger.debug(f"Index path: {self.index_path}")
|
||||
|
||||
# Close existing embeddings if any to release file locks
|
||||
if self.embeddings:
|
||||
try:
|
||||
if hasattr(self.embeddings, 'close'):
|
||||
self.embeddings.close()
|
||||
self.embeddings = None
|
||||
except Exception as close_err:
|
||||
logger.warning(f"Error closing existing embeddings: {close_err}")
|
||||
|
||||
# Ensure directory exists
|
||||
os.makedirs(os.path.dirname(self.index_path), exist_ok=True)
|
||||
logger.debug(f"Created index directory: {os.path.dirname(self.index_path)}")
|
||||
|
||||
# Initialize embeddings with optimal configuration for ALwrity use case
|
||||
# Hardening: Disabling quantization by default as it causes 'IndexIDMap' attribute errors with small indices on Windows
|
||||
self.embeddings = Embeddings({
|
||||
"path": self.model_path,
|
||||
"content": True, # Enable content storage for retrieval
|
||||
"objects": True, # Enable object storage for metadata
|
||||
"backend": "faiss", # Use Faiss for efficient similarity search
|
||||
"quantize": True, # Enable quantization for memory efficiency
|
||||
"backend": self._backend, # Use Faiss for efficient similarity search
|
||||
"batch": 32, # Batch size for processing
|
||||
"gpu": False, # Force CPU usage for compatibility
|
||||
"limit": 1000 # Maximum number of results for queries
|
||||
@@ -76,7 +101,12 @@ class TxtaiIntelligenceService:
|
||||
try:
|
||||
self.embeddings.load(self.index_path)
|
||||
logger.info(f"Successfully loaded existing txtai index for user {self.user_id}")
|
||||
logger.debug(f"Index contains {len(self.embeddings)} items")
|
||||
# Try to log count, handle if not supported
|
||||
try:
|
||||
count = self.embeddings.count() if hasattr(self.embeddings, 'count') else "unknown"
|
||||
logger.debug(f"Index contains {count} items")
|
||||
except:
|
||||
logger.debug("Index loaded (count unavailable)")
|
||||
except Exception as load_error:
|
||||
logger.warning(f"Failed to load existing index: {load_error}. Creating new index.")
|
||||
# Reset embeddings to create new index
|
||||
@@ -84,8 +114,7 @@ class TxtaiIntelligenceService:
|
||||
"path": self.model_path,
|
||||
"content": True,
|
||||
"objects": True,
|
||||
"backend": "faiss",
|
||||
"quantize": True,
|
||||
"backend": self._backend,
|
||||
"batch": 32,
|
||||
"gpu": False,
|
||||
"limit": 1000
|
||||
@@ -146,8 +175,15 @@ class TxtaiIntelligenceService:
|
||||
logger.error(f"Error indexing content for user {self.user_id}: {e}")
|
||||
logger.error(f"Full traceback: {traceback.format_exc()}")
|
||||
logger.error(f"Items count: {len(items) if items else 0}")
|
||||
if items and len(items) > 0:
|
||||
logger.error(f"Sample item structure: {type(items[0])}")
|
||||
|
||||
message = str(e)
|
||||
is_windows_lock_error = isinstance(e, PermissionError) or "WinError 32" in message
|
||||
if is_windows_lock_error:
|
||||
logger.warning(
|
||||
f"Txtai index save skipped for user {self.user_id} due to file lock. "
|
||||
f"The index will be retried on a future run."
|
||||
)
|
||||
return
|
||||
raise
|
||||
|
||||
async def search(self, query: str, limit: int = 5) -> List[Dict[str, Any]]:
|
||||
@@ -172,7 +208,20 @@ class TxtaiIntelligenceService:
|
||||
logger.debug(f"Cache miss for search query: '{query}'")
|
||||
|
||||
logger.debug(f"Searching for query: '{query}' with limit: {limit}")
|
||||
results = self.embeddings.search(query, limit=limit)
|
||||
try:
|
||||
results = self.embeddings.search(query, limit=limit)
|
||||
except AttributeError as ae:
|
||||
if "nprobe" in str(ae):
|
||||
logger.error(f"Detected known txtai/faiss IndexIDMap/nprobe incompatibility for user {self.user_id}. Attempting re-init with numpy backend fallback...")
|
||||
# Switch to numpy backend which doesn't have this issue
|
||||
self._backend = "numpy"
|
||||
self._initialize_embeddings()
|
||||
if self.embeddings:
|
||||
results = self.embeddings.search(query, limit=limit)
|
||||
else:
|
||||
raise ae
|
||||
else:
|
||||
raise ae
|
||||
|
||||
# Cache the results if caching is enabled
|
||||
if self.enable_caching and self.cache_manager and results:
|
||||
@@ -216,7 +265,19 @@ class TxtaiIntelligenceService:
|
||||
logger.debug(f"Cache miss for similarity calculation")
|
||||
|
||||
logger.debug(f"Calculating similarity between texts: '{text1[:50]}...' and '{text2[:50]}...'")
|
||||
similarity = self.embeddings.similarity(text1, text2)
|
||||
try:
|
||||
similarity = self.embeddings.similarity(text1, text2)
|
||||
except AttributeError as ae:
|
||||
if "nprobe" in str(ae):
|
||||
logger.error(f"Detected IndexIDMap nprobe error in similarity for user {self.user_id}. Falling back to numpy backend...")
|
||||
self._backend = "numpy"
|
||||
self._initialize_embeddings()
|
||||
if self.embeddings:
|
||||
similarity = self.embeddings.similarity(text1, text2)
|
||||
else:
|
||||
raise ae
|
||||
else:
|
||||
raise ae
|
||||
|
||||
# Cache the similarity result
|
||||
if self.enable_caching and self.cache_manager:
|
||||
@@ -272,7 +333,19 @@ class TxtaiIntelligenceService:
|
||||
# Use graph-based clustering if available
|
||||
# Perform a search to get graph structure
|
||||
sample_query = "content marketing digital strategy"
|
||||
graph_results = self.embeddings.search(sample_query, limit=10, graph=True)
|
||||
try:
|
||||
graph_results = self.embeddings.search(sample_query, limit=10, graph=True)
|
||||
except AttributeError as ae:
|
||||
if "nprobe" in str(ae):
|
||||
logger.error(f"Detected IndexIDMap nprobe error in cluster for user {self.user_id}. Falling back to numpy backend...")
|
||||
self._backend = "numpy"
|
||||
self._initialize_embeddings()
|
||||
if self.embeddings:
|
||||
graph_results = self.embeddings.search(sample_query, limit=10, graph=True)
|
||||
else:
|
||||
raise ae
|
||||
else:
|
||||
raise ae
|
||||
|
||||
if not graph_results:
|
||||
logger.warning(f"No graph results for clustering user {self.user_id}")
|
||||
@@ -306,7 +379,7 @@ class TxtaiIntelligenceService:
|
||||
logger.error(f"Full traceback: {traceback.format_exc()}")
|
||||
return self._fallback_clustering(min_score)
|
||||
|
||||
def _fallback_clustering(self, min_score: float) -> List[List[int]]:
|
||||
async def _fallback_clustering(self, min_score: float) -> List[List[int]]:
|
||||
"""Fallback clustering method when graph clustering is not available."""
|
||||
logger.info(f"Using fallback clustering for user {self.user_id}")
|
||||
|
||||
@@ -318,7 +391,8 @@ class TxtaiIntelligenceService:
|
||||
all_clusters = []
|
||||
|
||||
for query in sample_queries:
|
||||
results = self.embeddings.search(query, limit=5)
|
||||
# Use our search wrapper for hardening
|
||||
results = await self.search(query, limit=5)
|
||||
if results and results[0].get("score", 0) >= min_score:
|
||||
# Create a cluster from similar results
|
||||
cluster = [i for i, result in enumerate(results) if result.get("score", 0) >= min_score]
|
||||
@@ -393,9 +467,13 @@ class TxtaiIntelligenceService:
|
||||
return {"status": "not_initialized", "user_id": self.user_id}
|
||||
|
||||
try:
|
||||
# Get count of indexed items - txtai doesn't have a direct len() method
|
||||
# We'll estimate based on available data or return a placeholder
|
||||
index_size = getattr(self.embeddings, 'count', 0) or "unknown"
|
||||
# Get count of indexed items
|
||||
index_size = "unknown"
|
||||
if hasattr(self.embeddings, 'count'):
|
||||
try:
|
||||
index_size = self.embeddings.count()
|
||||
except:
|
||||
pass
|
||||
|
||||
return {
|
||||
"status": "active",
|
||||
@@ -410,5 +488,7 @@ class TxtaiIntelligenceService:
|
||||
return {"status": "error", "user_id": self.user_id, "error": str(e)}
|
||||
|
||||
def is_initialized(self) -> bool:
|
||||
"""Check if the service is properly initialized."""
|
||||
"""Check if the service is properly initialized, triggering lazy init if needed."""
|
||||
if not self._initialized:
|
||||
self._ensure_initialized()
|
||||
return self._initialized and self.embeddings is not None
|
||||
|
||||
Reference in New Issue
Block a user