Implement evidence-driven semantic gap detection
This commit is contained in:
@@ -7,6 +7,8 @@ Each agent leverages TxtaiIntelligenceService for semantic operations.
|
|||||||
import traceback
|
import traceback
|
||||||
import json
|
import json
|
||||||
import asyncio
|
import asyncio
|
||||||
|
import re
|
||||||
|
from collections import Counter
|
||||||
from typing import List, Dict, Any, Optional
|
from typing import List, Dict, Any, Optional
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
@@ -177,23 +179,193 @@ class StrategyArchitectAgent(SIFBaseAgent):
|
|||||||
self._log_agent_operation("Finding semantic content gaps", competitor_count=len(competitor_indices))
|
self._log_agent_operation("Finding semantic content gaps", competitor_count=len(competitor_indices))
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# STUB: Implement cross-index comparison
|
documents = await self._fetch_index_documents()
|
||||||
# This would involve:
|
if not documents:
|
||||||
# 1. Getting user content topics/themes
|
logger.info(f"[{self.__class__.__name__}] No indexed documents available for gap detection")
|
||||||
# 2. Getting competitor content topics/themes
|
return []
|
||||||
# 3. Finding topics competitors cover but user doesn't
|
|
||||||
|
competitor_docs, user_docs = [], []
|
||||||
logger.info(f"[{self.__class__.__name__}] Found semantic gaps analysis stub")
|
allowed_competitor_ids = set(str(idx) for idx in competitor_indices) if competitor_indices else None
|
||||||
return [
|
for doc in documents:
|
||||||
{"topic": "Topic A", "priority": "high", "reason": "Competitor coverage gap"},
|
metadata = doc.get("metadata", {})
|
||||||
{"topic": "Topic B", "priority": "medium", "reason": "Emerging trend"}
|
doc_type = str(metadata.get("type", "")).lower()
|
||||||
]
|
if "competitor" in doc_type:
|
||||||
|
if allowed_competitor_ids and str(doc.get("id")) not in allowed_competitor_ids:
|
||||||
|
continue
|
||||||
|
competitor_docs.append(doc)
|
||||||
|
elif "user" in doc_type:
|
||||||
|
user_docs.append(doc)
|
||||||
|
|
||||||
|
if not competitor_docs or not user_docs:
|
||||||
|
logger.info(
|
||||||
|
f"[{self.__class__.__name__}] Insufficient split for gap analysis: "
|
||||||
|
f"user_docs={len(user_docs)}, competitor_docs={len(competitor_docs)}"
|
||||||
|
)
|
||||||
|
return []
|
||||||
|
|
||||||
|
competitor_topics = self._extract_topic_density(competitor_docs)
|
||||||
|
user_topics = self._extract_topic_density(user_docs)
|
||||||
|
|
||||||
|
gaps = []
|
||||||
|
for topic, competitor_density in competitor_topics.items():
|
||||||
|
user_density = user_topics.get(topic, 0.0)
|
||||||
|
density_gap = competitor_density - user_density
|
||||||
|
if density_gap <= 0.08:
|
||||||
|
continue
|
||||||
|
|
||||||
|
confidence = max(
|
||||||
|
0.0,
|
||||||
|
min(1.0, 0.35 + (density_gap * 1.5) + (competitor_density * 0.4))
|
||||||
|
)
|
||||||
|
priority = "high" if confidence >= 0.75 else "medium" if confidence >= 0.5 else "low"
|
||||||
|
gaps.append({
|
||||||
|
"topic": topic,
|
||||||
|
"priority": priority,
|
||||||
|
"reason": (
|
||||||
|
f"Competitors mention '{topic}' substantially more often "
|
||||||
|
f"(density {competitor_density:.2f} vs {user_density:.2f})."
|
||||||
|
),
|
||||||
|
"confidence": round(confidence, 3),
|
||||||
|
"topic_density": {
|
||||||
|
"competitor": round(competitor_density, 4),
|
||||||
|
"user": round(user_density, 4),
|
||||||
|
"gap": round(density_gap, 4)
|
||||||
|
},
|
||||||
|
"evidence": {
|
||||||
|
"competitor_sample_titles": self._sample_titles_for_topic(competitor_docs, topic),
|
||||||
|
"user_sample_titles": self._sample_titles_for_topic(user_docs, topic),
|
||||||
|
"competitor_doc_count": len(competitor_docs),
|
||||||
|
"user_doc_count": len(user_docs)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
gaps.sort(
|
||||||
|
key=lambda item: (
|
||||||
|
item.get("confidence", 0),
|
||||||
|
item.get("topic_density", {}).get("gap", 0)
|
||||||
|
),
|
||||||
|
reverse=True
|
||||||
|
)
|
||||||
|
return gaps[:12]
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"[{self.__class__.__name__}] Failed to find semantic gaps: {e}")
|
logger.error(f"[{self.__class__.__name__}] Failed to find semantic gaps: {e}")
|
||||||
logger.error(f"[{self.__class__.__name__}] Full traceback: {traceback.format_exc()}")
|
logger.error(f"[{self.__class__.__name__}] Full traceback: {traceback.format_exc()}")
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
async def _fetch_index_documents(self) -> List[Dict[str, Any]]:
|
||||||
|
"""Fetch indexed documents and normalize metadata from txtai result objects."""
|
||||||
|
if not self.intelligence.is_initialized() or not self.intelligence.embeddings:
|
||||||
|
return []
|
||||||
|
|
||||||
|
embeddings = self.intelligence.embeddings
|
||||||
|
limit = 0
|
||||||
|
if hasattr(embeddings, "count"):
|
||||||
|
try:
|
||||||
|
limit = int(embeddings.count())
|
||||||
|
except Exception:
|
||||||
|
limit = 0
|
||||||
|
|
||||||
|
documents = []
|
||||||
|
candidate_queries = []
|
||||||
|
if limit > 0:
|
||||||
|
candidate_queries.extend([
|
||||||
|
f"select id, text, object from txtai limit {limit}",
|
||||||
|
f"select id, text, tags from txtai limit {limit}"
|
||||||
|
])
|
||||||
|
candidate_queries.extend(["marketing", "content", "seo", "strategy", "social media"])
|
||||||
|
|
||||||
|
seen_ids = set()
|
||||||
|
for query in candidate_queries:
|
||||||
|
try:
|
||||||
|
query_limit = limit if query.startswith("select") and limit > 0 else max(10, limit or 50)
|
||||||
|
rows = embeddings.search(query, limit=query_limit)
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
|
||||||
|
for row in rows or []:
|
||||||
|
doc_id = str(row.get("id", ""))
|
||||||
|
dedupe_key = doc_id or str(hash(f"{row.get('text','')}::{row.get('score',0)}"))
|
||||||
|
if dedupe_key in seen_ids:
|
||||||
|
continue
|
||||||
|
seen_ids.add(dedupe_key)
|
||||||
|
documents.append({
|
||||||
|
"id": doc_id,
|
||||||
|
"text": row.get("text", "") or "",
|
||||||
|
"metadata": self._normalize_metadata(row)
|
||||||
|
})
|
||||||
|
|
||||||
|
if limit > 0 and len(documents) >= limit:
|
||||||
|
break
|
||||||
|
|
||||||
|
return documents
|
||||||
|
|
||||||
|
def _normalize_metadata(self, row: Dict[str, Any]) -> Dict[str, Any]:
|
||||||
|
"""Normalize metadata payloads from txtai search rows."""
|
||||||
|
for key in ("object", "tags", "metadata", "meta"):
|
||||||
|
payload = row.get(key)
|
||||||
|
if isinstance(payload, dict):
|
||||||
|
return payload
|
||||||
|
if isinstance(payload, str):
|
||||||
|
try:
|
||||||
|
parsed = json.loads(payload)
|
||||||
|
if isinstance(parsed, dict):
|
||||||
|
return parsed
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
return {}
|
||||||
|
|
||||||
|
def _extract_topic_density(self, documents: List[Dict[str, Any]]) -> Dict[str, float]:
|
||||||
|
"""Extract topic density from document metadata and titles."""
|
||||||
|
topic_counter: Counter = Counter()
|
||||||
|
|
||||||
|
for doc in documents:
|
||||||
|
metadata = doc.get("metadata", {})
|
||||||
|
candidates = []
|
||||||
|
for key in ("topics", "topic", "keywords", "keyword", "tags", "category"):
|
||||||
|
value = metadata.get(key)
|
||||||
|
if isinstance(value, list):
|
||||||
|
candidates.extend([str(v) for v in value if v])
|
||||||
|
elif isinstance(value, str) and value.strip():
|
||||||
|
candidates.extend(re.split(r"[,|/]", value))
|
||||||
|
|
||||||
|
title = metadata.get("title") or doc.get("text", "")[:120]
|
||||||
|
if title:
|
||||||
|
title_tokens = re.findall(r"[a-zA-Z][a-zA-Z\-]{3,}", str(title).lower())
|
||||||
|
candidates.extend(title_tokens)
|
||||||
|
|
||||||
|
normalized = {
|
||||||
|
item.strip().lower() for item in candidates
|
||||||
|
if item and len(item.strip()) >= 4 and not item.strip().isdigit()
|
||||||
|
}
|
||||||
|
for topic in normalized:
|
||||||
|
topic_counter[topic] += 1
|
||||||
|
|
||||||
|
total_docs = max(1, len(documents))
|
||||||
|
return {
|
||||||
|
topic: count / total_docs
|
||||||
|
for topic, count in topic_counter.items()
|
||||||
|
if count >= 2
|
||||||
|
}
|
||||||
|
|
||||||
|
def _sample_titles_for_topic(self, documents: List[Dict[str, Any]], topic: str, limit: int = 3) -> List[str]:
|
||||||
|
"""Return sample titles for a topic."""
|
||||||
|
samples = []
|
||||||
|
topic_lower = topic.lower()
|
||||||
|
for doc in documents:
|
||||||
|
metadata = doc.get("metadata", {})
|
||||||
|
title = metadata.get("title") or doc.get("text", "")[:100]
|
||||||
|
if not title:
|
||||||
|
continue
|
||||||
|
|
||||||
|
haystack = f"{title} {json.dumps(metadata, default=str)}".lower()
|
||||||
|
if topic_lower in haystack:
|
||||||
|
samples.append(str(title))
|
||||||
|
if len(samples) >= limit:
|
||||||
|
break
|
||||||
|
|
||||||
|
return samples
|
||||||
|
|
||||||
class ContentGuardianAgent(SIFBaseAgent):
|
class ContentGuardianAgent(SIFBaseAgent):
|
||||||
"""Agent for preventing cannibalization and ensuring content originality."""
|
"""Agent for preventing cannibalization and ensuring content originality."""
|
||||||
|
|
||||||
@@ -2436,4 +2608,3 @@ class SocialAmplificationAgent(BaseALwrityAgent):
|
|||||||
"status": "scheduled",
|
"status": "scheduled",
|
||||||
"timestamp": datetime.utcnow().isoformat()
|
"timestamp": datetime.utcnow().isoformat()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -7,6 +7,8 @@ Each agent leverages TxtaiIntelligenceService for semantic operations.
|
|||||||
import traceback
|
import traceback
|
||||||
import json
|
import json
|
||||||
import asyncio
|
import asyncio
|
||||||
|
import re
|
||||||
|
from collections import Counter
|
||||||
from typing import List, Dict, Any, Optional
|
from typing import List, Dict, Any, Optional
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
@@ -212,23 +214,193 @@ class StrategyArchitectAgent(SIFBaseAgent):
|
|||||||
self._log_agent_operation("Finding semantic content gaps", competitor_count=len(competitor_indices))
|
self._log_agent_operation("Finding semantic content gaps", competitor_count=len(competitor_indices))
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# STUB: Implement cross-index comparison
|
documents = await self._fetch_index_documents()
|
||||||
# This would involve:
|
if not documents:
|
||||||
# 1. Getting user content topics/themes
|
logger.info(f"[{self.__class__.__name__}] No indexed documents available for gap detection")
|
||||||
# 2. Getting competitor content topics/themes
|
return []
|
||||||
# 3. Finding topics competitors cover but user doesn't
|
|
||||||
|
competitor_docs, user_docs = [], []
|
||||||
logger.info(f"[{self.__class__.__name__}] Found semantic gaps analysis stub")
|
allowed_competitor_ids = set(str(idx) for idx in competitor_indices) if competitor_indices else None
|
||||||
return [
|
for doc in documents:
|
||||||
{"topic": "Topic A", "priority": "high", "reason": "Competitor coverage gap"},
|
metadata = doc.get("metadata", {})
|
||||||
{"topic": "Topic B", "priority": "medium", "reason": "Emerging trend"}
|
doc_type = str(metadata.get("type", "")).lower()
|
||||||
]
|
if "competitor" in doc_type:
|
||||||
|
if allowed_competitor_ids and str(doc.get("id")) not in allowed_competitor_ids:
|
||||||
|
continue
|
||||||
|
competitor_docs.append(doc)
|
||||||
|
elif "user" in doc_type:
|
||||||
|
user_docs.append(doc)
|
||||||
|
|
||||||
|
if not competitor_docs or not user_docs:
|
||||||
|
logger.info(
|
||||||
|
f"[{self.__class__.__name__}] Insufficient split for gap analysis: "
|
||||||
|
f"user_docs={len(user_docs)}, competitor_docs={len(competitor_docs)}"
|
||||||
|
)
|
||||||
|
return []
|
||||||
|
|
||||||
|
competitor_topics = self._extract_topic_density(competitor_docs)
|
||||||
|
user_topics = self._extract_topic_density(user_docs)
|
||||||
|
|
||||||
|
gaps = []
|
||||||
|
for topic, competitor_density in competitor_topics.items():
|
||||||
|
user_density = user_topics.get(topic, 0.0)
|
||||||
|
density_gap = competitor_density - user_density
|
||||||
|
if density_gap <= 0.08:
|
||||||
|
continue
|
||||||
|
|
||||||
|
confidence = max(
|
||||||
|
0.0,
|
||||||
|
min(1.0, 0.35 + (density_gap * 1.5) + (competitor_density * 0.4))
|
||||||
|
)
|
||||||
|
priority = "high" if confidence >= 0.75 else "medium" if confidence >= 0.5 else "low"
|
||||||
|
gaps.append({
|
||||||
|
"topic": topic,
|
||||||
|
"priority": priority,
|
||||||
|
"reason": (
|
||||||
|
f"Competitors mention '{topic}' substantially more often "
|
||||||
|
f"(density {competitor_density:.2f} vs {user_density:.2f})."
|
||||||
|
),
|
||||||
|
"confidence": round(confidence, 3),
|
||||||
|
"topic_density": {
|
||||||
|
"competitor": round(competitor_density, 4),
|
||||||
|
"user": round(user_density, 4),
|
||||||
|
"gap": round(density_gap, 4)
|
||||||
|
},
|
||||||
|
"evidence": {
|
||||||
|
"competitor_sample_titles": self._sample_titles_for_topic(competitor_docs, topic),
|
||||||
|
"user_sample_titles": self._sample_titles_for_topic(user_docs, topic),
|
||||||
|
"competitor_doc_count": len(competitor_docs),
|
||||||
|
"user_doc_count": len(user_docs)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
gaps.sort(
|
||||||
|
key=lambda item: (
|
||||||
|
item.get("confidence", 0),
|
||||||
|
item.get("topic_density", {}).get("gap", 0)
|
||||||
|
),
|
||||||
|
reverse=True
|
||||||
|
)
|
||||||
|
return gaps[:12]
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"[{self.__class__.__name__}] Failed to find semantic gaps: {e}")
|
logger.error(f"[{self.__class__.__name__}] Failed to find semantic gaps: {e}")
|
||||||
logger.error(f"[{self.__class__.__name__}] Full traceback: {traceback.format_exc()}")
|
logger.error(f"[{self.__class__.__name__}] Full traceback: {traceback.format_exc()}")
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
async def _fetch_index_documents(self) -> List[Dict[str, Any]]:
|
||||||
|
"""Fetch indexed documents and normalize metadata from txtai result objects."""
|
||||||
|
if not self.intelligence.is_initialized() or not self.intelligence.embeddings:
|
||||||
|
return []
|
||||||
|
|
||||||
|
embeddings = self.intelligence.embeddings
|
||||||
|
limit = 0
|
||||||
|
if hasattr(embeddings, "count"):
|
||||||
|
try:
|
||||||
|
limit = int(embeddings.count())
|
||||||
|
except Exception:
|
||||||
|
limit = 0
|
||||||
|
|
||||||
|
documents = []
|
||||||
|
candidate_queries = []
|
||||||
|
if limit > 0:
|
||||||
|
candidate_queries.extend([
|
||||||
|
f"select id, text, object from txtai limit {limit}",
|
||||||
|
f"select id, text, tags from txtai limit {limit}"
|
||||||
|
])
|
||||||
|
candidate_queries.extend(["marketing", "content", "seo", "strategy", "social media"])
|
||||||
|
|
||||||
|
seen_ids = set()
|
||||||
|
for query in candidate_queries:
|
||||||
|
try:
|
||||||
|
query_limit = limit if query.startswith("select") and limit > 0 else max(10, limit or 50)
|
||||||
|
rows = embeddings.search(query, limit=query_limit)
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
|
||||||
|
for row in rows or []:
|
||||||
|
doc_id = str(row.get("id", ""))
|
||||||
|
dedupe_key = doc_id or str(hash(f"{row.get('text','')}::{row.get('score',0)}"))
|
||||||
|
if dedupe_key in seen_ids:
|
||||||
|
continue
|
||||||
|
seen_ids.add(dedupe_key)
|
||||||
|
documents.append({
|
||||||
|
"id": doc_id,
|
||||||
|
"text": row.get("text", "") or "",
|
||||||
|
"metadata": self._normalize_metadata(row)
|
||||||
|
})
|
||||||
|
|
||||||
|
if limit > 0 and len(documents) >= limit:
|
||||||
|
break
|
||||||
|
|
||||||
|
return documents
|
||||||
|
|
||||||
|
def _normalize_metadata(self, row: Dict[str, Any]) -> Dict[str, Any]:
|
||||||
|
"""Normalize metadata payloads from txtai search rows."""
|
||||||
|
for key in ("object", "tags", "metadata", "meta"):
|
||||||
|
payload = row.get(key)
|
||||||
|
if isinstance(payload, dict):
|
||||||
|
return payload
|
||||||
|
if isinstance(payload, str):
|
||||||
|
try:
|
||||||
|
parsed = json.loads(payload)
|
||||||
|
if isinstance(parsed, dict):
|
||||||
|
return parsed
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
return {}
|
||||||
|
|
||||||
|
def _extract_topic_density(self, documents: List[Dict[str, Any]]) -> Dict[str, float]:
|
||||||
|
"""Extract topic density from document metadata and titles."""
|
||||||
|
topic_counter: Counter = Counter()
|
||||||
|
|
||||||
|
for doc in documents:
|
||||||
|
metadata = doc.get("metadata", {})
|
||||||
|
candidates = []
|
||||||
|
for key in ("topics", "topic", "keywords", "keyword", "tags", "category"):
|
||||||
|
value = metadata.get(key)
|
||||||
|
if isinstance(value, list):
|
||||||
|
candidates.extend([str(v) for v in value if v])
|
||||||
|
elif isinstance(value, str) and value.strip():
|
||||||
|
candidates.extend(re.split(r"[,|/]", value))
|
||||||
|
|
||||||
|
title = metadata.get("title") or doc.get("text", "")[:120]
|
||||||
|
if title:
|
||||||
|
title_tokens = re.findall(r"[a-zA-Z][a-zA-Z\-]{3,}", str(title).lower())
|
||||||
|
candidates.extend(title_tokens)
|
||||||
|
|
||||||
|
normalized = {
|
||||||
|
item.strip().lower() for item in candidates
|
||||||
|
if item and len(item.strip()) >= 4 and not item.strip().isdigit()
|
||||||
|
}
|
||||||
|
for topic in normalized:
|
||||||
|
topic_counter[topic] += 1
|
||||||
|
|
||||||
|
total_docs = max(1, len(documents))
|
||||||
|
return {
|
||||||
|
topic: count / total_docs
|
||||||
|
for topic, count in topic_counter.items()
|
||||||
|
if count >= 2
|
||||||
|
}
|
||||||
|
|
||||||
|
def _sample_titles_for_topic(self, documents: List[Dict[str, Any]], topic: str, limit: int = 3) -> List[str]:
|
||||||
|
"""Return sample titles for a topic."""
|
||||||
|
samples = []
|
||||||
|
topic_lower = topic.lower()
|
||||||
|
for doc in documents:
|
||||||
|
metadata = doc.get("metadata", {})
|
||||||
|
title = metadata.get("title") or doc.get("text", "")[:100]
|
||||||
|
if not title:
|
||||||
|
continue
|
||||||
|
|
||||||
|
haystack = f"{title} {json.dumps(metadata, default=str)}".lower()
|
||||||
|
if topic_lower in haystack:
|
||||||
|
samples.append(str(title))
|
||||||
|
if len(samples) >= limit:
|
||||||
|
break
|
||||||
|
|
||||||
|
return samples
|
||||||
|
|
||||||
class ContentGuardianAgent(SIFBaseAgent):
|
class ContentGuardianAgent(SIFBaseAgent):
|
||||||
"""Agent for preventing cannibalization and ensuring content originality."""
|
"""Agent for preventing cannibalization and ensuring content originality."""
|
||||||
|
|
||||||
|
|||||||
@@ -974,60 +974,36 @@ class SIFIntegrationService:
|
|||||||
return pillars
|
return pillars
|
||||||
|
|
||||||
async def _identify_semantic_gaps(self, website_data: Dict[str, Any]) -> List[Dict[str, Any]]:
|
async def _identify_semantic_gaps(self, website_data: Dict[str, Any]) -> List[Dict[str, Any]]:
|
||||||
"""
|
"""Identify semantic gaps using StrategyArchitectAgent evidence-driven analysis."""
|
||||||
Identify semantic gaps in user content by comparing against competitor topics or industry standards.
|
|
||||||
Uses txtai semantic search to check coverage of key topics.
|
|
||||||
"""
|
|
||||||
gaps = []
|
|
||||||
try:
|
try:
|
||||||
# 1. Determine target topics to check
|
if not self.strategy_agent:
|
||||||
# In a real scenario, these come from competitor analysis or keyword research.
|
from .sif_agents import StrategyArchitectAgent
|
||||||
# Here we extract potential topics from competitor data or use defaults.
|
self.strategy_agent = StrategyArchitectAgent(self.intelligence_service, user_id=self.user_id)
|
||||||
competitors = website_data.get('competitors', [])
|
|
||||||
target_topics = []
|
|
||||||
|
|
||||||
# Placeholder: Extract topics from competitor names/descriptions if available
|
|
||||||
# For now, we'll use a mix of generic marketing topics and any provided tags
|
|
||||||
target_topics = [
|
|
||||||
"content strategy", "SEO optimization", "social media marketing",
|
|
||||||
"email campaigns", "brand storytelling", "customer retention",
|
|
||||||
"voice search", "video marketing", "influencer partnerships"
|
|
||||||
]
|
|
||||||
|
|
||||||
# Add specific topics from input if available
|
|
||||||
if 'target_keywords' in website_data:
|
|
||||||
target_topics.extend(website_data['target_keywords'])
|
|
||||||
|
|
||||||
# 2. Check coverage for each topic in the user's index
|
competitor_ids = website_data.get("competitor_indices", []) or []
|
||||||
for topic in target_topics:
|
gaps = await self.strategy_agent.find_semantic_gaps(competitor_indices=competitor_ids)
|
||||||
# Search the user's index
|
|
||||||
results = await self.intelligence_service.search(topic, limit=1)
|
normalized_gaps = []
|
||||||
|
for gap in gaps:
|
||||||
# Check relevance
|
density = gap.get("topic_density", {})
|
||||||
max_score = results[0]['score'] if results else 0.0
|
normalized_gaps.append({
|
||||||
|
"topic": gap.get("topic"),
|
||||||
# If relevance is low, it's a gap
|
"priority": gap.get("priority", "medium"),
|
||||||
GAP_THRESHOLD = 0.45
|
"reason": gap.get("reason", "Competitor coverage gap"),
|
||||||
if max_score < GAP_THRESHOLD:
|
"confidence": gap.get("confidence", 0.0),
|
||||||
gaps.append({
|
"current_coverage_score": density.get("user", 0.0),
|
||||||
"topic": topic,
|
"competitor_coverage_score": density.get("competitor", 0.0),
|
||||||
"current_coverage_score": float(max_score),
|
"gap_severity": gap.get("priority", "medium"),
|
||||||
"gap_severity": "high" if max_score < 0.2 else "medium",
|
"suggested_action": f"Create dedicated content for '{gap.get('topic', 'this topic')}'",
|
||||||
"reason": "Low semantic relevance in current content index",
|
"topic_density": density,
|
||||||
"suggested_action": f"Create dedicated content for '{topic}'"
|
"evidence": gap.get("evidence", {})
|
||||||
})
|
})
|
||||||
|
|
||||||
# Sort by severity (lower score = higher severity)
|
return normalized_gaps
|
||||||
gaps.sort(key=lambda x: x['current_coverage_score'])
|
|
||||||
|
|
||||||
return gaps[:5] # Return top 5 gaps
|
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error identifying semantic gaps: {e}")
|
logger.error(f"Error identifying semantic gaps: {e}")
|
||||||
# Fallback to sample data if index search fails completely
|
return []
|
||||||
return [
|
|
||||||
{"topic": "error_fallback", "reason": str(e), "current_coverage_score": 0.0}
|
|
||||||
]
|
|
||||||
|
|
||||||
async def _analyze_competitor_semantics(self, website_data: Dict[str, Any]) -> Dict[str, Any]:
|
async def _analyze_competitor_semantics(self, website_data: Dict[str, Any]) -> Dict[str, Any]:
|
||||||
"""Analyze competitor semantic positioning."""
|
"""Analyze competitor semantic positioning."""
|
||||||
|
|||||||
@@ -52,9 +52,9 @@ class SIFOnboardingIntegration:
|
|||||||
self.harvester = SemanticHarvesterService()
|
self.harvester = SemanticHarvesterService()
|
||||||
|
|
||||||
# Initialize agents
|
# Initialize agents
|
||||||
self.strategy_agent = StrategyArchitectAgent(self.intelligence)
|
self.strategy_agent = StrategyArchitectAgent(self.intelligence, user_id)
|
||||||
self.guardian_agent = ContentGuardianAgent(self.intelligence)
|
self.guardian_agent = ContentGuardianAgent(self.intelligence, user_id)
|
||||||
self.link_agent = LinkGraphAgent(self.intelligence)
|
self.link_agent = LinkGraphAgent(self.intelligence, user_id)
|
||||||
|
|
||||||
logger.info(f"[SIFOnboarding] Initialized for user {user_id}")
|
logger.info(f"[SIFOnboarding] Initialized for user {user_id}")
|
||||||
|
|
||||||
@@ -254,7 +254,23 @@ class SIFOnboardingIntegration:
|
|||||||
"priority": "high",
|
"priority": "high",
|
||||||
"title": "Fill Content Gaps",
|
"title": "Fill Content Gaps",
|
||||||
"description": f"Competitors are covering {len(semantic_gaps)} topics you haven't addressed.",
|
"description": f"Competitors are covering {len(semantic_gaps)} topics you haven't addressed.",
|
||||||
"action_items": [f"Create content about: {gap.get('topic', 'Unknown topic')}" for gap in semantic_gaps[:5]]
|
"action_items": [
|
||||||
|
(
|
||||||
|
f"Create content about: {gap.get('topic', 'Unknown topic')} "
|
||||||
|
f"({gap.get('priority', 'medium')} priority) - {gap.get('reason', 'Coverage gap identified')}"
|
||||||
|
)
|
||||||
|
for gap in semantic_gaps[:5]
|
||||||
|
],
|
||||||
|
"evidence": [
|
||||||
|
{
|
||||||
|
"topic": gap.get("topic"),
|
||||||
|
"priority": gap.get("priority"),
|
||||||
|
"confidence": gap.get("confidence"),
|
||||||
|
"topic_density": gap.get("topic_density"),
|
||||||
|
"competitor_sample_titles": gap.get("evidence", {}).get("competitor_sample_titles", [])
|
||||||
|
}
|
||||||
|
for gap in semantic_gaps[:5]
|
||||||
|
]
|
||||||
})
|
})
|
||||||
|
|
||||||
# Theme-based recommendations
|
# Theme-based recommendations
|
||||||
@@ -448,4 +464,4 @@ async def discover_competitors(request: CompetitorDiscoveryRequest, user=Depends
|
|||||||
"content_analysis": enhanced_results["content_analysis"],
|
"content_analysis": enhanced_results["content_analysis"],
|
||||||
"strategic_recommendations": enhanced_results["semantic_insights"]["strategic_recommendations"]
|
"strategic_recommendations": enhanced_results["semantic_insights"]["strategic_recommendations"]
|
||||||
}
|
}
|
||||||
"""
|
"""
|
||||||
|
|||||||
Reference in New Issue
Block a user