Merge PR #364: Add competitor-aware originality checks and fix agent initialization

This commit is contained in:
ajaysi
2026-03-03 18:57:46 +05:30
4 changed files with 219 additions and 16 deletions

View File

@@ -422,15 +422,57 @@ class ContentGuardianAgent(SIFBaseAgent):
logger.warning(f"[{self.__class__.__name__}] Text too short for meaningful originality check")
return {"originality_score": 0.0, "reason": "Text too short"}
# STUB: Implement cross-index search against competitor content
# This would search the text against a competitor-specific index
logger.info(f"[{self.__class__.__name__}] Originality verification stub completed")
query = text.strip()
competitor_results = []
method = "user_index_competitor_filter"
if competitor_index is not None and hasattr(competitor_index, "search"):
method = "competitor_index_search"
raw_results = competitor_index.search(query, limit=5)
if asyncio.iscoroutine(raw_results):
raw_results = await raw_results
competitor_results = raw_results or []
else:
raw_results = await self.intelligence.search(query, limit=10)
for result in raw_results or []:
metadata_raw = result.get("object")
metadata = metadata_raw if isinstance(metadata_raw, dict) else {}
if not metadata and isinstance(metadata_raw, str):
try:
metadata = json.loads(metadata_raw)
except Exception:
metadata = {}
doc_type = str((metadata or {}).get("type", "")).lower()
source = str((metadata or {}).get("source", "")).lower()
if "competitor" in doc_type or "competitor" in source:
competitor_results.append(result)
if not competitor_results:
return {
"originality_score": 1.0,
"confidence": 0.6,
"method": method,
"notes": "No competitor overlap detected in available index"
}
top_match = max(competitor_results, key=lambda item: float(item.get("score", 0.0)))
top_score = max(0.0, min(1.0, float(top_match.get("score", 0.0))))
originality_score = max(0.0, round(1.0 - top_score, 4))
confidence = round(min(1.0, 0.55 + (min(len(competitor_results), 5) * 0.07)), 3)
warning = originality_score < self.ORIGINALITY_THRESHOLD
return {
"originality_score": 0.95, # Placeholder
"confidence": 0.8,
"method": "semantic_comparison",
"notes": "Competitor index integration pending"
"originality_score": originality_score,
"confidence": confidence,
"method": method,
"warning": warning,
"threshold": self.ORIGINALITY_THRESHOLD,
"top_competitor_match": {
"id": top_match.get("id"),
"score": round(top_score, 4)
},
"matches_evaluated": len(competitor_results)
}
except Exception as e:

View File

@@ -572,15 +572,57 @@ class ContentGuardianAgent(SIFBaseAgent):
logger.warning(f"[{self.__class__.__name__}] Text too short for meaningful originality check")
return {"originality_score": 0.0, "reason": "Text too short"}
# STUB: Implement cross-index search against competitor content
# This would search the text against a competitor-specific index
logger.info(f"[{self.__class__.__name__}] Originality verification stub completed")
query = text.strip()
competitor_results = []
method = "user_index_competitor_filter"
if competitor_index is not None and hasattr(competitor_index, "search"):
method = "competitor_index_search"
raw_results = competitor_index.search(query, limit=5)
if asyncio.iscoroutine(raw_results):
raw_results = await raw_results
competitor_results = raw_results or []
else:
raw_results = await self.intelligence.search(query, limit=10)
for result in raw_results or []:
metadata_raw = result.get("object")
metadata = metadata_raw if isinstance(metadata_raw, dict) else {}
if not metadata and isinstance(metadata_raw, str):
try:
metadata = json.loads(metadata_raw)
except Exception:
metadata = {}
doc_type = str((metadata or {}).get("type", "")).lower()
source = str((metadata or {}).get("source", "")).lower()
if "competitor" in doc_type or "competitor" in source:
competitor_results.append(result)
if not competitor_results:
return {
"originality_score": 1.0,
"confidence": 0.6,
"method": method,
"notes": "No competitor overlap detected in available index"
}
top_match = max(competitor_results, key=lambda item: float(item.get("score", 0.0)))
top_score = max(0.0, min(1.0, float(top_match.get("score", 0.0))))
originality_score = max(0.0, round(1.0 - top_score, 4))
confidence = round(min(1.0, 0.55 + (min(len(competitor_results), 5) * 0.07)), 3)
warning = originality_score < self.ORIGINALITY_THRESHOLD
return {
"originality_score": 0.95, # Placeholder
"confidence": 0.8,
"method": "semantic_comparison",
"notes": "Competitor index integration pending"
"originality_score": originality_score,
"confidence": confidence,
"method": method,
"warning": warning,
"threshold": self.ORIGINALITY_THRESHOLD,
"top_competitor_match": {
"id": top_match.get("id"),
"score": round(top_score, 4)
},
"matches_evaluated": len(competitor_results)
}
except Exception as e:

View File

@@ -0,0 +1,89 @@
import asyncio
import unittest
import sys
from pathlib import Path
from unittest.mock import patch
ROOT = Path(__file__).resolve().parents[1]
if str(ROOT) not in sys.path:
sys.path.insert(0, str(ROOT))
from services.intelligence.monitoring.semantic_dashboard import RealTimeSemanticMonitor, SemanticHealthMetric
from services.today_workflow_service import _ensure_pillar_coverage, PILLAR_IDS
from services.intelligence.sif_agents import ContentGuardianAgent as SifGuardian
from services.intelligence.agents.specialized_agents import ContentGuardianAgent as SpecializedGuardian
class _FakeIntelligence:
def __init__(self, results=None):
self._results = results or []
async def search(self, query: str, limit: int = 10):
return self._results
class _FakeCompetitorIndex:
async def search(self, query: str, limit: int = 5):
return [
{"id": "comp-1", "score": 0.82},
{"id": "comp-2", "score": 0.65},
]
class SIFReleaseReadinessTests(unittest.IsolatedAsyncioTestCase):
def test_single_strategy_architect_init_block(self):
source = Path("backend/services/intelligence/agents/agent_orchestrator.py").read_text()
self.assertEqual(source.count('if enabled_by_key.get("strategy_architect", True):'), 1)
async def test_semantic_health_returns_canonical_metric(self):
monitor = RealTimeSemanticMonitor.__new__(RealTimeSemanticMonitor)
monitor.user_id = "u1"
metric_list = [
SemanticHealthMetric("semantic_diversity", 0.8, 0.6, "healthy", "t", "d", []),
SemanticHealthMetric("authority_score", 0.3, 0.4, "critical", "t", "d", ["Improve authority"]),
]
async def _fake_metrics():
return metric_list
monitor._check_semantic_health = _fake_metrics
result = await RealTimeSemanticMonitor.check_semantic_health(monitor)
self.assertIsInstance(result, SemanticHealthMetric)
self.assertEqual(result.metric_name, "semantic_health")
self.assertEqual(result.status, "critical")
async def test_verify_originality_uses_real_scores_sif_guardian(self):
agent = SifGuardian.__new__(SifGuardian)
agent.ORIGINALITY_THRESHOLD = 0.75
agent.intelligence = _FakeIntelligence()
agent._log_agent_operation = lambda *args, **kwargs: None
result = await SifGuardian.verify_originality(agent, "This is sufficiently long text for originality analysis.", _FakeCompetitorIndex())
self.assertIn("originality_score", result)
self.assertLess(result["originality_score"], 1.0)
self.assertIn("warning", result)
self.assertEqual(result["method"], "competitor_index_search")
async def test_verify_originality_uses_real_scores_specialized_guardian(self):
agent = SpecializedGuardian.__new__(SpecializedGuardian)
agent.ORIGINALITY_THRESHOLD = 0.75
agent.intelligence = _FakeIntelligence()
agent._log_agent_operation = lambda *args, **kwargs: None
result = await SpecializedGuardian.verify_originality(agent, "This is sufficiently long text for originality analysis.", _FakeCompetitorIndex())
self.assertIn("originality_score", result)
self.assertLess(result["originality_score"], 1.0)
self.assertIn("warning", result)
self.assertEqual(result["method"], "competitor_index_search")
def test_pillar_coverage_guardrail_backfills_missing(self):
tasks = [{"pillarId": "plan", "title": "Plan", "description": "d", "priority": "high", "estimatedTime": 10, "actionType": "navigate", "enabled": True}]
grounding = {"workflow_config": {"enforce_pillar_coverage": True}}
with patch("services.today_workflow_service._build_single_task_for_missing_pillar", return_value=None):
covered = _ensure_pillar_coverage(tasks, "u1", "2026-01-01", grounding)
pillars = {t["pillarId"] for t in covered}
self.assertEqual(pillars, set(PILLAR_IDS))
if __name__ == "__main__":
unittest.main()

View File

@@ -271,3 +271,33 @@ The Today's Tasks Workflow System is designed to transform ALwrity's complex dig
---
*This document serves as the foundation for implementing the Today's Tasks Workflow System. It should be reviewed and updated regularly as the project progresses and new insights are gained.*
## ✅ **Production Release Gates (SIF Today Workflow)**
Before enabling broad production rollout, verify the following release gates:
1. **Committee completeness**
- `StrategyArchitectAgent` is initialized once and included in committee polling.
- Daily workflow generation includes all major agent lanes (`strategy`, `content`, `seo`, `social`, `competitor`).
2. **Semantic intelligence quality gates**
- `find_semantic_gaps` returns evidence-backed topic gaps (no placeholder outputs).
- `verify_originality` uses real competitor similarity evidence (no synthetic placeholder score).
3. **Response contract gates**
- Semantic health API returns the canonical `SemanticHealthMetric` structure in success and fallback paths.
- Workflow task payloads always pass pillar coverage guardrails unless explicitly disabled by config.
4. **Observability gates**
- Error paths emit structured logs with `user_id` and operation context.
- Semantic filter degraded-path counters are incremented and logged.
5. **Test and validation gates**
- Automated tests cover:
- single strategy agent initialization path,
- semantic health aggregation contract,
- originality score computation path,
- pillar coverage backfill behavior.
- Static compile check passes on modified backend modules.
**Release recommendation**: proceed only when all gates pass in CI and staging smoke tests.