Merge PR #364: Add competitor-aware originality checks and fix agent initialization

2026-03-03 18:57:46 +05:30
parent 05dd4f1efb 60e6cbd34b
commit 460e1f398d
4 changed files with 219 additions and 16 deletions
--- a/backend/services/intelligence/agents/specialized_agents.py
+++ b/backend/services/intelligence/agents/specialized_agents.py
@@ -422,15 +422,57 @@ class ContentGuardianAgent(SIFBaseAgent):
                logger.warning(f"[{self.__class__.__name__}] Text too short for meaningful originality check")
                return {"originality_score": 0.0, "reason": "Text too short"}
-            # STUB: Implement cross-index search against competitor content
+            query = text.strip()
-            # This would search the text against a competitor-specific index
+            competitor_results = []
-            
+            method = "user_index_competitor_filter"
-            logger.info(f"[{self.__class__.__name__}] Originality verification stub completed")
+
            if competitor_index is not None and hasattr(competitor_index, "search"):
                method = "competitor_index_search"
                raw_results = competitor_index.search(query, limit=5)
                if asyncio.iscoroutine(raw_results):
                    raw_results = await raw_results
                competitor_results = raw_results or []
            else:
                raw_results = await self.intelligence.search(query, limit=10)
                for result in raw_results or []:
                    metadata_raw = result.get("object")
                    metadata = metadata_raw if isinstance(metadata_raw, dict) else {}
                    if not metadata and isinstance(metadata_raw, str):
                        try:
                            metadata = json.loads(metadata_raw)
                        except Exception:
                            metadata = {}
                    doc_type = str((metadata or {}).get("type", "")).lower()
                    source = str((metadata or {}).get("source", "")).lower()
                    if "competitor" in doc_type or "competitor" in source:
                        competitor_results.append(result)
            if not competitor_results:
                return {
                    "originality_score": 1.0,
                    "confidence": 0.6,
                    "method": method,
                    "notes": "No competitor overlap detected in available index"
                }
            top_match = max(competitor_results, key=lambda item: float(item.get("score", 0.0)))
            top_score = max(0.0, min(1.0, float(top_match.get("score", 0.0))))
            originality_score = max(0.0, round(1.0 - top_score, 4))
            confidence = round(min(1.0, 0.55 + (min(len(competitor_results), 5) * 0.07)), 3)
            warning = originality_score < self.ORIGINALITY_THRESHOLD
            return {
-                "originality_score": 0.95,  # Placeholder
+                "originality_score": originality_score,
-                "confidence": 0.8,
+                "confidence": confidence,
-                "method": "semantic_comparison",
+                "method": method,
-                "notes": "Competitor index integration pending"
+                "warning": warning,
                "threshold": self.ORIGINALITY_THRESHOLD,
                "top_competitor_match": {
                    "id": top_match.get("id"),
                    "score": round(top_score, 4)
                },
                "matches_evaluated": len(competitor_results)
            }
        except Exception as e:
--- a/backend/services/intelligence/sif_agents.py
+++ b/backend/services/intelligence/sif_agents.py
@@ -572,15 +572,57 @@ class ContentGuardianAgent(SIFBaseAgent):
                logger.warning(f"[{self.__class__.__name__}] Text too short for meaningful originality check")
                return {"originality_score": 0.0, "reason": "Text too short"}
-            # STUB: Implement cross-index search against competitor content
+            query = text.strip()
-            # This would search the text against a competitor-specific index
+            competitor_results = []
-            
+            method = "user_index_competitor_filter"
-            logger.info(f"[{self.__class__.__name__}] Originality verification stub completed")
+
            if competitor_index is not None and hasattr(competitor_index, "search"):
                method = "competitor_index_search"
                raw_results = competitor_index.search(query, limit=5)
                if asyncio.iscoroutine(raw_results):
                    raw_results = await raw_results
                competitor_results = raw_results or []
            else:
                raw_results = await self.intelligence.search(query, limit=10)
                for result in raw_results or []:
                    metadata_raw = result.get("object")
                    metadata = metadata_raw if isinstance(metadata_raw, dict) else {}
                    if not metadata and isinstance(metadata_raw, str):
                        try:
                            metadata = json.loads(metadata_raw)
                        except Exception:
                            metadata = {}
                    doc_type = str((metadata or {}).get("type", "")).lower()
                    source = str((metadata or {}).get("source", "")).lower()
                    if "competitor" in doc_type or "competitor" in source:
                        competitor_results.append(result)
            if not competitor_results:
                return {
                    "originality_score": 1.0,
                    "confidence": 0.6,
                    "method": method,
                    "notes": "No competitor overlap detected in available index"
                }
            top_match = max(competitor_results, key=lambda item: float(item.get("score", 0.0)))
            top_score = max(0.0, min(1.0, float(top_match.get("score", 0.0))))
            originality_score = max(0.0, round(1.0 - top_score, 4))
            confidence = round(min(1.0, 0.55 + (min(len(competitor_results), 5) * 0.07)), 3)
            warning = originality_score < self.ORIGINALITY_THRESHOLD
            return {
-                "originality_score": 0.95,  # Placeholder
+                "originality_score": originality_score,
-                "confidence": 0.8,
+                "confidence": confidence,
-                "method": "semantic_comparison",
+                "method": method,
-                "notes": "Competitor index integration pending"
+                "warning": warning,
                "threshold": self.ORIGINALITY_THRESHOLD,
                "top_competitor_match": {
                    "id": top_match.get("id"),
                    "score": round(top_score, 4)
                },
                "matches_evaluated": len(competitor_results)
            }
        except Exception as e:
--- a/backend/sif_release_readiness_checks.py
+++ b/backend/sif_release_readiness_checks.py
@@ -0,0 +1,89 @@
 import asyncio
 import unittest
 import sys
 from pathlib import Path
 from unittest.mock import patch
 ROOT = Path(__file__).resolve().parents[1]
 if str(ROOT) not in sys.path:
    sys.path.insert(0, str(ROOT))
 from services.intelligence.monitoring.semantic_dashboard import RealTimeSemanticMonitor, SemanticHealthMetric
 from services.today_workflow_service import _ensure_pillar_coverage, PILLAR_IDS
 from services.intelligence.sif_agents import ContentGuardianAgent as SifGuardian
 from services.intelligence.agents.specialized_agents import ContentGuardianAgent as SpecializedGuardian
 class _FakeIntelligence:
    def __init__(self, results=None):
        self._results = results or []
    async def search(self, query: str, limit: int = 10):
        return self._results
 class _FakeCompetitorIndex:
    async def search(self, query: str, limit: int = 5):
        return [
            {"id": "comp-1", "score": 0.82},
            {"id": "comp-2", "score": 0.65},
        ]
 class SIFReleaseReadinessTests(unittest.IsolatedAsyncioTestCase):
    def test_single_strategy_architect_init_block(self):
        source = Path("backend/services/intelligence/agents/agent_orchestrator.py").read_text()
        self.assertEqual(source.count('if enabled_by_key.get("strategy_architect", True):'), 1)
    async def test_semantic_health_returns_canonical_metric(self):
        monitor = RealTimeSemanticMonitor.__new__(RealTimeSemanticMonitor)
        monitor.user_id = "u1"
        metric_list = [
            SemanticHealthMetric("semantic_diversity", 0.8, 0.6, "healthy", "t", "d", []),
            SemanticHealthMetric("authority_score", 0.3, 0.4, "critical", "t", "d", ["Improve authority"]),
        ]
        async def _fake_metrics():
            return metric_list
        monitor._check_semantic_health = _fake_metrics
        result = await RealTimeSemanticMonitor.check_semantic_health(monitor)
        self.assertIsInstance(result, SemanticHealthMetric)
        self.assertEqual(result.metric_name, "semantic_health")
        self.assertEqual(result.status, "critical")
    async def test_verify_originality_uses_real_scores_sif_guardian(self):
        agent = SifGuardian.__new__(SifGuardian)
        agent.ORIGINALITY_THRESHOLD = 0.75
        agent.intelligence = _FakeIntelligence()
        agent._log_agent_operation = lambda *args, **kwargs: None
        result = await SifGuardian.verify_originality(agent, "This is sufficiently long text for originality analysis.", _FakeCompetitorIndex())
        self.assertIn("originality_score", result)
        self.assertLess(result["originality_score"], 1.0)
        self.assertIn("warning", result)
        self.assertEqual(result["method"], "competitor_index_search")
    async def test_verify_originality_uses_real_scores_specialized_guardian(self):
        agent = SpecializedGuardian.__new__(SpecializedGuardian)
        agent.ORIGINALITY_THRESHOLD = 0.75
        agent.intelligence = _FakeIntelligence()
        agent._log_agent_operation = lambda *args, **kwargs: None
        result = await SpecializedGuardian.verify_originality(agent, "This is sufficiently long text for originality analysis.", _FakeCompetitorIndex())
        self.assertIn("originality_score", result)
        self.assertLess(result["originality_score"], 1.0)
        self.assertIn("warning", result)
        self.assertEqual(result["method"], "competitor_index_search")
    def test_pillar_coverage_guardrail_backfills_missing(self):
        tasks = [{"pillarId": "plan", "title": "Plan", "description": "d", "priority": "high", "estimatedTime": 10, "actionType": "navigate", "enabled": True}]
        grounding = {"workflow_config": {"enforce_pillar_coverage": True}}
        with patch("services.today_workflow_service._build_single_task_for_missing_pillar", return_value=None):
            covered = _ensure_pillar_coverage(tasks, "u1", "2026-01-01", grounding)
        pillars = {t["pillarId"] for t in covered}
        self.assertEqual(pillars, set(PILLAR_IDS))
 if __name__ == "__main__":
    unittest.main()
--- a/docs/SIF/TODAYS_TASKS_WORKFLOW_IMPLEMENTATION_PLAN.md
+++ b/docs/SIF/TODAYS_TASKS_WORKFLOW_IMPLEMENTATION_PLAN.md
@@ -271,3 +271,33 @@ The Today's Tasks Workflow System is designed to transform ALwrity's complex dig
 ---
 *This document serves as the foundation for implementing the Today's Tasks Workflow System. It should be reviewed and updated regularly as the project progresses and new insights are gained.*
 ## ✅ **Production Release Gates (SIF Today Workflow)**
 Before enabling broad production rollout, verify the following release gates:
 1. **Committee completeness**
   - `StrategyArchitectAgent` is initialized once and included in committee polling.
   - Daily workflow generation includes all major agent lanes (`strategy`, `content`, `seo`, `social`, `competitor`).
 2. **Semantic intelligence quality gates**
   - `find_semantic_gaps` returns evidence-backed topic gaps (no placeholder outputs).
   - `verify_originality` uses real competitor similarity evidence (no synthetic placeholder score).
 3. **Response contract gates**
   - Semantic health API returns the canonical `SemanticHealthMetric` structure in success and fallback paths.
   - Workflow task payloads always pass pillar coverage guardrails unless explicitly disabled by config.
 4. **Observability gates**
   - Error paths emit structured logs with `user_id` and operation context.
   - Semantic filter degraded-path counters are incremented and logged.
 5. **Test and validation gates**
   - Automated tests cover:
     - single strategy agent initialization path,
     - semantic health aggregation contract,
     - originality score computation path,
     - pillar coverage backfill behavior.
   - Static compile check passes on modified backend modules.
 **Release recommendation**: proceed only when all gates pass in CI and staging smoke tests.