Harden SIF release readiness gaps and add regression checks
This commit is contained in:
@@ -168,12 +168,6 @@ class ALwrityAgentOrchestrator:
|
||||
self.social_agent = SocialAmplificationAgent(self.user_id, self.config.shared_llm, llm=self.llm)
|
||||
self.agents['social'] = self.social_agent
|
||||
|
||||
# Strategy Architect Agent
|
||||
if enabled_by_key.get("strategy_architect", True):
|
||||
from services.intelligence.txtai_service import TxtaiIntelligenceService
|
||||
intel_service = TxtaiIntelligenceService(self.user_id)
|
||||
self.strategy_agent = StrategyArchitectAgent(intel_service, self.user_id)
|
||||
self.agents['strategy'] = self.strategy_agent
|
||||
|
||||
# Trend Surfer Agent
|
||||
if enabled_by_key.get("trend_surfer", True):
|
||||
|
||||
@@ -531,15 +531,57 @@ class ContentGuardianAgent(SIFBaseAgent):
|
||||
logger.warning(f"[{self.__class__.__name__}] Text too short for meaningful originality check")
|
||||
return {"originality_score": 0.0, "reason": "Text too short"}
|
||||
|
||||
# STUB: Implement cross-index search against competitor content
|
||||
# This would search the text against a competitor-specific index
|
||||
|
||||
logger.info(f"[{self.__class__.__name__}] Originality verification stub completed")
|
||||
query = text.strip()
|
||||
competitor_results = []
|
||||
method = "user_index_competitor_filter"
|
||||
|
||||
if competitor_index is not None and hasattr(competitor_index, "search"):
|
||||
method = "competitor_index_search"
|
||||
raw_results = competitor_index.search(query, limit=5)
|
||||
if asyncio.iscoroutine(raw_results):
|
||||
raw_results = await raw_results
|
||||
competitor_results = raw_results or []
|
||||
else:
|
||||
raw_results = await self.intelligence.search(query, limit=10)
|
||||
for result in raw_results or []:
|
||||
metadata_raw = result.get("object")
|
||||
metadata = metadata_raw if isinstance(metadata_raw, dict) else {}
|
||||
if not metadata and isinstance(metadata_raw, str):
|
||||
try:
|
||||
metadata = json.loads(metadata_raw)
|
||||
except Exception:
|
||||
metadata = {}
|
||||
|
||||
doc_type = str((metadata or {}).get("type", "")).lower()
|
||||
source = str((metadata or {}).get("source", "")).lower()
|
||||
if "competitor" in doc_type or "competitor" in source:
|
||||
competitor_results.append(result)
|
||||
|
||||
if not competitor_results:
|
||||
return {
|
||||
"originality_score": 1.0,
|
||||
"confidence": 0.6,
|
||||
"method": method,
|
||||
"notes": "No competitor overlap detected in available index"
|
||||
}
|
||||
|
||||
top_match = max(competitor_results, key=lambda item: float(item.get("score", 0.0)))
|
||||
top_score = max(0.0, min(1.0, float(top_match.get("score", 0.0))))
|
||||
originality_score = max(0.0, round(1.0 - top_score, 4))
|
||||
confidence = round(min(1.0, 0.55 + (min(len(competitor_results), 5) * 0.07)), 3)
|
||||
warning = originality_score < self.ORIGINALITY_THRESHOLD
|
||||
|
||||
return {
|
||||
"originality_score": 0.95, # Placeholder
|
||||
"confidence": 0.8,
|
||||
"method": "semantic_comparison",
|
||||
"notes": "Competitor index integration pending"
|
||||
"originality_score": originality_score,
|
||||
"confidence": confidence,
|
||||
"method": method,
|
||||
"warning": warning,
|
||||
"threshold": self.ORIGINALITY_THRESHOLD,
|
||||
"top_competitor_match": {
|
||||
"id": top_match.get("id"),
|
||||
"score": round(top_score, 4)
|
||||
},
|
||||
"matches_evaluated": len(competitor_results)
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
|
||||
@@ -544,15 +544,57 @@ class ContentGuardianAgent(SIFBaseAgent):
|
||||
logger.warning(f"[{self.__class__.__name__}] Text too short for meaningful originality check")
|
||||
return {"originality_score": 0.0, "reason": "Text too short"}
|
||||
|
||||
# STUB: Implement cross-index search against competitor content
|
||||
# This would search the text against a competitor-specific index
|
||||
|
||||
logger.info(f"[{self.__class__.__name__}] Originality verification stub completed")
|
||||
query = text.strip()
|
||||
competitor_results = []
|
||||
method = "user_index_competitor_filter"
|
||||
|
||||
if competitor_index is not None and hasattr(competitor_index, "search"):
|
||||
method = "competitor_index_search"
|
||||
raw_results = competitor_index.search(query, limit=5)
|
||||
if asyncio.iscoroutine(raw_results):
|
||||
raw_results = await raw_results
|
||||
competitor_results = raw_results or []
|
||||
else:
|
||||
raw_results = await self.intelligence.search(query, limit=10)
|
||||
for result in raw_results or []:
|
||||
metadata_raw = result.get("object")
|
||||
metadata = metadata_raw if isinstance(metadata_raw, dict) else {}
|
||||
if not metadata and isinstance(metadata_raw, str):
|
||||
try:
|
||||
metadata = json.loads(metadata_raw)
|
||||
except Exception:
|
||||
metadata = {}
|
||||
|
||||
doc_type = str((metadata or {}).get("type", "")).lower()
|
||||
source = str((metadata or {}).get("source", "")).lower()
|
||||
if "competitor" in doc_type or "competitor" in source:
|
||||
competitor_results.append(result)
|
||||
|
||||
if not competitor_results:
|
||||
return {
|
||||
"originality_score": 1.0,
|
||||
"confidence": 0.6,
|
||||
"method": method,
|
||||
"notes": "No competitor overlap detected in available index"
|
||||
}
|
||||
|
||||
top_match = max(competitor_results, key=lambda item: float(item.get("score", 0.0)))
|
||||
top_score = max(0.0, min(1.0, float(top_match.get("score", 0.0))))
|
||||
originality_score = max(0.0, round(1.0 - top_score, 4))
|
||||
confidence = round(min(1.0, 0.55 + (min(len(competitor_results), 5) * 0.07)), 3)
|
||||
warning = originality_score < self.ORIGINALITY_THRESHOLD
|
||||
|
||||
return {
|
||||
"originality_score": 0.95, # Placeholder
|
||||
"confidence": 0.8,
|
||||
"method": "semantic_comparison",
|
||||
"notes": "Competitor index integration pending"
|
||||
"originality_score": originality_score,
|
||||
"confidence": confidence,
|
||||
"method": method,
|
||||
"warning": warning,
|
||||
"threshold": self.ORIGINALITY_THRESHOLD,
|
||||
"top_competitor_match": {
|
||||
"id": top_match.get("id"),
|
||||
"score": round(top_score, 4)
|
||||
},
|
||||
"matches_evaluated": len(competitor_results)
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
|
||||
89
backend/sif_release_readiness_checks.py
Normal file
89
backend/sif_release_readiness_checks.py
Normal file
@@ -0,0 +1,89 @@
|
||||
import asyncio
|
||||
import unittest
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch
|
||||
ROOT = Path(__file__).resolve().parents[1]
|
||||
if str(ROOT) not in sys.path:
|
||||
sys.path.insert(0, str(ROOT))
|
||||
|
||||
from services.intelligence.monitoring.semantic_dashboard import RealTimeSemanticMonitor, SemanticHealthMetric
|
||||
from services.today_workflow_service import _ensure_pillar_coverage, PILLAR_IDS
|
||||
from services.intelligence.sif_agents import ContentGuardianAgent as SifGuardian
|
||||
from services.intelligence.agents.specialized_agents import ContentGuardianAgent as SpecializedGuardian
|
||||
|
||||
|
||||
class _FakeIntelligence:
|
||||
def __init__(self, results=None):
|
||||
self._results = results or []
|
||||
|
||||
async def search(self, query: str, limit: int = 10):
|
||||
return self._results
|
||||
|
||||
|
||||
class _FakeCompetitorIndex:
|
||||
async def search(self, query: str, limit: int = 5):
|
||||
return [
|
||||
{"id": "comp-1", "score": 0.82},
|
||||
{"id": "comp-2", "score": 0.65},
|
||||
]
|
||||
|
||||
|
||||
class SIFReleaseReadinessTests(unittest.IsolatedAsyncioTestCase):
|
||||
def test_single_strategy_architect_init_block(self):
|
||||
source = Path("backend/services/intelligence/agents/agent_orchestrator.py").read_text()
|
||||
self.assertEqual(source.count('if enabled_by_key.get("strategy_architect", True):'), 1)
|
||||
|
||||
async def test_semantic_health_returns_canonical_metric(self):
|
||||
monitor = RealTimeSemanticMonitor.__new__(RealTimeSemanticMonitor)
|
||||
monitor.user_id = "u1"
|
||||
metric_list = [
|
||||
SemanticHealthMetric("semantic_diversity", 0.8, 0.6, "healthy", "t", "d", []),
|
||||
SemanticHealthMetric("authority_score", 0.3, 0.4, "critical", "t", "d", ["Improve authority"]),
|
||||
]
|
||||
async def _fake_metrics():
|
||||
return metric_list
|
||||
monitor._check_semantic_health = _fake_metrics
|
||||
|
||||
result = await RealTimeSemanticMonitor.check_semantic_health(monitor)
|
||||
self.assertIsInstance(result, SemanticHealthMetric)
|
||||
self.assertEqual(result.metric_name, "semantic_health")
|
||||
self.assertEqual(result.status, "critical")
|
||||
|
||||
async def test_verify_originality_uses_real_scores_sif_guardian(self):
|
||||
agent = SifGuardian.__new__(SifGuardian)
|
||||
agent.ORIGINALITY_THRESHOLD = 0.75
|
||||
agent.intelligence = _FakeIntelligence()
|
||||
agent._log_agent_operation = lambda *args, **kwargs: None
|
||||
|
||||
result = await SifGuardian.verify_originality(agent, "This is sufficiently long text for originality analysis.", _FakeCompetitorIndex())
|
||||
self.assertIn("originality_score", result)
|
||||
self.assertLess(result["originality_score"], 1.0)
|
||||
self.assertIn("warning", result)
|
||||
self.assertEqual(result["method"], "competitor_index_search")
|
||||
|
||||
async def test_verify_originality_uses_real_scores_specialized_guardian(self):
|
||||
agent = SpecializedGuardian.__new__(SpecializedGuardian)
|
||||
agent.ORIGINALITY_THRESHOLD = 0.75
|
||||
agent.intelligence = _FakeIntelligence()
|
||||
agent._log_agent_operation = lambda *args, **kwargs: None
|
||||
|
||||
result = await SpecializedGuardian.verify_originality(agent, "This is sufficiently long text for originality analysis.", _FakeCompetitorIndex())
|
||||
self.assertIn("originality_score", result)
|
||||
self.assertLess(result["originality_score"], 1.0)
|
||||
self.assertIn("warning", result)
|
||||
self.assertEqual(result["method"], "competitor_index_search")
|
||||
|
||||
def test_pillar_coverage_guardrail_backfills_missing(self):
|
||||
tasks = [{"pillarId": "plan", "title": "Plan", "description": "d", "priority": "high", "estimatedTime": 10, "actionType": "navigate", "enabled": True}]
|
||||
grounding = {"workflow_config": {"enforce_pillar_coverage": True}}
|
||||
|
||||
with patch("services.today_workflow_service._build_single_task_for_missing_pillar", return_value=None):
|
||||
covered = _ensure_pillar_coverage(tasks, "u1", "2026-01-01", grounding)
|
||||
|
||||
pillars = {t["pillarId"] for t in covered}
|
||||
self.assertEqual(pillars, set(PILLAR_IDS))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -271,3 +271,33 @@ The Today's Tasks Workflow System is designed to transform ALwrity's complex dig
|
||||
---
|
||||
|
||||
*This document serves as the foundation for implementing the Today's Tasks Workflow System. It should be reviewed and updated regularly as the project progresses and new insights are gained.*
|
||||
|
||||
## ✅ **Production Release Gates (SIF Today Workflow)**
|
||||
|
||||
Before enabling broad production rollout, verify the following release gates:
|
||||
|
||||
1. **Committee completeness**
|
||||
- `StrategyArchitectAgent` is initialized once and included in committee polling.
|
||||
- Daily workflow generation includes all major agent lanes (`strategy`, `content`, `seo`, `social`, `competitor`).
|
||||
|
||||
2. **Semantic intelligence quality gates**
|
||||
- `find_semantic_gaps` returns evidence-backed topic gaps (no placeholder outputs).
|
||||
- `verify_originality` uses real competitor similarity evidence (no synthetic placeholder score).
|
||||
|
||||
3. **Response contract gates**
|
||||
- Semantic health API returns the canonical `SemanticHealthMetric` structure in success and fallback paths.
|
||||
- Workflow task payloads always pass pillar coverage guardrails unless explicitly disabled by config.
|
||||
|
||||
4. **Observability gates**
|
||||
- Error paths emit structured logs with `user_id` and operation context.
|
||||
- Semantic filter degraded-path counters are incremented and logged.
|
||||
|
||||
5. **Test and validation gates**
|
||||
- Automated tests cover:
|
||||
- single strategy agent initialization path,
|
||||
- semantic health aggregation contract,
|
||||
- originality score computation path,
|
||||
- pillar coverage backfill behavior.
|
||||
- Static compile check passes on modified backend modules.
|
||||
|
||||
**Release recommendation**: proceed only when all gates pass in CI and staging smoke tests.
|
||||
|
||||
Reference in New Issue
Block a user