fix: Resolve conflicts in PR #386 for per-agent timeouts and partial committee handling

2026-03-07 12:05:51 +05:30
parent f0f73eb003 198143e6ca
commit 4f2a3d6e2d
1 changed files with 141 additions and 51 deletions
--- a/backend/services/today_workflow_service.py
+++ b/backend/services/today_workflow_service.py
@@ -54,6 +54,14 @@ def _proposal_priority_rank(priority: str) -> int:
 def _proposal_order_key(proposal: Any) -> tuple:
    # Handle both object and dict access for compatibility
    if isinstance(proposal, dict):
        return (
            str(proposal.get("source_agent") or "").lower(),
            str(proposal.get("title") or "").lower(),
            str(proposal.get("description") or "").lower(),
            str(proposal.get("action_url") or "").lower(),
        )
    return (
        str(getattr(proposal, "source_agent", "") or "").lower(),
        str(getattr(proposal, "title", "") or "").lower(),
@@ -62,6 +70,19 @@ def _proposal_order_key(proposal: Any) -> tuple:
    )
 def _get_agent_proposal_timeout_seconds(grounding: Dict[str, Any]) -> float:
    workflow_config = grounding.get("workflow_config", {}) if isinstance(grounding, dict) else {}
    if not isinstance(workflow_config, dict):
        return 4.0
    raw_timeout = workflow_config.get("agent_proposal_timeout_seconds", 4.0)
    try:
        timeout_seconds = float(raw_timeout)
    except (TypeError, ValueError):
        return 4.0
    return max(1.0, timeout_seconds)
 def _fallback_tasks(date: str) -> List[Dict[str, Any]]:
    return [
        {
@@ -308,39 +329,82 @@ async def generate_agent_enhanced_plan(db: Session, user_id: str, date: str) ->
    logger.info(f"Gathering daily task proposals from agent committee for user {user_id}")
    agent_tasks = []
    committee_total_failure = False
    try:
-        # Define agents to poll
+        agent_timeout_seconds = _get_agent_proposal_timeout_seconds(grounding)
-        agents_to_poll = [
+
-            orchestrator.agents.get('content'),      # ContentStrategyAgent
+        # Define agents to poll (keyed for logging/metrics)
-            orchestrator.agents.get('strategy'),     # StrategyArchitectAgent
+        agents_to_poll = {
-            orchestrator.agents.get('seo'),          # SEOOptimizationAgent
+            "content": orchestrator.agents.get('content'),
-            orchestrator.agents.get('social'),       # SocialAmplificationAgent
+            "strategy": orchestrator.agents.get('strategy'),
-            orchestrator.agents.get('competitor'),   # CompetitorResponseAgent
+            "seo": orchestrator.agents.get('seo'),
-        ]
+            "social": orchestrator.agents.get('social'),
-        
+            "competitor": orchestrator.agents.get('competitor'),
        }
        # Filter out None agents (disabled/failed init)
-        active_agents = [a for a in agents_to_poll if a]
+        active_agents = {key: agent for key, agent in agents_to_poll.items() if agent}
-        
+
-        # Execute propose_daily_tasks in parallel
+        async def _collect_agent_proposals(agent_key: str, agent: Any) -> Dict[str, Any]:
-        results = await asyncio.gather(
+            started_at = datetime.now(timezone.utc)
-            *[a.propose_daily_tasks(grounding) for a in active_agents],
+            try:
-            return_exceptions=True
+                proposals = await asyncio.wait_for(agent.propose_daily_tasks(grounding), timeout=agent_timeout_seconds)
                elapsed_ms = (datetime.now(timezone.utc) - started_at).total_seconds() * 1000
                return {
                    "agent_key": agent_key,
                    "status": "ok",
                    "elapsed_ms": elapsed_ms,
                    "proposals": proposals if isinstance(proposals, list) else [],
                }
            except asyncio.TimeoutError:
                elapsed_ms = (datetime.now(timezone.utc) - started_at).total_seconds() * 1000
                return {
                    "agent_key": agent_key,
                    "status": "timeout",
                    "elapsed_ms": elapsed_ms,
                    "proposals": [],
                    "error": f"Timed out after {agent_timeout_seconds:.2f}s",
                }
            except Exception as agent_error:
                elapsed_ms = (datetime.now(timezone.utc) - started_at).total_seconds() * 1000
                return {
                    "agent_key": agent_key,
                    "status": "error",
                    "elapsed_ms": elapsed_ms,
                    "proposals": [],
                    "error": str(agent_error),
                }
        # Execute propose_daily_tasks in parallel with per-agent timeout
        committee_results = await asyncio.gather(
            *[_collect_agent_proposals(agent_key, agent) for agent_key, agent in active_agents.items()]
        )
-        
+
-        # Collect successful proposals
+        successful_agent_count = 0
        raw_proposals = []
-        for res in results:
+        for res in committee_results:
-            if isinstance(res, list):
+            agent_key = res.get("agent_key")
-                # Normalize pillar IDs and filter invalid proposals
+            status = res.get("status")
-                for proposal in res:
+            elapsed_ms = res.get("elapsed_ms")
-                    pillar_id = str(proposal.get("pillarId") or "").lower().strip()
+
-                    if pillar_id not in PILLAR_IDS:
+            logger.info(
-                        logger.warning(f"Skipping proposal with invalid pillarId: {pillar_id}. Proposal: {proposal}")
+                "Agent committee proposal metric | agent={} status={} elapsed_ms={:.2f} timeout_s={:.2f} proposal_count={}",
-                        continue
+                agent_key,
-                    proposal["pillarId"] = pillar_id
+                status,
-                    raw_proposals.append(proposal)
+                elapsed_ms,
-            elif isinstance(res, Exception):
+                agent_timeout_seconds,
-                logger.warning(f"Agent proposal failed: {res}")
+                len(res.get("proposals") or []),
            )
            if status == "ok":
                successful_agent_count += 1
                raw_proposals.extend(res.get("proposals") or [])
            elif status == "timeout":
                logger.warning(f"Agent proposal timed out for {agent_key}: {res.get('error')}")
            else:
                logger.warning(f"Agent proposal failed for {agent_key}: {res.get('error')}")
        committee_total_failure = successful_agent_count == 0
        # 3. Filter Redundant Proposals (Self-Learning)
        # Note: We need to ensure we don't filter out essential recurring tasks if they were completed long ago
@@ -350,19 +414,26 @@ async def generate_agent_enhanced_plan(db: Session, user_id: str, date: str) ->
        # Simple deduplication based on title+pillar
        unique_map = {}
        for p in raw_proposals:
-            key = f"{p.pillar_id}:{p.title}"
+            # Normalize pillar IDs and filter invalid proposals (re-apply from PR #383)
            pillar_id = str(p.get("pillarId") or "").lower().strip()
            if pillar_id not in PILLAR_IDS:
                logger.warning(f"Skipping proposal with invalid pillarId: {pillar_id}. Proposal: {p}")
                continue
            p["pillarId"] = pillar_id
            key = f"{p.get('pillarId')}:{p.get('title')}"
            if key not in unique_map:
                unique_map[key] = p
                continue
            existing = unique_map[key]
-            if _proposal_priority_rank(p.priority) > _proposal_priority_rank(existing.priority):
+            if _proposal_priority_rank(p.get('priority')) > _proposal_priority_rank(existing.get('priority')):
                unique_map[key] = p
                continue
            # Deterministic tie-breaker for equal priority proposals.
            if (
-                _proposal_priority_rank(p.priority) == _proposal_priority_rank(existing.priority)
+                _proposal_priority_rank(p.get('priority')) == _proposal_priority_rank(existing.get('priority'))
                and _proposal_order_key(p) < _proposal_order_key(existing)
            ):
                unique_map[key] = p
@@ -374,32 +445,51 @@ async def generate_agent_enhanced_plan(db: Session, user_id: str, date: str) ->
    except Exception as e:
        logger.error(f"Committee proposal phase failed: {e}")
        committee_total_failure = True
        # Continue to fallback or LLM generation if committee fails
    # 4. Final Selection
-    # If we have agent tasks, use them. Otherwise fall back to LLM generation.
+    # Use committee outcomes whenever committee partially succeeds, even with sparse proposals.
-    if agent_tasks:
+    if not committee_total_failure:
        logger.info(f"Generated {len(agent_tasks)} tasks via Agent Committee")
-        
+
        # Convert TaskProposal objects to dicts for frontend
        final_tasks = []
        for prop in agent_tasks:
-            final_tasks.append({
+            # Handle both object and dict types
-                "pillarId": prop.pillar_id,
+            if isinstance(prop, dict):
-                "title": prop.title,
+                final_tasks.append({
-                "description": prop.description,
+                    "pillarId": prop.get("pillarId"),
-                "priority": prop.priority,
+                    "title": prop.get("title"),
-                "estimatedTime": prop.estimated_time,
+                    "description": prop.get("description"),
-                "actionType": prop.action_type,
+                    "priority": prop.get("priority"),
-                "actionUrl": prop.action_url,
+                    "estimatedTime": prop.get("estimatedTime"),
-                "enabled": True,
+                    "actionType": prop.get("actionType"),
-                "metadata": {
+                    "actionUrl": prop.get("actionUrl"),
-                    "source_agent": prop.source_agent,
+                    "enabled": True,
-                    "reasoning": prop.reasoning,
+                    "metadata": {
-                    "context_data": prop.context_data
+                        "source_agent": prop.get("source_agent"),
-                }
+                        "reasoning": prop.get("reasoning"),
-            })
+                        "context_data": prop.get("context_data")
-            
+                    }
                })
            else:
                final_tasks.append({
                    "pillarId": prop.pillar_id,
                    "title": prop.title,
                    "description": prop.description,
                    "priority": prop.priority,
                    "estimatedTime": prop.estimated_time,
                    "actionType": prop.action_type,
                    "actionUrl": prop.action_url,
                    "enabled": True,
                    "metadata": {
                        "source_agent": prop.source_agent,
                        "reasoning": prop.reasoning,
                        "context_data": prop.context_data
                    }
                })
        final_tasks = _ensure_pillar_coverage(final_tasks, user_id, date, grounding)
        return {
            "date": date,