Enable phase-3 task memory filtering and add coverage

This commit is contained in:
ي
2026-03-01 21:38:24 +05:30
parent f8f7ddeb2a
commit e2718e3b79
3 changed files with 147 additions and 14 deletions

View File

@@ -13,6 +13,10 @@ from sqlalchemy.orm import Session
from models.daily_workflow_models import TaskHistory, DailyWorkflowTask
from services.intelligence.txtai_service import TxtaiIntelligenceService
EXACT_DUPLICATE_LOOKBACK_DAYS = 7
SEMANTIC_SUPPRESSION_SCORE_THRESHOLD = 0.85
SUPPRESSED_STATUSES = {"dismissed", "rejected"}
class TaskMemoryService:
"""
Manages the long-term memory of user tasks.
@@ -96,7 +100,7 @@ class TaskMemoryService:
filtered = []
# Get recent history hashes (last 7 days)
cutoff = datetime.utcnow() - timedelta(days=7)
cutoff = datetime.utcnow() - timedelta(days=EXACT_DUPLICATE_LOOKBACK_DAYS)
recent_hashes = {
row.task_hash for row in
self.db.query(TaskHistory.task_hash)
@@ -117,23 +121,39 @@ class TaskMemoryService:
is_semantic_duplicate = False
try:
# Check if similar tasks were REJECTED recently
results = self.intelligence.search(
results = await self.intelligence.search(
f"{p.title} {p.description}",
limit=1
)
if results:
top = results[0]
# If very similar (>0.85) and was REJECTED/DISMISSED
# We might need to fetch the metadata from the result if txtai returns it
# For now, this is a heuristic stub. Txtai search returns dict with 'id', 'score', 'text', etc.
# If we stored 'status' in metadata, we check it.
if top['score'] > 0.85:
# Retrieve status from DB using vector_id if needed, or if metadata is returned
# Assuming we want to avoid repeating REJECTED ideas
# This requires storing 'status' in the index metadata
pass
top_score = float(top.get("score", 0))
if top_score >= SEMANTIC_SUPPRESSION_SCORE_THRESHOLD:
indexed_status = self._extract_indexed_status(top)
if indexed_status in SUPPRESSED_STATUSES:
logger.info(
f"Filtering redundant task (semantic {top_score:.2f}, indexed status={indexed_status}): {p.title}"
)
is_semantic_duplicate = True
else:
vector_id = top.get("id") or top.get("vector_id")
if vector_id:
history = (
self.db.query(TaskHistory.status)
.filter(
TaskHistory.user_id == self.user_id,
TaskHistory.vector_id == str(vector_id),
)
.order_by(TaskHistory.created_at.desc())
.first()
)
history_status = getattr(history, "status", None)
if history_status in SUPPRESSED_STATUSES:
logger.info(
f"Filtering redundant task (semantic {top_score:.2f}, history status={history_status}): {p.title}"
)
is_semantic_duplicate = True
except Exception:
pass
@@ -141,3 +161,16 @@ class TaskMemoryService:
filtered.append(p)
return filtered
def _extract_indexed_status(self, search_result: Dict[str, Any]) -> Optional[str]:
"""Extract indexed status from txtai result metadata if available."""
status = search_result.get("status")
if status:
return str(status).lower()
obj = search_result.get("object")
if isinstance(obj, dict):
obj_status = obj.get("status")
return str(obj_status).lower() if obj_status else None
return None