Compare commits

..

1 Commits

Author SHA1 Message Date
ي
23489fdc12 Add flat-context synthesis and mnemonic prompt injection 2026-05-18 16:00:56 +05:30
7 changed files with 150 additions and 192 deletions

View File

@@ -26,7 +26,6 @@ from services.database import get_db
from middleware.auth_middleware import get_current_user, get_current_user_with_query_token
from api.story_writer.utils.auth import require_authenticated_user
from utils.asset_tracker import save_asset_to_library
from models.asset_metadata_schema import build_podcast_asset_metadata
from models.story_models import StoryAudioResult
from loguru import logger
from ..constants import get_podcast_audio_service, get_podcast_media_dir
@@ -218,11 +217,11 @@ async def upload_podcast_audio(
title=f"Uploaded Audio - {project_id}",
description="Uploaded podcast audio/voice sample",
tags=["podcast", "audio", "upload", project_id],
asset_metadata=build_podcast_asset_metadata(
asset_role="uploaded_audio",
project_id=project_id,
origin="podcast.audio.upload",
),
asset_metadata={
"project_id": project_id,
"type": "uploaded_audio",
"status": "completed",
},
)
except Exception as e:
logger.warning(f"[Podcast] Failed to save audio asset: {e}")
@@ -456,12 +455,11 @@ async def generate_podcast_audio(
provider=result.get("provider"),
model=result.get("model"),
cost=result.get("cost"),
asset_metadata=build_podcast_asset_metadata(
asset_role="podcast_audio",
project_id=request.project_id,
origin="podcast.audio.generate",
extras={"scene_id": request.scene_id, "scene_title": request.scene_title},
),
asset_metadata={
"scene_id": request.scene_id,
"scene_title": request.scene_title,
"status": "completed",
},
)
except Exception as e:
logger.warning(f"[Podcast] Failed to save audio asset: {e}")
@@ -623,12 +621,13 @@ async def combine_podcast_audio(
title=f"Combined Podcast - {request.project_id}",
description=f"Combined podcast audio from {len(request.scene_ids)} scenes",
tags=["podcast", "audio", "combined", request.project_id],
asset_metadata=build_podcast_asset_metadata(
asset_role="combined_podcast_audio",
project_id=request.project_id,
origin="podcast.audio.combine",
extras={"scene_ids": request.scene_ids, "scene_count": len(request.scene_ids), "total_duration": total_duration},
),
asset_metadata={
"project_id": request.project_id,
"scene_ids": request.scene_ids,
"scene_count": len(request.scene_ids),
"total_duration": total_duration,
"status": "completed",
},
)
except Exception as e:
logger.warning(f"[Podcast] Failed to save combined audio asset: {e}")

View File

@@ -18,7 +18,6 @@ from api.story_writer.utils.auth import require_authenticated_user
from services.llm_providers.main_image_generation import generate_image
from services.llm_providers.main_image_editing import edit_image
from utils.asset_tracker import save_asset_to_library
from models.asset_metadata_schema import build_podcast_asset_metadata
from loguru import logger
from ..constants import get_podcast_media_dir, PODCAST_AVATARS_SUBDIR
from ..presenter_personas import choose_persona_id, get_persona
@@ -112,11 +111,11 @@ async def upload_podcast_avatar(
title=f"Podcast Presenter Avatar - {project_id}",
description="Podcast presenter avatar image",
tags=["podcast", "avatar", project_id],
asset_metadata=build_podcast_asset_metadata(
asset_role="presenter_avatar",
project_id=project_id,
origin="podcast.avatar.upload",
),
asset_metadata={
"project_id": project_id,
"type": "presenter_avatar",
"status": "completed",
},
)
except Exception as e:
logger.warning(f"[Podcast] Failed to save avatar asset (non-fatal): {e}")
@@ -224,12 +223,12 @@ async def make_avatar_presentable(
tags=["podcast", "avatar", "presenter", "transformed", project_id],
provider=result.provider,
model=result.model,
asset_metadata=build_podcast_asset_metadata(
asset_role="transformed_presenter",
project_id=project_id,
origin="podcast.avatar.make_presentable",
extras={"original_avatar_url": avatar_url},
),
asset_metadata={
"project_id": project_id,
"type": "transformed_presenter",
"original_avatar_url": avatar_url,
"status": "completed",
},
)
except Exception as e:
logger.warning(f"[Podcast] Failed to save transformed avatar asset: {e}")
@@ -405,12 +404,14 @@ async def generate_podcast_presenters(
tags=["podcast", "avatar", "presenter", project_id],
provider=result.provider,
model=result.model,
asset_metadata=build_podcast_asset_metadata(
asset_role="generated_presenter",
project_id=project_id,
origin="podcast.avatar.generate",
extras={"speaker_number": i + 1, "persona_id": selected_persona_id, "seed": seed},
),
asset_metadata={
"project_id": project_id,
"speaker_number": i + 1,
"type": "generated_presenter",
"status": "completed",
"persona_id": selected_persona_id,
"seed": seed,
},
)
except Exception as e:
logger.warning(f"[Podcast] Failed to save presenter asset: {e}")

View File

@@ -16,7 +16,6 @@ from middleware.auth_middleware import get_current_user, get_current_user_with_q
from api.story_writer.utils.auth import require_authenticated_user
from services.llm_providers.main_image_generation import generate_image, generate_character_image
from utils.asset_tracker import save_asset_to_library
from models.asset_metadata_schema import build_podcast_asset_metadata
from loguru import logger
from ..constants import get_podcast_media_dir
from ..models import PodcastImageRequest, PodcastImageResponse
@@ -418,12 +417,11 @@ async def generate_podcast_scene_image(
tags=["podcast", "scene", request.scene_id],
provider=result.provider,
model=result.model,
asset_metadata=build_podcast_asset_metadata(
asset_role="podcast_scene_image",
project_id=request.project_id,
origin="podcast.images.generate",
extras={"scene_id": request.scene_id, "scene_title": request.scene_title},
),
asset_metadata={
"scene_id": request.scene_id,
"scene_title": request.scene_title,
"status": "completed",
},
)
except Exception as e:
logger.warning(f"[Podcast] Failed to save image asset: {e}")

View File

@@ -1,76 +0,0 @@
"""Shared schema/builders for content asset metadata."""
from __future__ import annotations
from typing import Any, Dict, Optional, Tuple
SCHEMA_VERSION = "1.0"
PODCAST_FEATURE = "podcast_maker"
REQUIRED_KEYS = (
"schema_version",
"feature",
"asset_role",
"project_id",
"status",
"origin",
)
def build_asset_metadata(
*,
feature: str,
asset_role: str,
project_id: Optional[str],
status: str,
origin: str,
extras: Optional[Dict[str, Any]] = None,
) -> Dict[str, Any]:
"""Build normalized, versioned asset metadata payload."""
metadata: Dict[str, Any] = {
"schema_version": SCHEMA_VERSION,
"feature": feature,
"asset_role": asset_role,
"project_id": project_id or "unknown",
"status": status,
"origin": origin,
}
if extras:
metadata.update({k: v for k, v in extras.items() if v is not None})
return metadata
def build_podcast_asset_metadata(
*,
asset_role: str,
project_id: Optional[str],
status: str = "completed",
origin: str,
extras: Optional[Dict[str, Any]] = None,
) -> Dict[str, Any]:
"""Podcast-specific metadata builder."""
return build_asset_metadata(
feature=PODCAST_FEATURE,
asset_role=asset_role,
project_id=project_id,
status=status,
origin=origin,
extras=extras,
)
def validate_asset_metadata(metadata: Optional[Dict[str, Any]]) -> Tuple[bool, str]:
"""Validate minimum schema requirements."""
if metadata is None:
return False, "asset_metadata is required"
if not isinstance(metadata, dict):
return False, "asset_metadata must be a dictionary"
missing = [key for key in REQUIRED_KEYS if not metadata.get(key)]
if missing:
return False, f"asset_metadata missing required keys: {', '.join(missing)}"
if str(metadata.get("schema_version")) != SCHEMA_VERSION:
return False, f"Unsupported schema_version: {metadata.get('schema_version')}"
return True, "ok"

View File

@@ -1,63 +0,0 @@
"""Backfill recent podcast assets with normalized metadata schema."""
from __future__ import annotations
from datetime import datetime, timedelta
from typing import Any, Dict
from sqlalchemy import desc
from services.database import SessionLocal
from models.content_asset_models import ContentAsset, AssetSource
from models.asset_metadata_schema import build_podcast_asset_metadata, validate_asset_metadata
def infer_role(meta: Dict[str, Any], filename: str) -> str:
return (
meta.get("asset_role")
or meta.get("type")
or ("podcast_audio" if filename.lower().endswith((".mp3", ".wav", ".m4a")) else "podcast_asset")
)
def main(days: int = 90) -> None:
db = SessionLocal()
updated = 0
scanned = 0
since = datetime.utcnow() - timedelta(days=days)
try:
assets = (
db.query(ContentAsset)
.filter(ContentAsset.source_module == AssetSource.PODCAST_MAKER)
.filter(ContentAsset.created_at >= since)
.order_by(desc(ContentAsset.created_at))
.all()
)
for asset in assets:
scanned += 1
meta = asset.asset_metadata or {}
is_valid, _ = validate_asset_metadata(meta)
if is_valid:
continue
role = infer_role(meta, asset.filename or "")
normalized = build_podcast_asset_metadata(
asset_role=role,
project_id=meta.get("project_id"),
status=meta.get("status", "completed"),
origin=meta.get("origin", "migration.backfill_podcast_asset_metadata"),
extras=meta,
)
asset.asset_metadata = normalized
db.add(asset)
updated += 1
db.commit()
print(f"Scanned={scanned} Updated={updated} Since={since.isoformat()}")
finally:
db.close()
if __name__ == "__main__":
main()

View File

@@ -101,6 +101,7 @@ class AgentContextVFS:
"/steps/integrations": AgentFlatContextStore.STEP5_FILENAME,
}
HIGH_SIGNAL_MARKERS = ("agent_summary", "high_signal_terms", "quick_facts", "context_type")
LOW_CONFIDENCE_MARKER = "low_confidence"
def __init__(self, user_id: str, project_id: Optional[str] = None):
self.user_id = user_id
@@ -294,6 +295,101 @@ class AgentContextVFS:
)
return ranked[: max(1, top_k)]
@staticmethod
def _mnemonic_token(result: Dict[str, Any], rank: int) -> str:
"""Create compressed mnemonic token with source reference."""
path = str(result.get("path") or "unknown")
reason = str(result.get("reason") or "match")
confidence = float(result.get("confidence") or 0.0)
low_flag = "!" if result.get(AgentContextVFS.LOW_CONFIDENCE_MARKER) else ""
src = path.replace(".json", "").replace("_", "-")[:28]
hint = reason.replace(" ", "-")[:20]
return f"M{rank}:{src}|{hint}|c{confidence:.2f}{low_flag}"
@staticmethod
def _detect_contradictions(results: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
"""Detect contradictory learnings by path with conflicting reasons/relevance classes."""
by_path: Dict[str, List[Dict[str, Any]]] = {}
for item in results:
p = str(item.get("path") or "")
by_path.setdefault(p, []).append(item)
contradictions: List[Dict[str, Any]] = []
for path, rows in by_path.items():
reasons = {str(r.get("reason") or "").strip().lower() for r in rows}
relevance = {str(r.get("relevance") or "").strip().lower() for r in rows}
# contradictory if both high/supported or mixed summary/body signals in same source cluster
if len(reasons) > 1 and len(relevance) > 1:
contradictions.append(
{
"path": path,
"reason_variants": sorted([r for r in reasons if r]),
"relevance_variants": sorted([r for r in relevance if r]),
"count": len(rows),
}
)
return contradictions
def _run_synthesis_pipeline(
self, ranked_results: List[Dict[str, Any]], *, char_budget: int = 1200, top_k: int = 5
) -> Dict[str, Any]:
"""
Flat-context synthesis pipeline:
1) Compress telemetry into mnemonic tokens with source references
2) Detect contradictions and mark low-confidence heuristics
3) Select top-ranked, budget-fitting tokens for prompt injection
4) Persist synthesis + source lineage for explainability
"""
contradictions = self._detect_contradictions(ranked_results)
contradiction_paths = {c["path"] for c in contradictions}
normalized: List[Dict[str, Any]] = []
for idx, item in enumerate(ranked_results, start=1):
row = dict(item)
low_conf = bool(row.get("low_probability")) or (str(row.get("path") or "") in contradiction_paths)
row[self.LOW_CONFIDENCE_MARKER] = low_conf
if low_conf:
row["confidence"] = round(max(0.05, float(row.get("confidence", 0.0)) * 0.7), 3)
row["mnemonic_token"] = self._mnemonic_token(row, idx)
normalized.append(row)
chosen: List[Dict[str, Any]] = []
used = 0
for row in normalized[: max(1, top_k * 3)]:
token = str(row.get("mnemonic_token") or "")
cost = len(token) + 8
if chosen and used + cost > char_budget:
continue
chosen.append(row)
used += cost
if len(chosen) >= top_k:
break
synthesis = {
"created_at": datetime.now(timezone.utc).isoformat(),
"top_k": top_k,
"char_budget": char_budget,
"char_budget_used": used,
"selected_mnemonics": [c.get("mnemonic_token") for c in chosen],
"source_lineage": [
{
"mnemonic_token": c.get("mnemonic_token"),
"path": c.get("path"),
"reason": c.get("reason"),
"confidence": c.get("confidence"),
"low_confidence": c.get(self.LOW_CONFIDENCE_MARKER, False),
}
for c in chosen
],
"contradictions": contradictions,
}
self.append_activity_log(
event_type="flat_context_synthesis",
actor="agent_context_vfs",
details=synthesis,
)
return {"ranked_results": normalized, "synthesis": synthesis}
@staticmethod
def _resolve_json_path(data: Any, path_query: str) -> Any:
"""Resolve dot/bracket JSON path such as 'data.seo_audit.recommendations[0]'."""
@@ -518,15 +614,26 @@ class AgentContextVFS:
bounded_results.append(r)
used += cost
synthesis_bundle = self._run_synthesis_pipeline(
self._static_triage(bounded_results, normalized),
char_budget=1200,
top_k=5,
)
triaged_results = synthesis_bundle["ranked_results"]
synthesis = synthesis_bundle["synthesis"]
result = {
"query": normalized,
"attempted_queries": attempted_queries,
"matched_files_count": len(matched_files),
"results": self._static_triage(bounded_results, normalized),
"results": triaged_results,
"notice": notice,
"char_budget_used": used,
"can_answer": bool(bounded_results),
"synthesis": synthesis,
"prompt_context_mnemonics": synthesis.get("selected_mnemonics", []),
}
# Top-ranked, budget-fitting mnemonic tokens are the only ones intended for prompt context injection.
result["triage_top5"] = self._llm_router_stub(result["results"], top_k=5)
logger.info(
f"[vfs_audit] user={self.store.safe_user_id} action=search_context query={normalized!r} results={len(result['results'])}"

View File

@@ -11,8 +11,6 @@ import logging
import re
from urllib.parse import urlparse
from models.asset_metadata_schema import validate_asset_metadata
logger = logging.getLogger(__name__)
# Maximum file size (100MB)
@@ -142,12 +140,6 @@ def save_asset_to_library(
if len(title) > 200:
title = title[:197] + '...'
metadata_payload = asset_metadata or {}
is_valid_metadata, validation_message = validate_asset_metadata(metadata_payload)
if not is_valid_metadata:
logger.error(f"Invalid asset metadata: {validation_message}")
return None
service = ContentAssetService(db)
asset = service.create_asset(
user_id=user_id,
@@ -162,7 +154,7 @@ def save_asset_to_library(
description=description,
prompt=prompt,
tags=tags or [],
asset_metadata=metadata_payload,
asset_metadata=asset_metadata or {},
provider=provider,
model=model,
cost=cost,