From 761740de126dfb6b4f53159b84466a72e0a8c58c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D9=8A?= Date: Mon, 18 May 2026 14:36:36 +0530 Subject: [PATCH] Add versioned podcast asset metadata schema and backfill script --- backend/api/podcast/handlers/audio.py | 35 ++++----- backend/api/podcast/handlers/avatar.py | 37 +++++---- backend/api/podcast/handlers/images.py | 12 +-- backend/models/asset_metadata_schema.py | 76 +++++++++++++++++++ .../backfill_podcast_asset_metadata.py | 63 +++++++++++++++ backend/utils/asset_tracker.py | 10 ++- 6 files changed, 191 insertions(+), 42 deletions(-) create mode 100644 backend/models/asset_metadata_schema.py create mode 100644 backend/scripts/backfill_podcast_asset_metadata.py diff --git a/backend/api/podcast/handlers/audio.py b/backend/api/podcast/handlers/audio.py index 4f869867..0b08e95b 100644 --- a/backend/api/podcast/handlers/audio.py +++ b/backend/api/podcast/handlers/audio.py @@ -26,6 +26,7 @@ from services.database import get_db from middleware.auth_middleware import get_current_user, get_current_user_with_query_token from api.story_writer.utils.auth import require_authenticated_user from utils.asset_tracker import save_asset_to_library +from models.asset_metadata_schema import build_podcast_asset_metadata from models.story_models import StoryAudioResult from loguru import logger from ..constants import get_podcast_audio_service, get_podcast_media_dir @@ -217,11 +218,11 @@ async def upload_podcast_audio( title=f"Uploaded Audio - {project_id}", description="Uploaded podcast audio/voice sample", tags=["podcast", "audio", "upload", project_id], - asset_metadata={ - "project_id": project_id, - "type": "uploaded_audio", - "status": "completed", - }, + asset_metadata=build_podcast_asset_metadata( + asset_role="uploaded_audio", + project_id=project_id, + origin="podcast.audio.upload", + ), ) except Exception as e: logger.warning(f"[Podcast] Failed to save audio asset: {e}") @@ -455,11 +456,12 @@ async def generate_podcast_audio( provider=result.get("provider"), model=result.get("model"), cost=result.get("cost"), - asset_metadata={ - "scene_id": request.scene_id, - "scene_title": request.scene_title, - "status": "completed", - }, + asset_metadata=build_podcast_asset_metadata( + asset_role="podcast_audio", + project_id=request.project_id, + origin="podcast.audio.generate", + extras={"scene_id": request.scene_id, "scene_title": request.scene_title}, + ), ) except Exception as e: logger.warning(f"[Podcast] Failed to save audio asset: {e}") @@ -621,13 +623,12 @@ async def combine_podcast_audio( title=f"Combined Podcast - {request.project_id}", description=f"Combined podcast audio from {len(request.scene_ids)} scenes", tags=["podcast", "audio", "combined", request.project_id], - asset_metadata={ - "project_id": request.project_id, - "scene_ids": request.scene_ids, - "scene_count": len(request.scene_ids), - "total_duration": total_duration, - "status": "completed", - }, + asset_metadata=build_podcast_asset_metadata( + asset_role="combined_podcast_audio", + project_id=request.project_id, + origin="podcast.audio.combine", + extras={"scene_ids": request.scene_ids, "scene_count": len(request.scene_ids), "total_duration": total_duration}, + ), ) except Exception as e: logger.warning(f"[Podcast] Failed to save combined audio asset: {e}") diff --git a/backend/api/podcast/handlers/avatar.py b/backend/api/podcast/handlers/avatar.py index 261cbd23..3f49c35e 100644 --- a/backend/api/podcast/handlers/avatar.py +++ b/backend/api/podcast/handlers/avatar.py @@ -18,6 +18,7 @@ from api.story_writer.utils.auth import require_authenticated_user from services.llm_providers.main_image_generation import generate_image from services.llm_providers.main_image_editing import edit_image from utils.asset_tracker import save_asset_to_library +from models.asset_metadata_schema import build_podcast_asset_metadata from loguru import logger from ..constants import get_podcast_media_dir, PODCAST_AVATARS_SUBDIR from ..presenter_personas import choose_persona_id, get_persona @@ -111,11 +112,11 @@ async def upload_podcast_avatar( title=f"Podcast Presenter Avatar - {project_id}", description="Podcast presenter avatar image", tags=["podcast", "avatar", project_id], - asset_metadata={ - "project_id": project_id, - "type": "presenter_avatar", - "status": "completed", - }, + asset_metadata=build_podcast_asset_metadata( + asset_role="presenter_avatar", + project_id=project_id, + origin="podcast.avatar.upload", + ), ) except Exception as e: logger.warning(f"[Podcast] Failed to save avatar asset (non-fatal): {e}") @@ -223,12 +224,12 @@ async def make_avatar_presentable( tags=["podcast", "avatar", "presenter", "transformed", project_id], provider=result.provider, model=result.model, - asset_metadata={ - "project_id": project_id, - "type": "transformed_presenter", - "original_avatar_url": avatar_url, - "status": "completed", - }, + asset_metadata=build_podcast_asset_metadata( + asset_role="transformed_presenter", + project_id=project_id, + origin="podcast.avatar.make_presentable", + extras={"original_avatar_url": avatar_url}, + ), ) except Exception as e: logger.warning(f"[Podcast] Failed to save transformed avatar asset: {e}") @@ -404,14 +405,12 @@ async def generate_podcast_presenters( tags=["podcast", "avatar", "presenter", project_id], provider=result.provider, model=result.model, - asset_metadata={ - "project_id": project_id, - "speaker_number": i + 1, - "type": "generated_presenter", - "status": "completed", - "persona_id": selected_persona_id, - "seed": seed, - }, + asset_metadata=build_podcast_asset_metadata( + asset_role="generated_presenter", + project_id=project_id, + origin="podcast.avatar.generate", + extras={"speaker_number": i + 1, "persona_id": selected_persona_id, "seed": seed}, + ), ) except Exception as e: logger.warning(f"[Podcast] Failed to save presenter asset: {e}") diff --git a/backend/api/podcast/handlers/images.py b/backend/api/podcast/handlers/images.py index b7dabcc2..226f47cf 100644 --- a/backend/api/podcast/handlers/images.py +++ b/backend/api/podcast/handlers/images.py @@ -16,6 +16,7 @@ from middleware.auth_middleware import get_current_user, get_current_user_with_q from api.story_writer.utils.auth import require_authenticated_user from services.llm_providers.main_image_generation import generate_image, generate_character_image from utils.asset_tracker import save_asset_to_library +from models.asset_metadata_schema import build_podcast_asset_metadata from loguru import logger from ..constants import get_podcast_media_dir from ..models import PodcastImageRequest, PodcastImageResponse @@ -417,11 +418,12 @@ async def generate_podcast_scene_image( tags=["podcast", "scene", request.scene_id], provider=result.provider, model=result.model, - asset_metadata={ - "scene_id": request.scene_id, - "scene_title": request.scene_title, - "status": "completed", - }, + asset_metadata=build_podcast_asset_metadata( + asset_role="podcast_scene_image", + project_id=request.project_id, + origin="podcast.images.generate", + extras={"scene_id": request.scene_id, "scene_title": request.scene_title}, + ), ) except Exception as e: logger.warning(f"[Podcast] Failed to save image asset: {e}") diff --git a/backend/models/asset_metadata_schema.py b/backend/models/asset_metadata_schema.py new file mode 100644 index 00000000..a6e07b97 --- /dev/null +++ b/backend/models/asset_metadata_schema.py @@ -0,0 +1,76 @@ +"""Shared schema/builders for content asset metadata.""" + +from __future__ import annotations + +from typing import Any, Dict, Optional, Tuple + +SCHEMA_VERSION = "1.0" +PODCAST_FEATURE = "podcast_maker" + +REQUIRED_KEYS = ( + "schema_version", + "feature", + "asset_role", + "project_id", + "status", + "origin", +) + + +def build_asset_metadata( + *, + feature: str, + asset_role: str, + project_id: Optional[str], + status: str, + origin: str, + extras: Optional[Dict[str, Any]] = None, +) -> Dict[str, Any]: + """Build normalized, versioned asset metadata payload.""" + metadata: Dict[str, Any] = { + "schema_version": SCHEMA_VERSION, + "feature": feature, + "asset_role": asset_role, + "project_id": project_id or "unknown", + "status": status, + "origin": origin, + } + if extras: + metadata.update({k: v for k, v in extras.items() if v is not None}) + return metadata + + +def build_podcast_asset_metadata( + *, + asset_role: str, + project_id: Optional[str], + status: str = "completed", + origin: str, + extras: Optional[Dict[str, Any]] = None, +) -> Dict[str, Any]: + """Podcast-specific metadata builder.""" + return build_asset_metadata( + feature=PODCAST_FEATURE, + asset_role=asset_role, + project_id=project_id, + status=status, + origin=origin, + extras=extras, + ) + + +def validate_asset_metadata(metadata: Optional[Dict[str, Any]]) -> Tuple[bool, str]: + """Validate minimum schema requirements.""" + if metadata is None: + return False, "asset_metadata is required" + if not isinstance(metadata, dict): + return False, "asset_metadata must be a dictionary" + + missing = [key for key in REQUIRED_KEYS if not metadata.get(key)] + if missing: + return False, f"asset_metadata missing required keys: {', '.join(missing)}" + + if str(metadata.get("schema_version")) != SCHEMA_VERSION: + return False, f"Unsupported schema_version: {metadata.get('schema_version')}" + + return True, "ok" diff --git a/backend/scripts/backfill_podcast_asset_metadata.py b/backend/scripts/backfill_podcast_asset_metadata.py new file mode 100644 index 00000000..30e3361d --- /dev/null +++ b/backend/scripts/backfill_podcast_asset_metadata.py @@ -0,0 +1,63 @@ +"""Backfill recent podcast assets with normalized metadata schema.""" + +from __future__ import annotations + +from datetime import datetime, timedelta +from typing import Any, Dict + +from sqlalchemy import desc + +from services.database import SessionLocal +from models.content_asset_models import ContentAsset, AssetSource +from models.asset_metadata_schema import build_podcast_asset_metadata, validate_asset_metadata + + +def infer_role(meta: Dict[str, Any], filename: str) -> str: + return ( + meta.get("asset_role") + or meta.get("type") + or ("podcast_audio" if filename.lower().endswith((".mp3", ".wav", ".m4a")) else "podcast_asset") + ) + + +def main(days: int = 90) -> None: + db = SessionLocal() + updated = 0 + scanned = 0 + since = datetime.utcnow() - timedelta(days=days) + try: + assets = ( + db.query(ContentAsset) + .filter(ContentAsset.source_module == AssetSource.PODCAST_MAKER) + .filter(ContentAsset.created_at >= since) + .order_by(desc(ContentAsset.created_at)) + .all() + ) + + for asset in assets: + scanned += 1 + meta = asset.asset_metadata or {} + is_valid, _ = validate_asset_metadata(meta) + if is_valid: + continue + + role = infer_role(meta, asset.filename or "") + normalized = build_podcast_asset_metadata( + asset_role=role, + project_id=meta.get("project_id"), + status=meta.get("status", "completed"), + origin=meta.get("origin", "migration.backfill_podcast_asset_metadata"), + extras=meta, + ) + asset.asset_metadata = normalized + db.add(asset) + updated += 1 + + db.commit() + print(f"Scanned={scanned} Updated={updated} Since={since.isoformat()}") + finally: + db.close() + + +if __name__ == "__main__": + main() diff --git a/backend/utils/asset_tracker.py b/backend/utils/asset_tracker.py index e1a945a1..da024235 100644 --- a/backend/utils/asset_tracker.py +++ b/backend/utils/asset_tracker.py @@ -11,6 +11,8 @@ import logging import re from urllib.parse import urlparse +from models.asset_metadata_schema import validate_asset_metadata + logger = logging.getLogger(__name__) # Maximum file size (100MB) @@ -140,6 +142,12 @@ def save_asset_to_library( if len(title) > 200: title = title[:197] + '...' + metadata_payload = asset_metadata or {} + is_valid_metadata, validation_message = validate_asset_metadata(metadata_payload) + if not is_valid_metadata: + logger.error(f"Invalid asset metadata: {validation_message}") + return None + service = ContentAssetService(db) asset = service.create_asset( user_id=user_id, @@ -154,7 +162,7 @@ def save_asset_to_library( description=description, prompt=prompt, tags=tags or [], - asset_metadata=asset_metadata or {}, + asset_metadata=metadata_payload, provider=provider, model=model, cost=cost,