Add versioned podcast asset metadata schema and backfill script
This commit is contained in:
63
backend/scripts/backfill_podcast_asset_metadata.py
Normal file
63
backend/scripts/backfill_podcast_asset_metadata.py
Normal file
@@ -0,0 +1,63 @@
|
||||
"""Backfill recent podcast assets with normalized metadata schema."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Any, Dict
|
||||
|
||||
from sqlalchemy import desc
|
||||
|
||||
from services.database import SessionLocal
|
||||
from models.content_asset_models import ContentAsset, AssetSource
|
||||
from models.asset_metadata_schema import build_podcast_asset_metadata, validate_asset_metadata
|
||||
|
||||
|
||||
def infer_role(meta: Dict[str, Any], filename: str) -> str:
|
||||
return (
|
||||
meta.get("asset_role")
|
||||
or meta.get("type")
|
||||
or ("podcast_audio" if filename.lower().endswith((".mp3", ".wav", ".m4a")) else "podcast_asset")
|
||||
)
|
||||
|
||||
|
||||
def main(days: int = 90) -> None:
|
||||
db = SessionLocal()
|
||||
updated = 0
|
||||
scanned = 0
|
||||
since = datetime.utcnow() - timedelta(days=days)
|
||||
try:
|
||||
assets = (
|
||||
db.query(ContentAsset)
|
||||
.filter(ContentAsset.source_module == AssetSource.PODCAST_MAKER)
|
||||
.filter(ContentAsset.created_at >= since)
|
||||
.order_by(desc(ContentAsset.created_at))
|
||||
.all()
|
||||
)
|
||||
|
||||
for asset in assets:
|
||||
scanned += 1
|
||||
meta = asset.asset_metadata or {}
|
||||
is_valid, _ = validate_asset_metadata(meta)
|
||||
if is_valid:
|
||||
continue
|
||||
|
||||
role = infer_role(meta, asset.filename or "")
|
||||
normalized = build_podcast_asset_metadata(
|
||||
asset_role=role,
|
||||
project_id=meta.get("project_id"),
|
||||
status=meta.get("status", "completed"),
|
||||
origin=meta.get("origin", "migration.backfill_podcast_asset_metadata"),
|
||||
extras=meta,
|
||||
)
|
||||
asset.asset_metadata = normalized
|
||||
db.add(asset)
|
||||
updated += 1
|
||||
|
||||
db.commit()
|
||||
print(f"Scanned={scanned} Updated={updated} Since={since.isoformat()}")
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user