Files
ALwrity/backend/scripts/backfill_podcast_asset_metadata.py

64 lines
1.9 KiB
Python

"""Backfill recent podcast assets with normalized metadata schema."""
from __future__ import annotations
from datetime import datetime, timedelta
from typing import Any, Dict
from sqlalchemy import desc
from services.database import SessionLocal
from models.content_asset_models import ContentAsset, AssetSource
from models.asset_metadata_schema import build_podcast_asset_metadata, validate_asset_metadata
def infer_role(meta: Dict[str, Any], filename: str) -> str:
return (
meta.get("asset_role")
or meta.get("type")
or ("podcast_audio" if filename.lower().endswith((".mp3", ".wav", ".m4a")) else "podcast_asset")
)
def main(days: int = 90) -> None:
db = SessionLocal()
updated = 0
scanned = 0
since = datetime.utcnow() - timedelta(days=days)
try:
assets = (
db.query(ContentAsset)
.filter(ContentAsset.source_module == AssetSource.PODCAST_MAKER)
.filter(ContentAsset.created_at >= since)
.order_by(desc(ContentAsset.created_at))
.all()
)
for asset in assets:
scanned += 1
meta = asset.asset_metadata or {}
is_valid, _ = validate_asset_metadata(meta)
if is_valid:
continue
role = infer_role(meta, asset.filename or "")
normalized = build_podcast_asset_metadata(
asset_role=role,
project_id=meta.get("project_id"),
status=meta.get("status", "completed"),
origin=meta.get("origin", "migration.backfill_podcast_asset_metadata"),
extras=meta,
)
asset.asset_metadata = normalized
db.add(asset)
updated += 1
db.commit()
print(f"Scanned={scanned} Updated={updated} Since={since.isoformat()}")
finally:
db.close()
if __name__ == "__main__":
main()