Compare commits

..

1 Commits

Author SHA1 Message Date
ي
761740de12 Add versioned podcast asset metadata schema and backfill script 2026-05-18 14:36:36 +05:30
12 changed files with 205 additions and 200 deletions

View File

@@ -5,7 +5,7 @@ API endpoints for managing unified content assets across all modules.
from fastapi import APIRouter, Depends, HTTPException, Query, Body
from sqlalchemy.orm import Session
from typing import List, Optional, Dict, Any, Set
from typing import List, Optional, Dict, Any
from pydantic import BaseModel, Field
from datetime import datetime
@@ -47,33 +47,6 @@ class AssetResponse(BaseModel):
from_attributes = True
def _parse_source_modules(source_module: Optional[List[str]]) -> Optional[List[AssetSource]]:
"""Parse source_module query values from repeated params and/or comma-separated values."""
if not source_module:
return None
parsed_values: List[AssetSource] = []
seen: Set[AssetSource] = set()
for raw_value in source_module:
for value in raw_value.split(","):
normalized = value.strip().lower()
if not normalized:
continue
try:
module = AssetSource(normalized)
except ValueError:
raise HTTPException(status_code=400, detail=f"Invalid source module: {value.strip()}")
if module not in seen:
seen.add(module)
parsed_values.append(module)
return parsed_values or None
class AssetListResponse(BaseModel):
"""Response model for asset list."""
assets: List[AssetResponse]
@@ -85,7 +58,7 @@ class AssetListResponse(BaseModel):
@router.get("/", response_model=AssetListResponse)
async def get_assets(
asset_type: Optional[str] = Query(None, description="Filter by asset type"),
source_module: Optional[List[str]] = Query(None, description="Filter by source module(s); supports repeated params and comma-separated values"),
source_module: Optional[str] = Query(None, description="Filter by source module"),
search: Optional[str] = Query(None, description="Search query"),
tags: Optional[str] = Query(None, description="Comma-separated tags"),
favorites_only: bool = Query(False, description="Only favorites"),
@@ -116,7 +89,12 @@ async def get_assets(
except ValueError:
raise HTTPException(status_code=400, detail=f"Invalid asset type: {asset_type}")
source_modules_enum = _parse_source_modules(source_module)
source_module_enum = None
if source_module:
try:
source_module_enum = AssetSource(source_module.lower())
except ValueError:
raise HTTPException(status_code=400, detail=f"Invalid source module: {source_module}")
tags_list = None
if tags:
@@ -148,7 +126,7 @@ async def get_assets(
assets, total = service.get_user_assets(
user_id=user_id,
asset_type=asset_type_enum,
source_modules=source_modules_enum,
source_module=source_module_enum,
search_query=search,
tags=tags_list,
favorites_only=favorites_only,
@@ -222,7 +200,7 @@ async def create_asset(
asset = service.create_asset(
user_id=user_id,
asset_type=asset_type_enum,
source_modules=source_modules_enum,
source_module=source_module_enum,
filename=asset_data.filename,
file_url=asset_data.file_url,
file_path=asset_data.file_path,

View File

@@ -26,6 +26,7 @@ from services.database import get_db
from middleware.auth_middleware import get_current_user, get_current_user_with_query_token
from api.story_writer.utils.auth import require_authenticated_user
from utils.asset_tracker import save_asset_to_library
from models.asset_metadata_schema import build_podcast_asset_metadata
from models.story_models import StoryAudioResult
from loguru import logger
from ..constants import get_podcast_audio_service, get_podcast_media_dir
@@ -217,11 +218,11 @@ async def upload_podcast_audio(
title=f"Uploaded Audio - {project_id}",
description="Uploaded podcast audio/voice sample",
tags=["podcast", "audio", "upload", project_id],
asset_metadata={
"project_id": project_id,
"type": "uploaded_audio",
"status": "completed",
},
asset_metadata=build_podcast_asset_metadata(
asset_role="uploaded_audio",
project_id=project_id,
origin="podcast.audio.upload",
),
)
except Exception as e:
logger.warning(f"[Podcast] Failed to save audio asset: {e}")
@@ -455,11 +456,12 @@ async def generate_podcast_audio(
provider=result.get("provider"),
model=result.get("model"),
cost=result.get("cost"),
asset_metadata={
"scene_id": request.scene_id,
"scene_title": request.scene_title,
"status": "completed",
},
asset_metadata=build_podcast_asset_metadata(
asset_role="podcast_audio",
project_id=request.project_id,
origin="podcast.audio.generate",
extras={"scene_id": request.scene_id, "scene_title": request.scene_title},
),
)
except Exception as e:
logger.warning(f"[Podcast] Failed to save audio asset: {e}")
@@ -621,13 +623,12 @@ async def combine_podcast_audio(
title=f"Combined Podcast - {request.project_id}",
description=f"Combined podcast audio from {len(request.scene_ids)} scenes",
tags=["podcast", "audio", "combined", request.project_id],
asset_metadata={
"project_id": request.project_id,
"scene_ids": request.scene_ids,
"scene_count": len(request.scene_ids),
"total_duration": total_duration,
"status": "completed",
},
asset_metadata=build_podcast_asset_metadata(
asset_role="combined_podcast_audio",
project_id=request.project_id,
origin="podcast.audio.combine",
extras={"scene_ids": request.scene_ids, "scene_count": len(request.scene_ids), "total_duration": total_duration},
),
)
except Exception as e:
logger.warning(f"[Podcast] Failed to save combined audio asset: {e}")

View File

@@ -18,6 +18,7 @@ from api.story_writer.utils.auth import require_authenticated_user
from services.llm_providers.main_image_generation import generate_image
from services.llm_providers.main_image_editing import edit_image
from utils.asset_tracker import save_asset_to_library
from models.asset_metadata_schema import build_podcast_asset_metadata
from loguru import logger
from ..constants import get_podcast_media_dir, PODCAST_AVATARS_SUBDIR
from ..presenter_personas import choose_persona_id, get_persona
@@ -111,11 +112,11 @@ async def upload_podcast_avatar(
title=f"Podcast Presenter Avatar - {project_id}",
description="Podcast presenter avatar image",
tags=["podcast", "avatar", project_id],
asset_metadata={
"project_id": project_id,
"type": "presenter_avatar",
"status": "completed",
},
asset_metadata=build_podcast_asset_metadata(
asset_role="presenter_avatar",
project_id=project_id,
origin="podcast.avatar.upload",
),
)
except Exception as e:
logger.warning(f"[Podcast] Failed to save avatar asset (non-fatal): {e}")
@@ -223,12 +224,12 @@ async def make_avatar_presentable(
tags=["podcast", "avatar", "presenter", "transformed", project_id],
provider=result.provider,
model=result.model,
asset_metadata={
"project_id": project_id,
"type": "transformed_presenter",
"original_avatar_url": avatar_url,
"status": "completed",
},
asset_metadata=build_podcast_asset_metadata(
asset_role="transformed_presenter",
project_id=project_id,
origin="podcast.avatar.make_presentable",
extras={"original_avatar_url": avatar_url},
),
)
except Exception as e:
logger.warning(f"[Podcast] Failed to save transformed avatar asset: {e}")
@@ -404,14 +405,12 @@ async def generate_podcast_presenters(
tags=["podcast", "avatar", "presenter", project_id],
provider=result.provider,
model=result.model,
asset_metadata={
"project_id": project_id,
"speaker_number": i + 1,
"type": "generated_presenter",
"status": "completed",
"persona_id": selected_persona_id,
"seed": seed,
},
asset_metadata=build_podcast_asset_metadata(
asset_role="generated_presenter",
project_id=project_id,
origin="podcast.avatar.generate",
extras={"speaker_number": i + 1, "persona_id": selected_persona_id, "seed": seed},
),
)
except Exception as e:
logger.warning(f"[Podcast] Failed to save presenter asset: {e}")

View File

@@ -16,6 +16,7 @@ from middleware.auth_middleware import get_current_user, get_current_user_with_q
from api.story_writer.utils.auth import require_authenticated_user
from services.llm_providers.main_image_generation import generate_image, generate_character_image
from utils.asset_tracker import save_asset_to_library
from models.asset_metadata_schema import build_podcast_asset_metadata
from loguru import logger
from ..constants import get_podcast_media_dir
from ..models import PodcastImageRequest, PodcastImageResponse
@@ -417,11 +418,12 @@ async def generate_podcast_scene_image(
tags=["podcast", "scene", request.scene_id],
provider=result.provider,
model=result.model,
asset_metadata={
"scene_id": request.scene_id,
"scene_title": request.scene_title,
"status": "completed",
},
asset_metadata=build_podcast_asset_metadata(
asset_role="podcast_scene_image",
project_id=request.project_id,
origin="podcast.images.generate",
extras={"scene_id": request.scene_id, "scene_title": request.scene_title},
),
)
except Exception as e:
logger.warning(f"[Podcast] Failed to save image asset: {e}")

View File

@@ -0,0 +1,76 @@
"""Shared schema/builders for content asset metadata."""
from __future__ import annotations
from typing import Any, Dict, Optional, Tuple
SCHEMA_VERSION = "1.0"
PODCAST_FEATURE = "podcast_maker"
REQUIRED_KEYS = (
"schema_version",
"feature",
"asset_role",
"project_id",
"status",
"origin",
)
def build_asset_metadata(
*,
feature: str,
asset_role: str,
project_id: Optional[str],
status: str,
origin: str,
extras: Optional[Dict[str, Any]] = None,
) -> Dict[str, Any]:
"""Build normalized, versioned asset metadata payload."""
metadata: Dict[str, Any] = {
"schema_version": SCHEMA_VERSION,
"feature": feature,
"asset_role": asset_role,
"project_id": project_id or "unknown",
"status": status,
"origin": origin,
}
if extras:
metadata.update({k: v for k, v in extras.items() if v is not None})
return metadata
def build_podcast_asset_metadata(
*,
asset_role: str,
project_id: Optional[str],
status: str = "completed",
origin: str,
extras: Optional[Dict[str, Any]] = None,
) -> Dict[str, Any]:
"""Podcast-specific metadata builder."""
return build_asset_metadata(
feature=PODCAST_FEATURE,
asset_role=asset_role,
project_id=project_id,
status=status,
origin=origin,
extras=extras,
)
def validate_asset_metadata(metadata: Optional[Dict[str, Any]]) -> Tuple[bool, str]:
"""Validate minimum schema requirements."""
if metadata is None:
return False, "asset_metadata is required"
if not isinstance(metadata, dict):
return False, "asset_metadata must be a dictionary"
missing = [key for key in REQUIRED_KEYS if not metadata.get(key)]
if missing:
return False, f"asset_metadata missing required keys: {', '.join(missing)}"
if str(metadata.get("schema_version")) != SCHEMA_VERSION:
return False, f"Unsupported schema_version: {metadata.get('schema_version')}"
return True, "ok"

View File

@@ -0,0 +1,63 @@
"""Backfill recent podcast assets with normalized metadata schema."""
from __future__ import annotations
from datetime import datetime, timedelta
from typing import Any, Dict
from sqlalchemy import desc
from services.database import SessionLocal
from models.content_asset_models import ContentAsset, AssetSource
from models.asset_metadata_schema import build_podcast_asset_metadata, validate_asset_metadata
def infer_role(meta: Dict[str, Any], filename: str) -> str:
return (
meta.get("asset_role")
or meta.get("type")
or ("podcast_audio" if filename.lower().endswith((".mp3", ".wav", ".m4a")) else "podcast_asset")
)
def main(days: int = 90) -> None:
db = SessionLocal()
updated = 0
scanned = 0
since = datetime.utcnow() - timedelta(days=days)
try:
assets = (
db.query(ContentAsset)
.filter(ContentAsset.source_module == AssetSource.PODCAST_MAKER)
.filter(ContentAsset.created_at >= since)
.order_by(desc(ContentAsset.created_at))
.all()
)
for asset in assets:
scanned += 1
meta = asset.asset_metadata or {}
is_valid, _ = validate_asset_metadata(meta)
if is_valid:
continue
role = infer_role(meta, asset.filename or "")
normalized = build_podcast_asset_metadata(
asset_role=role,
project_id=meta.get("project_id"),
status=meta.get("status", "completed"),
origin=meta.get("origin", "migration.backfill_podcast_asset_metadata"),
extras=meta,
)
asset.asset_metadata = normalized
db.add(asset)
updated += 1
db.commit()
print(f"Scanned={scanned} Updated={updated} Since={since.isoformat()}")
finally:
db.close()
if __name__ == "__main__":
main()

View File

@@ -107,7 +107,6 @@ class ContentAssetService:
user_id: str,
asset_type: Optional[AssetType] = None,
source_module: Optional[AssetSource] = None,
source_modules: Optional[List[AssetSource]] = None,
search_query: Optional[str] = None,
tags: Optional[List[str]] = None,
favorites_only: bool = False,
@@ -126,7 +125,6 @@ class ContentAssetService:
user_id: Clerk user ID
asset_type: Filter by asset type (optional)
source_module: Filter by source module (optional)
source_modules: Filter by multiple source modules (optional)
search_query: Search in title, description, prompt (optional)
tags: Filter by tags (optional)
favorites_only: Only return favorites (optional)
@@ -144,9 +142,7 @@ class ContentAssetService:
if asset_type:
query = query.filter(ContentAsset.asset_type == asset_type)
if source_modules:
query = query.filter(ContentAsset.source_module.in_(source_modules))
elif source_module:
if source_module:
query = query.filter(ContentAsset.source_module == source_module)
if favorites_only:

View File

@@ -1,31 +0,0 @@
import importlib.util
from pathlib import Path
from fastapi import HTTPException
ROOT = Path(__file__).resolve().parents[3]
ROUTER_PATH = ROOT / 'backend' / 'api' / 'content_assets' / 'router.py'
MODELS_PATH = ROOT / 'backend' / 'models' / 'content_asset_models.py'
models_spec = importlib.util.spec_from_file_location('content_asset_models', MODELS_PATH)
models = importlib.util.module_from_spec(models_spec)
models_spec.loader.exec_module(models)
AssetSource = models.AssetSource
router_spec = importlib.util.spec_from_file_location('content_assets_router', ROUTER_PATH)
router = importlib.util.module_from_spec(router_spec)
router_spec.loader.exec_module(router)
def test_parse_source_modules_supports_repeated_and_csv_values():
parsed = router._parse_source_modules(["blog_writer", "youtube,podcast"])
assert parsed == [AssetSource.BLOG_WRITER, AssetSource.YOUTUBE, AssetSource.PODCAST]
def test_parse_source_modules_raises_for_invalid_values():
try:
router._parse_source_modules(["blog_writer,unknown"])
except HTTPException as exc:
assert exc.status_code == 400
assert "Invalid source module" in exc.detail
else:
raise AssertionError("Expected HTTPException for invalid source module")

View File

@@ -1,50 +0,0 @@
import importlib.util
from pathlib import Path
ROOT = Path(__file__).resolve().parents[3]
SERVICE_PATH = ROOT / 'backend' / 'services' / 'content_asset_service.py'
MODELS_PATH = ROOT / 'backend' / 'models' / 'content_asset_models.py'
models_spec = importlib.util.spec_from_file_location('content_asset_models', MODELS_PATH)
models = importlib.util.module_from_spec(models_spec)
models_spec.loader.exec_module(models)
AssetSource = models.AssetSource
service_spec = importlib.util.spec_from_file_location('content_asset_service', SERVICE_PATH)
service_module = importlib.util.module_from_spec(service_spec)
service_spec.loader.exec_module(service_module)
ContentAssetService = service_module.ContentAssetService
class DummyQuery:
def __init__(self):
self.filters = []
def filter(self, expr):
self.filters.append(expr)
return self
def count(self): return 0
def order_by(self, *_args, **_kwargs): return self
def limit(self, *_args, **_kwargs): return self
def offset(self, *_args, **_kwargs): return self
def all(self): return []
class DummyDB:
def __init__(self): self.query_obj = DummyQuery()
def query(self, *_args, **_kwargs): return self.query_obj
def test_get_user_assets_accepts_multiple_source_modules_filter():
db = DummyDB()
service = ContentAssetService(db)
assets, total = service.get_user_assets(
user_id="user-1",
source_modules=[AssetSource.BLOG_WRITER, AssetSource.YOUTUBE],
)
assert assets == []
assert total == 0
assert len(db.query_obj.filters) >= 2

View File

@@ -11,6 +11,8 @@ import logging
import re
from urllib.parse import urlparse
from models.asset_metadata_schema import validate_asset_metadata
logger = logging.getLogger(__name__)
# Maximum file size (100MB)
@@ -140,6 +142,12 @@ def save_asset_to_library(
if len(title) > 200:
title = title[:197] + '...'
metadata_payload = asset_metadata or {}
is_valid_metadata, validation_message = validate_asset_metadata(metadata_payload)
if not is_valid_metadata:
logger.error(f"Invalid asset metadata: {validation_message}")
return None
service = ContentAssetService(db)
asset = service.create_asset(
user_id=user_id,
@@ -154,7 +162,7 @@ def save_asset_to_library(
description=description,
prompt=prompt,
tags=tags or [],
asset_metadata=asset_metadata or {},
asset_metadata=metadata_payload,
provider=provider,
model=model,
cost=cost,

View File

@@ -1,35 +0,0 @@
import { renderHook, waitFor } from '@testing-library/react';
import { useContentAssets } from '../useContentAssets';
const getTokenMock = jest.fn();
jest.mock('@clerk/clerk-react', () => ({
useAuth: () => ({ getToken: getTokenMock }),
}));
describe('useContentAssets', () => {
beforeEach(() => {
getTokenMock.mockResolvedValue('test-token');
global.fetch = jest.fn().mockResolvedValue({
ok: true,
json: async () => ({ assets: [], total: 0, limit: 100, offset: 0 }),
} as Response);
});
afterEach(() => {
jest.clearAllMocks();
});
it('sends all source_module values as repeated query params', async () => {
renderHook(() =>
useContentAssets({ source_module: ['blog_writer', 'youtube'], limit: 50, offset: 0 })
);
await waitFor(() => expect(global.fetch).toHaveBeenCalled());
const calledUrl = (global.fetch as jest.Mock).mock.calls[0][0] as string;
const params = new URL(calledUrl).searchParams;
expect(params.getAll('source_module')).toEqual(['blog_writer', 'youtube']);
});
});

View File

@@ -29,7 +29,7 @@ export interface ContentAsset {
export interface AssetFilters {
asset_type?: 'text' | 'image' | 'video' | 'audio';
source_module?: string | string[]; // Supports single or multiple source modules
source_module?: string | string[]; // Support single or multiple source modules
search?: string;
tags?: string[];
favorites_only?: boolean;
@@ -146,10 +146,8 @@ export const useContentAssets = (filters: AssetFilters = {}) => {
if (currentFilters.source_module) {
// Handle both string and array cases
if (Array.isArray(currentFilters.source_module)) {
// Send every selected source module as repeated query params
currentFilters.source_module.forEach((module) => {
params.append('source_module', module);
});
// For arrays, use the first value (backend doesn't support multiple yet)
params.append('source_module', currentFilters.source_module[0]);
} else {
params.append('source_module', currentFilters.source_module);
}