Compare commits

..

1 Commits

Author SHA1 Message Date
ي
761740de12 Add versioned podcast asset metadata schema and backfill script 2026-05-18 14:36:36 +05:30
10 changed files with 202 additions and 128 deletions

View File

@@ -26,6 +26,7 @@ from services.database import get_db
from middleware.auth_middleware import get_current_user, get_current_user_with_query_token
from api.story_writer.utils.auth import require_authenticated_user
from utils.asset_tracker import save_asset_to_library
from models.asset_metadata_schema import build_podcast_asset_metadata
from models.story_models import StoryAudioResult
from loguru import logger
from ..constants import get_podcast_audio_service, get_podcast_media_dir
@@ -217,11 +218,11 @@ async def upload_podcast_audio(
title=f"Uploaded Audio - {project_id}",
description="Uploaded podcast audio/voice sample",
tags=["podcast", "audio", "upload", project_id],
asset_metadata={
"project_id": project_id,
"type": "uploaded_audio",
"status": "completed",
},
asset_metadata=build_podcast_asset_metadata(
asset_role="uploaded_audio",
project_id=project_id,
origin="podcast.audio.upload",
),
)
except Exception as e:
logger.warning(f"[Podcast] Failed to save audio asset: {e}")
@@ -455,11 +456,12 @@ async def generate_podcast_audio(
provider=result.get("provider"),
model=result.get("model"),
cost=result.get("cost"),
asset_metadata={
"scene_id": request.scene_id,
"scene_title": request.scene_title,
"status": "completed",
},
asset_metadata=build_podcast_asset_metadata(
asset_role="podcast_audio",
project_id=request.project_id,
origin="podcast.audio.generate",
extras={"scene_id": request.scene_id, "scene_title": request.scene_title},
),
)
except Exception as e:
logger.warning(f"[Podcast] Failed to save audio asset: {e}")
@@ -621,13 +623,12 @@ async def combine_podcast_audio(
title=f"Combined Podcast - {request.project_id}",
description=f"Combined podcast audio from {len(request.scene_ids)} scenes",
tags=["podcast", "audio", "combined", request.project_id],
asset_metadata={
"project_id": request.project_id,
"scene_ids": request.scene_ids,
"scene_count": len(request.scene_ids),
"total_duration": total_duration,
"status": "completed",
},
asset_metadata=build_podcast_asset_metadata(
asset_role="combined_podcast_audio",
project_id=request.project_id,
origin="podcast.audio.combine",
extras={"scene_ids": request.scene_ids, "scene_count": len(request.scene_ids), "total_duration": total_duration},
),
)
except Exception as e:
logger.warning(f"[Podcast] Failed to save combined audio asset: {e}")

View File

@@ -18,6 +18,7 @@ from api.story_writer.utils.auth import require_authenticated_user
from services.llm_providers.main_image_generation import generate_image
from services.llm_providers.main_image_editing import edit_image
from utils.asset_tracker import save_asset_to_library
from models.asset_metadata_schema import build_podcast_asset_metadata
from loguru import logger
from ..constants import get_podcast_media_dir, PODCAST_AVATARS_SUBDIR
from ..presenter_personas import choose_persona_id, get_persona
@@ -111,11 +112,11 @@ async def upload_podcast_avatar(
title=f"Podcast Presenter Avatar - {project_id}",
description="Podcast presenter avatar image",
tags=["podcast", "avatar", project_id],
asset_metadata={
"project_id": project_id,
"type": "presenter_avatar",
"status": "completed",
},
asset_metadata=build_podcast_asset_metadata(
asset_role="presenter_avatar",
project_id=project_id,
origin="podcast.avatar.upload",
),
)
except Exception as e:
logger.warning(f"[Podcast] Failed to save avatar asset (non-fatal): {e}")
@@ -223,12 +224,12 @@ async def make_avatar_presentable(
tags=["podcast", "avatar", "presenter", "transformed", project_id],
provider=result.provider,
model=result.model,
asset_metadata={
"project_id": project_id,
"type": "transformed_presenter",
"original_avatar_url": avatar_url,
"status": "completed",
},
asset_metadata=build_podcast_asset_metadata(
asset_role="transformed_presenter",
project_id=project_id,
origin="podcast.avatar.make_presentable",
extras={"original_avatar_url": avatar_url},
),
)
except Exception as e:
logger.warning(f"[Podcast] Failed to save transformed avatar asset: {e}")
@@ -404,14 +405,12 @@ async def generate_podcast_presenters(
tags=["podcast", "avatar", "presenter", project_id],
provider=result.provider,
model=result.model,
asset_metadata={
"project_id": project_id,
"speaker_number": i + 1,
"type": "generated_presenter",
"status": "completed",
"persona_id": selected_persona_id,
"seed": seed,
},
asset_metadata=build_podcast_asset_metadata(
asset_role="generated_presenter",
project_id=project_id,
origin="podcast.avatar.generate",
extras={"speaker_number": i + 1, "persona_id": selected_persona_id, "seed": seed},
),
)
except Exception as e:
logger.warning(f"[Podcast] Failed to save presenter asset: {e}")

View File

@@ -16,6 +16,7 @@ from middleware.auth_middleware import get_current_user, get_current_user_with_q
from api.story_writer.utils.auth import require_authenticated_user
from services.llm_providers.main_image_generation import generate_image, generate_character_image
from utils.asset_tracker import save_asset_to_library
from models.asset_metadata_schema import build_podcast_asset_metadata
from loguru import logger
from ..constants import get_podcast_media_dir
from ..models import PodcastImageRequest, PodcastImageResponse
@@ -417,11 +418,12 @@ async def generate_podcast_scene_image(
tags=["podcast", "scene", request.scene_id],
provider=result.provider,
model=result.model,
asset_metadata={
"scene_id": request.scene_id,
"scene_title": request.scene_title,
"status": "completed",
},
asset_metadata=build_podcast_asset_metadata(
asset_role="podcast_scene_image",
project_id=request.project_id,
origin="podcast.images.generate",
extras={"scene_id": request.scene_id, "scene_title": request.scene_title},
),
)
except Exception as e:
logger.warning(f"[Podcast] Failed to save image asset: {e}")

View File

@@ -0,0 +1,76 @@
"""Shared schema/builders for content asset metadata."""
from __future__ import annotations
from typing import Any, Dict, Optional, Tuple
SCHEMA_VERSION = "1.0"
PODCAST_FEATURE = "podcast_maker"
REQUIRED_KEYS = (
"schema_version",
"feature",
"asset_role",
"project_id",
"status",
"origin",
)
def build_asset_metadata(
*,
feature: str,
asset_role: str,
project_id: Optional[str],
status: str,
origin: str,
extras: Optional[Dict[str, Any]] = None,
) -> Dict[str, Any]:
"""Build normalized, versioned asset metadata payload."""
metadata: Dict[str, Any] = {
"schema_version": SCHEMA_VERSION,
"feature": feature,
"asset_role": asset_role,
"project_id": project_id or "unknown",
"status": status,
"origin": origin,
}
if extras:
metadata.update({k: v for k, v in extras.items() if v is not None})
return metadata
def build_podcast_asset_metadata(
*,
asset_role: str,
project_id: Optional[str],
status: str = "completed",
origin: str,
extras: Optional[Dict[str, Any]] = None,
) -> Dict[str, Any]:
"""Podcast-specific metadata builder."""
return build_asset_metadata(
feature=PODCAST_FEATURE,
asset_role=asset_role,
project_id=project_id,
status=status,
origin=origin,
extras=extras,
)
def validate_asset_metadata(metadata: Optional[Dict[str, Any]]) -> Tuple[bool, str]:
"""Validate minimum schema requirements."""
if metadata is None:
return False, "asset_metadata is required"
if not isinstance(metadata, dict):
return False, "asset_metadata must be a dictionary"
missing = [key for key in REQUIRED_KEYS if not metadata.get(key)]
if missing:
return False, f"asset_metadata missing required keys: {', '.join(missing)}"
if str(metadata.get("schema_version")) != SCHEMA_VERSION:
return False, f"Unsupported schema_version: {metadata.get('schema_version')}"
return True, "ok"

View File

@@ -40,10 +40,6 @@ class OAuthTokenMonitoringTask(Base):
# Scheduling
next_check = Column(DateTime, nullable=True, index=True) # Next scheduled check time
next_retry_at = Column(DateTime, nullable=True, index=True) # Backoff retry schedule for refresh failures
refresh_attempts = Column(Integer, default=0) # Current retry attempt count for refresh workflow
terminal_failure_reason = Column(Text, nullable=True) # Permanent failure reason requiring user action
channel_status = Column(String(32), default='connected') # connected, degraded, disconnected
# Metadata
created_at = Column(DateTime, default=datetime.utcnow)
@@ -101,3 +97,4 @@ class OAuthTokenExecutionLog(Base):
def __repr__(self):
return f"<OAuthTokenExecutionLog(id={self.id}, task_id={self.task_id}, status={self.status}, execution_date={self.execution_date})>"

View File

@@ -0,0 +1,63 @@
"""Backfill recent podcast assets with normalized metadata schema."""
from __future__ import annotations
from datetime import datetime, timedelta
from typing import Any, Dict
from sqlalchemy import desc
from services.database import SessionLocal
from models.content_asset_models import ContentAsset, AssetSource
from models.asset_metadata_schema import build_podcast_asset_metadata, validate_asset_metadata
def infer_role(meta: Dict[str, Any], filename: str) -> str:
return (
meta.get("asset_role")
or meta.get("type")
or ("podcast_audio" if filename.lower().endswith((".mp3", ".wav", ".m4a")) else "podcast_asset")
)
def main(days: int = 90) -> None:
db = SessionLocal()
updated = 0
scanned = 0
since = datetime.utcnow() - timedelta(days=days)
try:
assets = (
db.query(ContentAsset)
.filter(ContentAsset.source_module == AssetSource.PODCAST_MAKER)
.filter(ContentAsset.created_at >= since)
.order_by(desc(ContentAsset.created_at))
.all()
)
for asset in assets:
scanned += 1
meta = asset.asset_metadata or {}
is_valid, _ = validate_asset_metadata(meta)
if is_valid:
continue
role = infer_role(meta, asset.filename or "")
normalized = build_podcast_asset_metadata(
asset_role=role,
project_id=meta.get("project_id"),
status=meta.get("status", "completed"),
origin=meta.get("origin", "migration.backfill_podcast_asset_metadata"),
extras=meta,
)
asset.asset_metadata = normalized
db.add(asset)
updated += 1
db.commit()
print(f"Scanned={scanned} Updated={updated} Since={since.isoformat()}")
finally:
db.close()
if __name__ == "__main__":
main()

View File

@@ -26,10 +26,7 @@ from .executors.advertools_executor import AdvertoolsExecutor
from .executors.sif_indexing_executor import SIFIndexingExecutor
from .executors.market_trends_executor import MarketTrendsExecutor
from .utils.task_loader import load_due_monitoring_tasks
from .utils.oauth_token_task_loader import (
load_due_oauth_token_monitoring_tasks,
load_near_expiry_oauth_token_tasks
)
from .utils.oauth_token_task_loader import load_due_oauth_token_monitoring_tasks
from .utils.website_analysis_task_loader import load_due_website_analysis_tasks
from .utils.onboarding_full_website_analysis_task_loader import load_due_onboarding_full_website_analysis_tasks
from .utils.deep_competitor_analysis_task_loader import load_due_deep_competitor_analysis_tasks
@@ -73,11 +70,6 @@ def get_scheduler() -> TaskScheduler:
oauth_token_executor,
load_due_oauth_token_monitoring_tasks
)
_scheduler_instance.register_executor(
'oauth_token_refresh',
oauth_token_executor,
load_near_expiry_oauth_token_tasks
)
# Register website analysis executor
website_analysis_executor = WebsiteAnalysisExecutor()

View File

@@ -42,8 +42,6 @@ class OAuthTokenMonitoringExecutor(TaskExecutor):
self.exception_handler = SchedulerExceptionHandler()
# Expiration warning window (7 days before expiration)
self.expiration_warning_days = 7
self.max_refresh_retries = 3
self.base_retry_backoff_minutes = 15
async def execute_task(self, task: OAuthTokenMonitoringTask, db: Session) -> TaskExecutionResult:
"""
@@ -95,10 +93,6 @@ class OAuthTokenMonitoringExecutor(TaskExecutor):
task.last_success = datetime.utcnow()
task.status = 'active'
task.failure_reason = None
task.terminal_failure_reason = None
task.channel_status = 'connected'
task.refresh_attempts = 0
task.next_retry_at = None
# Reset failure tracking on success
task.consecutive_failures = 0
task.failure_pattern = None
@@ -118,7 +112,6 @@ class OAuthTokenMonitoringExecutor(TaskExecutor):
task.last_failure = datetime.utcnow()
task.failure_reason = result.error_message
task.refresh_attempts = (task.refresh_attempts or 0) + 1
if pattern and pattern.should_cool_off:
# Mark task for human intervention
@@ -133,9 +126,6 @@ class OAuthTokenMonitoringExecutor(TaskExecutor):
}
# Clear next_check - task won't run automatically
task.next_check = None
task.next_retry_at = None
task.channel_status = "disconnected"
task.terminal_failure_reason = result.error_message
self.logger.warning(
f"Task {task.id} marked for human intervention: "
@@ -143,17 +133,10 @@ class OAuthTokenMonitoringExecutor(TaskExecutor):
f"reason: {pattern.failure_reason.value}"
)
else:
# Normal failure handling
task.status = 'failed'
task.consecutive_failures = (task.consecutive_failures or 0) + 1
if task.refresh_attempts >= self.max_refresh_retries:
task.status = 'failed'
task.channel_status = 'disconnected'
task.terminal_failure_reason = result.error_message
task.next_retry_at = None
else:
task.status = 'degraded'
task.channel_status = 'degraded'
delay_minutes = self.base_retry_backoff_minutes * (2 ** (task.refresh_attempts - 1))
task.next_retry_at = datetime.utcnow() + timedelta(minutes=delay_minutes)
# Do NOT update next_check - wait for manual trigger
self.logger.warning(
f"OAuth token refresh failed for user {user_id}, platform {platform}. "
@@ -161,7 +144,7 @@ class OAuthTokenMonitoringExecutor(TaskExecutor):
)
# Create UsageAlert notification for the user
self._create_failure_alert(user_id, platform, result.error_message, result.result_data, db, task)
self._create_failure_alert(user_id, platform, result.error_message, result.result_data, db)
task.updated_at = datetime.utcnow()
db.commit()
@@ -210,14 +193,12 @@ class OAuthTokenMonitoringExecutor(TaskExecutor):
task.last_failure = datetime.utcnow()
task.failure_reason = str(e)
task.status = 'failed'
task.channel_status = 'disconnected'
task.terminal_failure_reason = str(e)
task.last_check = datetime.utcnow()
task.updated_at = datetime.utcnow()
task.next_retry_at = None
# Do NOT update next_check - wait for manual trigger
# Create UsageAlert notification for the user
self._create_failure_alert(user_id, task.platform, str(e), None, db, task)
self._create_failure_alert(user_id, task.platform, str(e), None, db)
db.commit()
except Exception as commit_error:
@@ -670,8 +651,7 @@ class OAuthTokenMonitoringExecutor(TaskExecutor):
platform: str,
error_message: str,
result_data: Optional[Dict[str, Any]],
db: Session,
task: Optional[OAuthTokenMonitoringTask] = None
db: Session
):
"""
Create a UsageAlert notification when OAuth token refresh fails.
@@ -743,20 +723,6 @@ class OAuthTokenMonitoringExecutor(TaskExecutor):
# Get current billing period (YYYY-MM format)
from datetime import datetime
billing_period = datetime.utcnow().strftime("%Y-%m")
alert_payload = {
"requires_user_action": True,
"platform": platform,
"channel_status": getattr(task, "channel_status", "disconnected"),
"terminal_failure_reason": getattr(task, "terminal_failure_reason", error_message),
"next_retry_at": (
task.next_retry_at.isoformat() if task and task.next_retry_at else None
),
"refresh_attempts": getattr(task, "refresh_attempts", 0),
"max_refresh_retries": self.max_refresh_retries,
}
message = f"{message} [ALERT_PAYLOAD] {alert_payload}"
# Create UsageAlert
alert = UsageAlert(
@@ -820,3 +786,4 @@ class OAuthTokenMonitoringExecutor(TaskExecutor):
f"Defaulting to Weekly (7 days)."
)
return last_execution + timedelta(days=7)

View File

@@ -3,7 +3,7 @@ OAuth Token Monitoring Task Loader
Functions to load due OAuth token monitoring tasks from database.
"""
from datetime import datetime, timedelta
from datetime import datetime
from typing import List, Optional, Union
from sqlalchemy.orm import Session
from sqlalchemy import and_, or_
@@ -52,34 +52,3 @@ def load_due_oauth_token_monitoring_tasks(
return query.all()
def load_near_expiry_oauth_token_tasks(
db: Session,
refresh_horizon_hours: int = 24,
user_id: Optional[Union[str, int]] = None
) -> List[OAuthTokenMonitoringTask]:
"""
Load OAuth tasks that should run token refresh logic soon.
Includes:
- tasks with a scheduled retry now due (next_retry_at <= now)
- tasks whose routine check is inside the near-expiry horizon window
"""
now = datetime.utcnow()
horizon = now + timedelta(hours=max(refresh_horizon_hours, 1))
query = db.query(OAuthTokenMonitoringTask).filter(
and_(
OAuthTokenMonitoringTask.status.in_(['active', 'failed', 'degraded']),
or_(
OAuthTokenMonitoringTask.next_retry_at <= now,
OAuthTokenMonitoringTask.next_check <= horizon,
OAuthTokenMonitoringTask.next_check.is_(None)
)
)
)
if user_id is not None:
query = query.filter(OAuthTokenMonitoringTask.user_id == str(user_id))
return query.all()

View File

@@ -11,6 +11,8 @@ import logging
import re
from urllib.parse import urlparse
from models.asset_metadata_schema import validate_asset_metadata
logger = logging.getLogger(__name__)
# Maximum file size (100MB)
@@ -140,6 +142,12 @@ def save_asset_to_library(
if len(title) > 200:
title = title[:197] + '...'
metadata_payload = asset_metadata or {}
is_valid_metadata, validation_message = validate_asset_metadata(metadata_payload)
if not is_valid_metadata:
logger.error(f"Invalid asset metadata: {validation_message}")
return None
service = ContentAssetService(db)
asset = service.create_asset(
user_id=user_id,
@@ -154,7 +162,7 @@ def save_asset_to_library(
description=description,
prompt=prompt,
tags=tags or [],
asset_metadata=asset_metadata or {},
asset_metadata=metadata_payload,
provider=provider,
model=model,
cost=cost,