382 lines
16 KiB
Python
382 lines
16 KiB
Python
"""
|
|
Podcast Avatar Handlers
|
|
|
|
Avatar upload and presenter generation endpoints.
|
|
"""
|
|
|
|
from fastapi import APIRouter, Depends, HTTPException, UploadFile, File, Form
|
|
from fastapi.responses import FileResponse
|
|
from sqlalchemy.orm import Session
|
|
from typing import Dict, Any, List, Optional
|
|
from pathlib import Path
|
|
import uuid
|
|
import hashlib
|
|
|
|
from services.database import get_db
|
|
from middleware.auth_middleware import get_current_user, get_current_user_with_query_token
|
|
from api.story_writer.utils.auth import require_authenticated_user
|
|
from services.llm_providers.main_image_generation import generate_image
|
|
from services.llm_providers.main_image_editing import edit_image
|
|
from utils.asset_tracker import save_asset_to_library
|
|
from loguru import logger
|
|
from ..constants import PODCAST_IMAGES_DIR
|
|
from ..presenter_personas import choose_persona_id, get_persona
|
|
|
|
router = APIRouter()
|
|
|
|
# Avatar subdirectory
|
|
AVATAR_SUBDIR = "avatars"
|
|
PODCAST_AVATARS_DIR = PODCAST_IMAGES_DIR / AVATAR_SUBDIR
|
|
PODCAST_AVATARS_DIR.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
|
@router.post("/avatar/upload")
|
|
async def upload_podcast_avatar(
|
|
file: UploadFile = File(...),
|
|
project_id: Optional[str] = Form(None),
|
|
current_user: Dict[str, Any] = Depends(get_current_user),
|
|
db: Session = Depends(get_db),
|
|
):
|
|
"""
|
|
Upload a presenter avatar image for a podcast project.
|
|
Returns the avatar URL for use in scene image generation.
|
|
"""
|
|
user_id = require_authenticated_user(current_user)
|
|
|
|
# Validate file type
|
|
if not file.content_type or not file.content_type.startswith('image/'):
|
|
raise HTTPException(status_code=400, detail="File must be an image")
|
|
|
|
# Validate file size (max 5MB)
|
|
file_content = await file.read()
|
|
if len(file_content) > 5 * 1024 * 1024:
|
|
raise HTTPException(status_code=400, detail="Image file size must be less than 5MB")
|
|
|
|
try:
|
|
# Generate filename
|
|
file_ext = Path(file.filename).suffix or '.png'
|
|
unique_id = str(uuid.uuid4())[:8]
|
|
avatar_filename = f"avatar_{project_id or 'temp'}_{unique_id}{file_ext}"
|
|
avatar_path = PODCAST_AVATARS_DIR / avatar_filename
|
|
|
|
# Save file
|
|
with open(avatar_path, "wb") as f:
|
|
f.write(file_content)
|
|
|
|
logger.info(f"[Podcast] Avatar uploaded: {avatar_path}")
|
|
|
|
# Create avatar URL
|
|
avatar_url = f"/api/podcast/images/{AVATAR_SUBDIR}/{avatar_filename}"
|
|
|
|
# Save to asset library if project_id provided
|
|
if project_id:
|
|
try:
|
|
save_asset_to_library(
|
|
db=db,
|
|
user_id=user_id,
|
|
asset_type="image",
|
|
source_module="podcast_maker",
|
|
filename=avatar_filename,
|
|
file_url=avatar_url,
|
|
file_path=str(avatar_path),
|
|
file_size=len(file_content),
|
|
mime_type=file.content_type,
|
|
title=f"Podcast Presenter Avatar - {project_id}",
|
|
description="Podcast presenter avatar image",
|
|
tags=["podcast", "avatar", project_id],
|
|
asset_metadata={
|
|
"project_id": project_id,
|
|
"type": "presenter_avatar",
|
|
"status": "completed",
|
|
},
|
|
)
|
|
except Exception as e:
|
|
logger.warning(f"[Podcast] Failed to save avatar asset: {e}")
|
|
|
|
return {
|
|
"avatar_url": avatar_url,
|
|
"avatar_filename": avatar_filename,
|
|
"message": "Avatar uploaded successfully"
|
|
}
|
|
except Exception as exc:
|
|
logger.error(f"[Podcast] Avatar upload failed: {exc}", exc_info=True)
|
|
raise HTTPException(status_code=500, detail=f"Avatar upload failed: {str(exc)}")
|
|
|
|
|
|
@router.post("/avatar/make-presentable")
|
|
async def make_avatar_presentable(
|
|
avatar_url: str = Form(...),
|
|
project_id: Optional[str] = Form(None),
|
|
current_user: Dict[str, Any] = Depends(get_current_user),
|
|
db: Session = Depends(get_db),
|
|
):
|
|
"""
|
|
Transform an uploaded avatar image into a podcast-appropriate presenter.
|
|
Uses AI image editing to convert the uploaded photo into a professional podcast presenter.
|
|
"""
|
|
user_id = require_authenticated_user(current_user)
|
|
|
|
try:
|
|
# Load the uploaded avatar image
|
|
from ..utils import load_podcast_image_bytes
|
|
avatar_bytes = load_podcast_image_bytes(avatar_url)
|
|
|
|
logger.info(f"[Podcast] Transforming avatar to podcast presenter for project {project_id}")
|
|
|
|
# Create transformation prompt based on WaveSpeed AI recommendations
|
|
# Transform the uploaded image into a professional podcast presenter
|
|
transformation_prompt = """Transform this image into a professional podcast presenter:
|
|
- Half-length portrait format, looking at camera
|
|
- Professional attire (white shirt and light gray blazer or business casual)
|
|
- Confident, friendly, engaging expression
|
|
- Soft studio lighting, plain light-gray or neutral background
|
|
- Professional podcast host appearance, suitable for video generation
|
|
- Clean composition, center-focused for avatar overlay
|
|
- Maintain the person's appearance and identity while making it podcast-appropriate
|
|
- Ultra realistic, 4k quality, professional photography style"""
|
|
|
|
# Transform the image using image editing
|
|
image_options = {
|
|
"provider": None, # Auto-select provider
|
|
"model": None, # Use default model
|
|
}
|
|
|
|
result = edit_image(
|
|
input_image_bytes=avatar_bytes,
|
|
prompt=transformation_prompt,
|
|
options=image_options,
|
|
user_id=user_id
|
|
)
|
|
|
|
# Save transformed avatar
|
|
unique_id = str(uuid.uuid4())[:8]
|
|
transformed_filename = f"presenter_transformed_{project_id or 'temp'}_{unique_id}.png"
|
|
transformed_path = PODCAST_AVATARS_DIR / transformed_filename
|
|
|
|
with open(transformed_path, "wb") as f:
|
|
f.write(result.image_bytes)
|
|
|
|
transformed_url = f"/api/podcast/images/{AVATAR_SUBDIR}/{transformed_filename}"
|
|
|
|
logger.info(f"[Podcast] Transformed avatar saved to: {transformed_path}")
|
|
|
|
# Save to asset library
|
|
if project_id:
|
|
try:
|
|
save_asset_to_library(
|
|
db=db,
|
|
user_id=user_id,
|
|
asset_type="image",
|
|
source_module="podcast_maker",
|
|
filename=transformed_filename,
|
|
file_url=transformed_url,
|
|
file_path=str(transformed_path),
|
|
file_size=len(result.image_bytes),
|
|
mime_type="image/png",
|
|
title=f"Podcast Presenter (Transformed) - {project_id}",
|
|
description="AI-transformed podcast presenter avatar from uploaded photo",
|
|
prompt=transformation_prompt,
|
|
tags=["podcast", "avatar", "presenter", "transformed", project_id],
|
|
provider=result.provider,
|
|
model=result.model,
|
|
asset_metadata={
|
|
"project_id": project_id,
|
|
"type": "transformed_presenter",
|
|
"original_avatar_url": avatar_url,
|
|
"status": "completed",
|
|
},
|
|
)
|
|
except Exception as e:
|
|
logger.warning(f"[Podcast] Failed to save transformed avatar asset: {e}")
|
|
|
|
return {
|
|
"avatar_url": transformed_url,
|
|
"avatar_filename": transformed_filename,
|
|
"message": "Avatar transformed into podcast presenter successfully"
|
|
}
|
|
except Exception as exc:
|
|
logger.error(f"[Podcast] Avatar transformation failed: {exc}", exc_info=True)
|
|
raise HTTPException(status_code=500, detail=f"Avatar transformation failed: {str(exc)}")
|
|
|
|
|
|
@router.post("/avatar/generate")
|
|
async def generate_podcast_presenters(
|
|
speakers: int = Form(...),
|
|
project_id: Optional[str] = Form(None),
|
|
audience: Optional[str] = Form(None),
|
|
content_type: Optional[str] = Form(None),
|
|
top_keywords: Optional[str] = Form(None), # JSON string array
|
|
persona_id: Optional[str] = Form(None),
|
|
current_user: Dict[str, Any] = Depends(get_current_user),
|
|
db: Session = Depends(get_db),
|
|
):
|
|
"""
|
|
Generate presenter avatar images based on number of speakers and AI analysis insights.
|
|
Uses analysis data (audience, content_type, keywords) to create more relevant presenters.
|
|
Returns list of avatar URLs.
|
|
Based on WaveSpeed AI recommendations for professional podcast presenters.
|
|
"""
|
|
user_id = require_authenticated_user(current_user)
|
|
|
|
if speakers < 1 or speakers > 2:
|
|
raise HTTPException(status_code=400, detail="Speakers must be between 1 and 2")
|
|
|
|
try:
|
|
# Parse keywords if provided
|
|
keywords_list = []
|
|
if top_keywords:
|
|
try:
|
|
import json
|
|
keywords_list = json.loads(top_keywords) if isinstance(top_keywords, str) else top_keywords
|
|
except:
|
|
keywords_list = []
|
|
|
|
# Choose persona (market-fit + style) using analysis if not explicitly provided.
|
|
# Do not infer sensitive traits (like ethnicity); personas represent market + style only.
|
|
selected_persona_id = persona_id or choose_persona_id(
|
|
audience=audience,
|
|
content_type=content_type,
|
|
top_keywords=keywords_list,
|
|
)
|
|
persona = get_persona(selected_persona_id)
|
|
|
|
generated_avatars = []
|
|
|
|
for i in range(speakers):
|
|
# Generate presenter-specific prompt based on WaveSpeed AI recommendations
|
|
# Enhanced with analysis insights for more relevant presenter appearance
|
|
gender = "female" if i == 0 else "male" # First speaker female, second male
|
|
|
|
# Build context-aware prompt using analysis insights + persona preset
|
|
prompt_parts = [
|
|
f"Half-length portrait of a professional podcast presenter ({gender}, 25-35 years old)",
|
|
"photo-realistic, professional photography",
|
|
]
|
|
|
|
if persona:
|
|
prompt_parts.append(persona.prompt)
|
|
|
|
# Use content_type to influence attire/style
|
|
if content_type:
|
|
content_lower = content_type.lower()
|
|
if "business" in content_lower or "corporate" in content_lower:
|
|
prompt_parts.append("business professional attire (white shirt and light gray blazer)")
|
|
elif "casual" in content_lower or "conversational" in content_lower:
|
|
prompt_parts.append("business casual attire (smart casual, approachable)")
|
|
elif "tech" in content_lower or "technology" in content_lower:
|
|
prompt_parts.append("modern professional attire (tech-forward, contemporary style)")
|
|
else:
|
|
prompt_parts.append("professional attire (white shirt and light gray blazer or business casual)")
|
|
else:
|
|
prompt_parts.append("professional attire (white shirt and light gray blazer or business casual)")
|
|
|
|
# Use audience to influence expression and style
|
|
if audience:
|
|
audience_lower = audience.lower()
|
|
if "young" in audience_lower or "millennial" in audience_lower or "gen z" in audience_lower:
|
|
prompt_parts.append("modern, energetic, approachable expression")
|
|
elif "executive" in audience_lower or "professional" in audience_lower or "business" in audience_lower:
|
|
prompt_parts.append("confident, authoritative, professional expression")
|
|
else:
|
|
prompt_parts.append("confident, friendly, engaging expression")
|
|
else:
|
|
prompt_parts.append("confident, friendly expression")
|
|
|
|
# Add keywords context if available (for visual style hints)
|
|
if keywords_list and len(keywords_list) > 0:
|
|
# Extract visual-relevant keywords
|
|
visual_keywords = [k for k in keywords_list[:3] if any(word in k.lower() for word in ["tech", "business", "creative", "modern", "professional"])]
|
|
if visual_keywords:
|
|
prompt_parts.append(f"context: {', '.join(visual_keywords[:2])}")
|
|
|
|
# Technical requirements
|
|
prompt_parts.extend([
|
|
"looking at camera",
|
|
"soft studio lighting, plain light-gray or neutral background",
|
|
"ultra realistic, 4k quality, 85mm lens, f/2.8",
|
|
"professional podcast host appearance, suitable for video generation",
|
|
"clean composition, center-focused for avatar overlay"
|
|
])
|
|
|
|
prompt = ", ".join(prompt_parts)
|
|
|
|
logger.info(f"[Podcast] Generating presenter {i+1}/{speakers} for project {project_id}")
|
|
|
|
# Generate image
|
|
# Use a deterministic seed per (project_id, speaker_number, persona_id) to keep presenter identity stable.
|
|
# Note: determinism may vary by provider/model, but seed improves consistency substantially.
|
|
seed_source = f"{project_id or 'temp'}|speaker={i+1}|persona={selected_persona_id}"
|
|
seed = int(hashlib.sha256(seed_source.encode("utf-8")).hexdigest()[:8], 16)
|
|
image_options = {
|
|
"provider": None, # Auto-select provider
|
|
"width": 1024,
|
|
"height": 1024,
|
|
"seed": seed,
|
|
}
|
|
|
|
result = generate_image(
|
|
prompt=prompt,
|
|
options=image_options,
|
|
user_id=user_id
|
|
)
|
|
|
|
# Save avatar
|
|
unique_id = str(uuid.uuid4())[:8]
|
|
avatar_filename = f"presenter_{project_id or 'temp'}_{i+1}_{unique_id}.png"
|
|
avatar_path = PODCAST_AVATARS_DIR / avatar_filename
|
|
|
|
with open(avatar_path, "wb") as f:
|
|
f.write(result.image_bytes)
|
|
|
|
avatar_url = f"/api/podcast/images/{AVATAR_SUBDIR}/{avatar_filename}"
|
|
|
|
# Save to asset library
|
|
if project_id:
|
|
try:
|
|
save_asset_to_library(
|
|
db=db,
|
|
user_id=user_id,
|
|
asset_type="image",
|
|
source_module="podcast_maker",
|
|
filename=avatar_filename,
|
|
file_url=avatar_url,
|
|
file_path=str(avatar_path),
|
|
file_size=len(result.image_bytes),
|
|
mime_type="image/png",
|
|
title=f"Podcast Presenter {i+1} - {project_id}",
|
|
description=f"Generated podcast presenter avatar for speaker {i+1}",
|
|
prompt=prompt,
|
|
tags=["podcast", "avatar", "presenter", project_id],
|
|
provider=result.provider,
|
|
model=result.model,
|
|
asset_metadata={
|
|
"project_id": project_id,
|
|
"speaker_number": i + 1,
|
|
"type": "generated_presenter",
|
|
"status": "completed",
|
|
"persona_id": selected_persona_id,
|
|
"seed": seed,
|
|
},
|
|
)
|
|
except Exception as e:
|
|
logger.warning(f"[Podcast] Failed to save presenter asset: {e}")
|
|
|
|
generated_avatars.append({
|
|
"avatar_url": avatar_url,
|
|
"avatar_filename": avatar_filename,
|
|
"speaker_number": i + 1,
|
|
"prompt": prompt, # Include the prompt used for generation
|
|
"persona_id": selected_persona_id,
|
|
"seed": seed,
|
|
})
|
|
|
|
return {
|
|
"avatars": generated_avatars,
|
|
"message": f"Generated {speakers} presenter avatar(s)",
|
|
"persona_id": selected_persona_id,
|
|
}
|
|
except Exception as exc:
|
|
logger.error(f"[Podcast] Presenter generation failed: {exc}", exc_info=True)
|
|
raise HTTPException(status_code=500, detail=f"Presenter generation failed: {str(exc)}")
|
|
|