Files
moreminimore-marketing/backend/api/podcast/handlers/avatar.py
Kunthawat Greethong c35fa52117 Base code
2026-01-08 22:39:53 +07:00

382 lines
16 KiB
Python

"""
Podcast Avatar Handlers
Avatar upload and presenter generation endpoints.
"""
from fastapi import APIRouter, Depends, HTTPException, UploadFile, File, Form
from fastapi.responses import FileResponse
from sqlalchemy.orm import Session
from typing import Dict, Any, List, Optional
from pathlib import Path
import uuid
import hashlib
from services.database import get_db
from middleware.auth_middleware import get_current_user, get_current_user_with_query_token
from api.story_writer.utils.auth import require_authenticated_user
from services.llm_providers.main_image_generation import generate_image
from services.llm_providers.main_image_editing import edit_image
from utils.asset_tracker import save_asset_to_library
from loguru import logger
from ..constants import PODCAST_IMAGES_DIR
from ..presenter_personas import choose_persona_id, get_persona
router = APIRouter()
# Avatar subdirectory
AVATAR_SUBDIR = "avatars"
PODCAST_AVATARS_DIR = PODCAST_IMAGES_DIR / AVATAR_SUBDIR
PODCAST_AVATARS_DIR.mkdir(parents=True, exist_ok=True)
@router.post("/avatar/upload")
async def upload_podcast_avatar(
file: UploadFile = File(...),
project_id: Optional[str] = Form(None),
current_user: Dict[str, Any] = Depends(get_current_user),
db: Session = Depends(get_db),
):
"""
Upload a presenter avatar image for a podcast project.
Returns the avatar URL for use in scene image generation.
"""
user_id = require_authenticated_user(current_user)
# Validate file type
if not file.content_type or not file.content_type.startswith('image/'):
raise HTTPException(status_code=400, detail="File must be an image")
# Validate file size (max 5MB)
file_content = await file.read()
if len(file_content) > 5 * 1024 * 1024:
raise HTTPException(status_code=400, detail="Image file size must be less than 5MB")
try:
# Generate filename
file_ext = Path(file.filename).suffix or '.png'
unique_id = str(uuid.uuid4())[:8]
avatar_filename = f"avatar_{project_id or 'temp'}_{unique_id}{file_ext}"
avatar_path = PODCAST_AVATARS_DIR / avatar_filename
# Save file
with open(avatar_path, "wb") as f:
f.write(file_content)
logger.info(f"[Podcast] Avatar uploaded: {avatar_path}")
# Create avatar URL
avatar_url = f"/api/podcast/images/{AVATAR_SUBDIR}/{avatar_filename}"
# Save to asset library if project_id provided
if project_id:
try:
save_asset_to_library(
db=db,
user_id=user_id,
asset_type="image",
source_module="podcast_maker",
filename=avatar_filename,
file_url=avatar_url,
file_path=str(avatar_path),
file_size=len(file_content),
mime_type=file.content_type,
title=f"Podcast Presenter Avatar - {project_id}",
description="Podcast presenter avatar image",
tags=["podcast", "avatar", project_id],
asset_metadata={
"project_id": project_id,
"type": "presenter_avatar",
"status": "completed",
},
)
except Exception as e:
logger.warning(f"[Podcast] Failed to save avatar asset: {e}")
return {
"avatar_url": avatar_url,
"avatar_filename": avatar_filename,
"message": "Avatar uploaded successfully"
}
except Exception as exc:
logger.error(f"[Podcast] Avatar upload failed: {exc}", exc_info=True)
raise HTTPException(status_code=500, detail=f"Avatar upload failed: {str(exc)}")
@router.post("/avatar/make-presentable")
async def make_avatar_presentable(
avatar_url: str = Form(...),
project_id: Optional[str] = Form(None),
current_user: Dict[str, Any] = Depends(get_current_user),
db: Session = Depends(get_db),
):
"""
Transform an uploaded avatar image into a podcast-appropriate presenter.
Uses AI image editing to convert the uploaded photo into a professional podcast presenter.
"""
user_id = require_authenticated_user(current_user)
try:
# Load the uploaded avatar image
from ..utils import load_podcast_image_bytes
avatar_bytes = load_podcast_image_bytes(avatar_url)
logger.info(f"[Podcast] Transforming avatar to podcast presenter for project {project_id}")
# Create transformation prompt based on WaveSpeed AI recommendations
# Transform the uploaded image into a professional podcast presenter
transformation_prompt = """Transform this image into a professional podcast presenter:
- Half-length portrait format, looking at camera
- Professional attire (white shirt and light gray blazer or business casual)
- Confident, friendly, engaging expression
- Soft studio lighting, plain light-gray or neutral background
- Professional podcast host appearance, suitable for video generation
- Clean composition, center-focused for avatar overlay
- Maintain the person's appearance and identity while making it podcast-appropriate
- Ultra realistic, 4k quality, professional photography style"""
# Transform the image using image editing
image_options = {
"provider": None, # Auto-select provider
"model": None, # Use default model
}
result = edit_image(
input_image_bytes=avatar_bytes,
prompt=transformation_prompt,
options=image_options,
user_id=user_id
)
# Save transformed avatar
unique_id = str(uuid.uuid4())[:8]
transformed_filename = f"presenter_transformed_{project_id or 'temp'}_{unique_id}.png"
transformed_path = PODCAST_AVATARS_DIR / transformed_filename
with open(transformed_path, "wb") as f:
f.write(result.image_bytes)
transformed_url = f"/api/podcast/images/{AVATAR_SUBDIR}/{transformed_filename}"
logger.info(f"[Podcast] Transformed avatar saved to: {transformed_path}")
# Save to asset library
if project_id:
try:
save_asset_to_library(
db=db,
user_id=user_id,
asset_type="image",
source_module="podcast_maker",
filename=transformed_filename,
file_url=transformed_url,
file_path=str(transformed_path),
file_size=len(result.image_bytes),
mime_type="image/png",
title=f"Podcast Presenter (Transformed) - {project_id}",
description="AI-transformed podcast presenter avatar from uploaded photo",
prompt=transformation_prompt,
tags=["podcast", "avatar", "presenter", "transformed", project_id],
provider=result.provider,
model=result.model,
asset_metadata={
"project_id": project_id,
"type": "transformed_presenter",
"original_avatar_url": avatar_url,
"status": "completed",
},
)
except Exception as e:
logger.warning(f"[Podcast] Failed to save transformed avatar asset: {e}")
return {
"avatar_url": transformed_url,
"avatar_filename": transformed_filename,
"message": "Avatar transformed into podcast presenter successfully"
}
except Exception as exc:
logger.error(f"[Podcast] Avatar transformation failed: {exc}", exc_info=True)
raise HTTPException(status_code=500, detail=f"Avatar transformation failed: {str(exc)}")
@router.post("/avatar/generate")
async def generate_podcast_presenters(
speakers: int = Form(...),
project_id: Optional[str] = Form(None),
audience: Optional[str] = Form(None),
content_type: Optional[str] = Form(None),
top_keywords: Optional[str] = Form(None), # JSON string array
persona_id: Optional[str] = Form(None),
current_user: Dict[str, Any] = Depends(get_current_user),
db: Session = Depends(get_db),
):
"""
Generate presenter avatar images based on number of speakers and AI analysis insights.
Uses analysis data (audience, content_type, keywords) to create more relevant presenters.
Returns list of avatar URLs.
Based on WaveSpeed AI recommendations for professional podcast presenters.
"""
user_id = require_authenticated_user(current_user)
if speakers < 1 or speakers > 2:
raise HTTPException(status_code=400, detail="Speakers must be between 1 and 2")
try:
# Parse keywords if provided
keywords_list = []
if top_keywords:
try:
import json
keywords_list = json.loads(top_keywords) if isinstance(top_keywords, str) else top_keywords
except:
keywords_list = []
# Choose persona (market-fit + style) using analysis if not explicitly provided.
# Do not infer sensitive traits (like ethnicity); personas represent market + style only.
selected_persona_id = persona_id or choose_persona_id(
audience=audience,
content_type=content_type,
top_keywords=keywords_list,
)
persona = get_persona(selected_persona_id)
generated_avatars = []
for i in range(speakers):
# Generate presenter-specific prompt based on WaveSpeed AI recommendations
# Enhanced with analysis insights for more relevant presenter appearance
gender = "female" if i == 0 else "male" # First speaker female, second male
# Build context-aware prompt using analysis insights + persona preset
prompt_parts = [
f"Half-length portrait of a professional podcast presenter ({gender}, 25-35 years old)",
"photo-realistic, professional photography",
]
if persona:
prompt_parts.append(persona.prompt)
# Use content_type to influence attire/style
if content_type:
content_lower = content_type.lower()
if "business" in content_lower or "corporate" in content_lower:
prompt_parts.append("business professional attire (white shirt and light gray blazer)")
elif "casual" in content_lower or "conversational" in content_lower:
prompt_parts.append("business casual attire (smart casual, approachable)")
elif "tech" in content_lower or "technology" in content_lower:
prompt_parts.append("modern professional attire (tech-forward, contemporary style)")
else:
prompt_parts.append("professional attire (white shirt and light gray blazer or business casual)")
else:
prompt_parts.append("professional attire (white shirt and light gray blazer or business casual)")
# Use audience to influence expression and style
if audience:
audience_lower = audience.lower()
if "young" in audience_lower or "millennial" in audience_lower or "gen z" in audience_lower:
prompt_parts.append("modern, energetic, approachable expression")
elif "executive" in audience_lower or "professional" in audience_lower or "business" in audience_lower:
prompt_parts.append("confident, authoritative, professional expression")
else:
prompt_parts.append("confident, friendly, engaging expression")
else:
prompt_parts.append("confident, friendly expression")
# Add keywords context if available (for visual style hints)
if keywords_list and len(keywords_list) > 0:
# Extract visual-relevant keywords
visual_keywords = [k for k in keywords_list[:3] if any(word in k.lower() for word in ["tech", "business", "creative", "modern", "professional"])]
if visual_keywords:
prompt_parts.append(f"context: {', '.join(visual_keywords[:2])}")
# Technical requirements
prompt_parts.extend([
"looking at camera",
"soft studio lighting, plain light-gray or neutral background",
"ultra realistic, 4k quality, 85mm lens, f/2.8",
"professional podcast host appearance, suitable for video generation",
"clean composition, center-focused for avatar overlay"
])
prompt = ", ".join(prompt_parts)
logger.info(f"[Podcast] Generating presenter {i+1}/{speakers} for project {project_id}")
# Generate image
# Use a deterministic seed per (project_id, speaker_number, persona_id) to keep presenter identity stable.
# Note: determinism may vary by provider/model, but seed improves consistency substantially.
seed_source = f"{project_id or 'temp'}|speaker={i+1}|persona={selected_persona_id}"
seed = int(hashlib.sha256(seed_source.encode("utf-8")).hexdigest()[:8], 16)
image_options = {
"provider": None, # Auto-select provider
"width": 1024,
"height": 1024,
"seed": seed,
}
result = generate_image(
prompt=prompt,
options=image_options,
user_id=user_id
)
# Save avatar
unique_id = str(uuid.uuid4())[:8]
avatar_filename = f"presenter_{project_id or 'temp'}_{i+1}_{unique_id}.png"
avatar_path = PODCAST_AVATARS_DIR / avatar_filename
with open(avatar_path, "wb") as f:
f.write(result.image_bytes)
avatar_url = f"/api/podcast/images/{AVATAR_SUBDIR}/{avatar_filename}"
# Save to asset library
if project_id:
try:
save_asset_to_library(
db=db,
user_id=user_id,
asset_type="image",
source_module="podcast_maker",
filename=avatar_filename,
file_url=avatar_url,
file_path=str(avatar_path),
file_size=len(result.image_bytes),
mime_type="image/png",
title=f"Podcast Presenter {i+1} - {project_id}",
description=f"Generated podcast presenter avatar for speaker {i+1}",
prompt=prompt,
tags=["podcast", "avatar", "presenter", project_id],
provider=result.provider,
model=result.model,
asset_metadata={
"project_id": project_id,
"speaker_number": i + 1,
"type": "generated_presenter",
"status": "completed",
"persona_id": selected_persona_id,
"seed": seed,
},
)
except Exception as e:
logger.warning(f"[Podcast] Failed to save presenter asset: {e}")
generated_avatars.append({
"avatar_url": avatar_url,
"avatar_filename": avatar_filename,
"speaker_number": i + 1,
"prompt": prompt, # Include the prompt used for generation
"persona_id": selected_persona_id,
"seed": seed,
})
return {
"avatars": generated_avatars,
"message": f"Generated {speakers} presenter avatar(s)",
"persona_id": selected_persona_id,
}
except Exception as exc:
logger.error(f"[Podcast] Presenter generation failed: {exc}", exc_info=True)
raise HTTPException(status_code=500, detail=f"Presenter generation failed: {str(exc)}")