Base code
This commit is contained in:
381
backend/api/podcast/handlers/avatar.py
Normal file
381
backend/api/podcast/handlers/avatar.py
Normal file
@@ -0,0 +1,381 @@
|
||||
"""
|
||||
Podcast Avatar Handlers
|
||||
|
||||
Avatar upload and presenter generation endpoints.
|
||||
"""
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, UploadFile, File, Form
|
||||
from fastapi.responses import FileResponse
|
||||
from sqlalchemy.orm import Session
|
||||
from typing import Dict, Any, List, Optional
|
||||
from pathlib import Path
|
||||
import uuid
|
||||
import hashlib
|
||||
|
||||
from services.database import get_db
|
||||
from middleware.auth_middleware import get_current_user, get_current_user_with_query_token
|
||||
from api.story_writer.utils.auth import require_authenticated_user
|
||||
from services.llm_providers.main_image_generation import generate_image
|
||||
from services.llm_providers.main_image_editing import edit_image
|
||||
from utils.asset_tracker import save_asset_to_library
|
||||
from loguru import logger
|
||||
from ..constants import PODCAST_IMAGES_DIR
|
||||
from ..presenter_personas import choose_persona_id, get_persona
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
# Avatar subdirectory
|
||||
AVATAR_SUBDIR = "avatars"
|
||||
PODCAST_AVATARS_DIR = PODCAST_IMAGES_DIR / AVATAR_SUBDIR
|
||||
PODCAST_AVATARS_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
|
||||
@router.post("/avatar/upload")
|
||||
async def upload_podcast_avatar(
|
||||
file: UploadFile = File(...),
|
||||
project_id: Optional[str] = Form(None),
|
||||
current_user: Dict[str, Any] = Depends(get_current_user),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""
|
||||
Upload a presenter avatar image for a podcast project.
|
||||
Returns the avatar URL for use in scene image generation.
|
||||
"""
|
||||
user_id = require_authenticated_user(current_user)
|
||||
|
||||
# Validate file type
|
||||
if not file.content_type or not file.content_type.startswith('image/'):
|
||||
raise HTTPException(status_code=400, detail="File must be an image")
|
||||
|
||||
# Validate file size (max 5MB)
|
||||
file_content = await file.read()
|
||||
if len(file_content) > 5 * 1024 * 1024:
|
||||
raise HTTPException(status_code=400, detail="Image file size must be less than 5MB")
|
||||
|
||||
try:
|
||||
# Generate filename
|
||||
file_ext = Path(file.filename).suffix or '.png'
|
||||
unique_id = str(uuid.uuid4())[:8]
|
||||
avatar_filename = f"avatar_{project_id or 'temp'}_{unique_id}{file_ext}"
|
||||
avatar_path = PODCAST_AVATARS_DIR / avatar_filename
|
||||
|
||||
# Save file
|
||||
with open(avatar_path, "wb") as f:
|
||||
f.write(file_content)
|
||||
|
||||
logger.info(f"[Podcast] Avatar uploaded: {avatar_path}")
|
||||
|
||||
# Create avatar URL
|
||||
avatar_url = f"/api/podcast/images/{AVATAR_SUBDIR}/{avatar_filename}"
|
||||
|
||||
# Save to asset library if project_id provided
|
||||
if project_id:
|
||||
try:
|
||||
save_asset_to_library(
|
||||
db=db,
|
||||
user_id=user_id,
|
||||
asset_type="image",
|
||||
source_module="podcast_maker",
|
||||
filename=avatar_filename,
|
||||
file_url=avatar_url,
|
||||
file_path=str(avatar_path),
|
||||
file_size=len(file_content),
|
||||
mime_type=file.content_type,
|
||||
title=f"Podcast Presenter Avatar - {project_id}",
|
||||
description="Podcast presenter avatar image",
|
||||
tags=["podcast", "avatar", project_id],
|
||||
asset_metadata={
|
||||
"project_id": project_id,
|
||||
"type": "presenter_avatar",
|
||||
"status": "completed",
|
||||
},
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"[Podcast] Failed to save avatar asset: {e}")
|
||||
|
||||
return {
|
||||
"avatar_url": avatar_url,
|
||||
"avatar_filename": avatar_filename,
|
||||
"message": "Avatar uploaded successfully"
|
||||
}
|
||||
except Exception as exc:
|
||||
logger.error(f"[Podcast] Avatar upload failed: {exc}", exc_info=True)
|
||||
raise HTTPException(status_code=500, detail=f"Avatar upload failed: {str(exc)}")
|
||||
|
||||
|
||||
@router.post("/avatar/make-presentable")
|
||||
async def make_avatar_presentable(
|
||||
avatar_url: str = Form(...),
|
||||
project_id: Optional[str] = Form(None),
|
||||
current_user: Dict[str, Any] = Depends(get_current_user),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""
|
||||
Transform an uploaded avatar image into a podcast-appropriate presenter.
|
||||
Uses AI image editing to convert the uploaded photo into a professional podcast presenter.
|
||||
"""
|
||||
user_id = require_authenticated_user(current_user)
|
||||
|
||||
try:
|
||||
# Load the uploaded avatar image
|
||||
from ..utils import load_podcast_image_bytes
|
||||
avatar_bytes = load_podcast_image_bytes(avatar_url)
|
||||
|
||||
logger.info(f"[Podcast] Transforming avatar to podcast presenter for project {project_id}")
|
||||
|
||||
# Create transformation prompt based on WaveSpeed AI recommendations
|
||||
# Transform the uploaded image into a professional podcast presenter
|
||||
transformation_prompt = """Transform this image into a professional podcast presenter:
|
||||
- Half-length portrait format, looking at camera
|
||||
- Professional attire (white shirt and light gray blazer or business casual)
|
||||
- Confident, friendly, engaging expression
|
||||
- Soft studio lighting, plain light-gray or neutral background
|
||||
- Professional podcast host appearance, suitable for video generation
|
||||
- Clean composition, center-focused for avatar overlay
|
||||
- Maintain the person's appearance and identity while making it podcast-appropriate
|
||||
- Ultra realistic, 4k quality, professional photography style"""
|
||||
|
||||
# Transform the image using image editing
|
||||
image_options = {
|
||||
"provider": None, # Auto-select provider
|
||||
"model": None, # Use default model
|
||||
}
|
||||
|
||||
result = edit_image(
|
||||
input_image_bytes=avatar_bytes,
|
||||
prompt=transformation_prompt,
|
||||
options=image_options,
|
||||
user_id=user_id
|
||||
)
|
||||
|
||||
# Save transformed avatar
|
||||
unique_id = str(uuid.uuid4())[:8]
|
||||
transformed_filename = f"presenter_transformed_{project_id or 'temp'}_{unique_id}.png"
|
||||
transformed_path = PODCAST_AVATARS_DIR / transformed_filename
|
||||
|
||||
with open(transformed_path, "wb") as f:
|
||||
f.write(result.image_bytes)
|
||||
|
||||
transformed_url = f"/api/podcast/images/{AVATAR_SUBDIR}/{transformed_filename}"
|
||||
|
||||
logger.info(f"[Podcast] Transformed avatar saved to: {transformed_path}")
|
||||
|
||||
# Save to asset library
|
||||
if project_id:
|
||||
try:
|
||||
save_asset_to_library(
|
||||
db=db,
|
||||
user_id=user_id,
|
||||
asset_type="image",
|
||||
source_module="podcast_maker",
|
||||
filename=transformed_filename,
|
||||
file_url=transformed_url,
|
||||
file_path=str(transformed_path),
|
||||
file_size=len(result.image_bytes),
|
||||
mime_type="image/png",
|
||||
title=f"Podcast Presenter (Transformed) - {project_id}",
|
||||
description="AI-transformed podcast presenter avatar from uploaded photo",
|
||||
prompt=transformation_prompt,
|
||||
tags=["podcast", "avatar", "presenter", "transformed", project_id],
|
||||
provider=result.provider,
|
||||
model=result.model,
|
||||
asset_metadata={
|
||||
"project_id": project_id,
|
||||
"type": "transformed_presenter",
|
||||
"original_avatar_url": avatar_url,
|
||||
"status": "completed",
|
||||
},
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"[Podcast] Failed to save transformed avatar asset: {e}")
|
||||
|
||||
return {
|
||||
"avatar_url": transformed_url,
|
||||
"avatar_filename": transformed_filename,
|
||||
"message": "Avatar transformed into podcast presenter successfully"
|
||||
}
|
||||
except Exception as exc:
|
||||
logger.error(f"[Podcast] Avatar transformation failed: {exc}", exc_info=True)
|
||||
raise HTTPException(status_code=500, detail=f"Avatar transformation failed: {str(exc)}")
|
||||
|
||||
|
||||
@router.post("/avatar/generate")
|
||||
async def generate_podcast_presenters(
|
||||
speakers: int = Form(...),
|
||||
project_id: Optional[str] = Form(None),
|
||||
audience: Optional[str] = Form(None),
|
||||
content_type: Optional[str] = Form(None),
|
||||
top_keywords: Optional[str] = Form(None), # JSON string array
|
||||
persona_id: Optional[str] = Form(None),
|
||||
current_user: Dict[str, Any] = Depends(get_current_user),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""
|
||||
Generate presenter avatar images based on number of speakers and AI analysis insights.
|
||||
Uses analysis data (audience, content_type, keywords) to create more relevant presenters.
|
||||
Returns list of avatar URLs.
|
||||
Based on WaveSpeed AI recommendations for professional podcast presenters.
|
||||
"""
|
||||
user_id = require_authenticated_user(current_user)
|
||||
|
||||
if speakers < 1 or speakers > 2:
|
||||
raise HTTPException(status_code=400, detail="Speakers must be between 1 and 2")
|
||||
|
||||
try:
|
||||
# Parse keywords if provided
|
||||
keywords_list = []
|
||||
if top_keywords:
|
||||
try:
|
||||
import json
|
||||
keywords_list = json.loads(top_keywords) if isinstance(top_keywords, str) else top_keywords
|
||||
except:
|
||||
keywords_list = []
|
||||
|
||||
# Choose persona (market-fit + style) using analysis if not explicitly provided.
|
||||
# Do not infer sensitive traits (like ethnicity); personas represent market + style only.
|
||||
selected_persona_id = persona_id or choose_persona_id(
|
||||
audience=audience,
|
||||
content_type=content_type,
|
||||
top_keywords=keywords_list,
|
||||
)
|
||||
persona = get_persona(selected_persona_id)
|
||||
|
||||
generated_avatars = []
|
||||
|
||||
for i in range(speakers):
|
||||
# Generate presenter-specific prompt based on WaveSpeed AI recommendations
|
||||
# Enhanced with analysis insights for more relevant presenter appearance
|
||||
gender = "female" if i == 0 else "male" # First speaker female, second male
|
||||
|
||||
# Build context-aware prompt using analysis insights + persona preset
|
||||
prompt_parts = [
|
||||
f"Half-length portrait of a professional podcast presenter ({gender}, 25-35 years old)",
|
||||
"photo-realistic, professional photography",
|
||||
]
|
||||
|
||||
if persona:
|
||||
prompt_parts.append(persona.prompt)
|
||||
|
||||
# Use content_type to influence attire/style
|
||||
if content_type:
|
||||
content_lower = content_type.lower()
|
||||
if "business" in content_lower or "corporate" in content_lower:
|
||||
prompt_parts.append("business professional attire (white shirt and light gray blazer)")
|
||||
elif "casual" in content_lower or "conversational" in content_lower:
|
||||
prompt_parts.append("business casual attire (smart casual, approachable)")
|
||||
elif "tech" in content_lower or "technology" in content_lower:
|
||||
prompt_parts.append("modern professional attire (tech-forward, contemporary style)")
|
||||
else:
|
||||
prompt_parts.append("professional attire (white shirt and light gray blazer or business casual)")
|
||||
else:
|
||||
prompt_parts.append("professional attire (white shirt and light gray blazer or business casual)")
|
||||
|
||||
# Use audience to influence expression and style
|
||||
if audience:
|
||||
audience_lower = audience.lower()
|
||||
if "young" in audience_lower or "millennial" in audience_lower or "gen z" in audience_lower:
|
||||
prompt_parts.append("modern, energetic, approachable expression")
|
||||
elif "executive" in audience_lower or "professional" in audience_lower or "business" in audience_lower:
|
||||
prompt_parts.append("confident, authoritative, professional expression")
|
||||
else:
|
||||
prompt_parts.append("confident, friendly, engaging expression")
|
||||
else:
|
||||
prompt_parts.append("confident, friendly expression")
|
||||
|
||||
# Add keywords context if available (for visual style hints)
|
||||
if keywords_list and len(keywords_list) > 0:
|
||||
# Extract visual-relevant keywords
|
||||
visual_keywords = [k for k in keywords_list[:3] if any(word in k.lower() for word in ["tech", "business", "creative", "modern", "professional"])]
|
||||
if visual_keywords:
|
||||
prompt_parts.append(f"context: {', '.join(visual_keywords[:2])}")
|
||||
|
||||
# Technical requirements
|
||||
prompt_parts.extend([
|
||||
"looking at camera",
|
||||
"soft studio lighting, plain light-gray or neutral background",
|
||||
"ultra realistic, 4k quality, 85mm lens, f/2.8",
|
||||
"professional podcast host appearance, suitable for video generation",
|
||||
"clean composition, center-focused for avatar overlay"
|
||||
])
|
||||
|
||||
prompt = ", ".join(prompt_parts)
|
||||
|
||||
logger.info(f"[Podcast] Generating presenter {i+1}/{speakers} for project {project_id}")
|
||||
|
||||
# Generate image
|
||||
# Use a deterministic seed per (project_id, speaker_number, persona_id) to keep presenter identity stable.
|
||||
# Note: determinism may vary by provider/model, but seed improves consistency substantially.
|
||||
seed_source = f"{project_id or 'temp'}|speaker={i+1}|persona={selected_persona_id}"
|
||||
seed = int(hashlib.sha256(seed_source.encode("utf-8")).hexdigest()[:8], 16)
|
||||
image_options = {
|
||||
"provider": None, # Auto-select provider
|
||||
"width": 1024,
|
||||
"height": 1024,
|
||||
"seed": seed,
|
||||
}
|
||||
|
||||
result = generate_image(
|
||||
prompt=prompt,
|
||||
options=image_options,
|
||||
user_id=user_id
|
||||
)
|
||||
|
||||
# Save avatar
|
||||
unique_id = str(uuid.uuid4())[:8]
|
||||
avatar_filename = f"presenter_{project_id or 'temp'}_{i+1}_{unique_id}.png"
|
||||
avatar_path = PODCAST_AVATARS_DIR / avatar_filename
|
||||
|
||||
with open(avatar_path, "wb") as f:
|
||||
f.write(result.image_bytes)
|
||||
|
||||
avatar_url = f"/api/podcast/images/{AVATAR_SUBDIR}/{avatar_filename}"
|
||||
|
||||
# Save to asset library
|
||||
if project_id:
|
||||
try:
|
||||
save_asset_to_library(
|
||||
db=db,
|
||||
user_id=user_id,
|
||||
asset_type="image",
|
||||
source_module="podcast_maker",
|
||||
filename=avatar_filename,
|
||||
file_url=avatar_url,
|
||||
file_path=str(avatar_path),
|
||||
file_size=len(result.image_bytes),
|
||||
mime_type="image/png",
|
||||
title=f"Podcast Presenter {i+1} - {project_id}",
|
||||
description=f"Generated podcast presenter avatar for speaker {i+1}",
|
||||
prompt=prompt,
|
||||
tags=["podcast", "avatar", "presenter", project_id],
|
||||
provider=result.provider,
|
||||
model=result.model,
|
||||
asset_metadata={
|
||||
"project_id": project_id,
|
||||
"speaker_number": i + 1,
|
||||
"type": "generated_presenter",
|
||||
"status": "completed",
|
||||
"persona_id": selected_persona_id,
|
||||
"seed": seed,
|
||||
},
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"[Podcast] Failed to save presenter asset: {e}")
|
||||
|
||||
generated_avatars.append({
|
||||
"avatar_url": avatar_url,
|
||||
"avatar_filename": avatar_filename,
|
||||
"speaker_number": i + 1,
|
||||
"prompt": prompt, # Include the prompt used for generation
|
||||
"persona_id": selected_persona_id,
|
||||
"seed": seed,
|
||||
})
|
||||
|
||||
return {
|
||||
"avatars": generated_avatars,
|
||||
"message": f"Generated {speakers} presenter avatar(s)",
|
||||
"persona_id": selected_persona_id,
|
||||
}
|
||||
except Exception as exc:
|
||||
logger.error(f"[Podcast] Presenter generation failed: {exc}", exc_info=True)
|
||||
raise HTTPException(status_code=500, detail=f"Presenter generation failed: {str(exc)}")
|
||||
|
||||
Reference in New Issue
Block a user