AI Video Generation Implementation

This commit is contained in:
ajaysi
2025-11-17 17:38:23 +05:30
parent 4901b7eb72
commit bf7493c366
132 changed files with 6200 additions and 19475 deletions

View File

@@ -0,0 +1,8 @@
"""
Utility helpers for Story Writer API routes.
Grouped here to keep the main router lean while reusing common logic
such as authentication guards, media resolution, and HD video helpers.
"""

View File

@@ -0,0 +1,23 @@
from typing import Any, Dict
from fastapi import HTTPException, status
def require_authenticated_user(current_user: Dict[str, Any] | None) -> str:
"""
Validates the current user dictionary provided by Clerk middleware and
returns the normalized user_id. Raises HTTP 401 if authentication fails.
"""
if not current_user:
raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail="Authentication required")
user_id = str(current_user.get("id", "")).strip()
if not user_id:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Invalid user ID in authentication token",
)
return user_id

View File

@@ -0,0 +1,154 @@
from __future__ import annotations
from typing import Any, Dict, Optional
from fastapi import HTTPException
from loguru import logger
from uuid import uuid4
from .media_utils import load_story_image_bytes
def generate_hd_video_payload(request: Any, user_id: str) -> Dict[str, Any]:
"""Handles synchronous HD video generation."""
from services.llm_providers.main_video_generation import ai_video_generate
from services.story_writer.video_generation_service import StoryVideoGenerationService
video_service = StoryVideoGenerationService()
output_dir = video_service.output_dir
output_dir.mkdir(parents=True, exist_ok=True)
kwargs: Dict[str, Any] = {}
if getattr(request, "model", None):
kwargs["model"] = request.model
if getattr(request, "num_frames", None):
kwargs["num_frames"] = request.num_frames
if getattr(request, "guidance_scale", None) is not None:
kwargs["guidance_scale"] = request.guidance_scale
if getattr(request, "num_inference_steps", None):
kwargs["num_inference_steps"] = request.num_inference_steps
if getattr(request, "negative_prompt", None):
kwargs["negative_prompt"] = request.negative_prompt
if getattr(request, "seed", None) is not None:
kwargs["seed"] = request.seed
logger.info(f"[StoryWriter] Generating HD video via {getattr(request, 'provider', 'huggingface')} for user {user_id}")
raw_bytes = ai_video_generate(
prompt=request.prompt,
provider=getattr(request, "provider", None) or "huggingface",
user_id=user_id,
**kwargs,
)
filename = f"hd_{uuid4().hex}.mp4"
file_path = output_dir / filename
with open(file_path, "wb") as fh:
fh.write(raw_bytes)
logger.info(f"[StoryWriter] HD video saved to {file_path}")
return {
"success": True,
"video_filename": filename,
"video_url": f"/api/story/videos/{filename}",
"provider": getattr(request, "provider", None) or "huggingface",
"model": getattr(request, "model", None) or "tencent/HunyuanVideo",
}
def generate_hd_video_scene_payload(request: Any, user_id: str) -> Dict[str, Any]:
"""
Handles per-scene HD video generation including prompt enhancement,
subscription validation, and optional image conditioning.
"""
from services.database import get_db as get_db_validation
from services.onboarding.api_key_manager import APIKeyManager
from services.subscription import PricingService
from services.subscription.preflight_validator import validate_video_generation_operations
from services.story_writer.prompt_enhancer_service import enhance_scene_prompt_for_video
from services.llm_providers.main_video_generation import ai_video_generate
from services.story_writer.video_generation_service import StoryVideoGenerationService
scene_number = request.scene_number
logger.info(f"[StoryWriter] Generating HD video for scene {scene_number} for user {user_id}")
# Step 1: Validate API key
hf_token = APIKeyManager().get_api_key("hf_token")
if not hf_token:
logger.error("[StoryWriter] Pre-flight: HF token not configured - blocking video generation")
raise HTTPException(
status_code=400,
detail={
"error": "Hugging Face API token is not configured. Please configure your HF token in settings.",
"message": "Hugging Face API token is not configured. Please configure your HF token in settings.",
},
)
# Step 2: Subscription limits
db_validation = next(get_db_validation())
try:
pricing_service = PricingService(db_validation)
logger.info(f"[StoryWriter] Pre-flight: Checking video generation limits for user {user_id}...")
validate_video_generation_operations(pricing_service=pricing_service, user_id=user_id)
logger.info("[StoryWriter] Pre-flight: ✅ Video generation limits validated - proceeding")
finally:
db_validation.close()
# Stage 1: Prompt enhancement
enhanced_prompt = enhance_scene_prompt_for_video(
current_scene=request.scene_data,
story_context=request.story_context,
all_scenes=request.all_scenes,
user_id=user_id,
)
logger.info(f"[StoryWriter] Generated enhanced prompt ({len(enhanced_prompt)} chars) for scene {scene_number}")
# Stage 2: Optional image reference
scene_image_bytes: Optional[bytes] = None
if getattr(request, "scene_image_url", None):
scene_image_bytes = load_story_image_bytes(request.scene_image_url)
if scene_image_bytes:
logger.info(f"[StoryWriter] Using scene image reference for scene {scene_number}")
else:
logger.warning(f"[StoryWriter] Scene image could not be loaded for scene {scene_number}, falling back to text-only video")
kwargs: Dict[str, Any] = {}
if getattr(request, "model", None):
kwargs["model"] = request.model
if getattr(request, "num_frames", None):
kwargs["num_frames"] = request.num_frames
if getattr(request, "guidance_scale", None) is not None:
kwargs["guidance_scale"] = request.guidance_scale
if getattr(request, "num_inference_steps", None):
kwargs["num_inference_steps"] = request.num_inference_steps
if getattr(request, "negative_prompt", None):
kwargs["negative_prompt"] = request.negative_prompt
if getattr(request, "seed", None) is not None:
kwargs["seed"] = request.seed
raw_bytes = ai_video_generate(
prompt=enhanced_prompt,
provider=getattr(request, "provider", None) or "huggingface",
user_id=user_id,
input_image_bytes=scene_image_bytes,
**kwargs,
)
video_service = StoryVideoGenerationService()
save_result = video_service.save_scene_video(
video_bytes=raw_bytes,
scene_number=scene_number,
user_id=user_id,
)
logger.info(f"[StoryWriter] HD video saved for scene {scene_number}: {save_result.get('video_filename')}")
return {
"success": True,
"scene_number": scene_number,
"video_filename": save_result.get("video_filename"),
"video_url": save_result.get("video_url"),
"prompt_used": enhanced_prompt,
"provider": getattr(request, "provider", None) or "huggingface",
"model": getattr(request, "model", None) or "tencent/HunyuanVideo",
}

View File

@@ -0,0 +1,69 @@
from __future__ import annotations
from pathlib import Path
from typing import Optional
from urllib.parse import urlparse
from fastapi import HTTPException, status
from loguru import logger
BASE_DIR = Path(__file__).resolve().parents[3] # backend/
STORY_IMAGES_DIR = (BASE_DIR / "story_images").resolve()
STORY_IMAGES_DIR.mkdir(parents=True, exist_ok=True)
def load_story_image_bytes(image_url: str) -> Optional[bytes]:
"""
Resolve an authenticated story image URL (e.g., /api/story/images/<file>) to raw bytes.
Returns None if the file cannot be located.
"""
if not image_url:
return None
try:
parsed = urlparse(image_url)
path = parsed.path if parsed.scheme else image_url
prefix = "/api/story/images/"
if prefix not in path:
logger.warning(f"[StoryWriter] Unsupported image URL for video reference: {image_url}")
return None
filename = path.split(prefix, 1)[1].split("?", 1)[0].strip()
if not filename:
return None
file_path = (STORY_IMAGES_DIR / filename).resolve()
if not str(file_path).startswith(str(STORY_IMAGES_DIR)):
logger.error(f"[StoryWriter] Attempted path traversal when resolving image: {image_url}")
return None
if not file_path.exists():
logger.warning(f"[StoryWriter] Referenced scene image not found on disk: {file_path}")
return None
return file_path.read_bytes()
except Exception as exc:
logger.error(f"[StoryWriter] Failed to load reference image for video gen: {exc}")
return None
def resolve_media_file(base_dir: Path, filename: str) -> Path:
"""
Returns a safe resolved path for a media file stored under base_dir.
Guards against directory traversal and ensures the file exists.
"""
filename = filename.split("?")[0].strip()
resolved = (base_dir / filename).resolve()
try:
resolved.relative_to(base_dir.resolve())
except ValueError:
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Access denied")
if not resolved.exists():
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"File not found: {filename}")
return resolved