Refactor podcast media storage to lazy tenant resolver
This commit is contained in:
@@ -5,6 +5,7 @@ Centralized constants and directory configuration for podcast module.
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from typing import Literal
|
||||||
from services.story_writer.audio_generation_service import StoryAudioGenerationService
|
from services.story_writer.audio_generation_service import StoryAudioGenerationService
|
||||||
|
|
||||||
# Directory paths
|
# Directory paths
|
||||||
@@ -17,15 +18,54 @@ ROOT_DIR = Path(__file__).resolve().parents[3] # root/
|
|||||||
DATA_MEDIA_DIR = ROOT_DIR / "data" / "media"
|
DATA_MEDIA_DIR = ROOT_DIR / "data" / "media"
|
||||||
|
|
||||||
PODCAST_AUDIO_DIR = (DATA_MEDIA_DIR / "podcast_audio").resolve()
|
PODCAST_AUDIO_DIR = (DATA_MEDIA_DIR / "podcast_audio").resolve()
|
||||||
PODCAST_AUDIO_DIR.mkdir(parents=True, exist_ok=True)
|
|
||||||
PODCAST_IMAGES_DIR = (DATA_MEDIA_DIR / "podcast_images").resolve()
|
PODCAST_IMAGES_DIR = (DATA_MEDIA_DIR / "podcast_images").resolve()
|
||||||
PODCAST_IMAGES_DIR.mkdir(parents=True, exist_ok=True)
|
|
||||||
PODCAST_VIDEOS_DIR = (DATA_MEDIA_DIR / "podcast_videos").resolve()
|
PODCAST_VIDEOS_DIR = (DATA_MEDIA_DIR / "podcast_videos").resolve()
|
||||||
PODCAST_VIDEOS_DIR.mkdir(parents=True, exist_ok=True)
|
|
||||||
|
|
||||||
# Video subdirectory
|
# Video subdirectory
|
||||||
AI_VIDEO_SUBDIR = Path("AI_Videos")
|
AI_VIDEO_SUBDIR = Path("AI_Videos")
|
||||||
|
|
||||||
# Initialize audio service
|
MediaType = Literal["audio", "image", "video"]
|
||||||
audio_service = StoryAudioGenerationService(output_dir=str(PODCAST_AUDIO_DIR))
|
|
||||||
|
|
||||||
|
|
||||||
|
def _sanitize_user_id(user_id: str) -> str:
|
||||||
|
return "".join(c for c in user_id if c.isalnum() or c in ("-", "_"))
|
||||||
|
|
||||||
|
|
||||||
|
def get_podcast_media_dir(
|
||||||
|
media_type: MediaType,
|
||||||
|
user_id: str | None = None,
|
||||||
|
*,
|
||||||
|
ensure_exists: bool = False,
|
||||||
|
) -> Path:
|
||||||
|
"""Resolve podcast media directory (tenant workspace first, legacy global fallback)."""
|
||||||
|
media_subdir = {
|
||||||
|
"audio": "podcast_audio",
|
||||||
|
"image": "podcast_images",
|
||||||
|
"video": "podcast_videos",
|
||||||
|
}[media_type]
|
||||||
|
|
||||||
|
if user_id:
|
||||||
|
tenant_media_dir = ROOT_DIR / "workspace" / f"workspace_{_sanitize_user_id(user_id)}" / "media" / media_subdir
|
||||||
|
resolved_dir = tenant_media_dir.resolve()
|
||||||
|
else:
|
||||||
|
resolved_dir = (DATA_MEDIA_DIR / media_subdir).resolve()
|
||||||
|
|
||||||
|
if ensure_exists:
|
||||||
|
resolved_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
return resolved_dir
|
||||||
|
|
||||||
|
|
||||||
|
def get_podcast_media_read_dirs(media_type: MediaType, user_id: str | None = None) -> list[Path]:
|
||||||
|
"""Return ordered directories to search (tenant path first, then legacy global path)."""
|
||||||
|
dirs: list[Path] = []
|
||||||
|
if user_id:
|
||||||
|
dirs.append(get_podcast_media_dir(media_type, user_id))
|
||||||
|
dirs.append(get_podcast_media_dir(media_type, None))
|
||||||
|
return dirs
|
||||||
|
|
||||||
|
|
||||||
|
def get_podcast_audio_service(user_id: str | None = None) -> StoryAudioGenerationService:
|
||||||
|
"""Build audio service lazily so directory creation happens only when needed."""
|
||||||
|
output_dir = get_podcast_media_dir("audio", user_id, ensure_exists=True)
|
||||||
|
return StoryAudioGenerationService(output_dir=str(output_dir))
|
||||||
|
|||||||
@@ -20,7 +20,8 @@ from api.story_writer.utils.auth import require_authenticated_user
|
|||||||
from utils.asset_tracker import save_asset_to_library
|
from utils.asset_tracker import save_asset_to_library
|
||||||
from models.story_models import StoryAudioResult
|
from models.story_models import StoryAudioResult
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
from ..constants import PODCAST_AUDIO_DIR, audio_service
|
from ..constants import get_podcast_audio_service, get_podcast_media_dir
|
||||||
|
from ..utils import _resolve_podcast_media_file
|
||||||
from ..models import (
|
from ..models import (
|
||||||
PodcastAudioRequest,
|
PodcastAudioRequest,
|
||||||
PodcastAudioResponse,
|
PodcastAudioResponse,
|
||||||
@@ -62,7 +63,8 @@ async def upload_podcast_audio(
|
|||||||
file_ext = Path(file.filename).suffix or '.mp3'
|
file_ext = Path(file.filename).suffix or '.mp3'
|
||||||
unique_id = str(uuid.uuid4())[:8]
|
unique_id = str(uuid.uuid4())[:8]
|
||||||
audio_filename = f"audio_{project_id or 'temp'}_{unique_id}{file_ext}"
|
audio_filename = f"audio_{project_id or 'temp'}_{unique_id}{file_ext}"
|
||||||
audio_path = PODCAST_AUDIO_DIR / audio_filename
|
audio_base_dir = get_podcast_media_dir("audio", user_id, ensure_exists=True)
|
||||||
|
audio_path = audio_base_dir / audio_filename
|
||||||
|
|
||||||
# Save file
|
# Save file
|
||||||
with open(audio_path, "wb") as f:
|
with open(audio_path, "wb") as f:
|
||||||
@@ -123,6 +125,7 @@ async def generate_podcast_audio(
|
|||||||
raise HTTPException(status_code=400, detail="Text is required")
|
raise HTTPException(status_code=400, detail="Text is required")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
audio_service = get_podcast_audio_service(user_id)
|
||||||
result: StoryAudioResult = audio_service.generate_ai_audio(
|
result: StoryAudioResult = audio_service.generate_ai_audio(
|
||||||
scene_number=0,
|
scene_number=0,
|
||||||
scene_title=request.scene_title,
|
scene_title=request.scene_title,
|
||||||
@@ -267,12 +270,7 @@ async def combine_podcast_audio(
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
# Podcast audio files are stored in podcast_audio directory
|
# Podcast audio files are stored in podcast_audio directory
|
||||||
audio_path = (PODCAST_AUDIO_DIR / filename).resolve()
|
audio_path = _resolve_podcast_media_file(filename, "audio", user_id)
|
||||||
|
|
||||||
# Security check: ensure path is within PODCAST_AUDIO_DIR
|
|
||||||
if not str(audio_path).startswith(str(PODCAST_AUDIO_DIR)):
|
|
||||||
logger.error(f"[Podcast] Attempted path traversal when resolving audio: {audio_url}")
|
|
||||||
continue
|
|
||||||
else:
|
else:
|
||||||
logger.warning(f"[Podcast] Non-API URL format, treating as direct path: {audio_url}")
|
logger.warning(f"[Podcast] Non-API URL format, treating as direct path: {audio_url}")
|
||||||
audio_path = Path(audio_url)
|
audio_path = Path(audio_url)
|
||||||
@@ -303,7 +301,8 @@ async def combine_podcast_audio(
|
|||||||
|
|
||||||
# Generate output filename
|
# Generate output filename
|
||||||
output_filename = f"podcast_combined_{request.project_id}_{uuid.uuid4().hex[:8]}.mp3"
|
output_filename = f"podcast_combined_{request.project_id}_{uuid.uuid4().hex[:8]}.mp3"
|
||||||
output_path = PODCAST_AUDIO_DIR / output_filename
|
audio_base_dir = get_podcast_media_dir("audio", user_id, ensure_exists=True)
|
||||||
|
output_path = audio_base_dir / output_filename
|
||||||
|
|
||||||
# Write combined audio file
|
# Write combined audio file
|
||||||
combined_audio.write_audiofile(
|
combined_audio.write_audiofile(
|
||||||
@@ -382,20 +381,13 @@ async def serve_podcast_audio(
|
|||||||
Supports authentication via Authorization header or token query parameter.
|
Supports authentication via Authorization header or token query parameter.
|
||||||
Query parameter is useful for HTML elements like <audio> that cannot send custom headers.
|
Query parameter is useful for HTML elements like <audio> that cannot send custom headers.
|
||||||
"""
|
"""
|
||||||
require_authenticated_user(current_user)
|
|
||||||
|
|
||||||
# Security check: ensure filename doesn't contain path traversal
|
# Security check: ensure filename doesn't contain path traversal
|
||||||
if ".." in filename or "/" in filename or "\\" in filename:
|
if ".." in filename or "/" in filename or "\\" in filename:
|
||||||
raise HTTPException(status_code=400, detail="Invalid filename")
|
raise HTTPException(status_code=400, detail="Invalid filename")
|
||||||
|
|
||||||
audio_path = (PODCAST_AUDIO_DIR / filename).resolve()
|
user_id = require_authenticated_user(current_user)
|
||||||
|
audio_path = _resolve_podcast_media_file(filename, "audio", user_id)
|
||||||
# Security check: ensure path is within PODCAST_AUDIO_DIR
|
|
||||||
if not str(audio_path).startswith(str(PODCAST_AUDIO_DIR)):
|
|
||||||
raise HTTPException(status_code=403, detail="Access denied")
|
|
||||||
|
|
||||||
if not audio_path.exists():
|
|
||||||
raise HTTPException(status_code=404, detail="Audio file not found")
|
|
||||||
|
|
||||||
return FileResponse(audio_path, media_type="audio/mpeg")
|
return FileResponse(audio_path, media_type="audio/mpeg")
|
||||||
|
|
||||||
|
|||||||
@@ -24,8 +24,8 @@ from services.subscription import PricingService
|
|||||||
from services.subscription.preflight_validator import validate_scene_animation_operation
|
from services.subscription.preflight_validator import validate_scene_animation_operation
|
||||||
from api.story_writer.task_manager import task_manager
|
from api.story_writer.task_manager import task_manager
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
from ..constants import AI_VIDEO_SUBDIR, PODCAST_VIDEOS_DIR
|
from ..constants import AI_VIDEO_SUBDIR, get_podcast_media_dir, get_podcast_media_read_dirs
|
||||||
from ..utils import load_podcast_audio_bytes, load_podcast_image_bytes
|
from ..utils import _resolve_podcast_media_file, load_podcast_audio_bytes, load_podcast_image_bytes
|
||||||
from services.podcast_service import PodcastService
|
from services.podcast_service import PodcastService
|
||||||
from ..models import (
|
from ..models import (
|
||||||
PodcastVideoGenerationRequest,
|
PodcastVideoGenerationRequest,
|
||||||
@@ -164,10 +164,11 @@ def _execute_podcast_video_task(
|
|||||||
task_id, "processing", progress=80.0, message="Saving video file..."
|
task_id, "processing", progress=80.0, message="Saving video file..."
|
||||||
)
|
)
|
||||||
|
|
||||||
# Use podcast-specific video directory
|
# Use podcast-specific tenant workspace video directory
|
||||||
ai_video_dir = PODCAST_VIDEOS_DIR / AI_VIDEO_SUBDIR
|
videos_base_dir = get_podcast_media_dir("video", user_id, ensure_exists=True)
|
||||||
|
ai_video_dir = videos_base_dir / AI_VIDEO_SUBDIR
|
||||||
ai_video_dir.mkdir(parents=True, exist_ok=True)
|
ai_video_dir.mkdir(parents=True, exist_ok=True)
|
||||||
video_service = PodcastVideoCombinationService(output_dir=str(PODCAST_VIDEOS_DIR / "Final_Videos"))
|
video_service = PodcastVideoCombinationService(output_dir=str(videos_base_dir / "Final_Videos"))
|
||||||
|
|
||||||
save_result = video_service.save_scene_video(
|
save_result = video_service.save_scene_video(
|
||||||
video_bytes=animation_result["video_bytes"],
|
video_bytes=animation_result["video_bytes"],
|
||||||
@@ -277,7 +278,7 @@ async def generate_podcast_video(
|
|||||||
)
|
)
|
||||||
|
|
||||||
# Load audio bytes
|
# Load audio bytes
|
||||||
audio_bytes = load_podcast_audio_bytes(body.audio_url)
|
audio_bytes = load_podcast_audio_bytes(body.audio_url, user_id)
|
||||||
|
|
||||||
# Validate resolution
|
# Validate resolution
|
||||||
if body.resolution not in {"480p", "720p"}:
|
if body.resolution not in {"480p", "720p"}:
|
||||||
@@ -354,24 +355,25 @@ async def serve_podcast_video(
|
|||||||
Supports authentication via Authorization header or token query parameter.
|
Supports authentication via Authorization header or token query parameter.
|
||||||
Query parameter is useful for HTML elements like <video> that cannot send custom headers.
|
Query parameter is useful for HTML elements like <video> that cannot send custom headers.
|
||||||
"""
|
"""
|
||||||
require_authenticated_user(current_user)
|
|
||||||
|
|
||||||
# Security check: ensure filename doesn't contain path traversal
|
# Security check: ensure filename doesn't contain path traversal
|
||||||
if ".." in filename or "/" in filename or "\\" in filename:
|
if ".." in filename or "/" in filename or "\\" in filename:
|
||||||
raise HTTPException(status_code=400, detail="Invalid filename")
|
raise HTTPException(status_code=400, detail="Invalid filename")
|
||||||
|
|
||||||
# Look for video in podcast_videos directory (including AI_Videos subdirectory)
|
user_id = require_authenticated_user(current_user)
|
||||||
video_path = None
|
|
||||||
possible_paths = [
|
|
||||||
PODCAST_VIDEOS_DIR / filename,
|
|
||||||
PODCAST_VIDEOS_DIR / AI_VIDEO_SUBDIR / filename,
|
|
||||||
]
|
|
||||||
|
|
||||||
for path in possible_paths:
|
# Look for video in tenant workspace first, then legacy podcast_videos directory
|
||||||
resolved_path = path.resolve()
|
video_path = None
|
||||||
# Security check: ensure path is within PODCAST_VIDEOS_DIR
|
for base_dir in get_podcast_media_read_dirs("video", user_id):
|
||||||
if str(resolved_path).startswith(str(PODCAST_VIDEOS_DIR)) and resolved_path.exists():
|
possible_paths = [
|
||||||
video_path = resolved_path
|
base_dir / filename,
|
||||||
|
base_dir / AI_VIDEO_SUBDIR / filename,
|
||||||
|
]
|
||||||
|
for path in possible_paths:
|
||||||
|
resolved_path = path.resolve()
|
||||||
|
if str(resolved_path).startswith(str(base_dir.resolve())) and resolved_path.exists():
|
||||||
|
video_path = resolved_path
|
||||||
|
break
|
||||||
|
if video_path:
|
||||||
break
|
break
|
||||||
|
|
||||||
if not video_path:
|
if not video_path:
|
||||||
@@ -394,39 +396,29 @@ async def list_podcast_videos(
|
|||||||
|
|
||||||
logger.info(f"[Podcast] Listing videos for user_id={user_id}, project_id={project_id}")
|
logger.info(f"[Podcast] Listing videos for user_id={user_id}, project_id={project_id}")
|
||||||
|
|
||||||
# Look in podcast_videos/AI_Videos directory
|
|
||||||
ai_video_dir = PODCAST_VIDEOS_DIR / AI_VIDEO_SUBDIR
|
|
||||||
ai_video_dir.mkdir(parents=True, exist_ok=True)
|
|
||||||
|
|
||||||
videos = []
|
videos = []
|
||||||
if ai_video_dir.exists():
|
# Pattern: scene_{scene_number}_{user_id}_{timestamp}.mp4
|
||||||
# Pattern: scene_{scene_number}_{user_id}_{timestamp}.mp4
|
clean_user_id = "".join(c if c.isalnum() or c in ('-', '_') else '_' for c in user_id[:16])
|
||||||
# Extract user_id from current user (same logic as save_scene_video)
|
scene_video_map: Dict[int, Dict[str, Any]] = {}
|
||||||
clean_user_id = "".join(c if c.isalnum() or c in ('-', '_') else '_' for c in user_id[:16])
|
|
||||||
|
for base_dir in get_podcast_media_read_dirs("video", user_id):
|
||||||
|
ai_video_dir = base_dir / AI_VIDEO_SUBDIR
|
||||||
|
if not ai_video_dir.exists():
|
||||||
|
continue
|
||||||
|
|
||||||
logger.info(f"[Podcast] Looking for videos with clean_user_id={clean_user_id} in {ai_video_dir}")
|
logger.info(f"[Podcast] Looking for videos with clean_user_id={clean_user_id} in {ai_video_dir}")
|
||||||
|
|
||||||
# Map scene_number -> (most recent video info)
|
|
||||||
scene_video_map: Dict[int, Dict[str, Any]] = {}
|
|
||||||
|
|
||||||
all_files = list(ai_video_dir.glob("*.mp4"))
|
all_files = list(ai_video_dir.glob("*.mp4"))
|
||||||
logger.info(f"[Podcast] Found {len(all_files)} MP4 files in directory")
|
logger.info(f"[Podcast] Found {len(all_files)} MP4 files in directory")
|
||||||
|
|
||||||
for video_file in all_files:
|
for video_file in all_files:
|
||||||
filename = video_file.name
|
filename = video_file.name
|
||||||
# Match pattern: scene_{number}_{user_id}_{hash}.mp4
|
|
||||||
# Use greedy match for user_id and match hash as "anything except underscore before .mp4"
|
|
||||||
match = re.match(r"scene_(\d+)_(.+)_([^_]+)\.mp4", filename)
|
match = re.match(r"scene_(\d+)_(.+)_([^_]+)\.mp4", filename)
|
||||||
if match:
|
if match:
|
||||||
scene_number = int(match.group(1))
|
scene_number = int(match.group(1))
|
||||||
file_user_id = match.group(2)
|
file_user_id = match.group(2)
|
||||||
hash_part = match.group(3)
|
|
||||||
# Only include videos for this user
|
|
||||||
if file_user_id == clean_user_id:
|
if file_user_id == clean_user_id:
|
||||||
video_url = f"/api/podcast/videos/{filename}"
|
video_url = f"/api/podcast/videos/{filename}"
|
||||||
file_mtime = video_file.stat().st_mtime
|
file_mtime = video_file.stat().st_mtime
|
||||||
|
|
||||||
# Keep the most recent video for each scene
|
|
||||||
if scene_number not in scene_video_map or file_mtime > scene_video_map[scene_number]["mtime"]:
|
if scene_number not in scene_video_map or file_mtime > scene_video_map[scene_number]["mtime"]:
|
||||||
scene_video_map[scene_number] = {
|
scene_video_map[scene_number] = {
|
||||||
"scene_number": scene_number,
|
"scene_number": scene_number,
|
||||||
@@ -436,14 +428,9 @@ async def list_podcast_videos(
|
|||||||
"mtime": file_mtime,
|
"mtime": file_mtime,
|
||||||
}
|
}
|
||||||
|
|
||||||
# Convert map to list and sort by scene number
|
videos = list(scene_video_map.values())
|
||||||
videos = list(scene_video_map.values())
|
videos.sort(key=lambda v: v["scene_number"])
|
||||||
videos.sort(key=lambda v: v["scene_number"])
|
logger.info(f"[Podcast] Returning {len(videos)} videos for user: {[v['scene_number'] for v in videos]}")
|
||||||
|
|
||||||
logger.info(f"[Podcast] Returning {len(videos)} videos for user: {[v['scene_number'] for v in videos]}")
|
|
||||||
else:
|
|
||||||
logger.warning(f"[Podcast] Video directory does not exist: {ai_video_dir}")
|
|
||||||
|
|
||||||
return {"videos": videos}
|
return {"videos": videos}
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@@ -526,10 +513,10 @@ def _execute_combine_videos_task(
|
|||||||
for video_url in scene_video_urls:
|
for video_url in scene_video_urls:
|
||||||
# Extract filename from URL (e.g., /api/podcast/videos/scene_1_user_xxx.mp4)
|
# Extract filename from URL (e.g., /api/podcast/videos/scene_1_user_xxx.mp4)
|
||||||
filename = video_url.split("/")[-1].split("?")[0] # Remove query params
|
filename = video_url.split("/")[-1].split("?")[0] # Remove query params
|
||||||
video_path = PODCAST_VIDEOS_DIR / AI_VIDEO_SUBDIR / filename
|
try:
|
||||||
|
video_path = _resolve_podcast_media_file(filename, "video", user_id, subdir=AI_VIDEO_SUBDIR)
|
||||||
if not video_path.exists():
|
except HTTPException:
|
||||||
logger.warning(f"[Podcast] Scene video not found: {video_path}")
|
logger.warning(f"[Podcast] Scene video not found: {filename}")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
scene_video_paths.append(str(video_path))
|
scene_video_paths.append(str(video_path))
|
||||||
@@ -544,7 +531,8 @@ def _execute_combine_videos_task(
|
|||||||
)
|
)
|
||||||
|
|
||||||
# Use dedicated PodcastVideoCombinationService
|
# Use dedicated PodcastVideoCombinationService
|
||||||
final_videos_dir = PODCAST_VIDEOS_DIR / "Final_Videos"
|
videos_base_dir = get_podcast_media_dir("video", user_id, ensure_exists=True)
|
||||||
|
final_videos_dir = videos_base_dir / "Final_Videos"
|
||||||
final_videos_dir.mkdir(parents=True, exist_ok=True)
|
final_videos_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
video_service = PodcastVideoCombinationService(output_dir=str(final_videos_dir))
|
video_service = PodcastVideoCombinationService(output_dir=str(final_videos_dir))
|
||||||
@@ -628,10 +616,14 @@ async def serve_final_podcast_video(
|
|||||||
"""Serve the final combined podcast video with authentication."""
|
"""Serve the final combined podcast video with authentication."""
|
||||||
user_id = require_authenticated_user(current_user)
|
user_id = require_authenticated_user(current_user)
|
||||||
|
|
||||||
final_videos_dir = PODCAST_VIDEOS_DIR / "Final_Videos"
|
video_path = None
|
||||||
video_path = final_videos_dir / filename
|
for base_dir in get_podcast_media_read_dirs("video", user_id):
|
||||||
|
candidate = (base_dir / "Final_Videos" / filename).resolve()
|
||||||
|
if str(candidate).startswith(str(base_dir.resolve())) and candidate.exists():
|
||||||
|
video_path = candidate
|
||||||
|
break
|
||||||
|
|
||||||
if not video_path.exists():
|
if not video_path:
|
||||||
raise HTTPException(status_code=404, detail="Video not found")
|
raise HTTPException(status_code=404, detail="Video not found")
|
||||||
|
|
||||||
# Basic security: ensure filename doesn't contain path traversal
|
# Basic security: ensure filename doesn't contain path traversal
|
||||||
|
|||||||
@@ -9,11 +9,35 @@ from urllib.parse import urlparse
|
|||||||
from fastapi import HTTPException
|
from fastapi import HTTPException
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
from .constants import PODCAST_AUDIO_DIR, PODCAST_IMAGES_DIR
|
from .constants import get_podcast_media_read_dirs
|
||||||
from utils.media_utils import load_media_bytes
|
from utils.media_utils import load_media_bytes
|
||||||
|
|
||||||
|
|
||||||
def load_podcast_audio_bytes(audio_url: str) -> bytes:
|
def _resolve_podcast_media_file(
|
||||||
|
filename: str,
|
||||||
|
media_type: str,
|
||||||
|
user_id: str | None = None,
|
||||||
|
*,
|
||||||
|
subdir: Path | None = None,
|
||||||
|
) -> Path:
|
||||||
|
"""Resolve podcast media file path from tenant workspace first, then legacy global dir."""
|
||||||
|
clean_filename = filename.split("?", 1)[0].strip()
|
||||||
|
if not clean_filename:
|
||||||
|
raise HTTPException(status_code=400, detail="Invalid filename")
|
||||||
|
|
||||||
|
for base_dir in get_podcast_media_read_dirs(media_type, user_id):
|
||||||
|
target_dir = (base_dir / subdir).resolve() if subdir else base_dir.resolve()
|
||||||
|
candidate = (target_dir / clean_filename).resolve()
|
||||||
|
if not str(candidate).startswith(str(target_dir)):
|
||||||
|
logger.error(f"[Podcast] Attempted path traversal for {media_type}: {filename}")
|
||||||
|
raise HTTPException(status_code=403, detail="Invalid media path")
|
||||||
|
if candidate.exists():
|
||||||
|
return candidate
|
||||||
|
|
||||||
|
raise HTTPException(status_code=404, detail=f"{media_type.capitalize()} file not found: {clean_filename}")
|
||||||
|
|
||||||
|
|
||||||
|
def load_podcast_audio_bytes(audio_url: str, user_id: str | None = None) -> bytes:
|
||||||
"""Load podcast audio bytes from URL. Only handles /api/podcast/audio/ URLs."""
|
"""Load podcast audio bytes from URL. Only handles /api/podcast/audio/ URLs."""
|
||||||
if not audio_url:
|
if not audio_url:
|
||||||
raise HTTPException(status_code=400, detail="Audio URL is required")
|
raise HTTPException(status_code=400, detail="Audio URL is required")
|
||||||
@@ -34,18 +58,7 @@ def load_podcast_audio_bytes(audio_url: str) -> bytes:
|
|||||||
if not filename:
|
if not filename:
|
||||||
raise HTTPException(status_code=400, detail=f"Could not extract filename from URL: {audio_url}")
|
raise HTTPException(status_code=400, detail=f"Could not extract filename from URL: {audio_url}")
|
||||||
|
|
||||||
# Podcast audio files are stored in podcast_audio directory
|
audio_path = _resolve_podcast_media_file(filename, "audio", user_id)
|
||||||
audio_path = (PODCAST_AUDIO_DIR / filename).resolve()
|
|
||||||
|
|
||||||
# Security check: ensure path is within PODCAST_AUDIO_DIR
|
|
||||||
if not str(audio_path).startswith(str(PODCAST_AUDIO_DIR)):
|
|
||||||
logger.error(f"[Podcast] Attempted path traversal when resolving audio: {audio_url}")
|
|
||||||
raise HTTPException(status_code=403, detail="Invalid audio path")
|
|
||||||
|
|
||||||
if not audio_path.exists():
|
|
||||||
logger.warning(f"[Podcast] Audio file not found: {audio_path}")
|
|
||||||
raise HTTPException(status_code=404, detail=f"Audio file not found: {filename}")
|
|
||||||
|
|
||||||
return audio_path.read_bytes()
|
return audio_path.read_bytes()
|
||||||
except HTTPException:
|
except HTTPException:
|
||||||
raise
|
raise
|
||||||
@@ -77,4 +90,3 @@ def load_podcast_image_bytes(image_url: str) -> bytes:
|
|||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
logger.error(f"[Podcast] Failed to load image: {exc}")
|
logger.error(f"[Podcast] Failed to load image: {exc}")
|
||||||
raise HTTPException(status_code=500, detail=f"Failed to load image: {str(exc)}")
|
raise HTTPException(status_code=500, detail=f"Failed to load image: {str(exc)}")
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user