Files
ALwrity/backend/services/translation/wavespeed_translator.py
ajaysi f503a24b3b feat: Add Auto-Dubbing feature for Podcast Maker
This commit adds the Auto-Dubbing feature for Podcast Maker with support
for translating podcast audio to different languages with optional voice
cloning to preserve the original speaker's voice.

New Features:
- Translation Service (common module): DeepL integration for low-cost
  translation, WaveSpeed integration for high-quality translation
- Audio Dubbing Service: STT -> Translate -> TTS pipeline with
  voice cloning support
- 9 new API endpoints for dubbing and voice cloning
- Support for 34+ languages
- Cost estimation utilities
- Comprehensive documentation

Files Added:
- services/translation/ (5 files): Translation service module
- services/dubbing/: Audio dubbing service
- api/podcast/handlers/dubbing.py: API endpoints
- docs/AUTO_DUBBING.md: Feature documentation
- CHANGELOG.md: Change log

Files Modified:
- api/podcast/models.py: Added dubbing request/response models
- api/podcast/router.py: Added dubbing routes
- services/__init__.py: Export translation and dubbing services
- scene_animation.py: Fixed missing Path import
2026-03-24 15:45:51 +05:30

139 lines
3.7 KiB
Python

"""
WaveSpeed Translation Provider.
High-quality video/text translation using WaveSpeed API.
This will be used for Phase 3 (High-Quality Dubbing).
API: Uses existing WaveSpeed video translation API.
"""
from typing import Dict, List, Optional
from utils.logger_utils import get_service_logger
from .base_translation import (
BaseTranslationProvider,
TranslationQuality,
TranslationResult,
)
logger = get_service_logger("translation.wavespeed")
WAVESPEED_SUPPORTED_LANGUAGES: Dict[str, str] = {
"en": "English",
"es": "Spanish",
"fr": "French",
"de": "German",
"it": "Italian",
"pt": "Portuguese",
"ja": "Japanese",
"ko": "Korean",
"zh": "Chinese",
"ar": "Arabic",
"hi": "Hindi",
"ru": "Russian",
"nl": "Dutch",
"pl": "Polish",
"tr": "Turkish",
"vi": "Vietnamese",
"th": "Thai",
"id": "Indonesian",
"ms": "Malay",
"fil": "Filipino",
"he": "Hebrew",
"cs": "Czech",
"da": "Danish",
"fi": "Finnish",
"el": "Greek",
"hu": "Hungarian",
"nb": "Norwegian",
"ro": "Romanian",
"sk": "Slovak",
"sv": "Swedish",
"uk": "Ukrainian",
}
class WaveSpeedTranslator(BaseTranslationProvider):
COST_PER_CHARACTER = 0.0001
def __init__(self):
super().__init__()
logger.info("[WaveSpeedTranslator] Initialized (high-quality mode)")
@property
def provider_name(self) -> str:
return "WaveSpeed"
@property
def quality(self) -> TranslationQuality:
return TranslationQuality.HIGH
def translate(
self,
text: str,
target_language: str,
source_language: Optional[str] = None,
) -> TranslationResult:
self.validate_text(text)
raise NotImplementedError(
"WaveSpeed text translation not yet implemented. "
"For high-quality translation, use the video translation API "
"or fall back to DeepL for text translation."
)
def translate_batch(
self,
texts: List[str],
target_language: str,
source_language: Optional[str] = None,
) -> List[TranslationResult]:
raise NotImplementedError(
"WaveSpeed batch translation not yet implemented."
)
def get_supported_languages(self) -> Dict[str, str]:
return WAVESPEED_SUPPORTED_LANGUAGES.copy()
def is_language_supported(self, language: str) -> bool:
normalized = self.normalize_language_code(language).lower()
return normalized in WAVESPEED_SUPPORTED_LANGUAGES
def calculate_cost(self, text_length: int, char_count: int = 0) -> float:
chars = char_count or text_length
return chars * self.COST_PER_CHARACTER
def translate_video(
self,
video_path: str,
target_language: str,
source_language: Optional[str] = None,
) -> bytes:
"""
Translate video using WaveSpeed video translation API.
This is the primary use case for high-quality dubbing.
Args:
video_path: Path to video file
target_language: Target language
source_language: Source language (auto-detect if None)
Returns:
Translated video bytes
"""
from ..wavespeed.generators.video.translation import VideoTranslation
translator = VideoTranslation()
target_lang = self.normalize_language_code(target_language)
with open(video_path, "rb") as f:
video_bytes = f.read()
return translator.video_translate(
video=video_bytes,
output_language=target_lang,
enable_sync_mode=True,
)