feat: Add Auto-Dubbing feature for Podcast Maker
This commit adds the Auto-Dubbing feature for Podcast Maker with support for translating podcast audio to different languages with optional voice cloning to preserve the original speaker's voice. New Features: - Translation Service (common module): DeepL integration for low-cost translation, WaveSpeed integration for high-quality translation - Audio Dubbing Service: STT -> Translate -> TTS pipeline with voice cloning support - 9 new API endpoints for dubbing and voice cloning - Support for 34+ languages - Cost estimation utilities - Comprehensive documentation Files Added: - services/translation/ (5 files): Translation service module - services/dubbing/: Audio dubbing service - api/podcast/handlers/dubbing.py: API endpoints - docs/AUTO_DUBBING.md: Feature documentation - CHANGELOG.md: Change log Files Modified: - api/podcast/models.py: Added dubbing request/response models - api/podcast/router.py: Added dubbing routes - services/__init__.py: Export translation and dubbing services - scene_animation.py: Fixed missing Path import
This commit is contained in:
210
backend/services/translation/base_translation.py
Normal file
210
backend/services/translation/base_translation.py
Normal file
@@ -0,0 +1,210 @@
|
||||
"""
|
||||
Base Translation Provider abstract class.
|
||||
|
||||
Defines the interface for all translation providers in ALwrity.
|
||||
"""
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from dataclasses import dataclass, field
|
||||
from enum import Enum
|
||||
from typing import Dict, List, Optional, Any
|
||||
|
||||
|
||||
class TranslationQuality(str, Enum):
|
||||
LOW = "low"
|
||||
HIGH = "high"
|
||||
|
||||
|
||||
@dataclass
|
||||
class TranslationResult:
|
||||
translated_text: str
|
||||
source_language: str
|
||||
target_language: str
|
||||
provider: str
|
||||
quality: TranslationQuality
|
||||
confidence: float = 1.0
|
||||
alternative_translations: List[str] = field(default_factory=list)
|
||||
metadata: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"translated_text": self.translated_text,
|
||||
"source_language": self.source_language,
|
||||
"target_language": self.target_language,
|
||||
"provider": self.provider,
|
||||
"quality": self.quality.value,
|
||||
"confidence": self.confidence,
|
||||
"alternative_translations": self.alternative_translations,
|
||||
"metadata": self.metadata,
|
||||
}
|
||||
|
||||
|
||||
class BaseTranslationProvider(ABC):
|
||||
|
||||
SUPPORTED_LANGUAGES: Dict[str, str] = {
|
||||
"en": "English",
|
||||
"es": "Spanish",
|
||||
"fr": "French",
|
||||
"de": "German",
|
||||
"it": "Italian",
|
||||
"pt": "Portuguese",
|
||||
"nl": "Dutch",
|
||||
"pl": "Polish",
|
||||
"ru": "Russian",
|
||||
"ja": "Japanese",
|
||||
"zh": "Chinese",
|
||||
"ko": "Korean",
|
||||
"ar": "Arabic",
|
||||
"hi": "Hindi",
|
||||
"tr": "Turkish",
|
||||
"vi": "Vietnamese",
|
||||
"th": "Thai",
|
||||
"id": "Indonesian",
|
||||
"ms": "Malay",
|
||||
"fil": "Filipino",
|
||||
"he": "Hebrew",
|
||||
"cs": "Czech",
|
||||
"da": "Danish",
|
||||
"fi": "Finnish",
|
||||
"el": "Greek",
|
||||
"hu": "Hungarian",
|
||||
"nb": "Norwegian",
|
||||
"ro": "Romanian",
|
||||
"sk": "Slovak",
|
||||
"sv": "Swedish",
|
||||
"uk": "Ukrainian",
|
||||
"bg": "Bulgarian",
|
||||
"hr": "Croatian",
|
||||
"lt": "Lithuanian",
|
||||
"lv": "Latvian",
|
||||
"et": "Estonian",
|
||||
"sl": "Slovenian",
|
||||
}
|
||||
|
||||
LANGUAGE_CODE_MAPPING: Dict[str, str] = {}
|
||||
|
||||
def __init__(self):
|
||||
self._build_language_mapping()
|
||||
|
||||
def _build_language_mapping(self) -> None:
|
||||
for code, name in self.SUPPORTED_LANGUAGES.items():
|
||||
self.LANGUAGE_CODE_MAPPING[code.lower()] = code
|
||||
self.LANGUAGE_CODE_MAPPING[name.lower()] = code
|
||||
self.LANGUAGE_CODE_MAPPING[name.upper()] = code
|
||||
|
||||
def normalize_language_code(self, language: str) -> str:
|
||||
normalized = language.strip().lower()
|
||||
if normalized in self.LANGUAGE_CODE_MAPPING:
|
||||
return self.LANGUAGE_CODE_MAPPING[normalized]
|
||||
if len(normalized) == 2:
|
||||
return normalized.upper()
|
||||
for code, name in self.SUPPORTED_LANGUAGES.items():
|
||||
if name.lower() == normalized or code.lower() == normalized:
|
||||
return code
|
||||
return normalized.upper()
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def provider_name(self) -> str:
|
||||
"""Return the name of the translation provider."""
|
||||
pass
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def quality(self) -> TranslationQuality:
|
||||
"""Return the quality tier of this provider."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def translate(
|
||||
self,
|
||||
text: str,
|
||||
target_language: str,
|
||||
source_language: Optional[str] = None,
|
||||
) -> TranslationResult:
|
||||
"""
|
||||
Translate text to target language.
|
||||
|
||||
Args:
|
||||
text: The text to translate
|
||||
target_language: Target language code or name
|
||||
source_language: Source language code or name (auto-detect if None)
|
||||
|
||||
Returns:
|
||||
TranslationResult with translated text and metadata
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def translate_batch(
|
||||
self,
|
||||
texts: List[str],
|
||||
target_language: str,
|
||||
source_language: Optional[str] = None,
|
||||
) -> List[TranslationResult]:
|
||||
"""
|
||||
Translate multiple texts in batch.
|
||||
|
||||
Args:
|
||||
texts: List of texts to translate
|
||||
target_language: Target language code or name
|
||||
source_language: Source language code or name (auto-detect if None)
|
||||
|
||||
Returns:
|
||||
List of TranslationResults
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get_supported_languages(self) -> Dict[str, str]:
|
||||
"""Return dictionary of supported language codes and names."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def is_language_supported(self, language: str) -> bool:
|
||||
"""Check if a language is supported."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def calculate_cost(self, text_length: int, char_count: int = 0) -> float:
|
||||
"""
|
||||
Calculate the cost for translation.
|
||||
|
||||
Args:
|
||||
text_length: Number of characters to translate
|
||||
char_count: Optional explicit character count
|
||||
|
||||
Returns:
|
||||
Estimated cost in USD
|
||||
"""
|
||||
pass
|
||||
|
||||
def validate_text(self, text: str) -> bool:
|
||||
"""Validate that text is suitable for translation."""
|
||||
if not text or not text.strip():
|
||||
return False
|
||||
if len(text) > 50000:
|
||||
raise ValueError(f"Text too long: {len(text)} chars. Maximum is 50000.")
|
||||
return True
|
||||
|
||||
def split_long_text(self, text: str, max_chars: int = 5000) -> List[str]:
|
||||
"""Split long text into manageable chunks."""
|
||||
if len(text) <= max_chars:
|
||||
return [text]
|
||||
|
||||
chunks = []
|
||||
sentences = text.replace("! ", ".\n").replace("? ", ".\n").replace("。", "。\n").split("\n")
|
||||
current_chunk = ""
|
||||
|
||||
for sentence in sentences:
|
||||
if len(current_chunk) + len(sentence) <= max_chars:
|
||||
current_chunk += sentence + " "
|
||||
else:
|
||||
if current_chunk:
|
||||
chunks.append(current_chunk.strip())
|
||||
current_chunk = sentence + " "
|
||||
|
||||
if current_chunk:
|
||||
chunks.append(current_chunk.strip())
|
||||
|
||||
return chunks
|
||||
Reference in New Issue
Block a user