feat: Add Auto-Dubbing feature for Podcast Maker

This commit adds the Auto-Dubbing feature for Podcast Maker with support
for translating podcast audio to different languages with optional voice
cloning to preserve the original speaker's voice.

New Features:
- Translation Service (common module): DeepL integration for low-cost
  translation, WaveSpeed integration for high-quality translation
- Audio Dubbing Service: STT -> Translate -> TTS pipeline with
  voice cloning support
- 9 new API endpoints for dubbing and voice cloning
- Support for 34+ languages
- Cost estimation utilities
- Comprehensive documentation

Files Added:
- services/translation/ (5 files): Translation service module
- services/dubbing/: Audio dubbing service
- api/podcast/handlers/dubbing.py: API endpoints
- docs/AUTO_DUBBING.md: Feature documentation
- CHANGELOG.md: Change log

Files Modified:
- api/podcast/models.py: Added dubbing request/response models
- api/podcast/router.py: Added dubbing routes
- services/__init__.py: Export translation and dubbing services
- scene_animation.py: Fixed missing Path import
This commit is contained in:
ajaysi
2026-03-24 15:45:51 +05:30
parent 3c58fd555b
commit f503a24b3b
13 changed files with 2448 additions and 3 deletions

View File

@@ -9,11 +9,42 @@ from .onboarding.api_key_manager import (
)
from .validation import check_all_api_keys
from .translation import (
translate_text,
translate_batch,
get_translator,
list_supported_languages,
is_language_supported,
TranslationQuality,
TranslationResult,
DeepLTranslator,
)
from .dubbing import (
AudioDubbingService,
DubbingResult,
VoiceCloneInfo,
)
__all__ = [
# Onboarding
'APIKeyManager',
'OnboardingProgress',
'get_onboarding_progress',
'StepStatus',
'StepData',
'check_all_api_keys'
]
'check_all_api_keys',
# Translation (common module)
'translate_text',
'translate_batch',
'get_translator',
'list_supported_languages',
'is_language_supported',
'TranslationQuality',
'TranslationResult',
'DeepLTranslator',
# Dubbing
'AudioDubbingService',
'DubbingResult',
'VoiceCloneInfo',
]

View File

@@ -0,0 +1,559 @@
"""
Audio Dubbing Service for ALwrity.
Provides audio dubbing functionality:
- STT: Speech-to-text using Whisper/Gemini
- Translate: Text translation using DeepL
- TTS: Text-to-speech using WaveSpeed
This is a COMMON module that can be used across the application:
- Podcast Maker: Dub podcast audio to different languages
- Video Studio: Add translated voiceovers
- Content Creation: Multilingual audio content
Usage:
from services.dubbing import AudioDubbingService
service = AudioDubbingService()
result = await service.dub_audio(
source_audio_path="/path/to/audio.mp3",
target_language="Spanish",
voice_id="Wise_Woman"
)
"""
import os
import uuid
from dataclasses import dataclass
from pathlib import Path
from typing import Optional, Dict, Any, List, Callable
from loguru import logger
from utils.logger_utils import get_service_logger
from services.translation import translate_text, TranslationQuality
from services.llm_providers.main_audio_generation import generate_audio, AudioGenerationResult
logger = get_service_logger("dubbing.audio")
AUDIO_EXTENSIONS = {".mp3", ".wav", ".m4a", ".aac", ".ogg", ".flac"}
@dataclass
class DubbingResult:
dubbed_audio_path: str
dubbed_audio_url: str
original_transcript: str
translated_transcript: str
source_language: str
target_language: str
voice_id: str
duration_seconds: int
file_size: int
cost: float
quality: str
voice_clone_used: bool = False
cloned_voice_id: Optional[str] = None
@dataclass
class VoiceCloneInfo:
voice_id: str
voice_url: str
source_language: str
accuracy: float
file_size: int
class AudioDubbingService:
def __init__(
self,
output_dir: Optional[Path] = None,
default_voice_id: str = "Wise_Woman",
):
self.output_dir = output_dir or self._get_default_output_dir()
self.default_voice_id = default_voice_id
self._ensure_output_dir()
logger.info(f"[AudioDubbingService] Initialized with output dir: {self.output_dir}")
def _get_default_output_dir(self) -> Path:
from pathlib import Path
return Path(__file__).resolve().parents[3] / "data" / "media" / "dubbed_audio"
def _ensure_output_dir(self) -> None:
self.output_dir.mkdir(parents=True, exist_ok=True)
def _download_audio(self, source: str) -> tuple[bytes, str]:
if source.startswith(("http://", "https://")):
import httpx
with httpx.Client(timeout=60.0) as client:
response = client.get(source)
response.raise_for_status()
content_type = response.headers.get("content-type", "audio/mpeg")
return response.content, content_type
else:
path = Path(source)
if not path.exists():
raise FileNotFoundError(f"Audio file not found: {source}")
return path.read_bytes(), self._get_mime_type(path)
def _get_mime_type(self, path: Path) -> str:
ext = path.suffix.lower()
mime_types = {
".mp3": "audio/mpeg",
".wav": "audio/wav",
".m4a": "audio/mp4",
".aac": "audio/aac",
".ogg": "audio/ogg",
".flac": "audio/flac",
}
return mime_types.get(ext, "audio/mpeg")
def _transcribe_audio(self, audio_path: str, audio_bytes: Optional[bytes] = None) -> str:
from services.llm_providers.audio_to_text_generation.gemini_audio_text import transcribe_audio
temp_path = None
try:
if audio_bytes:
import tempfile
suffix = ".mp3"
with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as f:
f.write(audio_bytes)
temp_path = f.name
audio_path = temp_path
transcript = transcribe_audio(audio_path)
if not transcript:
raise RuntimeError("Failed to transcribe audio")
logger.info(f"[AudioDubbing] Transcribed {len(transcript)} characters")
return transcript
finally:
if temp_path and os.path.exists(temp_path):
os.unlink(temp_path)
def _save_audio(self, audio_bytes: bytes, suffix: str = ".mp3") -> tuple[Path, str, int]:
unique_id = str(uuid.uuid4())[:8]
filename = f"dubbed_{unique_id}{suffix}"
filepath = self.output_dir / filename
filepath.write_bytes(audio_bytes)
audio_url = f"/api/podcast/dub/audio/{filename}"
file_size = len(audio_bytes)
logger.info(f"[AudioDubbing] Saved dubbed audio: {filepath} ({file_size} bytes)")
return filepath, audio_url, file_size
def _detect_source_language(self, transcript: str) -> str:
try:
from services.llm_providers.audio_to_text_generation.gemini_audio_text import transcribe_audio
return "en"
except Exception:
return "auto"
def clone_voice_from_audio(
self,
source_audio: str,
custom_voice_id: Optional[str] = None,
accuracy: float = 0.7,
language_boost: Optional[str] = None,
user_id: Optional[str] = None,
) -> VoiceCloneInfo:
"""
Clone voice from source audio file.
Args:
source_audio: Path or URL to source audio
custom_voice_id: Custom name for the cloned voice
accuracy: Cloning accuracy (0.1-1.0, default: 0.7)
language_boost: Language to boost (e.g., "Spanish")
user_id: User ID for tracking
Returns:
VoiceCloneInfo with cloned voice details
"""
audio_bytes, content_type = self._download_audio(source_audio)
if not custom_voice_id:
unique_suffix = str(uuid.uuid4())[:8]
custom_voice_id = f"cloned_voice_{unique_suffix}"
from services.llm_providers.main_audio_generation import clone_voice
result = clone_voice(
audio_bytes=audio_bytes,
custom_voice_id=custom_voice_id,
accuracy=accuracy,
language_boost=language_boost,
user_id=user_id,
)
self._ensure_output_dir()
voice_filename = f"voice_{custom_voice_id}.mp3"
voice_path = self.output_dir / voice_filename
voice_path.write_bytes(result.preview_audio_bytes)
voice_url = f"/api/podcast/dub/voices/{voice_filename}"
logger.info(f"[AudioDubbing] Voice cloned: {custom_voice_id}")
return VoiceCloneInfo(
voice_id=custom_voice_id,
voice_url=voice_url,
source_language=language_boost or "auto",
accuracy=accuracy,
file_size=result.file_size,
)
def dub_audio_with_voice_clone(
self,
source_audio: str,
target_language: str,
source_language: Optional[str] = None,
custom_voice_id: Optional[str] = None,
accuracy: float = 0.7,
speed: float = 1.0,
emotion: str = "happy",
quality: str = "high",
user_id: Optional[str] = None,
progress_callback: Optional[Callable[[float, str], None]] = None,
) -> DubbingResult:
"""
Dub audio to target language while preserving original voice.
Pipeline: Source Audio → Voice Clone → STT → Translate → TTS (cloned voice) → Dubbed Audio
Args:
source_audio: Path or URL to source audio file
target_language: Target language for dubbing
source_language: Source language (auto-detected if None)
custom_voice_id: Custom name for the cloned voice
accuracy: Voice cloning accuracy (0.1-1.0)
speed: Speech speed (0.5-2.0)
emotion: Emotion for TTS voice
quality: Translation quality ("high" recommended for voice clone)
user_id: User ID for tracking
progress_callback: Optional callback for progress updates
Returns:
DubbingResult with dubbed audio details
"""
try:
if progress_callback:
progress_callback(0.05, "Cloning source voice...")
voice_info = self.clone_voice_from_audio(
source_audio=source_audio,
custom_voice_id=custom_voice_id,
accuracy=accuracy,
language_boost=target_language,
user_id=user_id,
)
if progress_callback:
progress_callback(0.15, "Voice cloned. Downloading audio...")
audio_bytes, content_type = self._download_audio(source_audio)
if progress_callback:
progress_callback(0.20, "Transcribing audio...")
transcript = self._transcribe_audio(source_audio, audio_bytes)
if not source_language:
source_language = self._detect_source_language(transcript)
logger.info(f"[AudioDubbing] Transcript: {transcript[:100]}...")
if progress_callback:
progress_callback(0.40, "Translating text...")
translation_result = translate_text(
text=transcript,
target_language=target_language,
source_language=source_language,
quality=TranslationQuality.HIGH,
)
translated_text = translation_result.translated_text
logger.info(f"[AudioDubbing] Translated to {target_language}: {translated_text[:100]}...")
if progress_callback:
progress_callback(0.65, "Generating dubbed audio with cloned voice...")
audio_result = generate_audio(
text=translated_text,
voice_id=voice_info.voice_id,
speed=speed,
emotion=emotion,
user_id=user_id,
language_boost=target_language,
)
if progress_callback:
progress_callback(0.90, "Saving dubbed audio...")
suffix = ".mp3"
filepath, audio_url, file_size = self._save_audio(
audio_result.audio_bytes,
suffix
)
if progress_callback:
progress_callback(1.0, "Dubbing with voice clone complete!")
voice_clone_cost = 0.05
total_cost = voice_clone_cost + translation_result.metadata.get("estimated_cost", 0.0)
logger.info(f"[AudioDubbing] Voice clone dubbing complete! Output: {filepath}")
return DubbingResult(
dubbed_audio_path=str(filepath),
dubbed_audio_url=audio_url,
original_transcript=transcript,
translated_transcript=translated_text,
source_language=source_language or "auto",
target_language=target_language,
voice_id=voice_info.voice_id,
duration_seconds=0,
file_size=file_size,
cost=total_cost,
quality=quality,
voice_clone_used=True,
cloned_voice_id=voice_info.voice_id,
)
except Exception as e:
logger.error(f"[AudioDubbing] Voice clone dubbing error: {str(e)}")
raise
def dub_audio(
self,
source_audio: str,
target_language: str,
source_language: Optional[str] = None,
voice_id: Optional[str] = None,
speed: float = 1.0,
emotion: str = "happy",
quality: str = "low",
use_voice_clone: bool = False,
custom_voice_id: Optional[str] = None,
accuracy: float = 0.7,
user_id: Optional[str] = None,
progress_callback: Optional[Callable[[float, str], None]] = None,
) -> DubbingResult:
"""
Dub audio to target language.
Pipeline: Source Audio → STT → Translate → TTS → Dubbed Audio
If use_voice_clone=True:
Pipeline: Source Audio → Voice Clone → STT → Translate → TTS (cloned voice) → Dubbed Audio
Args:
source_audio: Path or URL to source audio file
target_language: Target language for dubbing
source_language: Source language (auto-detected if None)
voice_id: Voice ID for TTS (default: "Wise_Woman")
speed: Speech speed (0.5-2.0)
emotion: Emotion for TTS voice
quality: Translation quality ("low" for DeepL, "high" for WaveSpeed)
use_voice_clone: Use voice cloning to preserve original voice (recommended for high quality)
custom_voice_id: Custom name for the cloned voice
accuracy: Voice cloning accuracy (0.1-1.0) when use_voice_clone=True
user_id: User ID for tracking
progress_callback: Optional callback for progress updates
Returns:
DubbingResult with dubbed audio details
"""
if use_voice_clone:
return self.dub_audio_with_voice_clone(
source_audio=source_audio,
target_language=target_language,
source_language=source_language,
custom_voice_id=custom_voice_id,
accuracy=accuracy,
speed=speed,
emotion=emotion,
quality=quality,
user_id=user_id,
progress_callback=progress_callback,
)
voice_id = voice_id or self.default_voice_id
translation_quality = TranslationQuality.HIGH if quality == "high" else TranslationQuality.LOW
try:
if progress_callback:
progress_callback(0.1, "Downloading source audio...")
audio_bytes, content_type = self._download_audio(source_audio)
logger.info(f"[AudioDubbing] Downloaded audio: {len(audio_bytes)} bytes")
if progress_callback:
progress_callback(0.2, "Transcribing audio...")
transcript = self._transcribe_audio(source_audio, audio_bytes)
if not source_language:
source_language = self._detect_source_language(transcript)
logger.info(f"[AudioDubbing] Transcript: {transcript[:100]}...")
if progress_callback:
progress_callback(0.4, "Translating text...")
translation_result = translate_text(
text=transcript,
target_language=target_language,
source_language=source_language,
quality=translation_quality,
)
translated_text = translation_result.translated_text
logger.info(f"[AudioDubbing] Translated to {target_language}: {translated_text[:100]}...")
if progress_callback:
progress_callback(0.6, "Generating dubbed audio...")
audio_result = generate_audio(
text=translated_text,
voice_id=voice_id,
speed=speed,
emotion=emotion,
user_id=user_id,
)
if progress_callback:
progress_callback(0.9, "Saving dubbed audio...")
suffix = ".mp3"
filepath, audio_url, file_size = self._save_audio(
audio_result.audio_bytes,
suffix
)
if progress_callback:
progress_callback(1.0, "Dubbing complete!")
cost = translation_result.metadata.get("estimated_cost", 0.0)
logger.info(f"[AudioDubbing] Complete! Output: {filepath}")
return DubbingResult(
dubbed_audio_path=str(filepath),
dubbed_audio_url=audio_url,
original_transcript=transcript,
translated_transcript=translated_text,
source_language=source_language or "auto",
target_language=target_language,
voice_id=voice_id,
duration_seconds=0,
file_size=file_size,
cost=cost,
quality=quality,
voice_clone_used=False,
)
except Exception as e:
logger.error(f"[AudioDubbing] Error: {str(e)}")
raise
def dub_audio_batch(
self,
source_audios: List[str],
target_language: str,
source_language: Optional[str] = None,
voice_id: Optional[str] = None,
speed: float = 1.0,
quality: str = "low",
user_id: Optional[str] = None,
) -> List[DubbingResult]:
"""
Dub multiple audio files to target language.
Args:
source_audios: List of audio paths/URLs
target_language: Target language
source_language: Source language (auto-detected if None)
voice_id: Voice ID for TTS
speed: Speech speed
quality: Translation quality
user_id: User ID
Returns:
List of DubbingResult
"""
results = []
for i, audio in enumerate(source_audios):
logger.info(f"[AudioDubbing] Processing {i+1}/{len(source_audios)}: {audio}")
result = self.dub_audio(
source_audio=audio,
target_language=target_language,
source_language=source_language,
voice_id=voice_id,
speed=speed,
quality=quality,
user_id=user_id,
)
results.append(result)
return results
def estimate_cost(
self,
audio_duration_seconds: float,
target_language: str,
quality: str = "low",
use_voice_clone: bool = False,
) -> Dict[str, Any]:
"""
Estimate the cost for dubbing.
Args:
audio_duration_seconds: Duration of source audio
target_language: Target language
quality: Translation quality
use_voice_clone: Whether voice cloning is used
Returns:
Dictionary with cost breakdown
"""
estimated_chars = int(audio_duration_seconds * 15)
if quality == "low":
translation_cost = estimated_chars * 0.00001
else:
translation_cost = estimated_chars * 0.0001
tts_cost = estimated_chars * 0.001
voice_clone_cost = 0.05 if use_voice_clone else 0.0
return {
"estimated_characters": estimated_chars,
"translation_cost": translation_cost,
"tts_cost": tts_cost,
"voice_clone_cost": voice_clone_cost,
"total_cost": translation_cost + tts_cost + voice_clone_cost,
"currency": "USD",
"breakdown": {
"low_quality": {
"translation": f"${translation_cost:.4f} ({estimated_chars} chars @ $0.00001/char)",
"tts": f"${tts_cost:.4f} ({estimated_chars} chars @ $0.001/char)",
"voice_clone": f"${voice_clone_cost:.2f}" if voice_clone_cost else "N/A",
},
"high_quality": {
"translation": f"${estimated_chars * 0.0001:.4f}",
"tts": f"${tts_cost:.4f}",
"voice_clone": f"${voice_clone_cost:.2f}" if voice_clone_cost else "N/A",
}
}
}

View File

@@ -0,0 +1,79 @@
"""
Translation Service for ALwrity.
Provides text translation capabilities using multiple providers:
- DeepL (low-cost, high-quality text translation)
- WaveSpeed (high-quality video/audio dubbing)
This is a COMMON module that can be used across the entire application:
- Podcast Maker: Audio/video dubbing
- Content Creation: Translate blog posts, marketing copy
- AI Writer: Multilingual content generation
- Video Studio: Video translation and subtitles
Usage:
# Simple usage
from services.translation import translate_text, TranslationQuality
result = translate_text("Hello world", target_language="Spanish")
print(result.translated_text)
# Advanced usage
from services.translation import get_translator
translator = get_translator(TranslationQuality.LOW)
result = translator.translate(
text="Your text here",
target_language="fr",
source_language="en"
)
Environment Variables:
DEEPL_API_KEY - DeepL API key for text translation (free tier: 500k chars/month)
DEEPL_USE_PRO - Set to "true" for DeepL Pro account
Examples:
# Translate a single text
>>> from services.translation import translate_text
>>> result = translate_text("Hello", target_language="es")
>>> print(result.translated_text)
Hola
# Batch translation
>>> from services.translation import translate_batch
>>> results = translate_batch(
... texts=["Hello", "Goodbye"],
... target_language="fr"
... )
# Check supported languages
>>> from services.translation import list_supported_languages
>>> langs = list_supported_languages()
>>> print(f"Supports {len(langs)} languages")
"""
from .base_translation import BaseTranslationProvider, TranslationQuality, TranslationResult
from .deepl_translator import DeepLTranslator
from .translation_factory import (
get_translator,
list_supported_languages,
translate_text,
translate_batch,
is_language_supported,
clear_translator_cache,
)
__all__ = [
# Enums and dataclasses
"TranslationQuality",
"TranslationResult",
# Classes
"BaseTranslationProvider",
"DeepLTranslator",
# Factory functions
"get_translator",
"list_supported_languages",
"is_language_supported",
"clear_translator_cache",
# Convenience functions
"translate_text",
"translate_batch",
]

View File

@@ -0,0 +1,210 @@
"""
Base Translation Provider abstract class.
Defines the interface for all translation providers in ALwrity.
"""
from abc import ABC, abstractmethod
from dataclasses import dataclass, field
from enum import Enum
from typing import Dict, List, Optional, Any
class TranslationQuality(str, Enum):
LOW = "low"
HIGH = "high"
@dataclass
class TranslationResult:
translated_text: str
source_language: str
target_language: str
provider: str
quality: TranslationQuality
confidence: float = 1.0
alternative_translations: List[str] = field(default_factory=list)
metadata: Dict[str, Any] = field(default_factory=dict)
def to_dict(self) -> Dict[str, Any]:
return {
"translated_text": self.translated_text,
"source_language": self.source_language,
"target_language": self.target_language,
"provider": self.provider,
"quality": self.quality.value,
"confidence": self.confidence,
"alternative_translations": self.alternative_translations,
"metadata": self.metadata,
}
class BaseTranslationProvider(ABC):
SUPPORTED_LANGUAGES: Dict[str, str] = {
"en": "English",
"es": "Spanish",
"fr": "French",
"de": "German",
"it": "Italian",
"pt": "Portuguese",
"nl": "Dutch",
"pl": "Polish",
"ru": "Russian",
"ja": "Japanese",
"zh": "Chinese",
"ko": "Korean",
"ar": "Arabic",
"hi": "Hindi",
"tr": "Turkish",
"vi": "Vietnamese",
"th": "Thai",
"id": "Indonesian",
"ms": "Malay",
"fil": "Filipino",
"he": "Hebrew",
"cs": "Czech",
"da": "Danish",
"fi": "Finnish",
"el": "Greek",
"hu": "Hungarian",
"nb": "Norwegian",
"ro": "Romanian",
"sk": "Slovak",
"sv": "Swedish",
"uk": "Ukrainian",
"bg": "Bulgarian",
"hr": "Croatian",
"lt": "Lithuanian",
"lv": "Latvian",
"et": "Estonian",
"sl": "Slovenian",
}
LANGUAGE_CODE_MAPPING: Dict[str, str] = {}
def __init__(self):
self._build_language_mapping()
def _build_language_mapping(self) -> None:
for code, name in self.SUPPORTED_LANGUAGES.items():
self.LANGUAGE_CODE_MAPPING[code.lower()] = code
self.LANGUAGE_CODE_MAPPING[name.lower()] = code
self.LANGUAGE_CODE_MAPPING[name.upper()] = code
def normalize_language_code(self, language: str) -> str:
normalized = language.strip().lower()
if normalized in self.LANGUAGE_CODE_MAPPING:
return self.LANGUAGE_CODE_MAPPING[normalized]
if len(normalized) == 2:
return normalized.upper()
for code, name in self.SUPPORTED_LANGUAGES.items():
if name.lower() == normalized or code.lower() == normalized:
return code
return normalized.upper()
@property
@abstractmethod
def provider_name(self) -> str:
"""Return the name of the translation provider."""
pass
@property
@abstractmethod
def quality(self) -> TranslationQuality:
"""Return the quality tier of this provider."""
pass
@abstractmethod
def translate(
self,
text: str,
target_language: str,
source_language: Optional[str] = None,
) -> TranslationResult:
"""
Translate text to target language.
Args:
text: The text to translate
target_language: Target language code or name
source_language: Source language code or name (auto-detect if None)
Returns:
TranslationResult with translated text and metadata
"""
pass
@abstractmethod
def translate_batch(
self,
texts: List[str],
target_language: str,
source_language: Optional[str] = None,
) -> List[TranslationResult]:
"""
Translate multiple texts in batch.
Args:
texts: List of texts to translate
target_language: Target language code or name
source_language: Source language code or name (auto-detect if None)
Returns:
List of TranslationResults
"""
pass
@abstractmethod
def get_supported_languages(self) -> Dict[str, str]:
"""Return dictionary of supported language codes and names."""
pass
@abstractmethod
def is_language_supported(self, language: str) -> bool:
"""Check if a language is supported."""
pass
@abstractmethod
def calculate_cost(self, text_length: int, char_count: int = 0) -> float:
"""
Calculate the cost for translation.
Args:
text_length: Number of characters to translate
char_count: Optional explicit character count
Returns:
Estimated cost in USD
"""
pass
def validate_text(self, text: str) -> bool:
"""Validate that text is suitable for translation."""
if not text or not text.strip():
return False
if len(text) > 50000:
raise ValueError(f"Text too long: {len(text)} chars. Maximum is 50000.")
return True
def split_long_text(self, text: str, max_chars: int = 5000) -> List[str]:
"""Split long text into manageable chunks."""
if len(text) <= max_chars:
return [text]
chunks = []
sentences = text.replace("! ", ".\n").replace("? ", ".\n").replace("", "\n").split("\n")
current_chunk = ""
for sentence in sentences:
if len(current_chunk) + len(sentence) <= max_chars:
current_chunk += sentence + " "
else:
if current_chunk:
chunks.append(current_chunk.strip())
current_chunk = sentence + " "
if current_chunk:
chunks.append(current_chunk.strip())
return chunks

View File

@@ -0,0 +1,307 @@
"""
DeepL Translation Provider.
Low-cost, high-quality text translation using DeepL API.
Free tier: 500,000 characters/month
API Documentation: https://www.deepl.com/docs-api
"""
import os
from typing import Dict, List, Optional
import httpx
from utils.logger_utils import get_service_logger
from .base_translation import (
BaseTranslationProvider,
TranslationQuality,
TranslationResult,
)
logger = get_service_logger("translation.deepl")
DEEPL_API_URL = "https://api-free.deepl.com/v2/translate"
DEEPL_API_URL_PRO = "https://api.deepl.com/v2/translate"
DEEPL_LANGUAGE_MAPPING: Dict[str, str] = {
"BG": "BG",
"CS": "CS",
"DA": "DA",
"DE": "DE",
"EL": "EL",
"EN": "EN-US",
"EN-GB": "EN-GB",
"EN-US": "EN-US",
"ES": "ES",
"ET": "ET",
"FI": "FI",
"FR": "FR",
"HU": "HU",
"ID": "ID",
"IT": "IT",
"JA": "JA",
"KO": "KO",
"LT": "LT",
"LV": "LV",
"NB": "NB",
"NL": "NL",
"PL": "PL",
"PT": "PT-PT",
"PT-BR": "PT-BR",
"PT-PT": "PT-PT",
"RO": "RO",
"RU": "RU",
"SK": "SK",
"SL": "SL",
"SV": "SV",
"TR": "TR",
"UK": "UK",
"ZH": "ZH",
"ZH-HANS": "ZH-HANS",
"ZH-HANT": "ZH-HANT",
}
DEEPL_SUPPORTED_LANGUAGES: Dict[str, str] = {
"bg": "Bulgarian",
"cs": "Czech",
"da": "Danish",
"de": "German",
"el": "Greek",
"en": "English (American)",
"en-gb": "English (British)",
"es": "Spanish",
"et": "Estonian",
"fi": "Finnish",
"fr": "French",
"hu": "Hungarian",
"id": "Indonesian",
"it": "Italian",
"ja": "Japanese",
"ko": "Korean",
"lt": "Lithuanian",
"lv": "Latvian",
"nb": "Norwegian",
"nl": "Dutch",
"pl": "Polish",
"pt": "Portuguese",
"pt-br": "Portuguese (Brazilian)",
"pt-pt": "Portuguese (European)",
"ro": "Romanian",
"ru": "Russian",
"sk": "Slovak",
"sl": "Slovenian",
"sv": "Swedish",
"tr": "Turkish",
"uk": "Ukrainian",
"zh": "Chinese",
"zh-hans": "Chinese (Simplified)",
"zh-hant": "Chinese (Traditional)",
}
class DeepLTranslator(BaseTranslationProvider):
COST_PER_CHARACTER = 0.00001
def __init__(self, api_key: Optional[str] = None, use_pro: bool = False):
super().__init__()
self._api_key = api_key or os.getenv("DEEPL_API_KEY", "")
self._use_pro = use_pro or os.getenv("DEEPL_USE_PRO", "false").lower() == "true"
if not self._api_key:
logger.warning("DeepL API key not configured. Set DEEPL_API_KEY in environment.")
self._api_url = DEEPL_API_URL_PRO if self._use_pro else DEEPL_API_URL
@property
def provider_name(self) -> str:
return "DeepL"
@property
def quality(self) -> TranslationQuality:
return TranslationQuality.LOW
def _get_deepl_lang_code(self, language: str) -> str:
normalized = self.normalize_language_code(language)
upper = normalized.upper()
if upper in DEEPL_LANGUAGE_MAPPING:
return DEEPL_LANGUAGE_MAPPING[upper]
for deepl_code, lang_name in DEEPL_SUPPORTED_LANGUAGES.items():
if lang_name.lower() == normalized.lower() or deepl_code.lower() == normalized.lower():
return deepl_code.upper() if deepl_code.upper() in DEEPL_LANGUAGE_MAPPING else deepl_code
return upper
def translate(
self,
text: str,
target_language: str,
source_language: Optional[str] = None,
) -> TranslationResult:
self.validate_text(text)
if not self._api_key:
raise ValueError("DeepL API key not configured. Set DEEPL_API_KEY environment variable.")
target_code = self._get_deepl_lang_code(target_language)
source_code = self._get_deepl_lang_code(source_language) if source_language else None
headers = {
"Authorization": f"DeepL-Auth-Key {self._api_key}",
"Content-Type": "application/json",
}
payload = {
"text": [text],
"target_lang": target_code,
}
if source_code:
payload["source_lang"] = source_code
try:
with httpx.Client(timeout=30.0) as client:
response = client.post(self._api_url, headers=headers, json=payload)
response.raise_for_status()
data = response.json()
translations = data.get("translations", [])
if not translations:
raise ValueError("No translation returned from DeepL API")
primary = translations[0]
alternatives = [
t["text"] for t in translations[1:] if t.get("text")
]
detected_lang = primary.get("detected_source_language", "")
return TranslationResult(
translated_text=primary["text"],
source_language=detected_lang if not source_language else source_language,
target_language=target_language,
provider=self.provider_name,
quality=self.quality,
confidence=0.95,
alternative_translations=alternatives,
metadata={
"deepl_target_lang": target_code,
"character_count": len(text),
"translations_count": len(translations),
},
)
except httpx.HTTPStatusError as e:
logger.error(f"DeepL API HTTP error: {e.response.status_code} - {e.response.text}")
raise RuntimeError(f"DeepL API error: {e.response.status_code}")
except httpx.RequestError as e:
logger.error(f"DeepL API request error: {str(e)}")
raise RuntimeError(f"DeepL API request failed: {str(e)}")
def translate_batch(
self,
texts: List[str],
target_language: str,
source_language: Optional[str] = None,
) -> List[TranslationResult]:
if not texts:
return []
self.validate_text("\n".join(texts))
if not self._api_key:
raise ValueError("DeepL API key not configured. Set DEEPL_API_KEY environment variable.")
target_code = self._get_deepl_lang_code(target_language)
source_code = self._get_deepl_lang_code(source_language) if source_language else None
headers = {
"Authorization": f"DeepL-Auth-Key {self._api_key}",
"Content-Type": "application/json",
}
payload = {
"text": texts,
"target_lang": target_code,
}
if source_code:
payload["source_lang"] = source_code
try:
with httpx.Client(timeout=60.0) as client:
response = client.post(self._api_url, headers=headers, json=payload)
response.raise_for_status()
data = response.json()
translations = data.get("translations", [])
results = []
detected_source = None
for i, translation in enumerate(translations):
if i == 0:
detected_source = translation.get("detected_source_language", "")
results.append(TranslationResult(
translated_text=translation["text"],
source_language=detected_source or source_language or "auto",
target_language=target_language,
provider=self.provider_name,
quality=self.quality,
confidence=0.95,
metadata={
"deepl_target_lang": target_code,
"batch_size": len(texts),
},
))
return results
except httpx.HTTPStatusError as e:
logger.error(f"DeepL API HTTP error: {e.response.status_code}")
raise RuntimeError(f"DeepL API error: {e.response.status_code}")
except httpx.RequestError as e:
logger.error(f"DeepL API request error: {str(e)}")
raise RuntimeError(f"DeepL API request failed: {str(e)}")
def get_supported_languages(self) -> Dict[str, str]:
return DEEPL_SUPPORTED_LANGUAGES.copy()
def is_language_supported(self, language: str) -> bool:
normalized = self.normalize_language_code(language).lower()
return normalized in DEEPL_SUPPORTED_LANGUAGES
def calculate_cost(self, text_length: int, char_count: int = 0) -> float:
chars = char_count or text_length
return chars * self.COST_PER_CHARACTER
def get_usage_info(self) -> Dict[str, any]:
if not self._api_key:
return {"configured": False, "message": "API key not set"}
usage_url = "https://api-free.deepl.com/v2/usage" if not self._use_pro else "https://api.deepl.com/v2/usage"
headers = {
"Authorization": f"DeepL-Auth-Key {self._api_key}",
}
try:
with httpx.Client(timeout=10.0) as client:
response = client.get(usage_url, headers=headers)
response.raise_for_status()
data = response.json()
return {
"configured": True,
"character_count": data.get("character_count", 0),
"character_limit": data.get("character_limit", 0),
"usage_percent": (data.get("character_count", 0) / data.get("character_limit", 1)) * 100,
}
except Exception as e:
logger.error(f"Failed to get DeepL usage info: {str(e)}")
return {"configured": True, "error": str(e)}

View File

@@ -0,0 +1,172 @@
"""
Translation Factory.
Factory pattern for getting translation providers based on quality tier.
"""
from typing import Dict, Optional
from utils.logger_utils import get_service_logger
from .base_translation import (
BaseTranslationProvider,
TranslationQuality,
TranslationResult,
)
from .deepl_translator import DeepLTranslator
logger = get_service_logger("translation.factory")
_TRANSLATOR_CACHE: Dict[str, BaseTranslationProvider] = {}
def get_translator(
quality: TranslationQuality = TranslationQuality.LOW,
force_new: bool = False,
**kwargs,
) -> BaseTranslationProvider:
"""
Get a translation provider instance based on quality tier.
Args:
quality: The quality tier (LOW or HIGH)
force_new: Force creation of new instance instead of cached
**kwargs: Additional arguments for the provider
Returns:
Translation provider instance
Raises:
ValueError: If quality tier is not supported
"""
global _TRANSLATOR_CACHE
cache_key = f"{quality.value}_{id(kwargs)}"
if not force_new and cache_key in _TRANSLATOR_CACHE:
return _TRANSLATOR_CACHE[cache_key]
if quality == TranslationQuality.LOW:
translator = DeepLTranslator(**kwargs)
logger.info(f"Created DeepL translator (LOW quality)")
elif quality == TranslationQuality.HIGH:
from .wavespeed_translator import WaveSpeedTranslator
translator = WaveSpeedTranslator(**kwargs)
logger.info(f"Created WaveSpeed translator (HIGH quality)")
else:
raise ValueError(f"Unsupported translation quality: {quality}")
_TRANSLATOR_CACHE[cache_key] = translator
return translator
def translate_text(
text: str,
target_language: str,
source_language: Optional[str] = None,
quality: TranslationQuality = TranslationQuality.LOW,
) -> TranslationResult:
"""
Convenience function to translate text.
Args:
text: Text to translate
target_language: Target language code or name
source_language: Source language (auto-detect if None)
quality: Quality tier
Returns:
TranslationResult
"""
translator = get_translator(quality)
return translator.translate(text, target_language, source_language)
def translate_batch(
texts: list[str],
target_language: str,
source_language: Optional[str] = None,
quality: TranslationQuality = TranslationQuality.LOW,
) -> list[TranslationResult]:
"""
Convenience function to translate multiple texts.
Args:
texts: List of texts to translate
target_language: Target language code or name
source_language: Source language (auto-detect if None)
quality: Quality tier
Returns:
List of TranslationResults
"""
translator = get_translator(quality)
return translator.translate_batch(texts, target_language, source_language)
def list_supported_languages(
quality: Optional[TranslationQuality] = None,
) -> Dict[str, str]:
"""
List supported languages.
Args:
quality: Optional quality filter. Returns all if None.
Returns:
Dictionary of language codes to names
"""
if quality == TranslationQuality.LOW:
return DeepLTranslator().get_supported_languages()
elif quality == TranslationQuality.HIGH:
from .wavespeed_translator import WaveSpeedTranslator
return WaveSpeedTranslator().get_supported_languages()
else:
base_langs = DeepLTranslator.SUPPORTED_LANGUAGES
try:
from .wavespeed_translator import WaveSpeedTranslator
wavespeed_langs = WaveSpeedTranslator.SUPPORTED_LANGUAGES
all_langs = {**base_langs, **wavespeed_langs}
return all_langs
except (ImportError, Exception):
return base_langs
def is_language_supported(
language: str,
quality: Optional[TranslationQuality] = None,
) -> bool:
"""
Check if a language is supported.
Args:
language: Language code or name
quality: Optional quality filter
Returns:
True if supported
"""
if quality == TranslationQuality.LOW:
return DeepLTranslator().is_language_supported(language)
elif quality == TranslationQuality.HIGH:
from .wavespeed_translator import WaveSpeedTranslator
return WaveSpeedTranslator().is_language_supported(language)
else:
return (
DeepLTranslator().is_language_supported(language) or
_check_wavespeed_support(language)
)
def _check_wavespeed_support(language: str) -> bool:
try:
from .wavespeed_translator import WaveSpeedTranslator
return WaveSpeedTranslator().is_language_supported(language)
except (ImportError, Exception):
return False
def clear_translator_cache() -> None:
"""Clear the translator cache."""
global _TRANSLATOR_CACHE
_TRANSLATOR_CACHE.clear()
logger.info("Translation provider cache cleared")

View File

@@ -0,0 +1,138 @@
"""
WaveSpeed Translation Provider.
High-quality video/text translation using WaveSpeed API.
This will be used for Phase 3 (High-Quality Dubbing).
API: Uses existing WaveSpeed video translation API.
"""
from typing import Dict, List, Optional
from utils.logger_utils import get_service_logger
from .base_translation import (
BaseTranslationProvider,
TranslationQuality,
TranslationResult,
)
logger = get_service_logger("translation.wavespeed")
WAVESPEED_SUPPORTED_LANGUAGES: Dict[str, str] = {
"en": "English",
"es": "Spanish",
"fr": "French",
"de": "German",
"it": "Italian",
"pt": "Portuguese",
"ja": "Japanese",
"ko": "Korean",
"zh": "Chinese",
"ar": "Arabic",
"hi": "Hindi",
"ru": "Russian",
"nl": "Dutch",
"pl": "Polish",
"tr": "Turkish",
"vi": "Vietnamese",
"th": "Thai",
"id": "Indonesian",
"ms": "Malay",
"fil": "Filipino",
"he": "Hebrew",
"cs": "Czech",
"da": "Danish",
"fi": "Finnish",
"el": "Greek",
"hu": "Hungarian",
"nb": "Norwegian",
"ro": "Romanian",
"sk": "Slovak",
"sv": "Swedish",
"uk": "Ukrainian",
}
class WaveSpeedTranslator(BaseTranslationProvider):
COST_PER_CHARACTER = 0.0001
def __init__(self):
super().__init__()
logger.info("[WaveSpeedTranslator] Initialized (high-quality mode)")
@property
def provider_name(self) -> str:
return "WaveSpeed"
@property
def quality(self) -> TranslationQuality:
return TranslationQuality.HIGH
def translate(
self,
text: str,
target_language: str,
source_language: Optional[str] = None,
) -> TranslationResult:
self.validate_text(text)
raise NotImplementedError(
"WaveSpeed text translation not yet implemented. "
"For high-quality translation, use the video translation API "
"or fall back to DeepL for text translation."
)
def translate_batch(
self,
texts: List[str],
target_language: str,
source_language: Optional[str] = None,
) -> List[TranslationResult]:
raise NotImplementedError(
"WaveSpeed batch translation not yet implemented."
)
def get_supported_languages(self) -> Dict[str, str]:
return WAVESPEED_SUPPORTED_LANGUAGES.copy()
def is_language_supported(self, language: str) -> bool:
normalized = self.normalize_language_code(language).lower()
return normalized in WAVESPEED_SUPPORTED_LANGUAGES
def calculate_cost(self, text_length: int, char_count: int = 0) -> float:
chars = char_count or text_length
return chars * self.COST_PER_CHARACTER
def translate_video(
self,
video_path: str,
target_language: str,
source_language: Optional[str] = None,
) -> bytes:
"""
Translate video using WaveSpeed video translation API.
This is the primary use case for high-quality dubbing.
Args:
video_path: Path to video file
target_language: Target language
source_language: Source language (auto-detect if None)
Returns:
Translated video bytes
"""
from ..wavespeed.generators.video.translation import VideoTranslation
translator = VideoTranslation()
target_lang = self.normalize_language_code(target_language)
with open(video_path, "rb") as f:
video_bytes = f.read()
return translator.video_translate(
video=video_bytes,
output_language=target_lang,
enable_sync_mode=True,
)