import React, { useEffect, useState, useMemo } from "react"; import { Dialog, DialogTitle, DialogContent, DialogActions, Stack, Box, Typography, Slider, Select, MenuItem, FormControl, InputLabel, FormControlLabel, Checkbox, Tooltip, IconButton, alpha, TextField, } from "@mui/material"; import { HelpOutline as HelpOutlineIcon, Close as CloseIcon, VolumeUp } from "@mui/icons-material"; import { Button } from "@mui/material"; // Import language-aware voice mapping (optional - only used in YouTube Creator context) let getVoicesForLanguage: ((language?: string) => any[]) | undefined; try { const youtubeConstants = require('../../components/YouTubeCreator/constants'); getVoicesForLanguage = youtubeConstants.getVoicesForLanguage; } catch { // Not in YouTube Creator context - will use fallback English voices getVoicesForLanguage = undefined; } export type AudioGenerationSettings = { voiceId: string; speed: number; volume: number; pitch: number; emotion: string; englishNormalization: boolean; sampleRate?: number; bitrate: number; channel: "1" | "2"; format: "mp3" | "wav" | "pcm" | "flac"; languageBoost?: string; enableSyncMode: boolean; }; interface AudioSettingsModalProps { open: boolean; onClose: () => void; onApplySettings: (settings: AudioGenerationSettings) => void; initialSettings: AudioGenerationSettings; isGenerating?: boolean; sceneTitle?: string; isRegenerating?: boolean; language?: string; // Language code (e.g., 'en', 'es', 'fr') - used to filter voice options } // Import language-aware voice mapping (fallback to English voices if not in YouTube Creator context) // This will be dynamically loaded based on language prop const EMOTION_OPTIONS = ["happy", "sad", "angry", "fearful", "disgusted", "surprised", "neutral"]; const SAMPLE_RATE_OPTIONS = [8000, 16000, 22050, 24000, 32000, 44100]; const BITRATE_OPTIONS = [32000, 64000, 128000, 256000]; const LANGUAGE_BOOST_OPTIONS = [ "auto", "English", "Chinese", "Chinese,Yue", "Arabic", "Russian", "Spanish", "French", "Portuguese", "German", "Turkish", "Dutch", "Ukrainian", "Vietnamese", "Indonesian", "Japanese", "Italian", "Korean", "Thai", "Polish", "Romanian", "Greek", "Czech", "Finnish", "Hindi", ]; export const AudioSettingsModal: React.FC = ({ open, onClose, onApplySettings, initialSettings, isGenerating = false, sceneTitle, isRegenerating = false, language, }) => { const [settings, setSettings] = useState(initialSettings); // Fallback English voices (used when language-aware mapping is not available) const ENGLISH_VOICES_FALLBACK = [ { id: "Wise_Woman", name: "Wise Woman", personality: "Authoritative, trustworthy female voice - perfect for educational content and expert narration" }, { id: "Friendly_Person", name: "Friendly Person", personality: "Warm, approachable voice - great for welcoming introductions and customer-facing content" }, { id: "Inspirational_girl", name: "Inspirational Girl", personality: "Motivational, uplifting female voice - ideal for inspirational and motivational content" }, { id: "Deep_Voice_Man", name: "Deep Voice Man", personality: "Powerful, commanding male voice - excellent for serious topics and authoritative delivery" }, { id: "Calm_Woman", name: "Calm Woman", personality: "Soothing, composed female voice - perfect for meditation, relaxation, or sensitive topics" }, { id: "Casual_Guy", name: "Casual Guy", personality: "Relaxed, conversational male voice - great for vlogs, tutorials, and informal content" }, { id: "Lively_Girl", name: "Lively Girl", personality: "Energetic, enthusiastic female voice - ideal for exciting announcements and upbeat content" }, { id: "Patient_Man", name: "Patient Man", personality: "Gentle, understanding male voice - perfect for explanations and patient guidance" }, { id: "Young_Knight", name: "Young Knight", personality: "Brave, confident male voice - great for adventure, gaming, and heroic narratives" }, { id: "Determined_Man", name: "Determined Man", personality: "Strong, resolute male voice - excellent for motivational speeches and determined delivery" }, { id: "Lovely_Girl", name: "Lovely Girl", personality: "Sweet, charming female voice - ideal for storytelling and gentle narratives" }, { id: "Decent_Boy", name: "Decent Boy", personality: "Honest, sincere male voice - perfect for testimonials and personal stories" }, { id: "Imposing_Manner", name: "Imposing Manner", personality: "Formal, dignified male voice - great for corporate content and official announcements" }, { id: "Elegant_Man", name: "Elegant Man", personality: "Refined, sophisticated male voice - ideal for luxury, premium content" }, { id: "Abbess", name: "Abbess", personality: "Spiritual, serene female voice - perfect for meditation, philosophy, or contemplative content" }, { id: "Sweet_Girl_2", name: "Sweet Girl 2", personality: "Gentle, melodic female voice - excellent for children's content and soft storytelling" }, { id: "Exuberant_Girl", name: "Exuberant Girl", personality: "Joyful, expressive female voice - ideal for celebrations and happy announcements" }, ]; // Get language-specific voices (use language-aware mapping if available, fallback to English) const VOICE_OPTIONS = useMemo(() => { if (getVoicesForLanguage && language) { return getVoicesForLanguage(language); } return ENGLISH_VOICES_FALLBACK; }, [language]); const handleApply = () => { onApplySettings(settings); }; return ( {isRegenerating ? 'Regenerate Audio' : 'Generate Audio'} - Voice Settings {sceneTitle && ( Configure voice settings for "{sceneTitle}" )} {isRegenerating ? 'Customize voice settings to regenerate your audio narration with different characteristics.' : 'Choose voice settings to generate high-quality audio narration for your scene.' } {/* Voice Selection */} Voice Selection Voice Selection Guide {language && language !== 'en' && ( 🌍 Language-specific voices are shown for {language.toUpperCase()} content. These voices provide native pronunciation and accent. )} Choose a voice that matches your content's personality and target audience. {(!language || language === 'en') && ( <> YouTube/Vlogging: Casual Guy (default), Friendly Person - conversational and engaging Educational/Tutorials: Wise Woman, Deep Voice Man - authoritative and trustworthy Motivational: Inspirational Girl, Determined Man - energetic and inspiring Relaxing/Storytelling: Calm Woman, Lovely Girl - soothing and gentle Default: Casual Guy - optimized for engaging YouTube narration. )} } arrow placement="right"> {language && language !== 'en' && ( 🌍 Showing {language.toUpperCase()} language-specific voices for native pronunciation )} {/* Speed / Volume / Pitch */} Speaking Speed ({settings.speed.toFixed(2)}) Natural Speaking Pace 0.8-1.0: Slow, deliberate (educational, complex topics) 1.1-1.2: Natural, engaging (recommended for YouTube) 1.3-1.5: Fast, energetic (exciting, promotional content) Default: 1.15 - Optimized for engaging YouTube narration. } arrow placement="right"> setSettings({ ...settings, speed: v as number })} sx={{ color: "#4ade80" }} /> 0.5 = Slower (narrative) • 1.0 = Normal • 2.0 = Faster (energetic) Volume Level ({settings.volume.toFixed(1)}) Audio Loudness 0.1-0.5: Very soft, intimate whisper 0.8-1.2: Normal speaking volume 1.5-10.0: Loud, commanding presence Note: Very high volumes may cause distortion. } arrow placement="right"> setSettings({ ...settings, volume: v as number })} sx={{ color: "#fbbf24" }} /> 0.1 = Very soft • 1.0 = Normal • 10.0 = Very loud Voice Pitch ({settings.pitch}) Voice Tone & Character -12 to -6: Deep, authoritative (male voices, serious content) -2 to +2: Natural, conversational range +3 to +12: Bright, energetic (female voices, upbeat content) Tip: Small adjustments (±2) sound most natural. } arrow placement="right"> setSettings({ ...settings, pitch: v as number })} sx={{ color: "#f87171" }} /> -12 = Very deep • 0 = Normal • +12 = Very high {/* Emotion */} Emotional Delivery Voice Emotional Expression Choose the emotional tone that matches your content: Happy: Warm, enthusiastic delivery Neutral: Professional, straightforward tone Sad: Somber, reflective delivery Angry: Forceful, urgent tone (use sparingly) Recommendation: Happy/Neutral for most educational content. } arrow placement="right"> Select the emotional tone. "Happy" provides natural, engaging delivery for most YouTube content. {/* Language & Normalization */} setSettings({ ...settings, englishNormalization: e.target.checked })} sx={{ color: "rgba(255,255,255,0.7)" }} /> } label={ English text normalization } /> Improves pronunciation of numbers (42 → "forty-two"), dates, currencies, and technical terms. Recommended for most English content. setSettings({ ...settings, languageBoost: e.target.value })} InputLabelProps={{ sx: { color: "rgba(255,255,255,0.7)" } }} sx={{ "& .MuiOutlinedInput-root": { backgroundColor: alpha("#ffffff", 0.1), color: "white", "& fieldset": { borderColor: "rgba(255,255,255,0.3)" }, "&:hover fieldset": { borderColor: "rgba(255,255,255,0.4)" }, "&.Mui-focused fieldset": { borderColor: "#ffffff" }, "& .MuiSvgIcon-root": { color: "rgba(255,255,255,0.7)" }, }, }} > {LANGUAGE_BOOST_OPTIONS.map((option) => ( {option} ))} Improves pronunciation accuracy for content in specific languages or regional dialects. Use "auto" for automatic detection. {/* Quality Settings */} setSettings({ ...settings, sampleRate: Number(e.target.value) })} InputLabelProps={{ sx: { color: "rgba(255,255,255,0.7)" } }} sx={{ "& .MuiOutlinedInput-root": { backgroundColor: alpha("#ffffff", 0.1), color: "white", "& fieldset": { borderColor: "rgba(255,255,255,0.3)" }, "&:hover fieldset": { borderColor: "rgba(255,255,255,0.4)" }, "&.Mui-focused fieldset": { borderColor: "#ffffff" }, "& .MuiSvgIcon-root": { color: "rgba(255,255,255,0.7)" }, }, }} > {SAMPLE_RATE_OPTIONS.map((rate) => ( {rate} Hz ))} Sample rate affects audio clarity. 24kHz is optimal for voice content - higher values increase file size without noticeable improvement. setSettings({ ...settings, bitrate: Number(e.target.value) })} InputLabelProps={{ sx: { color: "rgba(255,255,255,0.7)" } }} sx={{ "& .MuiOutlinedInput-root": { backgroundColor: alpha("#ffffff", 0.1), color: "white", "& fieldset": { borderColor: "rgba(255,255,255,0.3)" }, "&:hover fieldset": { borderColor: "rgba(255,255,255,0.4)" }, "&.Mui-focused fieldset": { borderColor: "#ffffff" }, "& .MuiSvgIcon-root": { color: "rgba(255,255,255,0.7)" }, }, }} > {BITRATE_OPTIONS.map((bitrate) => ( {bitrate / 1000} kbps ))} Audio quality vs file size trade-off. 128kbps provides excellent voice quality with reasonable file sizes. {/* Format & Channel */} setSettings({ ...settings, channel: e.target.value as "1" | "2" })} InputLabelProps={{ sx: { color: "rgba(255,255,255,0.7)" } }} sx={{ "& .MuiOutlinedInput-root": { backgroundColor: alpha("#ffffff", 0.1), color: "white", "& fieldset": { borderColor: "rgba(255,255,255,0.3)" }, "&:hover fieldset": { borderColor: "rgba(255,255,255,0.4)" }, "&.Mui-focused fieldset": { borderColor: "#ffffff" }, "& .MuiSvgIcon-root": { color: "rgba(255,255,255,0.7)" }, }, }} > Mono (smaller files, standard for voice) Stereo (wider sound, larger files) setSettings({ ...settings, format: e.target.value as "mp3" | "wav" | "pcm" | "flac" })} InputLabelProps={{ sx: { color: "rgba(255,255,255,0.7)" } }} sx={{ "& .MuiOutlinedInput-root": { backgroundColor: alpha("#ffffff", 0.1), color: "white", "& fieldset": { borderColor: "rgba(255,255,255,0.3)" }, "&:hover fieldset": { borderColor: "rgba(255,255,255,0.4)" }, "&.Mui-focused fieldset": { borderColor: "#ffffff" }, "& .MuiSvgIcon-root": { color: "rgba(255,255,255,0.7)" }, }, }} > MP3 (compressed, widely supported) WAV (uncompressed, highest quality) PCM (raw data, specialized use) FLAC (lossless, large files) {/* Sync Mode */} setSettings({ ...settings, enableSyncMode: e.target.checked })} sx={{ color: "rgba(255,255,255,0.7)" }} /> } label={ Enable sync mode (recommended) } /> When enabled, waits for generation to complete before proceeding. Recommended for reliable audio delivery. {/* Pro Tips */} 💡 Human-Like Audio Tips Voice Choice: "Casual_Guy" provides natural, conversational delivery perfect for YouTube Speed: 1.15 provides engaging pace - not too slow, not too fast, just right for viewers Emotion: "Happy" creates natural, positive delivery that keeps viewers engaged Quality: 128kbps MP3 provides professional quality with optimal file sizes Enhancement: English normalization improves pronunciation of numbers, dates, and technical terms ); };