ALwrity/frontend/src/components/StoryWriter/Phases/StoryOutlineParts/AudioScriptModal.tsx

import React from 'react';
import {
  Box, Button, Dialog, DialogActions, DialogContent, DialogTitle,
  TextField, Divider, CircularProgress, Typography, Tooltip, IconButton,
  Slider, FormControl, InputLabel, Select, MenuItem, FormHelperText,
  ToggleButtonGroup, ToggleButton
} from '@mui/material';
import VolumeUpIcon from '@mui/icons-material/VolumeUp';
import SmartToyIcon from '@mui/icons-material/SmartToy';
import InfoOutlinedIcon from '@mui/icons-material/InfoOutlined';
import { OperationButton } from '../../../shared/OperationButton';

interface AudioScriptModalProps {
  open: boolean;
  sceneNumber: number;
  value: string;
  onChange: (v: string) => void;
  onClose: () => void;
  onSave: () => void;
  // audio settings
  audioProvider: string;
  audioLang: string;
  audioSlow: boolean;
  audioRate: number;
  onChangeProvider: (v: string) => void;
  onChangeLang: (v: string) => void;
  onChangeSlow: (v: boolean) => void;
  onChangeRate: (v: number) => void;
  audioUrl?: string | null;
  // audio generation callbacks - now with full parameters
  onGenerateAI?: (params: {
    text: string;
    voice_id?: string;
    speed?: number;
    volume?: number;
    pitch?: number;
    emotion?: string;
  }) => Promise<void>;
  onGenerateFree?: (text: string) => Promise<void>;
}

// Available voice IDs from WaveSpeed Minimax
const AVAILABLE_VOICES = [
  { value: 'Wise_Woman', label: 'Wise Woman', description: 'Warm, authoritative female voice' },
  { value: 'Friendly_Person', label: 'Friendly Person', description: 'Approachable and conversational' },
  { value: 'Inspirational_girl', label: 'Inspirational Girl', description: 'Energetic and motivating' },
  { value: 'Deep_Voice_Man', label: 'Deep Voice Man', description: 'Rich, deep male voice' },
  { value: 'Calm_Woman', label: 'Calm Woman', description: 'Peaceful and soothing' },
  { value: 'Casual_Guy', label: 'Casual Guy', description: 'Relaxed and informal' },
  { value: 'Lively_Girl', label: 'Lively Girl', description: 'Vibrant and enthusiastic' },
  { value: 'Patient_Man', label: 'Patient Man', description: 'Steady and reassuring' },
  { value: 'Young_Knight', label: 'Young Knight', description: 'Brave and confident' },
  { value: 'Determined_Man', label: 'Determined Man', description: 'Strong and resolute' },
  { value: 'Lovely_Girl', label: 'Lovely Girl', description: 'Sweet and charming' },
  { value: 'Decent_Boy', label: 'Decent Boy', description: 'Polite and well-mannered' },
  { value: 'Imposing_Manner', label: 'Imposing Manner', description: 'Commanding and powerful' },
  { value: 'Elegant_Man', label: 'Elegant Man', description: 'Sophisticated and refined' },
  { value: 'Abbess', label: 'Abbess', description: 'Dignified and wise' },
  { value: 'Sweet_Girl_2', label: 'Sweet Girl 2', description: 'Gentle and kind' },
  { value: 'Exuberant_Girl', label: 'Exuberant Girl', description: 'Joyful and energetic' },
];

const EMOTIONS = [
  { value: 'happy', label: 'Happy', description: 'Cheerful and upbeat tone' },
  { value: 'sad', label: 'Sad', description: 'Melancholic and somber tone' },
  { value: 'angry', label: 'Angry', description: 'Intense and forceful tone' },
  { value: 'fear', label: 'Fear', description: 'Anxious and nervous tone' },
  { value: 'surprised', label: 'Surprised', description: 'Astonished and amazed tone' },
  { value: 'neutral', label: 'Neutral', description: 'Calm and balanced tone (default)' },
];

const AudioScriptModal: React.FC<AudioScriptModalProps> = ({
  open, sceneNumber, value, onChange, onClose, onSave,
  audioProvider, audioLang, audioSlow, audioRate,
  onChangeProvider, onChangeLang, onChangeSlow, onChangeRate,
  audioUrl,
  onGenerateAI,
  onGenerateFree,
}) => {
  const [isGeneratingAI, setIsGeneratingAI] = React.useState(false);
  const [isGeneratingFree, setIsGeneratingFree] = React.useState(false);
  const [generateError, setGenerateError] = React.useState<string | null>(null);

  // Audio type toggle - default to 'free'
  const [audioType, setAudioType] = React.useState<'free' | 'ai'>('free');

  // AI Audio generation parameters with intelligent defaults
  const [voiceId, setVoiceId] = React.useState<string>('Wise_Woman');
  const [customVoiceId, setCustomVoiceId] = React.useState<string>('');
  const [useCustomVoice, setUseCustomVoice] = React.useState<boolean>(false);
  const [emotion, setEmotion] = React.useState<string>('happy');
  const [speed, setSpeed] = React.useState<number>(1.0);
  const [volume, setVolume] = React.useState<number>(1.0);
  const [pitch, setPitch] = React.useState<number>(0.0);

  const handleGenerateAI = async () => {
    if (!onGenerateAI || !value.trim()) {
      return;
    }

    setIsGeneratingAI(true);
    setGenerateError(null);
    try {
      await onGenerateAI({
        text: value.trim(),
        voice_id: useCustomVoice ? customVoiceId : voiceId,
        emotion: emotion,
        speed: speed,
        volume: volume,
        pitch: pitch,
      });
      // Optionally close modal after successful generation
      // onClose();
    } catch (err: any) {
      setGenerateError(err?.response?.data?.detail || err?.message || 'Failed to generate AI audio');
    } finally {
      setIsGeneratingAI(false);
    }
  };

  const handleGenerateFree = async () => {
    if (!onGenerateFree || !value.trim()) {
      return;
    }

    setIsGeneratingFree(true);
    setGenerateError(null);
    try {
      await onGenerateFree(value.trim());
      // Optionally close modal after successful generation
      // onClose();
    } catch (err: any) {
      setGenerateError(err?.response?.data?.detail || err?.message || 'Failed to generate free audio');
    } finally {
      setIsGeneratingFree(false);
    }
  };
  return (
    <Dialog
      open={open}
      onClose={onClose}
      maxWidth="md"
      fullWidth
      PaperProps={{
        sx: {
          backgroundColor: '#fff',
          borderRadius: 2,
          boxShadow: '0 24px 64px rgba(0,0,0,0.18)',
          border: '1px solid rgba(0,0,0,0.06)',
        },
      }}
    >
      <DialogTitle>Edit Audio Narration Script (Scene {sceneNumber})</DialogTitle>
      <DialogContent dividers sx={{ color: '#2C2416', bgcolor: '#fff' }}>
        <Box
          sx={{
            display: 'flex',
            flexDirection: 'column',
            gap: 3,
            pt: 1,
            '& .MuiFormLabel-root': { color: '#5D4037', fontWeight: 500 },
            '& .MuiInputBase-root': {
              color: '#2C2416',
              bgcolor: '#fff',
              '& .MuiOutlinedInput-notchedOutline': {
                borderColor: 'rgba(0, 0, 0, 0.23)',
              },
              '&:hover .MuiOutlinedInput-notchedOutline': {
                borderColor: 'rgba(0, 0, 0, 0.87)',
              },
              '&.Mui-focused .MuiOutlinedInput-notchedOutline': {
                borderColor: 'primary.main',
                borderWidth: '2px',
              },
            },
            '& .MuiInputBase-input': {
              color: '#2C2416',
            },
            '& textarea': {
              color: '#2C2416',
            },
            '& .MuiSelect-select': {
              color: '#2C2416',
            },
            '& .MuiFormHelperText-root': {
              color: 'rgba(0, 0, 0, 0.6)',
            },
            '& .MuiMenuItem-root': {
              color: '#2C2416',
            },
          }}
        >
          {audioUrl ? (
            <Box
              sx={{
                p: 1,
                backgroundColor: 'rgba(0,0,0,0.03)',
                borderRadius: 1,
                border: '1px solid rgba(0,0,0,0.06)',
              }}
            >
              <audio controls src={audioUrl || undefined} style={{ width: '100%' }}>
                Your browser does not support the audio element.
              </audio>
            </Box>
          ) : null}
          <TextField
            label="Audio Narration"
            value={value}
            onChange={(e) => onChange(e.target.value)}
            multiline
            minRows={6}
            fullWidth
            placeholder="Enter the narration text for this scene..."
            sx={{
              '& .MuiInputBase-input': {
                color: '#2C2416',
              },
            }}
          />

          {generateError && (
            <Box sx={{ color: 'error.main', fontSize: '0.875rem', mt: -1 }}>
              {generateError}
            </Box>
          )}

          <Divider sx={{ my: 1 }} />

          {/* Audio Type Toggle */}
          <Box sx={{ display: 'flex', flexDirection: 'column', gap: 2 }}>
            <Box>
              <Typography variant="subtitle2" sx={{ mb: 1.5, fontWeight: 600, color: '#5D4037' }}>
                Audio Type
              </Typography>
              <ToggleButtonGroup
                value={audioType}
                exclusive
                onChange={(_, newValue) => {
                  if (newValue !== null) {
                    setAudioType(newValue);
                    setGenerateError(null);
                  }
                }}
                aria-label="audio type"
                fullWidth
                sx={{
                  '& .MuiToggleButton-root': {
                    textTransform: 'none',
                    borderColor: 'rgba(0, 0, 0, 0.23)',
                    color: '#5D4037',
                    '&.Mui-selected': {
                      backgroundColor: 'primary.main',
                      color: '#fff',
                      '&:hover': {
                        backgroundColor: 'primary.dark',
                      },
                    },
                    '&:hover': {
                      backgroundColor: 'rgba(0, 0, 0, 0.04)',
                    },
                  },
                }}
              >
                <ToggleButton value="free" aria-label="free audio">
                  <VolumeUpIcon sx={{ mr: 1 }} />
                  Free Audio (gTTS)
                </ToggleButton>
                <ToggleButton value="ai" aria-label="ai audio">
                  <SmartToyIcon sx={{ mr: 1 }} />
                  AI Audio (Minimax)
                </ToggleButton>
              </ToggleButtonGroup>
            </Box>

            {/* Generate Button - Context aware based on audio type */}
            <Box sx={{ display: 'flex', gap: 2, flexWrap: 'wrap' }}>
              {audioType === 'ai' && onGenerateAI && (
                <OperationButton
                  operation={{
                    provider: 'audio',
                    model: 'minimax/speech-02-hd',
                    tokens_requested: value.trim().length, // Every character is 1 token
                    operation_type: 'audio_generation',
                    actual_provider_name: 'wavespeed',
                  }}
                  label="Generate AI Audio"
                  variant="contained"
                  size="medium"
                  startIcon={<SmartToyIcon />}
                  showCost={true}
                  checkOnHover={true}
                  checkOnMount={false}
                  onClick={handleGenerateAI}
                  disabled={isGeneratingAI || isGeneratingFree || !value.trim()}
                  loading={isGeneratingAI}
                  sx={{ flex: 1, minWidth: '200px' }}
                />
              )}

              {audioType === 'free' && onGenerateFree && (
                <Button
                  variant="contained"
                  size="medium"
                  startIcon={isGeneratingFree ? <CircularProgress size={16} /> : <VolumeUpIcon />}
                  onClick={handleGenerateFree}
                  disabled={isGeneratingAI || isGeneratingFree || !value.trim()}
                  sx={{ flex: 1, minWidth: '200px' }}
                >
                  {isGeneratingFree ? 'Generating...' : 'Generate Free Audio (gTTS)'}
                </Button>
              )}
            </Box>

            <Divider sx={{ my: 1 }} />

            {/* Settings - Conditionally shown based on audio type */}
            {audioType === 'ai' && (
              <Box>
                <Typography variant="subtitle2" sx={{ mb: 2, fontWeight: 600, color: '#5D4037' }}>
                  AI Audio Generation Settings
                </Typography>
            <Box sx={{ display: 'grid', gridTemplateColumns: { xs: '1fr', md: '1fr 1fr' }, gap: 2 }}>
              {/* Voice Selection */}
              <FormControl fullWidth>
                <InputLabel>Voice</InputLabel>
                <Select
                  value={useCustomVoice ? 'custom' : voiceId}
                  onChange={(e) => {
                    if (e.target.value === 'custom') {
                      setUseCustomVoice(true);
                    } else {
                      setUseCustomVoice(false);
                      setVoiceId(e.target.value);
                    }
                  }}
                  label="Voice"
                  renderValue={(value) => {
                    if (value === 'custom') {
                      return customVoiceId || 'Custom Voice ID';
                    }
                    const voice = AVAILABLE_VOICES.find(v => v.value === value);
                    return voice ? voice.label : value;
                  }}
                >
                  {AVAILABLE_VOICES.map((voice) => (
                    <MenuItem key={voice.value} value={voice.value}>
                      <Box>
                        <Typography variant="body2" sx={{ fontWeight: 500 }}>
                          {voice.label}
                        </Typography>
                        <Typography variant="caption" sx={{ color: 'text.secondary' }}>
                          {voice.description}
                        </Typography>
                      </Box>
                    </MenuItem>
                  ))}
                  <MenuItem value="custom">
                    <Box>
                      <Typography variant="body2" sx={{ fontWeight: 500, fontStyle: 'italic' }}>
                        Custom Voice ID...
                      </Typography>
                      <Typography variant="caption" sx={{ color: 'text.secondary' }}>
                        Use a voice ID from voice cloning
                      </Typography>
                    </Box>
                  </MenuItem>
                </Select>
                <FormHelperText>
                  <Box sx={{ display: 'flex', alignItems: 'center', gap: 0.5 }}>
                    Choose a voice that matches your story's tone
                    <Tooltip
                      title={
                        <Box sx={{ p: 0.5 }}>
                          <Typography variant="caption" sx={{ display: 'block', mb: 0.5 }}>
                            Current Voice ID: {voiceId}
                          </Typography>
                          <Typography variant="caption" sx={{ display: 'block', mb: 0.5 }}>
                            You can use system voices above or enter a custom voice ID from voice cloning.
                          </Typography>
                          <Typography variant="caption" sx={{ display: 'block' }}>
                            Learn more:{' '}
                            <a
                              href="https://wavespeed.ai/models/minimax/voice-clone"
                              target="_blank"
                              rel="noopener noreferrer"
                              style={{ color: '#90caf9' }}
                            >
                              Voice Cloning Guide
                            </a>
                          </Typography>
                        </Box>
                      }
                      arrow
                      placement="top"
                    >
                      <InfoOutlinedIcon sx={{ fontSize: '0.875rem', color: 'text.secondary', cursor: 'help' }} />
                    </Tooltip>
                  </Box>
                </FormHelperText>
              </FormControl>

              {/* Custom Voice ID Input (shown when custom voice is selected) */}
              {useCustomVoice && (
                <TextField
                  fullWidth
                  label="Custom Voice ID"
                  value={customVoiceId}
                  onChange={(e) => setCustomVoiceId(e.target.value)}
                  helperText="Enter your custom voice ID from voice cloning"
                  placeholder="your-custom-voice-id"
                />
              )}

              {/* Emotion Selection */}
              <FormControl fullWidth>
                <InputLabel>Emotion</InputLabel>
                <Select
                  value={emotion}
                  onChange={(e) => setEmotion(e.target.value)}
                  label="Emotion"
                >
                  {EMOTIONS.map((em) => (
                    <MenuItem key={em.value} value={em.value}>
                      <Box>
                        <Typography variant="body2">{em.label}</Typography>
                        <Typography variant="caption" sx={{ color: 'text.secondary' }}>
                          {em.description}
                        </Typography>
                      </Box>
                    </MenuItem>
                  ))}
                </Select>
                <FormHelperText>
                  Select the emotional tone for the narration
                </FormHelperText>
              </FormControl>

              {/* Speed Slider */}
              <Box>
                <Box sx={{ display: 'flex', alignItems: 'center', gap: 1, mb: 1 }}>
                  <Typography variant="body2" sx={{ minWidth: '60px' }}>
                    Speed
                  </Typography>
                  <Slider
                    value={speed}
                    onChange={(_, newValue) => setSpeed(newValue as number)}
                    min={0.5}
                    max={2.0}
                    step={0.1}
                    valueLabelDisplay="auto"
                    valueLabelFormat={(value) => `${value}x`}
                    sx={{ flex: 1 }}
                  />
                  <Typography variant="body2" sx={{ minWidth: '40px', textAlign: 'right' }}>
                    {speed.toFixed(1)}x
                  </Typography>
                </Box>
                <FormHelperText>
                  <Box sx={{ display: 'flex', alignItems: 'center', gap: 0.5 }}>
                    Speech speed (0.5x = slow, 1.0x = normal, 2.0x = fast)
                    <Tooltip
                      title="Adjust how fast the narration speaks. 1.0 is normal speed, suitable for most content."
                      arrow
                      placement="top"
                    >
                      <InfoOutlinedIcon sx={{ fontSize: '0.875rem', color: 'text.secondary', cursor: 'help' }} />
                    </Tooltip>
                  </Box>
                </FormHelperText>
              </Box>

              {/* Volume Slider */}
              <Box>
                <Box sx={{ display: 'flex', alignItems: 'center', gap: 1, mb: 1 }}>
                  <Typography variant="body2" sx={{ minWidth: '60px' }}>
                    Volume
                  </Typography>
                  <Slider
                    value={volume}
                    onChange={(_, newValue) => setVolume(newValue as number)}
                    min={0.1}
                    max={10.0}
                    step={0.1}
                    valueLabelDisplay="auto"
                    valueLabelFormat={(value) => `${value.toFixed(1)}`}
                    sx={{ flex: 1 }}
                  />
                  <Typography variant="body2" sx={{ minWidth: '40px', textAlign: 'right' }}>
                    {volume.toFixed(1)}
                  </Typography>
                </Box>
                <FormHelperText>
                  <Box sx={{ display: 'flex', alignItems: 'center', gap: 0.5 }}>
                    Audio volume level (0.1 = quiet, 1.0 = normal, 10.0 = loud)
                    <Tooltip
                      title="Control the loudness of the audio. 1.0 is standard volume. Increase for emphasis, decrease for subtlety."
                      arrow
                      placement="top"
                    >
                      <InfoOutlinedIcon sx={{ fontSize: '0.875rem', color: 'text.secondary', cursor: 'help' }} />
                    </Tooltip>
                  </Box>
                </FormHelperText>
              </Box>

              {/* Pitch Slider */}
              <Box sx={{ gridColumn: { xs: '1', md: '1 / -1' } }}>
                <Box sx={{ display: 'flex', alignItems: 'center', gap: 1, mb: 1 }}>
                  <Typography variant="body2" sx={{ minWidth: '60px' }}>
                    Pitch
                  </Typography>
                  <Slider
                    value={pitch}
                    onChange={(_, newValue) => setPitch(newValue as number)}
                    min={-12}
                    max={12}
                    step={1}
                    valueLabelDisplay="auto"
                    valueLabelFormat={(value) => `${value > 0 ? '+' : ''}${value}`}
                    marks={[
                      { value: -12, label: '-12' },
                      { value: 0, label: '0' },
                      { value: 12, label: '+12' },
                    ]}
                    sx={{ flex: 1 }}
                  />
                  <Typography variant="body2" sx={{ minWidth: '50px', textAlign: 'right' }}>
                    {pitch > 0 ? '+' : ''}{pitch}
                  </Typography>
                </Box>
                <FormHelperText>
                  <Box sx={{ display: 'flex', alignItems: 'center', gap: 0.5 }}>
                    Voice pitch adjustment (-12 = lower, 0 = normal, +12 = higher)
                    <Tooltip
                      title="Adjust the pitch of the voice. Negative values make the voice deeper, positive values make it higher. 0 keeps the natural voice pitch."
                      arrow
                      placement="top"
                    >
                      <InfoOutlinedIcon sx={{ fontSize: '0.875rem', color: 'text.secondary', cursor: 'help' }} />
                    </Tooltip>
                  </Box>
                </FormHelperText>
              </Box>
            </Box>
              </Box>
            )}

            {audioType === 'free' && (
              <Box>
                <Typography variant="subtitle2" sx={{ mb: 2, fontWeight: 600, color: '#5D4037' }}>
                  Free Audio (gTTS) Settings
                </Typography>
                <Box sx={{ display: 'grid', gridTemplateColumns: { xs: '1fr', md: '1fr 1fr' }, gap: 2 }}>
                  <TextField
                    select
                    label="Audio Provider"
                    value={audioProvider}
                    onChange={(e) => onChangeProvider(e.target.value)}
                    SelectProps={{ native: true }}
                    helperText="Text-to-speech engine for free audio generation"
                  >
                    <option value="gtts">gTTS (Google Text-to-Speech)</option>
                    <option value="pyttsx3">pyttsx3 (Offline)</option>
                  </TextField>
                  <TextField
                    label="Language"
                    value={audioLang}
                    onChange={(e) => onChangeLang(e.target.value)}
                    helperText="Language code (e.g., en for English, hi for Hindi)"
                    placeholder="en"
                  />
                  <TextField
                    select
                    label="Speech Speed (gTTS)"
                    value={audioSlow ? 'true' : 'false'}
                    onChange={(e) => onChangeSlow(e.target.value === 'true')}
                    SelectProps={{ native: true }}
                    helperText="Whether to speak slowly (useful for clarity)"
                  >
                    <option value="false">Normal Speed</option>
                    <option value="true">Slow Speed</option>
                  </TextField>
                  <TextField
                    type="number"
                    label="Speech Rate (pyttsx3)"
                    value={audioRate}
                    onChange={(e) => onChangeRate(Number(e.target.value))}
                    inputProps={{ min: 50, max: 300, step: 10 }}
                    helperText="Words per minute (50-300, default: 150)"
                  />
                </Box>
              </Box>
            )}
          </Box>
        </Box>
      </DialogContent>
      <DialogActions>
        <Button onClick={onClose}>Cancel</Button>
        <Button variant="contained" onClick={onSave}>Save</Button>
      </DialogActions>
    </Dialog>
  );
};

export default AudioScriptModal;