AI story writer enhancements, text to video and voice generation, subscription management, and more.

2025-11-19 09:55:32 +05:30
parent bf7493c366
commit e96525347b
64 changed files with 10367 additions and 400 deletions
--- a/frontend/src/components/StoryWriter/Phases/StoryOutlineParts/AudioScriptModal.tsx
+++ b/frontend/src/components/StoryWriter/Phases/StoryOutlineParts/AudioScriptModal.tsx
@@ -1,5 +1,14 @@
 import React from 'react';
-import { Box, Button, Dialog, DialogActions, DialogContent, DialogTitle, TextField } from '@mui/material';
+import { 
+  Box, Button, Dialog, DialogActions, DialogContent, DialogTitle, 
+  TextField, Divider, CircularProgress, Typography, Tooltip, IconButton,
+  Slider, FormControl, InputLabel, Select, MenuItem, FormHelperText,
+  ToggleButtonGroup, ToggleButton 
+} from '@mui/material';
+import VolumeUpIcon from '@mui/icons-material/VolumeUp';
+import SmartToyIcon from '@mui/icons-material/SmartToy';
+import InfoOutlinedIcon from '@mui/icons-material/InfoOutlined';
+import { OperationButton } from '../../../shared/OperationButton';

 interface AudioScriptModalProps {
  open: boolean;
@@ -18,14 +27,114 @@ interface AudioScriptModalProps {
  onChangeSlow: (v: boolean) => void;
  onChangeRate: (v: number) => void;
  audioUrl?: string | null;
+  // audio generation callbacks - now with full parameters
+  onGenerateAI?: (params: {
+    text: string;
+    voice_id?: string;
+    speed?: number;
+    volume?: number;
+    pitch?: number;
+    emotion?: string;
+  }) => Promise<void>;
+  onGenerateFree?: (text: string) => Promise<void>;
 }

+// Available voice IDs from WaveSpeed Minimax
+const AVAILABLE_VOICES = [
+  { value: 'Wise_Woman', label: 'Wise Woman', description: 'Warm, authoritative female voice' },
+  { value: 'Friendly_Person', label: 'Friendly Person', description: 'Approachable and conversational' },
+  { value: 'Inspirational_girl', label: 'Inspirational Girl', description: 'Energetic and motivating' },
+  { value: 'Deep_Voice_Man', label: 'Deep Voice Man', description: 'Rich, deep male voice' },
+  { value: 'Calm_Woman', label: 'Calm Woman', description: 'Peaceful and soothing' },
+  { value: 'Casual_Guy', label: 'Casual Guy', description: 'Relaxed and informal' },
+  { value: 'Lively_Girl', label: 'Lively Girl', description: 'Vibrant and enthusiastic' },
+  { value: 'Patient_Man', label: 'Patient Man', description: 'Steady and reassuring' },
+  { value: 'Young_Knight', label: 'Young Knight', description: 'Brave and confident' },
+  { value: 'Determined_Man', label: 'Determined Man', description: 'Strong and resolute' },
+  { value: 'Lovely_Girl', label: 'Lovely Girl', description: 'Sweet and charming' },
+  { value: 'Decent_Boy', label: 'Decent Boy', description: 'Polite and well-mannered' },
+  { value: 'Imposing_Manner', label: 'Imposing Manner', description: 'Commanding and powerful' },
+  { value: 'Elegant_Man', label: 'Elegant Man', description: 'Sophisticated and refined' },
+  { value: 'Abbess', label: 'Abbess', description: 'Dignified and wise' },
+  { value: 'Sweet_Girl_2', label: 'Sweet Girl 2', description: 'Gentle and kind' },
+  { value: 'Exuberant_Girl', label: 'Exuberant Girl', description: 'Joyful and energetic' },
+];
+
+const EMOTIONS = [
+  { value: 'happy', label: 'Happy', description: 'Cheerful and upbeat tone' },
+  { value: 'sad', label: 'Sad', description: 'Melancholic and somber tone' },
+  { value: 'angry', label: 'Angry', description: 'Intense and forceful tone' },
+  { value: 'fear', label: 'Fear', description: 'Anxious and nervous tone' },
+  { value: 'surprised', label: 'Surprised', description: 'Astonished and amazed tone' },
+  { value: 'neutral', label: 'Neutral', description: 'Calm and balanced tone (default)' },
+];
+
 const AudioScriptModal: React.FC<AudioScriptModalProps> = ({
  open, sceneNumber, value, onChange, onClose, onSave,
  audioProvider, audioLang, audioSlow, audioRate,
  onChangeProvider, onChangeLang, onChangeSlow, onChangeRate,
  audioUrl,
+  onGenerateAI,
+  onGenerateFree,
 }) => {
+  const [isGeneratingAI, setIsGeneratingAI] = React.useState(false);
+  const [isGeneratingFree, setIsGeneratingFree] = React.useState(false);
+  const [generateError, setGenerateError] = React.useState<string | null>(null);
+  
+  // Audio type toggle - default to 'free'
+  const [audioType, setAudioType] = React.useState<'free' | 'ai'>('free');
+  
+  // AI Audio generation parameters with intelligent defaults
+  const [voiceId, setVoiceId] = React.useState<string>('Wise_Woman');
+  const [customVoiceId, setCustomVoiceId] = React.useState<string>('');
+  const [useCustomVoice, setUseCustomVoice] = React.useState<boolean>(false);
+  const [emotion, setEmotion] = React.useState<string>('happy');
+  const [speed, setSpeed] = React.useState<number>(1.0);
+  const [volume, setVolume] = React.useState<number>(1.0);
+  const [pitch, setPitch] = React.useState<number>(0.0);
+
+  const handleGenerateAI = async () => {
+    if (!onGenerateAI || !value.trim()) {
+      return;
+    }
+    
+    setIsGeneratingAI(true);
+    setGenerateError(null);
+    try {
+      await onGenerateAI({
+        text: value.trim(),
+        voice_id: useCustomVoice ? customVoiceId : voiceId,
+        emotion: emotion,
+        speed: speed,
+        volume: volume,
+        pitch: pitch,
+      });
+      // Optionally close modal after successful generation
+      // onClose();
+    } catch (err: any) {
+      setGenerateError(err?.response?.data?.detail || err?.message || 'Failed to generate AI audio');
+    } finally {
+      setIsGeneratingAI(false);
+    }
+  };
+
+  const handleGenerateFree = async () => {
+    if (!onGenerateFree || !value.trim()) {
+      return;
+    }
+    
+    setIsGeneratingFree(true);
+    setGenerateError(null);
+    try {
+      await onGenerateFree(value.trim());
+      // Optionally close modal after successful generation
+      // onClose();
+    } catch (err: any) {
+      setGenerateError(err?.response?.data?.detail || err?.message || 'Failed to generate free audio');
+    } finally {
+      setIsGeneratingFree(false);
+    }
+  };
  return (
    <Dialog
      open={open}
@@ -42,14 +151,43 @@ const AudioScriptModal: React.FC<AudioScriptModalProps> = ({
      }}
    >
      <DialogTitle>Edit Audio Narration Script (Scene {sceneNumber})</DialogTitle>
-      <DialogContent dividers sx={{ color: '#2C2416' }}>
+      <DialogContent dividers sx={{ color: '#2C2416', bgcolor: '#fff' }}>
        <Box
          sx={{
            display: 'flex',
            flexDirection: 'column',
-            gap: 2,
-            '& .MuiFormLabel-root': { color: '#6b5846' },
-            '& .MuiInputBase-root': { color: '#2C2416' },
+            gap: 3,
+            pt: 1,
+            '& .MuiFormLabel-root': { color: '#5D4037', fontWeight: 500 },
+            '& .MuiInputBase-root': { 
+              color: '#2C2416',
+              bgcolor: '#fff',
+              '& .MuiOutlinedInput-notchedOutline': {
+                borderColor: 'rgba(0, 0, 0, 0.23)',
+              },
+              '&:hover .MuiOutlinedInput-notchedOutline': {
+                borderColor: 'rgba(0, 0, 0, 0.87)',
+              },
+              '&.Mui-focused .MuiOutlinedInput-notchedOutline': {
+                borderColor: 'primary.main',
+                borderWidth: '2px',
+              },
+            },
+            '& .MuiInputBase-input': {
+              color: '#2C2416',
+            },
+            '& textarea': {
+              color: '#2C2416',
+            },
+            '& .MuiSelect-select': {
+              color: '#2C2416',
+            },
+            '& .MuiFormHelperText-root': {
+              color: 'rgba(0, 0, 0, 0.6)',
+            },
+            '& .MuiMenuItem-root': {
+              color: '#2C2416',
+            },
          }}
        >
          {audioUrl ? (
@@ -73,40 +211,387 @@ const AudioScriptModal: React.FC<AudioScriptModalProps> = ({
            multiline
            minRows={6}
            fullWidth
+            placeholder="Enter the narration text for this scene..."
+            sx={{
+              '& .MuiInputBase-input': {
+                color: '#2C2416',
+              },
+            }}
          />
-          <Box sx={{ display: 'grid', gridTemplateColumns: { xs: '1fr', md: '1fr 1fr' }, gap: 2 }}>
-            <TextField
-              select
-              label="Audio Provider"
-              value={audioProvider}
-              onChange={(e) => onChangeProvider(e.target.value)}
-              SelectProps={{ native: true }}
-            >
-              <option value="gtts">gTTS</option>
-              <option value="pyttsx3">pyttsx3</option>
-            </TextField>
-            <TextField
-              label="Language (e.g., en, hi)"
-              value={audioLang}
-              onChange={(e) => onChangeLang(e.target.value)}
-            />
-            <TextField
-              select
-              label="Slow (gTTS)"
-              value={audioSlow ? 'true' : 'false'}
-              onChange={(e) => onChangeSlow(e.target.value === 'true')}
-              SelectProps={{ native: true }}
-            >
-              <option value="false">Normal</option>
-              <option value="true">Slow</option>
-            </TextField>
-            <TextField
-              type="number"
-              label="Rate (pyttsx3)"
-              value={audioRate}
-              onChange={(e) => onChangeRate(Number(e.target.value))}
-              inputProps={{ min: 50, max: 300, step: 10 }}
-            />
+          
+          {generateError && (
+            <Box sx={{ color: 'error.main', fontSize: '0.875rem', mt: -1 }}>
+              {generateError}
+            </Box>
+          )}
+
+          <Divider sx={{ my: 1 }} />
+
+          {/* Audio Type Toggle */}
+          <Box sx={{ display: 'flex', flexDirection: 'column', gap: 2 }}>
+            <Box>
+              <Typography variant="subtitle2" sx={{ mb: 1.5, fontWeight: 600, color: '#5D4037' }}>
+                Audio Type
+              </Typography>
+              <ToggleButtonGroup
+                value={audioType}
+                exclusive
+                onChange={(_, newValue) => {
+                  if (newValue !== null) {
+                    setAudioType(newValue);
+                    setGenerateError(null);
+                  }
+                }}
+                aria-label="audio type"
+                fullWidth
+                sx={{
+                  '& .MuiToggleButton-root': {
+                    textTransform: 'none',
+                    borderColor: 'rgba(0, 0, 0, 0.23)',
+                    color: '#5D4037',
+                    '&.Mui-selected': {
+                      backgroundColor: 'primary.main',
+                      color: '#fff',
+                      '&:hover': {
+                        backgroundColor: 'primary.dark',
+                      },
+                    },
+                    '&:hover': {
+                      backgroundColor: 'rgba(0, 0, 0, 0.04)',
+                    },
+                  },
+                }}
+              >
+                <ToggleButton value="free" aria-label="free audio">
+                  <VolumeUpIcon sx={{ mr: 1 }} />
+                  Free Audio (gTTS)
+                </ToggleButton>
+                <ToggleButton value="ai" aria-label="ai audio">
+                  <SmartToyIcon sx={{ mr: 1 }} />
+                  AI Audio (Minimax)
+                </ToggleButton>
+              </ToggleButtonGroup>
+            </Box>
+
+            {/* Generate Button - Context aware based on audio type */}
+            <Box sx={{ display: 'flex', gap: 2, flexWrap: 'wrap' }}>
+              {audioType === 'ai' && onGenerateAI && (
+                <OperationButton
+                  operation={{
+                    provider: 'audio',
+                    model: 'minimax/speech-02-hd',
+                    tokens_requested: value.trim().length, // Every character is 1 token
+                    operation_type: 'audio_generation',
+                    actual_provider_name: 'wavespeed',
+                  }}
+                  label="Generate AI Audio"
+                  variant="contained"
+                  size="medium"
+                  startIcon={<SmartToyIcon />}
+                  showCost={true}
+                  checkOnHover={true}
+                  checkOnMount={false}
+                  onClick={handleGenerateAI}
+                  disabled={isGeneratingAI || isGeneratingFree || !value.trim()}
+                  loading={isGeneratingAI}
+                  sx={{ flex: 1, minWidth: '200px' }}
+                />
+              )}
+
+              {audioType === 'free' && onGenerateFree && (
+                <Button
+                  variant="contained"
+                  size="medium"
+                  startIcon={isGeneratingFree ? <CircularProgress size={16} /> : <VolumeUpIcon />}
+                  onClick={handleGenerateFree}
+                  disabled={isGeneratingAI || isGeneratingFree || !value.trim()}
+                  sx={{ flex: 1, minWidth: '200px' }}
+                >
+                  {isGeneratingFree ? 'Generating...' : 'Generate Free Audio (gTTS)'}
+                </Button>
+              )}
+            </Box>
+
+            <Divider sx={{ my: 1 }} />
+
+            {/* Settings - Conditionally shown based on audio type */}
+            {audioType === 'ai' && (
+              <Box>
+                <Typography variant="subtitle2" sx={{ mb: 2, fontWeight: 600, color: '#5D4037' }}>
+                  AI Audio Generation Settings
+                </Typography>
+            <Box sx={{ display: 'grid', gridTemplateColumns: { xs: '1fr', md: '1fr 1fr' }, gap: 2 }}>
+              {/* Voice Selection */}
+              <FormControl fullWidth>
+                <InputLabel>Voice</InputLabel>
+                <Select
+                  value={useCustomVoice ? 'custom' : voiceId}
+                  onChange={(e) => {
+                    if (e.target.value === 'custom') {
+                      setUseCustomVoice(true);
+                    } else {
+                      setUseCustomVoice(false);
+                      setVoiceId(e.target.value);
+                    }
+                  }}
+                  label="Voice"
+                  renderValue={(value) => {
+                    if (value === 'custom') {
+                      return customVoiceId || 'Custom Voice ID';
+                    }
+                    const voice = AVAILABLE_VOICES.find(v => v.value === value);
+                    return voice ? voice.label : value;
+                  }}
+                >
+                  {AVAILABLE_VOICES.map((voice) => (
+                    <MenuItem key={voice.value} value={voice.value}>
+                      <Box>
+                        <Typography variant="body2" sx={{ fontWeight: 500 }}>
+                          {voice.label}
+                        </Typography>
+                        <Typography variant="caption" sx={{ color: 'text.secondary' }}>
+                          {voice.description}
+                        </Typography>
+                      </Box>
+                    </MenuItem>
+                  ))}
+                  <MenuItem value="custom">
+                    <Box>
+                      <Typography variant="body2" sx={{ fontWeight: 500, fontStyle: 'italic' }}>
+                        Custom Voice ID...
+                      </Typography>
+                      <Typography variant="caption" sx={{ color: 'text.secondary' }}>
+                        Use a voice ID from voice cloning
+                      </Typography>
+                    </Box>
+                  </MenuItem>
+                </Select>
+                <FormHelperText>
+                  <Box sx={{ display: 'flex', alignItems: 'center', gap: 0.5 }}>
+                    Choose a voice that matches your story's tone
+                    <Tooltip
+                      title={
+                        <Box sx={{ p: 0.5 }}>
+                          <Typography variant="caption" sx={{ display: 'block', mb: 0.5 }}>
+                            Current Voice ID: {voiceId}
+                          </Typography>
+                          <Typography variant="caption" sx={{ display: 'block', mb: 0.5 }}>
+                            You can use system voices above or enter a custom voice ID from voice cloning.
+                          </Typography>
+                          <Typography variant="caption" sx={{ display: 'block' }}>
+                            Learn more:{' '}
+                            <a
+                              href="https://wavespeed.ai/models/minimax/voice-clone"
+                              target="_blank"
+                              rel="noopener noreferrer"
+                              style={{ color: '#90caf9' }}
+                            >
+                              Voice Cloning Guide
+                            </a>
+                          </Typography>
+                        </Box>
+                      }
+                      arrow
+                      placement="top"
+                    >
+                      <InfoOutlinedIcon sx={{ fontSize: '0.875rem', color: 'text.secondary', cursor: 'help' }} />
+                    </Tooltip>
+                  </Box>
+                </FormHelperText>
+              </FormControl>
+              
+              {/* Custom Voice ID Input (shown when custom voice is selected) */}
+              {useCustomVoice && (
+                <TextField
+                  fullWidth
+                  label="Custom Voice ID"
+                  value={customVoiceId}
+                  onChange={(e) => setCustomVoiceId(e.target.value)}
+                  helperText="Enter your custom voice ID from voice cloning"
+                  placeholder="your-custom-voice-id"
+                />
+              )}
+
+              {/* Emotion Selection */}
+              <FormControl fullWidth>
+                <InputLabel>Emotion</InputLabel>
+                <Select
+                  value={emotion}
+                  onChange={(e) => setEmotion(e.target.value)}
+                  label="Emotion"
+                >
+                  {EMOTIONS.map((em) => (
+                    <MenuItem key={em.value} value={em.value}>
+                      <Box>
+                        <Typography variant="body2">{em.label}</Typography>
+                        <Typography variant="caption" sx={{ color: 'text.secondary' }}>
+                          {em.description}
+                        </Typography>
+                      </Box>
+                    </MenuItem>
+                  ))}
+                </Select>
+                <FormHelperText>
+                  Select the emotional tone for the narration
+                </FormHelperText>
+              </FormControl>
+
+              {/* Speed Slider */}
+              <Box>
+                <Box sx={{ display: 'flex', alignItems: 'center', gap: 1, mb: 1 }}>
+                  <Typography variant="body2" sx={{ minWidth: '60px' }}>
+                    Speed
+                  </Typography>
+                  <Slider
+                    value={speed}
+                    onChange={(_, newValue) => setSpeed(newValue as number)}
+                    min={0.5}
+                    max={2.0}
+                    step={0.1}
+                    valueLabelDisplay="auto"
+                    valueLabelFormat={(value) => `${value}x`}
+                    sx={{ flex: 1 }}
+                  />
+                  <Typography variant="body2" sx={{ minWidth: '40px', textAlign: 'right' }}>
+                    {speed.toFixed(1)}x
+                  </Typography>
+                </Box>
+                <FormHelperText>
+                  <Box sx={{ display: 'flex', alignItems: 'center', gap: 0.5 }}>
+                    Speech speed (0.5x = slow, 1.0x = normal, 2.0x = fast)
+                    <Tooltip
+                      title="Adjust how fast the narration speaks. 1.0 is normal speed, suitable for most content."
+                      arrow
+                      placement="top"
+                    >
+                      <InfoOutlinedIcon sx={{ fontSize: '0.875rem', color: 'text.secondary', cursor: 'help' }} />
+                    </Tooltip>
+                  </Box>
+                </FormHelperText>
+              </Box>
+
+              {/* Volume Slider */}
+              <Box>
+                <Box sx={{ display: 'flex', alignItems: 'center', gap: 1, mb: 1 }}>
+                  <Typography variant="body2" sx={{ minWidth: '60px' }}>
+                    Volume
+                  </Typography>
+                  <Slider
+                    value={volume}
+                    onChange={(_, newValue) => setVolume(newValue as number)}
+                    min={0.1}
+                    max={10.0}
+                    step={0.1}
+                    valueLabelDisplay="auto"
+                    valueLabelFormat={(value) => `${value.toFixed(1)}`}
+                    sx={{ flex: 1 }}
+                  />
+                  <Typography variant="body2" sx={{ minWidth: '40px', textAlign: 'right' }}>
+                    {volume.toFixed(1)}
+                  </Typography>
+                </Box>
+                <FormHelperText>
+                  <Box sx={{ display: 'flex', alignItems: 'center', gap: 0.5 }}>
+                    Audio volume level (0.1 = quiet, 1.0 = normal, 10.0 = loud)
+                    <Tooltip
+                      title="Control the loudness of the audio. 1.0 is standard volume. Increase for emphasis, decrease for subtlety."
+                      arrow
+                      placement="top"
+                    >
+                      <InfoOutlinedIcon sx={{ fontSize: '0.875rem', color: 'text.secondary', cursor: 'help' }} />
+                    </Tooltip>
+                  </Box>
+                </FormHelperText>
+              </Box>
+
+              {/* Pitch Slider */}
+              <Box sx={{ gridColumn: { xs: '1', md: '1 / -1' } }}>
+                <Box sx={{ display: 'flex', alignItems: 'center', gap: 1, mb: 1 }}>
+                  <Typography variant="body2" sx={{ minWidth: '60px' }}>
+                    Pitch
+                  </Typography>
+                  <Slider
+                    value={pitch}
+                    onChange={(_, newValue) => setPitch(newValue as number)}
+                    min={-12}
+                    max={12}
+                    step={1}
+                    valueLabelDisplay="auto"
+                    valueLabelFormat={(value) => `${value > 0 ? '+' : ''}${value}`}
+                    marks={[
+                      { value: -12, label: '-12' },
+                      { value: 0, label: '0' },
+                      { value: 12, label: '+12' },
+                    ]}
+                    sx={{ flex: 1 }}
+                  />
+                  <Typography variant="body2" sx={{ minWidth: '50px', textAlign: 'right' }}>
+                    {pitch > 0 ? '+' : ''}{pitch}
+                  </Typography>
+                </Box>
+                <FormHelperText>
+                  <Box sx={{ display: 'flex', alignItems: 'center', gap: 0.5 }}>
+                    Voice pitch adjustment (-12 = lower, 0 = normal, +12 = higher)
+                    <Tooltip
+                      title="Adjust the pitch of the voice. Negative values make the voice deeper, positive values make it higher. 0 keeps the natural voice pitch."
+                      arrow
+                      placement="top"
+                    >
+                      <InfoOutlinedIcon sx={{ fontSize: '0.875rem', color: 'text.secondary', cursor: 'help' }} />
+                    </Tooltip>
+                  </Box>
+                </FormHelperText>
+              </Box>
+            </Box>
+              </Box>
+            )}
+
+            {audioType === 'free' && (
+              <Box>
+                <Typography variant="subtitle2" sx={{ mb: 2, fontWeight: 600, color: '#5D4037' }}>
+                  Free Audio (gTTS) Settings
+                </Typography>
+                <Box sx={{ display: 'grid', gridTemplateColumns: { xs: '1fr', md: '1fr 1fr' }, gap: 2 }}>
+                  <TextField
+                    select
+                    label="Audio Provider"
+                    value={audioProvider}
+                    onChange={(e) => onChangeProvider(e.target.value)}
+                    SelectProps={{ native: true }}
+                    helperText="Text-to-speech engine for free audio generation"
+                  >
+                    <option value="gtts">gTTS (Google Text-to-Speech)</option>
+                    <option value="pyttsx3">pyttsx3 (Offline)</option>
+                  </TextField>
+                  <TextField
+                    label="Language"
+                    value={audioLang}
+                    onChange={(e) => onChangeLang(e.target.value)}
+                    helperText="Language code (e.g., en for English, hi for Hindi)"
+                    placeholder="en"
+                  />
+                  <TextField
+                    select
+                    label="Speech Speed (gTTS)"
+                    value={audioSlow ? 'true' : 'false'}
+                    onChange={(e) => onChangeSlow(e.target.value === 'true')}
+                    SelectProps={{ native: true }}
+                    helperText="Whether to speak slowly (useful for clarity)"
+                  >
+                    <option value="false">Normal Speed</option>
+                    <option value="true">Slow Speed</option>
+                  </TextField>
+                  <TextField
+                    type="number"
+                    label="Speech Rate (pyttsx3)"
+                    value={audioRate}
+                    onChange={(e) => onChangeRate(Number(e.target.value))}
+                    inputProps={{ min: 50, max: 300, step: 10 }}
+                    helperText="Words per minute (50-300, default: 150)"
+                  />
+                </Box>
+              </Box>
+            )}
          </Box>
        </Box>
      </DialogContent>