AI Image and Audio Generation Improvements.

AI Video Generation Pre-Flight Checklist. Cost Estimate Improvements.
This commit is contained in:
ajaysi
2025-12-25 16:26:08 +05:30
parent 59913bffa9
commit 7512933c65
163 changed files with 8938 additions and 37401 deletions

View File

@@ -24,12 +24,16 @@ import { youtubeApi, type VideoPlan, type Scene } from '../../services/youtubeAp
import { STEPS, YT_RED, YT_BG, YT_BORDER, YT_TEXT, type Resolution, type DurationType, type VideoType } from './constants';
import { PlanStep } from './components/PlanStep';
import { ScenesStep } from './components/ScenesStep';
import { SceneGenerationStep } from './components/SceneGenerationStep';
import { RenderStep } from './components/RenderStep';
import { useRenderPolling } from './hooks/useRenderPolling';
import { useCostEstimate } from './hooks/useCostEstimate';
import { useImageGenerationPolling } from './hooks/useImageGenerationPolling';
import HeaderControls from '../shared/HeaderControls';
import { useYouTubeCreatorState } from '../../hooks/useYouTubeCreatorState';
import { ContentAsset } from '../../hooks/useContentAssets';
import { AudioGenerationSettings } from '../../components/shared/AudioSettingsModal';
import type { YouTubeImageGenerationSettings } from './shared';
const YouTubeCreator: React.FC = () => {
const navigate = useNavigate();
@@ -65,6 +69,11 @@ const YouTubeCreator: React.FC = () => {
const [uploadingAvatar, setUploadingAvatar] = useState(false);
const [makingPresentable, setMakingPresentable] = useState(false);
const [regeneratingAvatar, setRegeneratingAvatar] = useState(false);
const [generatingImageSceneId, setGeneratingImageSceneId] = useState<number | null>(null);
const [generatingAudioSceneId, setGeneratingAudioSceneId] = useState<number | null>(null);
// Robust polling hook for image generation
const { startPolling: startImagePolling, stopPolling: stopImagePolling } = useImageGenerationPolling();
// Sync activeStep with persisted state on mount
useEffect(() => {
@@ -105,6 +114,7 @@ const YouTubeCreator: React.FC = () => {
scenes,
resolution,
renderTaskId,
imageModel: 'ideogram-v3-turbo', // Default for now, can be made configurable later
});
// Memoized computed values
@@ -312,7 +322,7 @@ const YouTubeCreator: React.FC = () => {
updateState({ scenes: updatedScenes });
setSuccess(successMessage);
// Navigate immediately to Render step so user can see scenes and cost estimates
// Navigate to Scene Generation step (step 2) to generate assets
setActiveStep(2);
// Clear success message after a brief moment
setTimeout(() => {
@@ -391,6 +401,246 @@ const YouTubeCreator: React.FC = () => {
updateState({ scenes: updatedScenes });
}, [scenes, updateState]);
const handleGenerateSceneImage = useCallback(async (scene: Scene, imageSettings?: YouTubeImageGenerationSettings) => {
console.log('[YouTubeCreator] handleGenerateSceneImage called for scene', scene.scene_number);
console.log('[YouTubeCreator] This should ONLY be called for image generation, NOT audio generation');
// Guard: prevent if already generating image for this scene
if (generatingImageSceneId === scene.scene_number) {
console.warn('[YouTubeCreator] Image generation already in progress for this scene');
return;
}
setGeneratingImageSceneId(scene.scene_number);
setError(null);
try {
console.log('[YouTubeCreator] Starting image generation task for scene', scene.scene_number);
const taskResponse = await youtubeApi.generateSceneImage({
sceneId: `scene_${scene.scene_number}`,
sceneTitle: scene.title,
sceneContent: scene.narration,
baseAvatarUrl: avatarUrl || undefined,
idea: videoPlan?.video_summary || userIdea,
width: 1024,
height: 576,
customPrompt: imageSettings?.prompt,
style: imageSettings?.style,
renderingSpeed: imageSettings?.renderingSpeed,
aspectRatio: imageSettings?.aspectRatio,
model: imageSettings?.model,
});
console.log('[YouTubeCreator] Image generation task started:', taskResponse);
if (!taskResponse.success) {
throw new Error(taskResponse.message || 'Failed to start image generation task');
}
const taskId = taskResponse.task_id;
// Start robust polling
startImagePolling({
taskId,
sceneNumber: scene.scene_number,
getStatus: youtubeApi.getImageGenerationStatus,
onComplete: (imageUrl) => {
console.log('[YouTubeCreator] Image generation completed!', {
sceneNumber: scene.scene_number,
imageUrl,
});
// Update scene with image URL atomically
const updatedScenes = scenes.map(s =>
s.scene_number === scene.scene_number
? { ...s, imageUrl }
: s
);
updateState({ scenes: updatedScenes });
setSuccess(`Image generated for Scene ${scene.scene_number}!`);
setTimeout(() => setSuccess(null), 3000);
setGeneratingImageSceneId(null);
},
onError: (errorMsg) => {
setError(errorMsg);
setGeneratingImageSceneId(null);
},
onProgress: (progress, message) => {
console.log(`[YouTubeCreator] Image generation in progress: ${progress}% - ${message}`);
},
});
} catch (err: any) {
const errorMessage = err?.response?.data?.detail?.message
|| err?.response?.data?.detail?.error
|| err?.response?.data?.detail
|| err?.message
|| 'Failed to start image generation';
setError(`Scene ${scene.scene_number}: ${errorMessage}`);
setGeneratingImageSceneId(null);
throw err; // Re-throw so SceneCard can handle it
}
}, [scenes, avatarUrl, videoPlan, userIdea, updateState, generatingImageSceneId, startImagePolling]);
// Helper function to build enriched text for better audio generation
const buildEnrichedSceneText = (scene: Scene): string => {
// Start with the core narration text
let enrichedText = scene.narration;
// Add scene title for context (helps WaveSpeed understand the scene's purpose)
if (scene.title && scene.title !== scene.narration.substring(0, scene.title.length)) {
enrichedText = `${scene.title}. ${enrichedText}`;
}
// Add delivery style hints based on emphasis tags
if (scene.emphasis_tags && scene.emphasis_tags.length > 0) {
const deliveryHints = scene.emphasis_tags.map(tag => {
switch (tag) {
case 'hook': return 'speak with energy and excitement';
case 'cta': return 'speak persuasively and confidently';
case 'transition': return 'speak smoothly and clearly';
default: return 'speak professionally and clearly';
}
});
// Use the primary emphasis tag for the delivery hint
const primaryHint = deliveryHints[0];
enrichedText += ` [${primaryHint}]`;
}
// Add visual cues for emotional delivery guidance
if (scene.visual_cues && scene.visual_cues.length > 0) {
// Filter for cues that affect audio delivery
const audioRelevantCues = scene.visual_cues.filter(cue =>
cue.toLowerCase().includes('slow') ||
cue.toLowerCase().includes('fast') ||
cue.toLowerCase().includes('energetic') ||
cue.toLowerCase().includes('calm') ||
cue.toLowerCase().includes('dramatic') ||
cue.toLowerCase().includes('intense')
);
if (audioRelevantCues.length > 0) {
enrichedText += ` [Pacing: ${audioRelevantCues.join(', ')}]`;
}
}
// Add duration estimate for natural pacing
if (scene.duration_estimate && scene.duration_estimate > 0) {
const wordsPerMinute = enrichedText.split(' ').length / (scene.duration_estimate / 60);
if (wordsPerMinute > 200) {
enrichedText += ` [Speak at a natural, conversational pace]`;
} else if (wordsPerMinute < 120) {
enrichedText += ` [Take time to articulate clearly]`;
}
}
// Ensure we don't exceed WaveSpeed's 10,000 character limit
if (enrichedText.length > 9500) {
enrichedText = enrichedText.substring(0, 9500) + '...';
}
return enrichedText;
};
const handleGenerateSceneAudio = useCallback(async (scene: Scene, audioSettings?: AudioGenerationSettings) => {
console.log('[YouTubeCreator] handleGenerateSceneAudio called for scene', scene.scene_number);
console.log('[YouTubeCreator] This should ONLY be called for audio generation, NOT image generation');
// Guard: prevent if already generating audio for this scene
if (generatingAudioSceneId === scene.scene_number) {
console.warn('[YouTubeCreator] Audio generation already in progress for this scene');
return;
}
setGeneratingAudioSceneId(scene.scene_number);
setError(null);
try {
// Enhanced audio defaults optimized for YouTube content
// Based on research into natural speech patterns and user feedback
// Speed 1.08: Natural conversational pace (engaging but not rushed)
// Voice: Auto-selected based on content analysis
// Emotion: Auto-selected based on scene content
// High quality settings for professional YouTube audio
const settings: AudioGenerationSettings = audioSettings || {
voiceId: "", // Empty string triggers auto-selection by backend
speed: 1.08, // Natural conversational pace - engaging but comfortable
volume: 1.0, // Standard volume
pitch: 0.0, // Neutral pitch for natural sound
emotion: "happy", // Default emotion (backend will auto-select based on content)
englishNormalization: true, // Better handling of numbers, dates, and technical terms
sampleRate: 44100, // CD quality audio
bitrate: 256000, // Highest quality: 256kbps for professional audio
channel: "2" as const, // Stereo for richer audio experience
format: "mp3" as const, // Universal format
languageBoost: "English", // Optimize for English content
enableSyncMode: true, // Reliable delivery
};
// Build enriched text for better audio generation
const enrichedText = buildEnrichedSceneText(scene);
console.log('[YouTubeCreator] Calling youtubeApi.generateSceneAudio with enriched text:', {
sceneId: `scene_${scene.scene_number}`,
sceneTitle: scene.title,
originalTextLength: scene.narration?.length,
enrichedTextLength: enrichedText.length,
voiceId: settings.voiceId || undefined, // Will auto-select if empty
endpoint: '/api/youtube/audio',
settings: settings,
video_plan_context: {
video_type: videoType,
target_audience: targetAudience,
tone: videoPlan?.tone,
visual_style: videoPlan?.visual_style,
video_goal: videoPlan?.video_goal,
},
});
const result = await youtubeApi.generateSceneAudio({
sceneId: `scene_${scene.scene_number}`,
sceneTitle: scene.title,
text: enrichedText, // Send enriched text instead of just narration
voiceId: settings.voiceId || undefined, // Will auto-select if empty
speed: settings.speed,
volume: settings.volume,
pitch: settings.pitch,
emotion: settings.emotion,
englishNormalization: settings.englishNormalization,
sampleRate: settings.sampleRate,
bitrate: settings.bitrate,
channel: settings.channel,
format: settings.format,
languageBoost: settings.languageBoost,
enableSyncMode: settings.enableSyncMode,
});
console.log('[YouTubeCreator] Audio generation result:', result);
// Update scene with audio URL
const updatedScenes = scenes.map(s =>
s.scene_number === scene.scene_number
? { ...s, audioUrl: result.audio_url }
: s
);
updateState({ scenes: updatedScenes });
setSuccess(`Audio generated for Scene ${scene.scene_number}!`);
} catch (err: any) {
const errorMessage = err?.response?.data?.detail?.message
|| err?.response?.data?.detail?.error
|| err?.response?.data?.detail
|| err?.message
|| 'Failed to generate audio';
setError(errorMessage);
throw err; // Re-throw so SceneCard can handle it
} finally {
setGeneratingAudioSceneId(null);
}
}, [scenes, updateState]);
const handleStartRender = useCallback(async () => {
if (scenes.length === 0) {
setError('Please build scenes first');
@@ -408,6 +658,19 @@ const YouTubeCreator: React.FC = () => {
return;
}
// VALIDATION: Check that all enabled scenes have both image and audio
const scenesMissingAssets = enabledScenes.filter(s => !s.imageUrl || !s.audioUrl);
if (scenesMissingAssets.length > 0) {
const missingList = scenesMissingAssets.map(s => {
const missing = [];
if (!s.imageUrl) missing.push('image');
if (!s.audioUrl) missing.push('audio');
return `Scene ${s.scene_number} (missing: ${missing.join(', ')})`;
}).join(', ');
setError(`Please generate images and audio for all enabled scenes before rendering. Missing: ${missingList}`);
return;
}
setLoading(true);
setError(null);
setSuccess(null);
@@ -472,17 +735,37 @@ const YouTubeCreator: React.FC = () => {
return;
}
if (scenes.length === 0) {
setError('Please build scenes before rendering.');
setError('Please build scenes first.');
return;
}
setActiveStep(2);
return;
}
if (targetStep === 3) {
if (!videoPlan) {
setError('Please generate a plan first.');
return;
}
if (scenes.length === 0) {
setError('Please build scenes first.');
return;
}
if (enabledScenesCount === 0) {
setError('Enable at least one scene to render.');
return;
}
setActiveStep(2);
// Check if all enabled scenes have assets
const enabledScenes = scenes.filter(s => s.enabled !== false);
const allReady = enabledScenes.every(s => s.imageUrl && s.audioUrl);
if (!allReady) {
setError('Please generate images and audio for all enabled scenes first.');
return;
}
setActiveStep(3);
return;
}
}, [activeStep, videoPlan, scenes.length, enabledScenesCount]);
}, [activeStep, videoPlan, scenes, enabledScenesCount]);
const handleResetRender = useCallback(() => {
updateState({
@@ -637,6 +920,29 @@ const YouTubeCreator: React.FC = () => {
)}
{activeStep === 2 && (
<SceneGenerationStep
scenes={scenes}
videoPlan={videoPlan}
editingSceneId={editingSceneId}
editedScene={editedScene}
onEditScene={handleEditScene}
onSaveScene={handleSaveScene}
onCancelEdit={handleCancelEdit}
onEditChange={handleEditChange}
onToggleScene={handleToggleScene}
onGenerateImage={handleGenerateSceneImage}
generatingImageSceneId={generatingImageSceneId}
onGenerateAudio={handleGenerateSceneAudio}
generatingAudioSceneId={generatingAudioSceneId}
loading={loading}
avatarUrl={avatarUrl}
videoPlanIdea={videoPlan?.video_summary || userIdea}
onBack={() => setActiveStep(1)}
onNext={() => setActiveStep(3)}
/>
)}
{activeStep === 3 && (
<RenderStep
renderTaskId={renderTaskId}
renderStatus={renderStatus}
@@ -649,19 +955,13 @@ const YouTubeCreator: React.FC = () => {
loading={loading}
scenes={scenes}
videoPlan={videoPlan}
editingSceneId={editingSceneId}
editedScene={editedScene}
onResolutionChange={(value) => updateState({ resolution: value })}
onCombineScenesChange={(value) => updateState({ combineScenes: value })}
onStartRender={handleStartRender}
onBack={() => setActiveStep(1)}
onBack={() => setActiveStep(2)}
onReset={handleResetRender}
onRetryFailedScenes={handleRetryFailedScenes}
onEditScene={handleEditScene}
onSaveScene={handleSaveScene}
onCancelEdit={handleCancelEdit}
onEditChange={handleEditChange}
onToggleScene={handleToggleScene}
onScenesUpdate={(updated) => updateState({ scenes: updated })}
getVideoUrl={getVideoUrl}
/>
)}

View File

@@ -0,0 +1,363 @@
/**
* Asset Generation Cost Card Component
*
* Displays cost estimate for generating images and audio for scenes.
* Optimized for Step 3 (Generate Assets).
*/
import React from 'react';
import {
Box,
Typography,
Stack,
Chip,
Alert,
Divider,
} from '@mui/material';
import {
MonetizationOn as MoneyIcon,
Image as ImageIcon,
VolumeUp as AudioIcon,
Info as InfoIcon,
} from '@mui/icons-material';
import { Scene } from '../../../services/youtubeApi';
interface AssetGenerationCostCardProps {
scenes: Scene[];
}
export const AssetGenerationCostCard: React.FC<AssetGenerationCostCardProps> = React.memo(({
scenes,
}) => {
const enabledScenes = scenes.filter(s => s.enabled !== false);
const numScenes = enabledScenes.length;
// Cost per asset (realistic estimates)
const costPerImage = 0.10; // Ideogram V3 Turbo default
const costPerAudio = 0.05; // Minimax TTS
// Calculate what's needed
const scenesNeedingImages = enabledScenes.filter(s => !s.imageUrl).length;
const scenesNeedingAudio = enabledScenes.filter(s => !s.audioUrl).length;
// Calculate costs
const imageCost = scenesNeedingImages * costPerImage;
const audioCost = scenesNeedingAudio * costPerAudio;
const totalCost = imageCost + audioCost;
if (numScenes === 0) {
return (
<Alert severity="warning" sx={{ mt: 2 }}>
No enabled scenes to generate assets for.
</Alert>
);
}
return (
<Box
sx={{
mt: 3,
p: 3,
background: 'linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%)',
borderRadius: 3,
border: '2px solid #667eea',
boxShadow: '0 4px 6px -1px rgba(0, 0, 0, 0.1), 0 2px 4px -1px rgba(0, 0, 0, 0.06)',
}}
>
{/* Header */}
<Box sx={{ display: 'flex', alignItems: 'center', gap: 1.5, mb: 3 }}>
<MoneyIcon sx={{ color: '#667eea', fontSize: 28 }} />
<Box>
<Typography
variant="h6"
sx={{
fontWeight: 700,
fontSize: '1.1rem',
color: '#1e293b',
letterSpacing: '-0.01em',
}}
>
💰 Asset Generation Cost
</Typography>
<Typography
variant="caption"
sx={{
color: '#64748b',
fontSize: '0.75rem',
}}
>
Cost to generate images and audio for your scenes
</Typography>
</Box>
</Box>
{/* Total Cost Display */}
<Box
sx={{
mb: 3,
p: 2.5,
bgcolor: 'white',
borderRadius: 2,
boxShadow: '0 2px 4px rgba(0, 0, 0, 0.08)',
}}
>
<Typography
variant="h3"
sx={{
fontWeight: 800,
fontSize: '2.5rem',
color: totalCost === 0 ? '#10b981' : '#667eea',
lineHeight: 1.2,
mb: 0.5,
}}
>
{totalCost === 0 ? 'FREE!' : `$${totalCost.toFixed(2)}`}
</Typography>
{totalCost === 0 ? (
<Typography
variant="body2"
sx={{
color: '#10b981',
fontSize: '0.875rem',
fontWeight: 600,
}}
>
All scenes already have their assets!
</Typography>
) : (
<Typography
variant="body2"
sx={{
color: '#64748b',
fontSize: '0.875rem',
fontWeight: 500,
}}
>
To generate missing assets for {scenesNeedingImages + scenesNeedingAudio} item(s)
</Typography>
)}
</Box>
{/* What's Included Section */}
<Box
sx={{
p: 2.5,
bgcolor: 'white',
borderRadius: 2,
mb: 2.5,
boxShadow: '0 1px 3px rgba(0, 0, 0, 0.08)',
}}
>
<Typography
variant="subtitle2"
sx={{
color: '#1e293b',
fontWeight: 700,
mb: 2,
fontSize: '0.95rem',
display: 'flex',
alignItems: 'center',
gap: 1,
}}
>
<InfoIcon sx={{ fontSize: 18, color: '#667eea' }} />
What You'll Generate
</Typography>
<Stack spacing={2}>
{/* Scene Images */}
<Box>
<Box sx={{ display: 'flex', alignItems: 'center', gap: 1, mb: 0.5 }}>
<ImageIcon sx={{ fontSize: 20, color: '#10b981' }} />
<Typography
variant="body2"
sx={{
color: '#1e293b',
fontWeight: 600,
fontSize: '0.875rem',
}}
>
Scene Images
</Typography>
<Chip
label={scenesNeedingImages === 0 ? 'All Ready' : `${scenesNeedingImages} needed`}
size="small"
sx={{
ml: 'auto',
bgcolor: scenesNeedingImages === 0 ? '#10b981' : '#667eea',
color: 'white',
fontWeight: 600,
fontSize: '0.75rem',
}}
/>
{scenesNeedingImages > 0 && (
<Chip
label={`$${imageCost.toFixed(2)}`}
size="small"
sx={{
bgcolor: '#667eea',
color: 'white',
fontWeight: 600,
fontSize: '0.75rem',
}}
/>
)}
</Box>
<Typography
variant="body2"
sx={{
color: '#64748b',
fontSize: '0.8rem',
lineHeight: 1.5,
ml: 3.5,
}}
>
{scenesNeedingImages === 0 ? (
<>All {numScenes} scenes already have custom images</>
) : (
<>Creating <strong>{scenesNeedingImages} AI-generated images</strong> tailored to your scene content</>
)}
</Typography>
{scenesNeedingImages > 0 && (
<Typography
variant="caption"
sx={{
color: '#94a3b8',
fontSize: '0.7rem',
display: 'block',
ml: 3.5,
mt: 0.5,
}}
>
Rate: ${costPerImage.toFixed(2)}/image • High-quality visuals using Ideogram V3 Turbo
</Typography>
)}
</Box>
{/* Scene Audio */}
{scenesNeedingAudio > 0 || scenesNeedingImages > 0 ? (
<>
<Divider sx={{ my: 0.5 }} />
<Box>
<Box sx={{ display: 'flex', alignItems: 'center', gap: 1, mb: 0.5 }}>
<AudioIcon sx={{ fontSize: 20, color: '#f59e0b' }} />
<Typography
variant="body2"
sx={{
color: '#1e293b',
fontWeight: 600,
fontSize: '0.875rem',
}}
>
Audio Narration
</Typography>
<Chip
label={scenesNeedingAudio === 0 ? 'All Ready' : `${scenesNeedingAudio} needed`}
size="small"
sx={{
ml: 'auto',
bgcolor: scenesNeedingAudio === 0 ? '#10b981' : '#f59e0b',
color: 'white',
fontWeight: 600,
fontSize: '0.75rem',
}}
/>
{scenesNeedingAudio > 0 && (
<Chip
label={`$${audioCost.toFixed(2)}`}
size="small"
sx={{
bgcolor: '#f59e0b',
color: 'white',
fontWeight: 600,
fontSize: '0.75rem',
}}
/>
)}
</Box>
<Typography
variant="body2"
sx={{
color: '#64748b',
fontSize: '0.8rem',
lineHeight: 1.5,
ml: 3.5,
}}
>
{scenesNeedingAudio === 0 ? (
<>All {numScenes} scenes already have professional voice narration</>
) : (
<>Generating <strong>{scenesNeedingAudio} AI voice narrations</strong> from your scene scripts</>
)}
</Typography>
{scenesNeedingAudio > 0 && (
<Typography
variant="caption"
sx={{
color: '#94a3b8',
fontSize: '0.7rem',
display: 'block',
ml: 3.5,
mt: 0.5,
}}
>
Rate: ${costPerAudio.toFixed(2)}/audio • Natural-sounding voice using Minimax TTS
</Typography>
)}
</Box>
</>
) : null}
</Stack>
{/* Summary Box */}
<Box
sx={{
mt: 2,
p: 1.5,
bgcolor: '#f1f5f9',
borderRadius: 1.5,
border: '1px solid #cbd5e1',
}}
>
<Typography
variant="caption"
sx={{
color: '#475569',
fontSize: '0.75rem',
lineHeight: 1.6,
display: 'block',
}}
>
💡 <strong>Smart Generation:</strong> Generate only what you need! If you already have an image or audio for a scene,
we won't charge you to regenerate it unless you explicitly click the regenerate button.
</Typography>
</Box>
</Box>
{/* Help Section */}
<Alert
severity="info"
icon={<InfoIcon />}
sx={{
bgcolor: '#eff6ff',
border: '1px solid #bfdbfe',
'& .MuiAlert-icon': {
color: '#3b82f6',
},
}}
>
<Typography variant="body2" sx={{ fontWeight: 600, fontSize: '0.8rem', mb: 0.5 }}>
How does this work?
</Typography>
<Typography variant="caption" sx={{ fontSize: '0.75rem', lineHeight: 1.5, display: 'block' }}>
Click "Generate Image" and "Generate Audio" buttons on each scene card. Images use AI to create custom
visuals matching your content, and audio uses text-to-speech to narrate your script naturally.
You only pay for what you generate!
</Typography>
</Alert>
</Box>
);
});
AssetGenerationCostCard.displayName = 'AssetGenerationCostCard';

View File

@@ -0,0 +1,512 @@
import React, { useEffect, useState } from "react";
import {
Dialog,
DialogTitle,
DialogContent,
DialogActions,
Stack,
Box,
Typography,
Slider,
Select,
MenuItem,
FormControl,
InputLabel,
FormControlLabel,
Checkbox,
Tooltip,
IconButton,
alpha,
TextField,
} from "@mui/material";
import { HelpOutline as HelpOutlineIcon, Close as CloseIcon, VolumeUp } from "@mui/icons-material";
import { Button } from "@mui/material";
export type YouTubeAudioGenerationSettings = {
voiceId: string;
speed: number;
volume: number;
pitch: number;
emotion: string;
englishNormalization: boolean;
sampleRate?: number;
bitrate: number;
channel: "1" | "2";
format: "mp3" | "wav" | "pcm" | "flac";
languageBoost?: string;
enableSyncMode: boolean;
};
interface AudioSettingsModalProps {
open: boolean;
onClose: () => void;
onApplySettings: (settings: YouTubeAudioGenerationSettings) => void;
initialSettings: YouTubeAudioGenerationSettings;
isGenerating?: boolean;
sceneTitle?: string;
}
// Voice options from minimax/speech-02-hd
const VOICE_OPTIONS = [
"Wise_Woman",
"Friendly_Person",
"Inspirational_girl",
"Deep_Voice_Man",
"Calm_Woman",
"Casual_Guy",
"Lively_Girl",
"Patient_Man",
"Young_Knight",
"Determined_Man",
"Lovely_Girl",
"Decent_Boy",
"Imposing_Manner",
"Elegant_Man",
"Abbess",
"Sweet_Girl_2",
"Exuberant_Girl",
];
const EMOTION_OPTIONS = ["happy", "sad", "angry", "fearful", "disgusted", "surprised", "neutral"];
const SAMPLE_RATE_OPTIONS = [8000, 16000, 22050, 24000, 32000, 44100];
const BITRATE_OPTIONS = [32000, 64000, 128000, 256000];
const LANGUAGE_BOOST_OPTIONS = [
"auto",
"English",
"Chinese",
"Chinese,Yue",
"Arabic",
"Russian",
"Spanish",
"French",
"Portuguese",
"German",
"Turkish",
"Dutch",
"Ukrainian",
"Vietnamese",
"Indonesian",
"Japanese",
"Italian",
"Korean",
"Thai",
"Polish",
"Romanian",
"Greek",
"Czech",
"Finnish",
"Hindi",
];
export const AudioSettingsModal: React.FC<AudioSettingsModalProps> = ({
open,
onClose,
onApplySettings,
initialSettings,
isGenerating = false,
sceneTitle,
}) => {
const [settings, setSettings] = useState<YouTubeAudioGenerationSettings>(initialSettings);
useEffect(() => {
setSettings(initialSettings);
}, [initialSettings]);
const handleApply = () => {
onApplySettings(settings);
};
return (
<Dialog
open={open}
onClose={onClose}
maxWidth="md"
fullWidth
PaperProps={{
sx: {
background: "linear-gradient(135deg, #667eea 0%, #764ba2 100%)",
color: "white",
},
}}
>
<DialogTitle>
<Stack direction="row" justifyContent="space-between" alignItems="center">
<Box>
<Typography variant="h6" sx={{ fontWeight: 600, mb: 0.5 }}>
Audio Generation Settings
</Typography>
{sceneTitle && (
<Typography variant="body2" sx={{ opacity: 0.8 }}>
Configure voice settings for "{sceneTitle}"
</Typography>
)}
</Box>
<IconButton onClick={onClose} size="small" sx={{ color: "rgba(255,255,255,0.7)" }}>
<CloseIcon />
</IconButton>
</Stack>
<Typography variant="body2" sx={{ opacity: 0.7, mt: 1 }}>
Customize voice, tone, and quality for better audio results. Changes apply only to this scene.
</Typography>
</DialogTitle>
<DialogContent>
<Stack spacing={3} sx={{ mt: 1 }}>
{/* Voice Selection */}
<Box>
<Stack direction="row" spacing={1} alignItems="center" sx={{ mb: 1 }}>
<Typography variant="subtitle1" sx={{ fontWeight: 600 }}>
Voice
</Typography>
<Tooltip title="Choose from professional voice options. Each voice has unique characteristics for different content types." arrow>
<IconButton size="small" sx={{ color: "rgba(255,255,255,0.5)" }}>
<HelpOutlineIcon fontSize="small" />
</IconButton>
</Tooltip>
</Stack>
<FormControl fullWidth>
<Select
value={settings.voiceId}
onChange={(e) => setSettings({ ...settings, voiceId: e.target.value })}
sx={{
backgroundColor: alpha("#ffffff", 0.1),
color: "white",
"& .MuiOutlinedInput-notchedOutline": { borderColor: "rgba(255,255,255,0.3)" },
"&:hover .MuiOutlinedInput-notchedOutline": { borderColor: "rgba(255,255,255,0.4)" },
"&.Mui-focused .MuiOutlinedInput-notchedOutline": { borderColor: "#ffffff" },
"& .MuiSvgIcon-root": { color: "rgba(255,255,255,0.7)" },
}}
>
{VOICE_OPTIONS.map((voice) => (
<MenuItem key={voice} value={voice}>
{voice.replace('_', ' ')}
</MenuItem>
))}
</Select>
</FormControl>
</Box>
{/* Speed / Volume / Pitch */}
<Stack direction={{ xs: "column", sm: "row" }} spacing={2}>
<Box sx={{ flex: 1 }}>
<Stack direction="row" spacing={1} alignItems="center" sx={{ mb: 0.5 }}>
<Typography variant="subtitle2" sx={{ fontWeight: 600 }}>
Speed ({settings.speed.toFixed(2)})
</Typography>
<Tooltip title="How fast the voice speaks. 1.0 = normal speed. Lower for narration, higher for conversational." arrow>
<HelpOutlineIcon fontSize="small" sx={{ color: "rgba(255,255,255,0.5)" }} />
</Tooltip>
</Stack>
<Slider
value={settings.speed}
min={0.5}
max={2.0}
step={0.05}
onChange={(_, v) => setSettings({ ...settings, speed: v as number })}
sx={{ color: "#4ade80" }}
/>
<Typography variant="caption" sx={{ opacity: 0.7 }}>
0.5 = Slower (narrative) 1.0 = Normal 2.0 = Faster (energetic)
</Typography>
</Box>
<Box sx={{ flex: 1 }}>
<Stack direction="row" spacing={1} alignItems="center" sx={{ mb: 0.5 }}>
<Typography variant="subtitle2" sx={{ fontWeight: 600 }}>
Volume ({settings.volume.toFixed(1)})
</Typography>
<Tooltip title="Loudness of the voice. 1.0 = normal volume." arrow>
<HelpOutlineIcon fontSize="small" sx={{ color: "rgba(255,255,255,0.5)" }} />
</Tooltip>
</Stack>
<Slider
value={settings.volume}
min={0.1}
max={10.0}
step={0.1}
onChange={(_, v) => setSettings({ ...settings, volume: v as number })}
sx={{ color: "#fbbf24" }}
/>
<Typography variant="caption" sx={{ opacity: 0.7 }}>
0.1 = Very soft 1.0 = Normal 10.0 = Very loud
</Typography>
</Box>
<Box sx={{ flex: 1 }}>
<Stack direction="row" spacing={1} alignItems="center" sx={{ mb: 0.5 }}>
<Typography variant="subtitle2" sx={{ fontWeight: 600 }}>
Pitch ({settings.pitch})
</Typography>
<Tooltip title="Tone of the voice. 0 = neutral. Negative = deeper, positive = higher pitched." arrow>
<HelpOutlineIcon fontSize="small" sx={{ color: "rgba(255,255,255,0.5)" }} />
</Tooltip>
</Stack>
<Slider
value={settings.pitch}
min={-12}
max={12}
step={0.5}
onChange={(_, v) => setSettings({ ...settings, pitch: v as number })}
sx={{ color: "#f87171" }}
/>
<Typography variant="caption" sx={{ opacity: 0.7 }}>
-12 = Very deep 0 = Normal +12 = Very high
</Typography>
</Box>
</Stack>
{/* Emotion */}
<Box>
<Stack direction="row" spacing={1} alignItems="center" sx={{ mb: 1 }}>
<Typography variant="subtitle1" sx={{ fontWeight: 600 }}>
Emotion
</Typography>
<Tooltip title="Sets the vocal mood and emotional delivery style." arrow>
<IconButton size="small" sx={{ color: "rgba(255,255,255,0.5)" }}>
<HelpOutlineIcon fontSize="small" />
</IconButton>
</Tooltip>
</Stack>
<FormControl fullWidth>
<Select
value={settings.emotion}
onChange={(e) => setSettings({ ...settings, emotion: e.target.value })}
sx={{
backgroundColor: alpha("#ffffff", 0.1),
color: "white",
"& .MuiOutlinedInput-notchedOutline": { borderColor: "rgba(255,255,255,0.3)" },
"&:hover .MuiOutlinedInput-notchedOutline": { borderColor: "rgba(255,255,255,0.4)" },
"&.Mui-focused .MuiOutlinedInput-notchedOutline": { borderColor: "#ffffff" },
"& .MuiSvgIcon-root": { color: "rgba(255,255,255,0.7)" },
}}
>
{EMOTION_OPTIONS.map((emotion) => (
<MenuItem key={emotion} value={emotion}>
{emotion.charAt(0).toUpperCase() + emotion.slice(1)}
</MenuItem>
))}
</Select>
</FormControl>
<Typography variant="caption" sx={{ opacity: 0.7, mt: 0.5, display: "block" }}>
Choose emotion that matches your content: happy for upbeat, neutral for professional, sad for serious topics.
</Typography>
</Box>
{/* Language & Normalization */}
<Stack direction={{ xs: "column", sm: "row" }} spacing={2}>
<Box sx={{ flex: 1 }}>
<FormControlLabel
control={
<Checkbox
checked={settings.englishNormalization}
onChange={(e) => setSettings({ ...settings, englishNormalization: e.target.checked })}
sx={{ color: "rgba(255,255,255,0.7)" }}
/>
}
label={
<Typography variant="body2" sx={{ color: "white" }}>
English normalization
</Typography>
}
/>
<Typography variant="caption" sx={{ opacity: 0.7 }}>
Improves pronunciation of numbers, dates, and technical terms.
</Typography>
</Box>
<Box sx={{ flex: 1 }}>
<TextField
select
fullWidth
label="Language boost"
value={settings.languageBoost || "auto"}
onChange={(e) => setSettings({ ...settings, languageBoost: e.target.value })}
InputLabelProps={{ sx: { color: "rgba(255,255,255,0.7)" } }}
sx={{
"& .MuiOutlinedInput-root": {
backgroundColor: alpha("#ffffff", 0.1),
color: "white",
"& fieldset": { borderColor: "rgba(255,255,255,0.3)" },
"&:hover fieldset": { borderColor: "rgba(255,255,255,0.4)" },
"&.Mui-focused fieldset": { borderColor: "#ffffff" },
"& .MuiSvgIcon-root": { color: "rgba(255,255,255,0.7)" },
},
}}
>
{LANGUAGE_BOOST_OPTIONS.map((option) => (
<MenuItem key={option} value={option}>
{option}
</MenuItem>
))}
</TextField>
<Typography variant="caption" sx={{ opacity: 0.7, mt: 0.5, display: "block" }}>
Enhances pronunciation for specific languages.
</Typography>
</Box>
</Stack>
{/* Quality Settings */}
<Stack direction={{ xs: "column", sm: "row" }} spacing={2}>
<Box sx={{ flex: 1 }}>
<TextField
select
fullWidth
label="Sample rate"
value={settings.sampleRate || 24000}
onChange={(e) => setSettings({ ...settings, sampleRate: Number(e.target.value) })}
InputLabelProps={{ sx: { color: "rgba(255,255,255,0.7)" } }}
sx={{
"& .MuiOutlinedInput-root": {
backgroundColor: alpha("#ffffff", 0.1),
color: "white",
"& fieldset": { borderColor: "rgba(255,255,255,0.3)" },
"&:hover fieldset": { borderColor: "rgba(255,255,255,0.4)" },
"&.Mui-focused fieldset": { borderColor: "#ffffff" },
"& .MuiSvgIcon-root": { color: "rgba(255,255,255,0.7)" },
},
}}
>
{SAMPLE_RATE_OPTIONS.map((rate) => (
<MenuItem key={rate} value={rate}>
{rate} Hz
</MenuItem>
))}
</TextField>
<Typography variant="caption" sx={{ opacity: 0.7, mt: 0.5, display: "block" }}>
Higher = better quality, larger files. 24kHz recommended for voice.
</Typography>
</Box>
<Box sx={{ flex: 1 }}>
<TextField
select
fullWidth
label="Bitrate"
value={settings.bitrate}
onChange={(e) => setSettings({ ...settings, bitrate: Number(e.target.value) })}
InputLabelProps={{ sx: { color: "rgba(255,255,255,0.7)" } }}
sx={{
"& .MuiOutlinedInput-root": {
backgroundColor: alpha("#ffffff", 0.1),
color: "white",
"& fieldset": { borderColor: "rgba(255,255,255,0.3)" },
"&:hover fieldset": { borderColor: "rgba(255,255,255,0.4)" },
"&.Mui-focused fieldset": { borderColor: "#ffffff" },
"& .MuiSvgIcon-root": { color: "rgba(255,255,255,0.7)" },
},
}}
>
{BITRATE_OPTIONS.map((bitrate) => (
<MenuItem key={bitrate} value={bitrate}>
{bitrate / 1000} kbps
</MenuItem>
))}
</TextField>
<Typography variant="caption" sx={{ opacity: 0.7, mt: 0.5, display: "block" }}>
Higher = clearer audio, larger files. 128kbps recommended.
</Typography>
</Box>
</Stack>
{/* Format & Channel */}
<Stack direction={{ xs: "column", sm: "row" }} spacing={2}>
<Box sx={{ flex: 1 }}>
<TextField
select
fullWidth
label="Channel"
value={settings.channel}
onChange={(e) => setSettings({ ...settings, channel: e.target.value as "1" | "2" })}
InputLabelProps={{ sx: { color: "rgba(255,255,255,0.7)" } }}
sx={{
"& .MuiOutlinedInput-root": {
backgroundColor: alpha("#ffffff", 0.1),
color: "white",
"& fieldset": { borderColor: "rgba(255,255,255,0.3)" },
"&:hover fieldset": { borderColor: "rgba(255,255,255,0.4)" },
"&.Mui-focused fieldset": { borderColor: "#ffffff" },
"& .MuiSvgIcon-root": { color: "rgba(255,255,255,0.7)" },
},
}}
>
<MenuItem value="1">Mono (smaller files, voice-focused)</MenuItem>
<MenuItem value="2">Stereo (better spatial audio)</MenuItem>
</TextField>
</Box>
<Box sx={{ flex: 1 }}>
<TextField
select
fullWidth
label="Format"
value={settings.format}
onChange={(e) => setSettings({ ...settings, format: e.target.value as "mp3" | "wav" | "pcm" | "flac" })}
InputLabelProps={{ sx: { color: "rgba(255,255,255,0.7)" } }}
sx={{
"& .MuiOutlinedInput-root": {
backgroundColor: alpha("#ffffff", 0.1),
color: "white",
"& fieldset": { borderColor: "rgba(255,255,255,0.3)" },
"&:hover fieldset": { borderColor: "rgba(255,255,255,0.4)" },
"&.Mui-focused fieldset": { borderColor: "#ffffff" },
"& .MuiSvgIcon-root": { color: "rgba(255,255,255,0.7)" },
},
}}
>
<MenuItem value="mp3">MP3 (compressed, universal)</MenuItem>
<MenuItem value="wav">WAV (uncompressed, high quality)</MenuItem>
<MenuItem value="pcm">PCM (raw audio data)</MenuItem>
<MenuItem value="flac">FLAC (lossless compression)</MenuItem>
</TextField>
</Box>
</Stack>
{/* Sync Mode */}
<Box>
<FormControlLabel
control={
<Checkbox
checked={settings.enableSyncMode}
onChange={(e) => setSettings({ ...settings, enableSyncMode: e.target.checked })}
sx={{ color: "rgba(255,255,255,0.7)" }}
/>
}
label={
<Typography variant="body2" sx={{ color: "white" }}>
Enable sync mode (faster, recommended)
</Typography>
}
/>
<Typography variant="caption" sx={{ opacity: 0.7 }}>
Waits for audio completion before returning. Recommended for most use cases.
</Typography>
</Box>
</Stack>
</DialogContent>
<DialogActions sx={{ p: 3, pt: 2 }}>
<Button
onClick={onClose}
disabled={isGenerating}
sx={{ color: "rgba(255,255,255,0.7)" }}
>
Cancel
</Button>
<Button
onClick={handleApply}
variant="contained"
disabled={isGenerating}
startIcon={isGenerating ? undefined : <VolumeUp />}
sx={{
backgroundColor: "#4ade80",
"&:hover": { backgroundColor: "#22c55e" },
"&:disabled": { backgroundColor: "rgba(255,255,255,0.2)" },
}}
>
{isGenerating ? "Generating..." : "Apply Settings & Generate"}
</Button>
</DialogActions>
</Dialog>
);
};

View File

@@ -18,7 +18,7 @@ import {
IconButton,
Alert,
} from '@mui/material';
import { HelpOutline, Timeline, BarChart, AccessTime, Movie, Info } from '@mui/icons-material';
import { HelpOutline, Timeline, BarChart, AccessTime, Movie, Info, Image as ImageIcon, VolumeUp, CheckCircle } from '@mui/icons-material';
import { Scene } from '../../../services/youtubeApi';
import { getSceneIcon, getSceneColor, getSceneTypeLabel, formatDuration } from '../utils/sceneHelpers';
@@ -40,6 +40,12 @@ export const CombinedSceneOverview: React.FC<CombinedSceneOverviewProps> = React
return acc;
}, {} as Record<string, number>);
// Asset readiness stats
const scenesWithImages = enabledScenes.filter(s => s.imageUrl).length;
const scenesWithAudio = enabledScenes.filter(s => s.audioUrl).length;
const scenesWithBoth = enabledScenes.filter(s => s.imageUrl && s.audioUrl).length;
const allReady = enabledScenes.length > 0 && scenesWithBoth === enabledScenes.length;
return {
totalScenes: scenes.length,
enabledScenes: enabledScenes.length,
@@ -47,6 +53,10 @@ export const CombinedSceneOverview: React.FC<CombinedSceneOverviewProps> = React
averageDuration,
sceneBreakdown,
enabledScenesList: enabledScenes,
scenesWithImages,
scenesWithAudio,
scenesWithBoth,
allReady,
};
}, [scenes]);
@@ -191,6 +201,79 @@ export const CombinedSceneOverview: React.FC<CombinedSceneOverviewProps> = React
<Divider sx={{ my: 0.5 }} />
{/* Asset Readiness */}
<Box>
<Typography
variant="caption"
sx={{
fontWeight: 600,
color: '#6b7280',
fontSize: '0.75rem',
textTransform: 'uppercase',
letterSpacing: '0.05em',
display: 'block',
mb: 1,
}}
>
Asset Status
</Typography>
<Stack direction="row" spacing={1} flexWrap="wrap" useFlexGap>
<Tooltip
title="Number of scenes with AI-generated images ready"
arrow
>
<Chip
icon={<ImageIcon sx={{ fontSize: 14 }} />}
label={`${stats.scenesWithImages}/${stats.enabledScenes} Images`}
size="small"
sx={{
fontWeight: 500,
fontSize: '0.75rem',
bgcolor: stats.scenesWithImages === stats.enabledScenes ? '#d1fae5' : '#fef3c7',
color: stats.scenesWithImages === stats.enabledScenes ? '#065f46' : '#92400e',
border: `1px solid ${stats.scenesWithImages === stats.enabledScenes ? '#10b981' : '#f59e0b'}`,
}}
/>
</Tooltip>
<Tooltip
title="Number of scenes with audio narration ready"
arrow
>
<Chip
icon={<VolumeUp sx={{ fontSize: 14 }} />}
label={`${stats.scenesWithAudio}/${stats.enabledScenes} Audio`}
size="small"
sx={{
fontWeight: 500,
fontSize: '0.75rem',
bgcolor: stats.scenesWithAudio === stats.enabledScenes ? '#d1fae5' : '#fef3c7',
color: stats.scenesWithAudio === stats.enabledScenes ? '#065f46' : '#92400e',
border: `1px solid ${stats.scenesWithAudio === stats.enabledScenes ? '#10b981' : '#f59e0b'}`,
}}
/>
</Tooltip>
{stats.allReady && (
<Tooltip
title="All scenes are ready for video generation!"
arrow
>
<Chip
icon={<CheckCircle sx={{ fontSize: 14 }} />}
label="All Ready"
size="small"
color="success"
sx={{
fontWeight: 600,
fontSize: '0.75rem',
}}
/>
</Tooltip>
)}
</Stack>
</Box>
<Divider sx={{ my: 0.5 }} />
{/* Scene Type Breakdown */}
<Box>
<Typography

View File

@@ -1,43 +1,91 @@
/**
* Cost Estimate Card Component
*
* Displays professional cost estimate with breakdown and per-scene costs.
* Displays user-friendly cost estimate with clear breakdown and explanations.
*/
import React from 'react';
import React, { useMemo } from 'react';
import {
Box,
Typography,
Stack,
CircularProgress,
Alert,
Chip,
Divider,
} from '@mui/material';
import { CostEstimate } from '../../../services/youtubeApi';
import {
MonetizationOn as MoneyIcon,
VideoLibrary as VideoIcon,
Image as ImageIcon,
Info as InfoIcon,
} from '@mui/icons-material';
import { CostEstimate, Scene } from '../../../services/youtubeApi';
interface CostEstimateCardProps {
costEstimate: CostEstimate | null;
loadingCostEstimate: boolean;
scenes?: Scene[];
}
export const CostEstimateCard: React.FC<CostEstimateCardProps> = React.memo(({
costEstimate,
loadingCostEstimate,
scenes = [],
}) => {
// Calculate total image cost if available
const totalImageCost = useMemo(() => {
if (!costEstimate) return 0;
return costEstimate.total_image_cost ||
(costEstimate.image_cost_per_scene ? costEstimate.num_scenes * costEstimate.image_cost_per_scene : 0);
}, [costEstimate]);
// Calculate video rendering cost
const videoRenderCost = useMemo(() => {
if (!costEstimate) return 0;
return costEstimate.total_cost - totalImageCost;
}, [costEstimate, totalImageCost]);
if (loadingCostEstimate) {
return (
<Box sx={{ display: 'flex', alignItems: 'center', gap: 1, mt: 2 }}>
<CircularProgress size={16} />
<Typography variant="body2" color="text.secondary">
Calculating cost estimate...
Calculating your video cost...
</Typography>
</Box>
);
}
if (!costEstimate) {
// Check which scenes are missing assets
const enabledScenes = scenes.filter(s => s.enabled !== false);
const scenesMissingImage = enabledScenes.filter(s => !s.imageUrl);
const scenesMissingAudio = enabledScenes.filter(s => !s.audioUrl);
const scenesMissingBoth = enabledScenes.filter(s => !s.imageUrl && !s.audioUrl);
let errorMessage = 'Please ensure all enabled scenes have images and audio.';
if (scenesMissingBoth.length > 0 || scenesMissingImage.length > 0 || scenesMissingAudio.length > 0) {
const missingDetails: string[] = [];
if (scenesMissingImage.length > 0) {
missingDetails.push(`${scenesMissingImage.length} scene${scenesMissingImage.length !== 1 ? 's' : ''} missing image${scenesMissingImage.length !== 1 ? 's' : ''}`);
}
if (scenesMissingAudio.length > 0) {
missingDetails.push(`${scenesMissingAudio.length} scene${scenesMissingAudio.length !== 1 ? 's' : ''} missing audio`);
}
if (missingDetails.length > 0) {
errorMessage = `Unable to calculate cost: ${missingDetails.join(', ')}. Go back to "Generate Assets" step to create missing assets.`;
}
}
return (
<Alert severity="warning" sx={{ mt: 2 }}>
Unable to calculate cost estimate. Please check your scenes and try again.
<Typography variant="body2" sx={{ fontWeight: 500 }}>
Unable to calculate cost estimate
</Typography>
<Typography variant="caption" sx={{ display: 'block', mt: 0.5 }}>
{errorMessage}
</Typography>
</Alert>
);
}
@@ -47,33 +95,56 @@ export const CostEstimateCard: React.FC<CostEstimateCardProps> = React.memo(({
sx={{
mt: 3,
p: 3,
bgcolor: '#ffffff',
borderRadius: 2,
border: '2px solid #e5e7eb',
boxShadow: '0 1px 3px 0 rgba(0, 0, 0, 0.1)',
bgcolor: 'linear-gradient(135deg, #667eea 0%, #764ba2 100%)',
background: 'linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%)',
borderRadius: 3,
border: '2px solid #667eea',
boxShadow: '0 4px 6px -1px rgba(0, 0, 0, 0.1), 0 2px 4px -1px rgba(0, 0, 0, 0.06)',
}}
>
<Box sx={{ display: 'flex', alignItems: 'center', gap: 1, mb: 2 }}>
<Typography
variant="h6"
sx={{
fontWeight: 700,
fontSize: '1rem',
color: '#111827',
letterSpacing: '-0.01em',
}}
>
Estimated Cost
</Typography>
{/* Header */}
<Box sx={{ display: 'flex', alignItems: 'center', gap: 1.5, mb: 3 }}>
<MoneyIcon sx={{ color: '#667eea', fontSize: 28 }} />
<Box>
<Typography
variant="h6"
sx={{
fontWeight: 700,
fontSize: '1.1rem',
color: '#1e293b',
letterSpacing: '-0.01em',
}}
>
💰 Total Cost Estimate
</Typography>
<Typography
variant="caption"
sx={{
color: '#64748b',
fontSize: '0.75rem',
}}
>
What you'll pay to create this video
</Typography>
</Box>
</Box>
<Box sx={{ mb: 2.5 }}>
{/* Total Cost Display */}
<Box
sx={{
mb: 3,
p: 2.5,
bgcolor: 'white',
borderRadius: 2,
boxShadow: '0 2px 4px rgba(0, 0, 0, 0.08)',
}}
>
<Typography
variant="h4"
variant="h3"
sx={{
fontWeight: 700,
fontSize: '2rem',
color: '#111827',
fontWeight: 800,
fontSize: '2.5rem',
color: '#667eea',
lineHeight: 1.2,
mb: 0.5,
}}
@@ -83,131 +154,339 @@ export const CostEstimateCard: React.FC<CostEstimateCardProps> = React.memo(({
<Typography
variant="body2"
sx={{
color: '#6b7280',
color: '#64748b',
fontSize: '0.875rem',
fontWeight: 500,
}}
>
Range: ${costEstimate.estimated_cost_range.min.toFixed(2)} - ${costEstimate.estimated_cost_range.max.toFixed(2)}
Estimated range: ${costEstimate.estimated_cost_range.min.toFixed(2)} - ${costEstimate.estimated_cost_range.max.toFixed(2)}
</Typography>
<Typography
variant="caption"
sx={{
color: '#94a3b8',
fontSize: '0.75rem',
display: 'block',
mt: 0.5,
}}
>
Final cost may vary by ±10% based on actual processing
</Typography>
</Box>
{/* What's Included Section */}
<Box
sx={{
p: 2,
bgcolor: '#f9fafb',
borderRadius: 1.5,
border: '1px solid #e5e7eb',
mb: 2,
p: 2.5,
bgcolor: 'white',
borderRadius: 2,
mb: 2.5,
boxShadow: '0 1px 3px rgba(0, 0, 0, 0.08)',
}}
>
<Typography
variant="body2"
variant="subtitle2"
sx={{
color: '#374151',
fontSize: '0.875rem',
lineHeight: 1.6,
mb: 0.5,
color: '#1e293b',
fontWeight: 700,
mb: 2,
fontSize: '0.95rem',
display: 'flex',
alignItems: 'center',
gap: 1,
}}
>
<strong>{costEstimate.num_scenes} scenes</strong> × <strong>${costEstimate.price_per_second.toFixed(2)}/second</strong>
<InfoIcon sx={{ fontSize: 18, color: '#667eea' }} />
What's Included in This Price
</Typography>
<Typography
variant="body2"
<Stack spacing={2}>
{/* Video Rendering */}
<Box>
<Box sx={{ display: 'flex', alignItems: 'center', gap: 1, mb: 0.5 }}>
<VideoIcon sx={{ fontSize: 20, color: '#667eea' }} />
<Typography
variant="body2"
sx={{
color: '#1e293b',
fontWeight: 600,
fontSize: '0.875rem',
}}
>
AI Video Generation
</Typography>
<Chip
label={`$${videoRenderCost.toFixed(2)}`}
size="small"
sx={{
ml: 'auto',
bgcolor: '#667eea',
color: 'white',
fontWeight: 600,
fontSize: '0.75rem',
}}
/>
</Box>
<Typography
variant="body2"
sx={{
color: '#64748b',
fontSize: '0.8rem',
lineHeight: 1.5,
ml: 3.5,
}}
>
Creating <strong>{costEstimate.num_scenes} video scenes</strong> ({Math.round(costEstimate.total_duration_seconds)} seconds total) at <strong>{costEstimate.resolution}</strong> quality
</Typography>
<Typography
variant="caption"
sx={{
color: '#94a3b8',
fontSize: '0.7rem',
display: 'block',
ml: 3.5,
mt: 0.5,
}}
>
Rate: ${costEstimate.price_per_second.toFixed(2)}/second • Using advanced AI to transform your narration into engaging video scenes
</Typography>
</Box>
{/* Image Generation (if applicable) */}
{totalImageCost > 0 && (
<>
<Divider sx={{ my: 0.5 }} />
<Box>
<Box sx={{ display: 'flex', alignItems: 'center', gap: 1, mb: 0.5 }}>
<ImageIcon sx={{ fontSize: 20, color: '#10b981' }} />
<Typography
variant="body2"
sx={{
color: '#1e293b',
fontWeight: 600,
fontSize: '0.875rem',
}}
>
Scene Images
</Typography>
<Chip
label={`$${totalImageCost.toFixed(2)}`}
size="small"
sx={{
ml: 'auto',
bgcolor: '#10b981',
color: 'white',
fontWeight: 600,
fontSize: '0.75rem',
}}
/>
</Box>
<Typography
variant="body2"
sx={{
color: '#64748b',
fontSize: '0.8rem',
lineHeight: 1.5,
ml: 3.5,
}}
>
Generating <strong>{costEstimate.num_scenes} custom images</strong> for your video scenes
{costEstimate.image_model && ` using ${costEstimate.image_model}`}
</Typography>
{costEstimate.image_cost_per_scene && (
<Typography
variant="caption"
sx={{
color: '#94a3b8',
fontSize: '0.7rem',
display: 'block',
ml: 3.5,
mt: 0.5,
}}
>
Rate: ${costEstimate.image_cost_per_scene.toFixed(2)}/image • High-quality AI-generated visuals tailored to your content
</Typography>
)}
</Box>
</>
)}
</Stack>
{/* Summary Box */}
<Box
sx={{
color: '#374151',
fontSize: '0.875rem',
lineHeight: 1.6,
mb: 0.5,
mt: 2,
p: 1.5,
bgcolor: '#f1f5f9',
borderRadius: 1.5,
border: '1px solid #cbd5e1',
}}
>
Total duration: <strong>~{Math.round(costEstimate.total_duration_seconds)} seconds</strong>
</Typography>
<Typography
variant="body2"
sx={{
color: '#374151',
fontSize: '0.875rem',
lineHeight: 1.6,
}}
>
Price per second: <strong>${costEstimate.price_per_second.toFixed(2)}</strong> ({costEstimate.resolution})
</Typography>
<Typography
variant="caption"
sx={{
color: '#475569',
fontSize: '0.75rem',
lineHeight: 1.6,
display: 'block',
}}
>
💡 <strong>Good to know:</strong> You only pay for the AI processing to create your video.
There are no hidden fees, subscription requirements, or storage charges.
Once created, your video is yours to download and use forever!
</Typography>
</Box>
</Box>
{/* Per Scene Breakdown (Optional, collapsible) */}
{costEstimate.scene_costs.length > 0 && (
<Box
sx={{
pt: 2,
borderTop: '2px solid #e5e7eb',
p: 2.5,
bgcolor: 'white',
borderRadius: 2,
boxShadow: '0 1px 3px rgba(0, 0, 0, 0.08)',
}}
>
<Typography
variant="subtitle2"
sx={{
fontWeight: 600,
fontWeight: 700,
fontSize: '0.875rem',
color: '#111827',
color: '#1e293b',
mb: 1.5,
textTransform: 'uppercase',
letterSpacing: '0.05em',
display: 'flex',
alignItems: 'center',
gap: 1,
}}
>
Per Scene Breakdown
📊 Cost Per Scene
<Typography
component="span"
variant="caption"
sx={{
ml: 'auto',
color: '#64748b',
fontWeight: 500,
}}
>
{costEstimate.scene_costs.length} scenes
</Typography>
</Typography>
<Stack spacing={0.75}>
{costEstimate.scene_costs.slice(0, 5).map((sceneCost) => (
<Stack spacing={1}>
{costEstimate.scene_costs.slice(0, 5).map((sceneCost, idx) => (
<Box
key={sceneCost.scene_number}
sx={{
display: 'flex',
justifyContent: 'space-between',
alignItems: 'center',
py: 0.75,
py: 1,
px: 1.5,
bgcolor: '#ffffff',
bgcolor: idx % 2 === 0 ? '#f8fafc' : '#ffffff',
borderRadius: 1,
border: '1px solid #e5e7eb',
border: '1px solid #e2e8f0',
transition: 'all 0.2s',
'&:hover': {
bgcolor: '#f1f5f9',
borderColor: '#cbd5e1',
},
}}
>
<Box>
<Typography
variant="body2"
sx={{
color: '#1e293b',
fontSize: '0.875rem',
fontWeight: 600,
}}
>
Scene {sceneCost.scene_number}
</Typography>
<Typography
variant="caption"
sx={{
color: '#64748b',
fontSize: '0.7rem',
}}
>
{sceneCost.actual_duration}s video
{sceneCost.duration_estimate !== sceneCost.actual_duration &&
` (optimized from ${sceneCost.duration_estimate}s)`}
</Typography>
</Box>
<Chip
label={`$${sceneCost.cost.toFixed(2)}`}
size="small"
sx={{
bgcolor: '#667eea',
color: 'white',
fontWeight: 600,
fontSize: '0.75rem',
}}
/>
</Box>
))}
{costEstimate.scene_costs.length > 5 && (
<Box
sx={{
py: 1,
textAlign: 'center',
bgcolor: '#f8fafc',
borderRadius: 1,
border: '1px dashed #cbd5e1',
}}
>
<Typography
variant="body2"
sx={{
color: '#374151',
fontSize: '0.875rem',
color: '#64748b',
fontSize: '0.8rem',
fontWeight: 500,
}}
>
Scene {sceneCost.scene_number}: {sceneCost.actual_duration}s
+ {costEstimate.scene_costs.length - 5} more scenes
</Typography>
<Typography
variant="body2"
variant="caption"
sx={{
color: '#111827',
fontSize: '0.875rem',
fontWeight: 600,
color: '#94a3b8',
fontSize: '0.7rem',
}}
>
${sceneCost.cost.toFixed(2)}
(scroll down after rendering to see all scenes)
</Typography>
</Box>
))}
{costEstimate.scene_costs.length > 5 && (
<Typography
variant="body2"
sx={{
color: '#6b7280',
fontSize: '0.875rem',
textAlign: 'center',
py: 0.5,
}}
>
... and {costEstimate.scene_costs.length - 5} more scenes
</Typography>
)}
</Stack>
</Box>
)}
{/* Help Section */}
<Alert
severity="info"
icon={<InfoIcon />}
sx={{
mt: 2.5,
bgcolor: '#eff6ff',
border: '1px solid #bfdbfe',
'& .MuiAlert-icon': {
color: '#3b82f6',
},
}}
>
<Typography variant="body2" sx={{ fontWeight: 600, fontSize: '0.8rem', mb: 0.5 }}>
Why does video creation cost money?
</Typography>
<Typography variant="caption" sx={{ fontSize: '0.75rem', lineHeight: 1.5, display: 'block' }}>
Creating videos with AI requires powerful computing resources. Each second of video is generated by
advanced AI models that analyze your script, create visuals, and synchronize everything perfectly.
The cost covers the actual AI processing time needed to bring your content to life.
</Typography>
</Alert>
</Box>
);
});

View File

@@ -5,7 +5,7 @@
* Orchestrates scene overview, settings, cost estimation, and render status.
*/
import React from 'react';
import React, { useMemo, useState } from 'react';
import {
Paper,
Typography,
@@ -14,16 +14,22 @@ import {
Box,
Alert,
CircularProgress,
Chip,
IconButton,
Tooltip,
} from '@mui/material';
import { PlayArrow } from '@mui/icons-material';
import { PlayArrow, CheckCircle, Warning, ArrowBack, Visibility, Image as ImageIcon, VolumeUp } from '@mui/icons-material';
import { motion } from 'framer-motion';
import { TaskStatus, CostEstimate, VideoPlan, Scene } from '../../../services/youtubeApi';
import { YT_BORDER, type Resolution } from '../constants';
import { SceneCard } from './SceneCard';
import { CombinedSceneOverview } from './CombinedSceneOverview';
import { CostEstimateCard } from './CostEstimateCard';
import { RenderSettings } from './RenderSettings';
import { RenderStatusDisplay } from './RenderStatusDisplay';
import { ScenePreviewModal } from './ScenePreviewModal';
import { useYouTubeRenderQueue } from '../hooks/useYouTubeRenderQueue';
import Snackbar from '@mui/material/Snackbar';
import MuiAlert, { AlertColor } from '@mui/material/Alert';
interface RenderStepProps {
renderTaskId: string | null;
@@ -37,19 +43,13 @@ interface RenderStepProps {
loading: boolean;
scenes: Scene[];
videoPlan: VideoPlan | null;
editingSceneId: number | null;
editedScene: Partial<Scene> | null;
onResolutionChange: (resolution: Resolution) => void;
onCombineScenesChange: (combine: boolean) => void;
onStartRender: () => void;
onBack: () => void;
onReset: () => void;
onRetryFailedScenes: (failedScenes: any[]) => void;
onEditScene: (scene: Scene) => void;
onSaveScene: () => void;
onCancelEdit: () => void;
onEditChange: (updates: Partial<Scene>) => void;
onToggleScene: (sceneNumber: number) => void;
onScenesUpdate: (updatedScenes: Scene[]) => void;
getVideoUrl: () => string | null;
}
@@ -64,21 +64,53 @@ export const RenderStep: React.FC<RenderStepProps> = React.memo(({
loadingCostEstimate,
loading,
scenes,
editingSceneId,
editedScene,
videoPlan,
onResolutionChange,
onCombineScenesChange,
onStartRender,
onBack,
onReset,
onRetryFailedScenes,
onEditScene,
onSaveScene,
onCancelEdit,
onEditChange,
onToggleScene,
getVideoUrl,
onScenesUpdate,
}) => {
const [snackbar, setSnackbar] = React.useState<{ open: boolean; message: string; severity: AlertColor }>({
open: false,
message: '',
severity: 'info',
});
const [previewModalOpen, setPreviewModalOpen] = useState(false);
const [previewScene, setPreviewScene] = useState<Scene | null>(null);
const showSnackbar = (message: string, severity: AlertColor = 'info') => {
setSnackbar({ open: true, message, severity });
};
const handlePreviewScene = (scene: Scene) => {
setPreviewScene(scene);
setPreviewModalOpen(true);
};
const {
sceneStatuses,
finalVideoUrl,
combining,
combiningProgress,
combiningMessage,
runSceneVideo,
combineVideos,
} = useYouTubeRenderQueue({
scenes,
videoPlan,
resolution,
onScenesUpdate,
onError: (msg) => showSnackbar(msg, 'error'),
onSuccess: (msg) => showSnackbar(msg, 'success'),
onInfo: (msg) => showSnackbar(msg, 'info'),
});
const canStartRender = enabledScenesCount > 0 && !loading;
return (
<motion.div
initial={{ opacity: 0, y: 20 }}
@@ -91,42 +123,259 @@ export const RenderStep: React.FC<RenderStepProps> = React.memo(({
border: `1px solid ${YT_BORDER}`,
}}
>
<Typography variant="h5" sx={{ mb: 3, fontWeight: 600 }}>
3 Render Video
</Typography>
<Typography variant="h5" sx={{ mb: 3, fontWeight: 600 }}>
4 Render Final Video
</Typography>
{!renderTaskId ? (
<Stack spacing={3}>
<Alert severity="info">
Review your scenes, configure render settings, and start generating your video. This may take several minutes.
</Alert>
{!renderTaskId ? (
<Stack spacing={3}>
<Alert severity="info" icon={<CheckCircle />}>
<Typography variant="body2" sx={{ fontWeight: 600, mb: 0.5 }}>
Ready to create your video!
</Typography>
<Typography variant="caption" sx={{ fontSize: '0.75rem' }}>
All scenes have their images and audio. Configure your render settings below and start the video generation process.
</Typography>
</Alert>
{/* Combined Scene Statistics & Timeline */}
{scenes.length > 0 && (
<CombinedSceneOverview scenes={scenes} />
)}
{/* Scene Details - Full descriptions */}
{/* Scene-wise Video Generation */}
{scenes.length > 0 && (
<Box sx={{ mb: 3 }}>
<Typography variant="h6" sx={{ mb: 2, fontWeight: 600, color: '#111827' }}>
Scene Details
<Box sx={{ display: 'flex', alignItems: 'center', justifyContent: 'between', mb: 2 }}>
<Typography variant="h6" sx={{ fontWeight: 700, color: '#1e293b', fontSize: '1.1rem' }}>
🎬 Scene Video Generation
</Typography>
</Box>
<Typography variant="body2" color="text.secondary" sx={{ mb: 2.5 }}>
Generate video for each scene individually. Videos are created using your scene images and audio narration.
You can preview assets and retry failed scenes.
</Typography>
<Stack spacing={2}>
{scenes.map((scene) => (
<SceneCard
key={scene.scene_number}
scene={scene}
isEditing={editingSceneId === scene.scene_number}
editedScene={editedScene}
onToggle={onToggleScene}
onEdit={onEditScene}
onSave={onSaveScene}
onCancel={onCancelEdit}
onEditChange={onEditChange}
loading={loading}
{scenes.filter(s => s.enabled !== false).map((scene) => {
const st = sceneStatuses[scene.scene_number] || { status: 'idle', progress: 0 };
const hasAssets = !!scene.imageUrl && !!scene.audioUrl;
const running = st.status === 'running';
const failed = st.status === 'failed';
const completed = st.status === 'completed';
return (
<Paper
key={scene.scene_number}
elevation={0}
sx={{
p: 3,
border: completed ? '2px solid #10b981' : failed ? '2px solid #ef4444' : '2px solid #e2e8f0',
borderRadius: 2,
bgcolor: completed ? '#f0fdf4' : failed ? '#fef2f2' : 'white',
transition: 'all 0.2s ease-in-out',
'&:hover': {
boxShadow: '0 4px 6px -1px rgba(0, 0, 0, 0.1)',
},
}}
>
<Stack spacing={2}>
{/* Header Row */}
<Box sx={{ display: 'flex', alignItems: 'flex-start', justifyContent: 'space-between', gap: 2 }}>
<Box sx={{ flex: 1, minWidth: 0 }}>
<Typography variant="subtitle1" sx={{ fontWeight: 700, color: '#1e293b', mb: 0.5 }}>
Scene {scene.scene_number}: {scene.title}
</Typography>
<Stack direction="row" spacing={1.5} alignItems="center" flexWrap="wrap" useFlexGap>
<Chip
label={`${scene.duration_estimate}s`}
size="small"
sx={{
fontSize: '0.75rem',
fontWeight: 600,
bgcolor: '#eff6ff',
color: '#1e40af',
}}
/>
{/* Asset Status Chips */}
<Tooltip
title={scene.imageUrl ? "Image ready - click to preview" : "Image not generated yet"}
arrow
>
<Chip
icon={<ImageIcon sx={{ fontSize: 14 }} />}
label="Image"
size="small"
onClick={scene.imageUrl ? () => handlePreviewScene(scene) : undefined}
sx={{
fontSize: '0.75rem',
fontWeight: 500,
bgcolor: scene.imageUrl ? '#d1fae5' : '#fee2e2',
color: scene.imageUrl ? '#065f46' : '#991b1b',
cursor: scene.imageUrl ? 'pointer' : 'default',
'&:hover': scene.imageUrl ? {
bgcolor: '#a7f3d0',
} : {},
}}
/>
</Tooltip>
<Tooltip
title={scene.audioUrl ? "Audio ready - click to preview" : "Audio not generated yet"}
arrow
>
<Chip
icon={<VolumeUp sx={{ fontSize: 14 }} />}
label="Audio"
size="small"
onClick={scene.audioUrl ? () => handlePreviewScene(scene) : undefined}
sx={{
fontSize: '0.75rem',
fontWeight: 500,
bgcolor: scene.audioUrl ? '#d1fae5' : '#fee2e2',
color: scene.audioUrl ? '#065f46' : '#991b1b',
cursor: scene.audioUrl ? 'pointer' : 'default',
'&:hover': scene.audioUrl ? {
bgcolor: '#a7f3d0',
} : {},
}}
/>
</Tooltip>
{/* Status Indicator */}
{completed && (
<Chip
icon={<CheckCircle sx={{ fontSize: 14 }} />}
label="Video Ready"
size="small"
color="success"
sx={{ fontWeight: 600, fontSize: '0.75rem' }}
/>
)}
{failed && (
<Chip
label="Failed"
size="small"
color="error"
sx={{ fontWeight: 600, fontSize: '0.75rem' }}
/>
)}
</Stack>
</Box>
{/* Action Buttons */}
<Stack direction="row" spacing={1} alignItems="center">
{running && st.progress > 0 && st.progress < 100 && (
<Box sx={{ display: 'flex', alignItems: 'center', gap: 1 }}>
<CircularProgress
size={32}
variant="determinate"
value={Math.min(100, st.progress)}
sx={{ color: '#667eea' }}
/>
<Typography variant="caption" sx={{ color: '#64748b', fontWeight: 600 }}>
{Math.round(st.progress)}%
</Typography>
</Box>
)}
{hasAssets && (
<Tooltip title="Preview scene assets" arrow>
<IconButton
size="small"
onClick={() => handlePreviewScene(scene)}
sx={{
color: '#667eea',
'&:hover': {
bgcolor: '#eff6ff',
},
}}
>
<Visibility />
</IconButton>
</Tooltip>
)}
<Button
variant={completed ? "outlined" : "contained"}
color={completed ? "success" : "primary"}
onClick={() => runSceneVideo(scene)}
disabled={!hasAssets || running}
startIcon={running ? <CircularProgress size={16} sx={{ color: 'white' }} /> : undefined}
sx={{
textTransform: 'none',
fontWeight: 700,
minWidth: 120,
px: 2.5,
}}
>
{running ? 'Generating' : failed ? 'Retry Video' : completed ? 'Regenerate' : 'Generate Video'}
</Button>
</Stack>
</Box>
{/* Progress/Error Message */}
{st.status !== 'idle' && st.status !== 'completed' && (
<Box
sx={{
px: 2,
py: 1,
bgcolor: failed ? '#fef2f2' : '#f8fafc',
borderRadius: 1,
border: `1px solid ${failed ? '#fecaca' : '#e2e8f0'}`,
}}
>
<Typography
variant="body2"
sx={{
color: failed ? '#991b1b' : '#475569',
fontSize: '0.875rem',
fontWeight: 500,
}}
>
{running
? `Generating video... This may take 1-2 minutes.`
: failed
? `${st.error || 'Generation failed. Please retry.'}`
: 'Processing...'}
</Typography>
</Box>
)}
</Stack>
</Paper>
);
})}
</Stack>
</Box>
)}
{/* Combine Scene Videos (Optional) */}
{combineScenes && scenes.filter(s => s.enabled !== false && s.videoUrl).length >= 2 && (
<Box sx={{ mb: 3, p: 2.5, bgcolor: '#f0fdf4', borderRadius: 2, border: '2px solid #10b981' }}>
<Typography variant="h6" sx={{ mb: 1, fontWeight: 600, color: '#065f46' }}>
🎞 Combine Scene Videos
</Typography>
<Typography variant="body2" color="text.secondary" sx={{ mb: 2 }}>
All scene videos are ready! Combine them into one final video.
</Typography>
<Stack direction="row" spacing={2} alignItems="center">
<Button
variant="contained"
color="success"
onClick={combineVideos}
disabled={combining}
startIcon={combining ? <CircularProgress size={20} sx={{ color: 'white' }} /> : undefined}
sx={{ textTransform: 'none', fontWeight: 700 }}
>
{combining ? 'Combining Videos...' : 'Combine Into Final Video'}
</Button>
{combining && (
<Typography variant="body2" color="text.secondary">
{combiningMessage} ({combiningProgress.toFixed(0)}%)
</Typography>
)}
{finalVideoUrl && (
<Chip
label="✅ Final video ready"
color="success"
sx={{ fontWeight: 600 }}
/>
))}
)}
</Stack>
</Box>
)}
@@ -140,42 +389,60 @@ export const RenderStep: React.FC<RenderStepProps> = React.memo(({
onCombineScenesChange={onCombineScenesChange}
/>
{/* Render Summary and Cost Estimate */}
<Box sx={{ p: 2, bgcolor: '#f4f4f4', borderRadius: 1, border: `1px solid ${YT_BORDER}` }}>
<Typography variant="subtitle2" sx={{ mb: 1, fontWeight: 600 }}>
Render Summary
</Typography>
<Typography variant="body2" color="text.secondary" sx={{ mb: 2 }}>
{enabledScenesCount} scenes will be rendered
<br />
Resolution: {resolution}
<br />
{combineScenes ? 'Scenes will be combined into one video' : 'Each scene will be a separate video'}
<br />
</Typography>
<CostEstimateCard
costEstimate={costEstimate}
loadingCostEstimate={loadingCostEstimate}
/>
</Box>
{/* Cost Estimate */}
<CostEstimateCard
costEstimate={costEstimate}
loadingCostEstimate={loadingCostEstimate}
scenes={scenes}
/>
{/* Action Buttons */}
<Box sx={{ display: 'flex', gap: 2 }}>
<Button variant="outlined" onClick={onBack}>
Back to Scenes
</Button>
<Button
variant="contained"
color="error"
size="large"
onClick={onStartRender}
disabled={loading || enabledScenesCount === 0}
startIcon={loading ? <CircularProgress size={20} /> : <PlayArrow />}
sx={{ px: 4 }}
<Button
variant="outlined"
startIcon={<ArrowBack />}
onClick={onBack}
>
{loading ? 'Starting Render...' : 'Start Video Render'}
Back to Assets
</Button>
<Tooltip
title={
enabledScenesCount === 0
? "Please enable at least one scene"
: loading
? "Video render in progress"
: `Generate videos for ${enabledScenesCount} scene${enabledScenesCount !== 1 ? 's' : ''}. Estimated cost includes video generation and processing.`
}
arrow
placement="top"
>
<span>
<Button
variant="contained"
color="error"
size="large"
startIcon={<PlayArrow />}
onClick={onStartRender}
disabled={loading || enabledScenesCount === 0}
sx={{
px: 4,
fontWeight: 600,
'&:disabled': {
opacity: 0.6,
},
}}
>
{loading ? (
<>
Rendering...
<CircularProgress size={16} sx={{ ml: 1 }} color="inherit" />
</>
) : (
`Start Video Render ${costEstimate?.total_cost ? `($${costEstimate.total_cost.toFixed(2)})` : ''}`
)}
</Button>
</span>
</Tooltip>
</Box>
</Stack>
) : (
@@ -188,6 +455,37 @@ export const RenderStep: React.FC<RenderStepProps> = React.memo(({
/>
)}
</Paper>
<Snackbar
open={snackbar.open}
autoHideDuration={4000}
onClose={() => setSnackbar((s) => ({ ...s, open: false }))}
anchorOrigin={{ vertical: 'bottom', horizontal: 'center' }}
>
<MuiAlert
onClose={() => setSnackbar((s) => ({ ...s, open: false }))}
severity={snackbar.severity}
elevation={6}
variant="filled"
sx={{ width: '100%' }}
>
{snackbar.message}
</MuiAlert>
</Snackbar>
{/* Scene Preview Modal */}
{previewScene && (
<ScenePreviewModal
open={previewModalOpen}
onClose={() => {
setPreviewModalOpen(false);
setPreviewScene(null);
}}
sceneTitle={previewScene.title}
sceneNumber={previewScene.scene_number}
imageUrl={previewScene.imageUrl}
audioUrl={previewScene.audioUrl}
/>
)}
</motion.div>
);
});

View File

@@ -1,26 +1,51 @@
/**
* Scene Card Component
*
* Displays a YouTube scene with editing, generation, and media display capabilities.
* Refactored for reusability and maintainability following React best practices.
*/
import React from 'react';
import React, { useCallback, useEffect } from 'react';
import {
Card,
CardContent,
Typography,
Stack,
Chip,
Box,
FormControlLabel,
Switch,
IconButton,
TextField,
Button,
Tooltip,
Alert,
} from '@mui/material';
import { Edit, Check, Close, Movie, Shuffle, CallMade, ArrowForward, HelpOutline, Info, RecordVoiceOver, Videocam, AutoAwesome } from '@mui/icons-material';
import { Scene } from '../../../services/youtubeApi';
import { inputSx, labelSx } from '../styles';
import { AudioGenerationSettings } from '../../../components/shared/AudioSettingsModal';
import { YouTubeImageGenerationSettings } from '../shared/YouTubeImageGenerationModal';
// Custom hooks
import { useSceneMedia } from '../hooks/useSceneMedia';
import { useGenerationState } from '../hooks/useGenerationState';
// Sub-components
import { SceneHeader } from './SceneCard/SceneHeader';
import { SceneContent } from './SceneCard/SceneContent';
import { SceneEditForm } from './SceneCard/SceneEditForm';
import { GenerationButtons } from './SceneCard/GenerationButtons';
import { GenerationModals } from './SceneCard/GenerationModals';
import { InfoAlert } from './SceneCard/InfoAlert';
// Types
interface SceneCardProps {
scene: Scene;
isEditing: boolean;
editedScene: Partial<Scene> | null;
onToggle: (sceneNumber: number) => void;
onEdit: (scene: Scene) => void;
onSave: () => void;
onCancel: () => void;
onEditChange: (updates: Partial<Scene>) => void;
onGenerateImage?: (scene: Scene, imageSettings?: YouTubeImageGenerationSettings) => Promise<void>;
generatingImage?: boolean;
onGenerateAudio?: (scene: Scene, audioSettings?: AudioGenerationSettings) => Promise<void>;
generatingAudio?: boolean;
loading: boolean;
avatarUrl?: string | null;
videoPlanIdea?: string;
}
interface SceneCardProps {
scene: Scene;
@@ -31,7 +56,13 @@ interface SceneCardProps {
onSave: () => void;
onCancel: () => void;
onEditChange: (updates: Partial<Scene>) => void;
onGenerateImage?: (scene: Scene, imageSettings?: YouTubeImageGenerationSettings) => Promise<void>;
generatingImage?: boolean;
onGenerateAudio?: (scene: Scene, audioSettings?: AudioGenerationSettings) => Promise<void>;
generatingAudio?: boolean;
loading: boolean;
avatarUrl?: string | null; // Base avatar URL for character consistency
videoPlanIdea?: string; // Video plan idea for context
}
// Helper function to get border color based on scene emphasis
@@ -52,33 +83,6 @@ const getSceneBorderColor = (emphasisTags?: string[]): string => {
}
};
// Helper function to get icon for scene emphasis
const getSceneIcon = (emphasisTag: string) => {
switch (emphasisTag) {
case 'hook':
return <Movie fontSize="small" />;
case 'cta':
return <CallMade fontSize="small" />;
case 'transition':
return <Shuffle fontSize="small" />;
case 'main_content':
return <ArrowForward fontSize="small" />;
default:
return <ArrowForward fontSize="small" />;
}
};
// Helper function to get color for scene emphasis
const getSceneChipColor = (emphasisTag: string): 'primary' | 'secondary' | 'default' => {
switch (emphasisTag) {
case 'hook':
return 'primary';
case 'cta':
return 'secondary';
default:
return 'default';
}
};
export const SceneCard: React.FC<SceneCardProps> = React.memo(({
scene,
@@ -89,416 +93,350 @@ export const SceneCard: React.FC<SceneCardProps> = React.memo(({
onSave,
onCancel,
onEditChange,
onGenerateImage,
generatingImage = false,
onGenerateAudio,
generatingAudio = false,
loading,
avatarUrl,
videoPlanIdea,
}) => {
const sceneData = isEditing && editedScene ? { ...scene, ...editedScene } : scene;
// Custom hooks
const { imageBlobUrl, imageLoading, audioBlobUrl, audioLoading } = useSceneMedia({
imageUrl: sceneData.imageUrl,
audioUrl: sceneData.audioUrl,
});
// Debug logging
React.useEffect(() => {
console.log('[SceneCard] Render', {
sceneNumber: scene.scene_number,
imageUrl: scene.imageUrl,
hasImageBlobUrl: !!imageBlobUrl,
imageLoading,
generatingImage,
});
});
const {
showAudioSettingsModal,
setShowAudioSettingsModal,
showImageSettingsModal,
setShowImageSettingsModal,
currentAudioSettings,
setCurrentAudioSettings,
imageGenerationProgress,
setImageGenerationProgress,
imageGenerationStatus,
setImageGenerationStatus,
audioGenerationProgress,
setAudioGenerationProgress,
audioGenerationStatus,
setAudioGenerationStatus,
resetImageGeneration,
resetAudioGeneration,
} = useGenerationState();
// Sync local status with parent's generating state
useEffect(() => {
if (generatingImage && imageGenerationStatus === '') {
setImageGenerationStatus('Generating image...');
setImageGenerationProgress(50);
} else if (!generatingImage && imageGenerationStatus.includes('Generating')) {
// Generation process finished (either success or failure)
if (sceneData.imageUrl) {
// Generation completed successfully
setImageGenerationStatus('Image generated successfully!');
setImageGenerationProgress(100);
setTimeout(() => resetImageGeneration(), 3000);
} else {
// Check if this is a new imageUrl that just arrived (race condition)
const checkForImageUrl = () => {
if (sceneData.imageUrl) {
setImageGenerationStatus('Image generated successfully!');
setImageGenerationProgress(100);
setTimeout(() => resetImageGeneration(), 3000);
} else {
// Still no imageUrl, assume failure
setImageGenerationStatus('Failed to generate image');
setImageGenerationProgress(0);
setTimeout(() => resetImageGeneration(), 3000);
}
};
// Wait a moment for potential race condition resolution
setTimeout(checkForImageUrl, 500);
}
}
}, [generatingImage, imageGenerationStatus, sceneData.imageUrl, setImageGenerationStatus, setImageGenerationProgress, resetImageGeneration]);
useEffect(() => {
if (generatingAudio && audioGenerationStatus === '') {
setAudioGenerationStatus('Generating audio...');
setAudioGenerationProgress(50);
} else if (!generatingAudio && audioGenerationStatus.includes('Generating')) {
// Generation process finished (either success or failure)
if (sceneData.audioUrl) {
// Generation completed successfully
setAudioGenerationStatus('Audio generated successfully!');
setAudioGenerationProgress(100);
setTimeout(() => resetAudioGeneration(), 2000);
} else {
// Check if this is a new audioUrl that just arrived (race condition)
const checkForAudioUrl = () => {
if (sceneData.audioUrl) {
setAudioGenerationStatus('Audio generated successfully!');
setAudioGenerationProgress(100);
setTimeout(() => resetAudioGeneration(), 2000);
} else {
// Still no audioUrl, assume failure
setAudioGenerationStatus('Failed to generate audio');
setAudioGenerationProgress(0);
setTimeout(() => resetAudioGeneration(), 2000);
}
};
// Wait a moment for potential race condition resolution
setTimeout(checkForAudioUrl, 500);
}
}
}, [generatingAudio, audioGenerationStatus, sceneData.audioUrl, setAudioGenerationStatus, setAudioGenerationProgress, resetAudioGeneration]);
console.log('[SceneCard] Render', {
sceneNumber: scene.scene_number,
imageUrl: scene.imageUrl,
generatingImage,
hasImageBlobUrl: !!imageBlobUrl,
imageLoading
});
// Reset local generation state when parent indicates generation is complete
useEffect(() => {
if (!generatingImage) {
resetImageGeneration();
}
}, [generatingImage, resetImageGeneration]);
useEffect(() => {
if (!generatingAudio) {
resetAudioGeneration();
}
}, [generatingAudio, resetAudioGeneration]);
// Border color based on scene emphasis
const getSceneBorderColor = (emphasisTags?: string[]): string => {
if (!emphasisTags || emphasisTags.length === 0) return '#e5e7eb';
const primaryTag = emphasisTags[0];
switch (primaryTag) {
case 'hook': return '#3b82f6';
case 'cta': return '#8b5cf6';
case 'transition': return '#10b981';
default: return '#e5e7eb';
}
};
const borderColor = getSceneBorderColor(sceneData.emphasis_tags);
// Event handlers
const handleAudioModalOpen = useCallback(() => {
if (!onGenerateAudio || generatingAudio || loading) return;
console.log('[SceneCard] Opening audio settings modal for scene', scene.scene_number);
setShowAudioSettingsModal(true);
}, [onGenerateAudio, generatingAudio, loading, scene.scene_number, setShowAudioSettingsModal]);
const handleImageModalOpen = useCallback(() => {
if (!onGenerateImage || generatingImage || loading) return;
console.log('[SceneCard] Opening image settings modal for scene', scene.scene_number);
setShowImageSettingsModal(true);
}, [onGenerateImage, generatingImage, loading, scene.scene_number, setShowImageSettingsModal]);
const handleImageSettingsApply = useCallback(async (settings: YouTubeImageGenerationSettings) => {
console.log('[SceneCard] Applying image settings for scene', scene.scene_number, 'with settings:', settings);
if (!onGenerateImage) {
console.error('[SceneCard] onGenerateImage handler is not provided');
return;
}
if (generatingImage || loading) {
console.warn('[SceneCard] Image generation already in progress, ignoring click');
return;
}
setShowImageSettingsModal(false);
try {
setImageGenerationStatus('Starting image generation...');
setImageGenerationProgress(5);
console.log('[SceneCard] Calling onGenerateImage for scene', scene.scene_number, 'with settings');
await onGenerateImage(scene, settings);
console.log('[SceneCard] onGenerateImage task started for scene', scene.scene_number);
// Don't assume success here - the parent component will handle polling
// and update the generatingImage prop when the task actually completes
setImageGenerationStatus('Image generation in progress...');
setImageGenerationProgress(25);
} catch (error: any) {
const errorMessage = error?.response?.data?.detail?.message
|| error?.response?.data?.detail?.error
|| error?.response?.data?.detail
|| error?.message
|| 'Failed to start image generation. Please try again.';
setImageGenerationStatus(`Error: ${errorMessage}`);
setImageGenerationProgress(0);
setTimeout(() => resetImageGeneration(), 3000);
}
}, [onGenerateImage, generatingImage, loading, scene, setShowImageSettingsModal, setImageGenerationStatus, setImageGenerationProgress, resetImageGeneration]);
const handleAudioSettingsApply = useCallback(async (settings: AudioGenerationSettings) => {
console.log('[SceneCard] Applying audio settings for scene', scene.scene_number, 'with settings:', settings);
setCurrentAudioSettings(settings);
setShowAudioSettingsModal(false);
const startTime = Date.now();
let progressInterval: NodeJS.Timeout | null = null;
try {
setAudioGenerationStatus('Submitting audio generation request...');
setAudioGenerationProgress(10);
progressInterval = setInterval(() => {
const elapsed = Date.now() - startTime;
const seconds = Math.floor(elapsed / 1000);
if (seconds < 3) {
setAudioGenerationStatus('Submitting request to AI service...');
setAudioGenerationProgress(15);
} else if (seconds < 10) {
setAudioGenerationStatus('AI is generating your audio...');
setAudioGenerationProgress(40);
} else if (seconds < 20) {
setAudioGenerationStatus('Synthesizing narration...');
setAudioGenerationProgress(70);
} else {
setAudioGenerationStatus(`Processing... (${seconds}s elapsed)`);
setAudioGenerationProgress(Math.min(90, 70 + (seconds - 20) / 2));
}
}, 1000);
await onGenerateAudio!(scene, settings);
console.log('[SceneCard] Audio generation completed for scene', scene.scene_number);
if (progressInterval) {
clearInterval(progressInterval);
progressInterval = null;
}
setAudioGenerationStatus('Finalizing audio...');
setAudioGenerationProgress(95);
const elapsed = Math.floor((Date.now() - startTime) / 1000);
setAudioGenerationStatus(`Audio generated successfully in ${elapsed}s`);
setAudioGenerationProgress(100);
setTimeout(() => resetAudioGeneration(), 2000);
} catch (error: any) {
if (progressInterval) {
clearInterval(progressInterval);
progressInterval = null;
}
const errorMessage = error?.response?.data?.detail?.message
|| error?.response?.data?.detail?.error
|| error?.response?.data?.detail
|| error?.message
|| 'Failed to generate audio. Please try again.';
setAudioGenerationStatus(`Error: ${errorMessage}`);
setAudioGenerationProgress(0);
}
}, [scene, setCurrentAudioSettings, setShowAudioSettingsModal, setAudioGenerationStatus, setAudioGenerationProgress, onGenerateAudio, resetAudioGeneration]);
return (
<Card
variant="outlined"
sx={{
opacity: sceneData.enabled === false ? 0.6 : 1,
border: sceneData.enabled === false ? '1px dashed #e5e7eb' : `2px solid ${borderColor}`,
borderRadius: 2,
bgcolor: '#ffffff',
transition: 'all 0.2s ease-in-out',
'&:hover': {
boxShadow: sceneData.enabled !== false ? '0 4px 12px rgba(0, 0, 0, 0.1)' : 'none',
},
}}
>
<CardContent sx={{ p: 3 }}>
{/* Header Section */}
<Box sx={{ display: 'flex', justifyContent: 'space-between', alignItems: 'flex-start', mb: 2.5 }}>
<Box sx={{ flexGrow: 1 }}>
<Box sx={{ display: 'flex', alignItems: 'center', gap: 1, mb: 1.5 }}>
<Typography
variant="h6"
sx={{
mb: 0,
fontWeight: 700,
fontSize: '1.125rem',
color: '#111827',
letterSpacing: '-0.01em',
}}
>
Scene {scene.scene_number}: {sceneData.title}
</Typography>
<Tooltip
title={
<Box>
<Typography variant="body2" sx={{ fontWeight: 600, mb: 0.5 }}>
Scene Type: {sceneData.emphasis_tags?.[0]?.replace('_', ' ') || 'Main Content'}
</Typography>
<Typography variant="caption" sx={{ display: 'block', mb: 0.5 }}>
{sceneData.emphasis_tags?.[0] === 'hook'
? 'Hook scenes capture attention in the first few seconds with compelling visuals or statements.'
: sceneData.emphasis_tags?.[0] === 'cta'
? 'Call-to-action scenes encourage viewers to like, subscribe, or take a specific action.'
: sceneData.emphasis_tags?.[0] === 'transition'
? 'Transition scenes smoothly connect different topics or segments.'
: 'Main content scenes deliver the core message and information.'}
</Typography>
<Typography variant="caption" sx={{ display: 'block' }}>
Duration: {sceneData.duration_estimate}s This affects rendering cost.
</Typography>
</Box>
}
arrow
placement="top"
>
<IconButton size="small" sx={{ color: '#6b7280', p: 0.5 }}>
<HelpOutline fontSize="small" />
</IconButton>
</Tooltip>
</Box>
<Stack direction="row" spacing={1} sx={{ mb: 0 }} flexWrap="wrap" useFlexGap>
{sceneData.emphasis_tags?.map((tag) => (
<Tooltip
key={tag}
title={
tag === 'hook'
? 'Hook: Grabs viewer attention immediately'
: tag === 'cta'
? 'CTA: Encourages viewer action'
: tag === 'transition'
? 'Transition: Connects segments smoothly'
: 'Main Content: Core message delivery'
}
arrow
>
<Chip
label={tag.replace('_', ' ')}
size="small"
color={getSceneChipColor(tag)}
icon={getSceneIcon(tag)}
sx={{
textTransform: 'capitalize',
fontWeight: 600,
fontSize: '0.75rem',
}}
/>
</Tooltip>
))}
<Tooltip
title="Estimated duration in seconds. Longer scenes cost more to render but provide more detail."
arrow
>
<Chip
label={`~${sceneData.duration_estimate}s`}
size="small"
variant="outlined"
sx={{
ml: 'auto',
fontWeight: 600,
fontSize: '0.75rem',
borderColor: '#d1d5db',
color: '#374151',
}}
/>
</Tooltip>
</Stack>
</Box>
<Box sx={{ display: 'flex', alignItems: 'center', gap: 1 }}>
<Tooltip
title={
sceneData.enabled !== false
? 'Disable this scene to exclude it from rendering and reduce cost'
: 'Enable this scene to include it in the final video'
}
arrow
>
<FormControlLabel
control={
<Switch
checked={sceneData.enabled !== false}
onChange={() => onToggle(scene.scene_number)}
size="small"
/>
}
label="Enable"
sx={{ mr: 0 }}
<>
<Card
variant="outlined"
sx={{
opacity: sceneData.enabled === false ? 0.6 : 1,
border: sceneData.enabled === false ? '1px dashed #e5e7eb' : `2px solid ${borderColor}`,
borderRadius: 2,
bgcolor: '#ffffff',
transition: 'all 0.2s ease-in-out',
'&:hover': {
boxShadow: sceneData.enabled !== false ? '0 4px 12px rgba(0, 0, 0, 0.1)' : 'none',
},
}}
>
<CardContent sx={{ p: 3 }}>
<SceneHeader
scene={scene}
isEditing={isEditing}
onToggle={onToggle}
onEdit={onEdit}
/>
{isEditing ? (
<SceneEditForm
scene={scene}
editedScene={editedScene || {}}
onEditChange={onEditChange}
onSave={onSave}
onCancel={onCancel}
loading={loading}
/>
) : (
<>
<SceneContent
scene={scene}
imageBlobUrl={imageBlobUrl}
imageLoading={imageLoading}
audioBlobUrl={audioBlobUrl}
audioLoading={audioLoading}
/>
</Tooltip>
{!isEditing && (
<Tooltip title="Edit scene narration, visual prompt, or duration" arrow>
<IconButton
size="small"
onClick={() => onEdit(scene)}
color="primary"
sx={{
border: '1px solid #e5e7eb',
'&:hover': {
bgcolor: '#f9fafb',
},
}}
>
<Edit fontSize="small" />
</IconButton>
</Tooltip>
)}
</Box>
</Box>
{isEditing ? (
<Stack spacing={2}>
<TextField
label="Narration"
value={sceneData.narration}
onChange={(e) => onEditChange({ narration: e.target.value })}
multiline
rows={3}
fullWidth
sx={inputSx}
InputLabelProps={{ sx: labelSx }}
/>
<TextField
label="Visual Prompt"
value={sceneData.visual_prompt}
onChange={(e) => onEditChange({ visual_prompt: e.target.value })}
multiline
rows={2}
fullWidth
sx={inputSx}
InputLabelProps={{ sx: labelSx }}
/>
<TextField
label="Duration (seconds)"
type="number"
value={sceneData.duration_estimate}
onChange={(e) => onEditChange({ duration_estimate: parseFloat(e.target.value) || 5 })}
inputProps={{ min: 1, max: 10, step: 0.5 }}
fullWidth
sx={inputSx}
InputLabelProps={{ sx: labelSx }}
/>
<Box sx={{ display: 'flex', gap: 1 }}>
<Button
size="small"
variant="contained"
startIcon={<Check />}
onClick={onSave}
disabled={loading}
>
Save
</Button>
<Button
size="small"
variant="outlined"
startIcon={<Close />}
onClick={onCancel}
>
Cancel
</Button>
</Box>
</Stack>
) : (
<Stack spacing={2.5}>
{/* Narration Section */}
<Box
sx={{
p: 2,
bgcolor: '#f9fafb',
borderRadius: 1.5,
border: '1px solid #e5e7eb',
}}
>
<Box sx={{ display: 'flex', alignItems: 'center', gap: 1, mb: 1 }}>
<RecordVoiceOver sx={{ color: '#6366f1', fontSize: 18 }} />
<Typography
variant="subtitle2"
sx={{
fontWeight: 600,
fontSize: '0.875rem',
color: '#111827',
textTransform: 'uppercase',
letterSpacing: '0.05em',
}}
>
Narration
</Typography>
<Tooltip
title="The spoken text or voiceover for this scene. This is what will be narrated in the final video."
arrow
>
<IconButton size="small" sx={{ color: '#6b7280', p: 0.25, ml: 0.5 }}>
<HelpOutline fontSize="small" />
</IconButton>
</Tooltip>
</Box>
<Typography
variant="body1"
sx={{
fontStyle: 'italic',
color: '#374151',
fontSize: '0.9375rem',
lineHeight: 1.7,
fontWeight: 400,
pl: 0.5,
}}
>
"{sceneData.narration}"
</Typography>
</Box>
<GenerationButtons
scene={scene}
isEditing={isEditing}
loading={loading}
onGenerateImage={onGenerateImage}
generatingImage={generatingImage}
onGenerateAudio={onGenerateAudio}
generatingAudio={generatingAudio}
imageGenerationStatus={imageGenerationStatus}
imageGenerationProgress={imageGenerationProgress}
audioGenerationStatus={audioGenerationStatus}
audioGenerationProgress={audioGenerationProgress}
onAudioModalOpen={handleAudioModalOpen}
onImageModalOpen={handleImageModalOpen}
/>
{/* Visual Prompt Section */}
<Box
sx={{
p: 2,
bgcolor: '#fef3c7',
borderRadius: 1.5,
border: '1px solid #fde68a',
}}
>
<Box sx={{ display: 'flex', alignItems: 'center', gap: 1, mb: 1 }}>
<Videocam sx={{ color: '#d97706', fontSize: 18 }} />
<Typography
variant="subtitle2"
sx={{
fontWeight: 600,
fontSize: '0.875rem',
color: '#92400e',
textTransform: 'uppercase',
letterSpacing: '0.05em',
}}
>
Visual Prompt
</Typography>
<Tooltip
title={
<Box>
<Typography variant="body2" sx={{ fontWeight: 600, mb: 0.5 }}>
Visual Prompt Explained
</Typography>
<Typography variant="caption" sx={{ display: 'block', mb: 0.5 }}>
This describes the visual content that will be generated for this scene. The AI uses this to create appropriate images or video clips.
</Typography>
<Typography variant="caption" sx={{ display: 'block' }}>
<strong>Tip:</strong> More detailed prompts lead to better visual results. Include camera angles, lighting, and composition details.
</Typography>
</Box>
}
arrow
>
<IconButton size="small" sx={{ color: '#d97706', p: 0.25, ml: 0.5 }}>
<HelpOutline fontSize="small" />
</IconButton>
</Tooltip>
</Box>
<Typography
variant="body2"
sx={{
color: '#78350f',
fontSize: '0.875rem',
lineHeight: 1.7,
pl: 0.5,
fontWeight: 400,
}}
>
{sceneData.visual_prompt}
</Typography>
</Box>
<InfoAlert
scene={scene}
isEditing={isEditing}
onGenerateImage={!!onGenerateImage}
onGenerateAudio={!!onGenerateAudio}
/>
</>
)}
</CardContent>
</Card>
{/* Visual Cues Section */}
{sceneData.visual_cues && sceneData.visual_cues.length > 0 && (
<Box
sx={{
p: 2,
bgcolor: '#f0f9ff',
borderRadius: 1.5,
border: '1px solid #bae6fd',
}}
>
<Box sx={{ display: 'flex', alignItems: 'center', gap: 1, mb: 1.5 }}>
<AutoAwesome sx={{ color: '#0284c7', fontSize: 18 }} />
<Typography
variant="subtitle2"
sx={{
fontWeight: 600,
fontSize: '0.875rem',
color: '#0c4a6e',
textTransform: 'uppercase',
letterSpacing: '0.05em',
}}
>
Visual Cues
</Typography>
<Tooltip
title={
<Box>
<Typography variant="body2" sx={{ fontWeight: 600, mb: 0.5 }}>
Visual Cues Explained
</Typography>
<Typography variant="caption" sx={{ display: 'block', mb: 0.5 }}>
These are specific visual effects, camera techniques, or stylistic elements that will be applied to enhance the scene.
</Typography>
<Typography variant="caption" sx={{ display: 'block' }}>
Examples: Quick Zoom, Sunlight Flare, Energetic Cut, Steady Cam Walk, etc.
</Typography>
</Box>
}
arrow
>
<IconButton size="small" sx={{ color: '#0284c7', p: 0.25, ml: 0.5 }}>
<HelpOutline fontSize="small" />
</IconButton>
</Tooltip>
</Box>
<Stack direction="row" spacing={0.75} flexWrap="wrap" useFlexGap>
{sceneData.visual_cues.map((cue, idx) => (
<Tooltip
key={`${cue}-${idx}`}
title={`Visual effect: ${cue}`}
arrow
>
<Chip
label={cue}
size="small"
sx={{
fontSize: '0.75rem',
height: 28,
textTransform: 'capitalize',
borderColor: '#7dd3fc',
bgcolor: '#ffffff',
color: '#0c4a6e',
fontWeight: 500,
'&:hover': {
bgcolor: '#e0f2fe',
borderColor: '#0284c7',
},
}}
/>
</Tooltip>
))}
</Stack>
</Box>
)}
{/* Info Alert for Editing */}
<Alert
severity="info"
icon={<Info fontSize="small" />}
sx={{
bgcolor: '#eff6ff',
border: '1px solid #bfdbfe',
'& .MuiAlert-icon': {
color: '#3b82f6',
},
'& .MuiAlert-message': {
color: '#1e40af',
},
}}
>
<Typography variant="caption" sx={{ fontSize: '0.75rem', lineHeight: 1.5 }}>
<strong>Tip:</strong> Click the edit icon above to modify narration, visual prompt, or duration.
Disable scenes you don't need to reduce rendering cost.
</Typography>
</Alert>
</Stack>
)}
</CardContent>
</Card>
<GenerationModals
scene={scene}
showAudioSettingsModal={showAudioSettingsModal}
setShowAudioSettingsModal={setShowAudioSettingsModal}
showImageSettingsModal={showImageSettingsModal}
setShowImageSettingsModal={setShowImageSettingsModal}
currentAudioSettings={currentAudioSettings}
onAudioSettingsApply={handleAudioSettingsApply}
onImageSettingsApply={handleImageSettingsApply}
generatingAudio={generatingAudio}
/>
</>
);
});

View File

@@ -0,0 +1,164 @@
import React from 'react';
import {
Box,
Button,
Typography,
LinearProgress,
CircularProgress,
} from '@mui/material';
import { Image as ImageIcon, VolumeUp } from '@mui/icons-material';
import { Scene } from '../../../../services/youtubeApi';
import { AudioGenerationSettings } from '../../../../components/shared/AudioSettingsModal';
import { YouTubeImageGenerationSettings } from '../../shared/YouTubeImageGenerationModal';
interface GenerationButtonsProps {
scene: Scene;
isEditing: boolean;
loading: boolean;
onGenerateImage?: (scene: Scene, imageSettings?: YouTubeImageGenerationSettings) => Promise<void>;
generatingImage?: boolean;
onGenerateAudio?: (scene: Scene, audioSettings?: AudioGenerationSettings) => Promise<void>;
generatingAudio?: boolean;
imageGenerationStatus?: string;
imageGenerationProgress?: number;
audioGenerationStatus?: string;
audioGenerationProgress?: number;
onAudioModalOpen: () => void;
onImageModalOpen: () => void;
}
export const GenerationButtons: React.FC<GenerationButtonsProps> = ({
scene,
isEditing,
loading,
onGenerateImage,
generatingImage = false,
onGenerateAudio,
generatingAudio = false,
imageGenerationStatus = '',
imageGenerationProgress = 0,
audioGenerationStatus = '',
audioGenerationProgress = 0,
onAudioModalOpen,
onImageModalOpen,
}) => {
if (isEditing) return null;
return (
<>
{/* Audio Generation Button */}
{onGenerateAudio && (
<Box sx={{ mt: 2 }}>
<Button
variant={scene.audioUrl ? 'outlined' : 'contained'}
color="primary"
startIcon={
generatingAudio ? (
<CircularProgress size={16} sx={{ color: 'inherit' }} />
) : (
<VolumeUp />
)
}
onClick={onAudioModalOpen}
disabled={generatingAudio || loading}
sx={{
textTransform: 'none',
fontWeight: 600,
py: 1.5,
width: '100%',
}}
>
{generatingAudio
? 'Generating Audio...'
: scene.audioUrl
? 'Regenerate Audio'
: 'Generate Audio'}
</Button>
{audioGenerationStatus && (
<Box sx={{ mt: 1.5 }}>
<Typography
variant="caption"
sx={{
display: 'block',
mb: 0.5,
color: audioGenerationStatus.startsWith('Error') ? 'error.main' : 'text.secondary',
fontSize: '0.75rem',
}}
>
{audioGenerationStatus}
</Typography>
{audioGenerationProgress > 0 && audioGenerationProgress < 100 && (
<LinearProgress
variant="determinate"
value={audioGenerationProgress}
sx={{
height: 4,
borderRadius: 2,
bgcolor: '#e5e7eb',
}}
/>
)}
</Box>
)}
</Box>
)}
{/* Image Generation Button */}
{onGenerateImage && (
<Box sx={{ mt: 2 }}>
<Button
variant={scene.imageUrl ? 'outlined' : 'contained'}
color="primary"
startIcon={
generatingImage ? (
<CircularProgress size={16} sx={{ color: 'inherit' }} />
) : (
<ImageIcon />
)
}
onClick={onImageModalOpen}
disabled={generatingImage || loading}
fullWidth
sx={{
textTransform: 'none',
fontWeight: 600,
py: 1.5,
}}
>
{generatingImage
? 'Generating Image...'
: scene.imageUrl
? 'Regenerate Image'
: 'Generate Image'}
</Button>
{imageGenerationStatus && (
<Box sx={{ mt: 1.5 }}>
<Typography
variant="caption"
sx={{
display: 'block',
mb: 0.5,
color: imageGenerationStatus.startsWith('Error') ? 'error.main' : 'text.secondary',
fontSize: '0.75rem',
}}
>
{imageGenerationStatus}
</Typography>
{imageGenerationProgress > 0 && imageGenerationProgress < 100 && (
<LinearProgress
variant="determinate"
value={imageGenerationProgress}
sx={{
height: 4,
borderRadius: 2,
bgcolor: '#e5e7eb',
}}
/>
)}
</Box>
)}
</Box>
)}
</>
);
};

View File

@@ -0,0 +1,53 @@
import React from 'react';
import { AudioSettingsModal, AudioGenerationSettings } from '../../../../components/shared/AudioSettingsModal';
import { YouTubeImageGenerationModal, YouTubeImageGenerationSettings } from '../../shared/YouTubeImageGenerationModal';
import { Scene } from '../../../../services/youtubeApi';
interface GenerationModalsProps {
scene: Scene;
showAudioSettingsModal: boolean;
setShowAudioSettingsModal: (show: boolean) => void;
showImageSettingsModal: boolean;
setShowImageSettingsModal: (show: boolean) => void;
currentAudioSettings: AudioGenerationSettings;
onAudioSettingsApply: (settings: AudioGenerationSettings) => void;
onImageSettingsApply: (settings: YouTubeImageGenerationSettings) => void;
generatingAudio?: boolean;
}
export const GenerationModals: React.FC<GenerationModalsProps> = ({
scene,
showAudioSettingsModal,
setShowAudioSettingsModal,
showImageSettingsModal,
setShowImageSettingsModal,
currentAudioSettings,
onAudioSettingsApply,
onImageSettingsApply,
generatingAudio = false,
}) => {
return (
<>
<AudioSettingsModal
open={showAudioSettingsModal}
onClose={() => setShowAudioSettingsModal(false)}
onApplySettings={onAudioSettingsApply}
initialSettings={currentAudioSettings}
isGenerating={generatingAudio}
sceneTitle={scene.title}
/>
<YouTubeImageGenerationModal
open={showImageSettingsModal}
onClose={() => setShowImageSettingsModal(false)}
onGenerate={onImageSettingsApply}
initialPrompt={`${scene.visual_prompt || ''}\n${scene.enhanced_visual_prompt || ''}`.trim() || `Create a YouTube scene image for: ${scene.title}`}
initialStyle="Realistic"
initialRenderingSpeed="Quality"
initialAspectRatio="16:9"
initialModel="ideogram-v3-turbo"
isGenerating={false} // This will be passed from parent
sceneTitle={scene.title}
/>
</>
);
};

View File

@@ -0,0 +1,48 @@
import React from 'react';
import {
Alert,
Typography,
} from '@mui/material';
import { Info } from '@mui/icons-material';
import { Scene } from '../../../../services/youtubeApi';
interface InfoAlertProps {
scene: Scene;
isEditing: boolean;
onGenerateImage?: boolean;
onGenerateAudio?: boolean;
}
export const InfoAlert: React.FC<InfoAlertProps> = ({
scene,
isEditing,
onGenerateImage = false,
onGenerateAudio = false,
}) => {
if (isEditing) return null;
return (
<Alert
severity="info"
icon={<Info fontSize="small" />}
sx={{
mt: 2,
bgcolor: '#eff6ff',
border: '1px solid #bfdbfe',
'& .MuiAlert-icon': {
color: '#3b82f6',
},
'& .MuiAlert-message': {
color: '#1e40af',
},
}}
>
<Typography variant="caption" sx={{ fontSize: '0.75rem', lineHeight: 1.5 }}>
<strong>Tip:</strong> Click the edit icon above to modify narration, visual prompt, or duration.
{onGenerateImage && !scene.imageUrl && ' Generate an image for this scene before rendering the video.'}
{onGenerateAudio && !scene.audioUrl && ' Generate audio narration for this scene before rendering the video.'}
Disable scenes you don't need to reduce rendering cost.
</Typography>
</Alert>
);
};

View File

@@ -0,0 +1,333 @@
import React from 'react';
import {
Typography,
Stack,
Box,
Chip,
Tooltip,
IconButton,
CircularProgress,
} from '@mui/material';
import { RecordVoiceOver, Videocam, AutoAwesome, Image as ImageIcon, VolumeUp, HelpOutline } from '@mui/icons-material';
import { Scene } from '../../../../services/youtubeApi';
interface SceneContentProps {
scene: Scene;
imageBlobUrl?: string | null;
imageLoading?: boolean;
audioBlobUrl?: string | null;
audioLoading?: boolean;
}
const NarrationSection: React.FC<{ narration: string }> = ({ narration }) => (
<Box
sx={{
p: 2,
bgcolor: '#f9fafb',
borderRadius: 1.5,
border: '1px solid #e5e7eb',
}}
>
<Box sx={{ display: 'flex', alignItems: 'center', gap: 1, mb: 1 }}>
<RecordVoiceOver sx={{ color: '#6366f1', fontSize: 18 }} />
<Typography
variant="subtitle2"
sx={{
fontWeight: 600,
fontSize: '0.875rem',
color: '#111827',
textTransform: 'uppercase',
letterSpacing: '0.05em',
}}
>
Narration
</Typography>
<Tooltip
title="The spoken text or voiceover for this scene. This is what will be narrated in the final video."
arrow
>
<IconButton size="small" sx={{ color: '#6b7280', p: 0.25, ml: 0.5 }}>
<HelpOutline fontSize="small" />
</IconButton>
</Tooltip>
</Box>
<Typography
variant="body1"
sx={{
fontStyle: 'italic',
color: '#374151',
fontSize: '0.9375rem',
lineHeight: 1.7,
fontWeight: 400,
pl: 0.5,
}}
>
"{narration}"
</Typography>
</Box>
);
const VisualPromptSection: React.FC<{ visualPrompt: string }> = ({ visualPrompt }) => (
<Box
sx={{
p: 2,
bgcolor: '#fef3c7',
borderRadius: 1.5,
border: '1px solid #fde68a',
}}
>
<Box sx={{ display: 'flex', alignItems: 'center', gap: 1, mb: 1 }}>
<Videocam sx={{ color: '#d97706', fontSize: 18 }} />
<Typography
variant="subtitle2"
sx={{
fontWeight: 600,
fontSize: '0.875rem',
color: '#92400e',
textTransform: 'uppercase',
letterSpacing: '0.05em',
}}
>
Visual Prompt
</Typography>
<Tooltip
title={
<Box>
<Typography variant="body2" sx={{ fontWeight: 600, mb: 0.5 }}>
Visual Prompt Explained
</Typography>
<Typography variant="caption" sx={{ display: 'block', mb: 0.5 }}>
This describes the visual content that will be generated for this scene. The AI uses this to create appropriate images or video clips.
</Typography>
<Typography variant="caption" sx={{ display: 'block' }}>
<strong>Tip:</strong> More detailed prompts lead to better visual results. Include camera angles, lighting, and composition details.
</Typography>
</Box>
}
arrow
>
<IconButton size="small" sx={{ color: '#d97706', p: 0.25, ml: 0.5 }}>
<HelpOutline fontSize="small" />
</IconButton>
</Tooltip>
</Box>
<Typography
variant="body2"
sx={{
color: '#78350f',
fontSize: '0.875rem',
lineHeight: 1.7,
pl: 0.5,
fontWeight: 400,
}}
>
{visualPrompt}
</Typography>
</Box>
);
const VisualCuesSection: React.FC<{ visualCues: string[] }> = ({ visualCues }) => (
<Box
sx={{
p: 2,
bgcolor: '#f0f9ff',
borderRadius: 1.5,
border: '1px solid #bae6fd',
}}
>
<Box sx={{ display: 'flex', alignItems: 'center', gap: 1, mb: 1.5 }}>
<AutoAwesome sx={{ color: '#0284c7', fontSize: 18 }} />
<Typography
variant="subtitle2"
sx={{
fontWeight: 600,
fontSize: '0.875rem',
color: '#0c4a6e',
textTransform: 'uppercase',
letterSpacing: '0.05em',
}}
>
Visual Cues
</Typography>
<Tooltip
title={
<Box>
<Typography variant="body2" sx={{ fontWeight: 600, mb: 0.5 }}>
Visual Cues Explained
</Typography>
<Typography variant="caption" sx={{ display: 'block', mb: 0.5 }}>
These are specific visual effects, camera techniques, or stylistic elements that will be applied to enhance the scene.
</Typography>
<Typography variant="caption" sx={{ display: 'block' }}>
Examples: Quick Zoom, Sunlight Flare, Energetic Cut, Steady Cam Walk, etc.
</Typography>
</Box>
}
arrow
>
<IconButton size="small" sx={{ color: '#0284c7', p: 0.25, ml: 0.5 }}>
<HelpOutline fontSize="small" />
</IconButton>
</Tooltip>
</Box>
<Stack direction="row" spacing={0.75} flexWrap="wrap" useFlexGap>
{visualCues.map((cue, idx) => (
<Tooltip
key={`${cue}-${idx}`}
title={`Visual effect: ${cue}`}
arrow
>
<Chip
label={cue}
size="small"
sx={{
fontSize: '0.75rem',
height: 28,
textTransform: 'capitalize',
borderColor: '#7dd3fc',
bgcolor: '#ffffff',
color: '#0c4a6e',
fontWeight: 500,
'&:hover': {
bgcolor: '#e0f2fe',
borderColor: '#0284c7',
},
}}
/>
</Tooltip>
))}
</Stack>
</Box>
);
const GeneratedMediaSection: React.FC<{
title: string;
icon: React.ReactNode;
children: React.ReactNode;
}> = ({ title, icon, children }) => (
<Box
sx={{
mt: 2,
p: 2,
bgcolor: '#f0fdf4',
borderRadius: 1.5,
border: '1px solid #86efac',
}}
>
<Box sx={{ display: 'flex', alignItems: 'center', gap: 1, mb: 1.5 }}>
{icon}
<Typography
variant="subtitle2"
sx={{
fontWeight: 600,
fontSize: '0.875rem',
color: '#166534',
textTransform: 'uppercase',
letterSpacing: '0.05em',
}}
>
{title}
</Typography>
<Tooltip
title={`This is the AI-generated ${title.toLowerCase()} for this scene. It will be used when rendering the video.`}
arrow
>
<IconButton size="small" sx={{ color: '#16a34a', p: 0.25, ml: 0.5 }}>
<HelpOutline fontSize="small" />
</IconButton>
</Tooltip>
</Box>
{children}
</Box>
);
export const SceneContent: React.FC<SceneContentProps> = ({
scene,
imageBlobUrl,
imageLoading,
audioBlobUrl,
audioLoading,
}) => {
return (
<Stack spacing={2.5}>
{/* Narration Section */}
<NarrationSection narration={scene.narration} />
{/* Visual Prompt Section */}
<VisualPromptSection visualPrompt={scene.visual_prompt} />
{/* Visual Cues Section */}
{scene.visual_cues && scene.visual_cues.length > 0 && (
<VisualCuesSection visualCues={scene.visual_cues} />
)}
{/* Generated Image Section */}
{scene.imageUrl && (
<GeneratedMediaSection
title="Generated Image"
icon={<ImageIcon sx={{ color: '#16a34a', fontSize: 18 }} />}
>
{imageLoading ? (
<Box sx={{ display: 'flex', alignItems: 'center', gap: 1, py: 2 }}>
<CircularProgress size={20} />
<Typography variant="body2" color="text.secondary">
Loading image...
</Typography>
</Box>
) : imageBlobUrl ? (
<Box
component="img"
src={imageBlobUrl}
alt={scene.title}
sx={{
width: '100%',
maxHeight: 300,
borderRadius: 1,
objectFit: 'contain',
border: '1px solid #86efac',
}}
onError={(e) => {
console.error('[SceneContent] Image failed to load:', {
src: e.currentTarget.src,
imageUrl: scene.imageUrl,
});
}}
/>
) : (
<Typography variant="body2" color="text.secondary">
Image not available yet. If this persists, try regenerating or refresh the page.
</Typography>
)}
</GeneratedMediaSection>
)}
{/* Generated Audio Section */}
{scene.audioUrl && (audioBlobUrl || audioLoading) && (
<GeneratedMediaSection
title="Generated Audio"
icon={<VolumeUp sx={{ color: '#16a34a', fontSize: 18 }} />}
>
{audioLoading ? (
<Box sx={{ display: 'flex', alignItems: 'center', gap: 1, py: 2 }}>
<CircularProgress size={20} />
<Typography variant="body2" color="text.secondary">
Loading audio...
</Typography>
</Box>
) : audioBlobUrl ? (
<Box
component="audio"
controls
src={audioBlobUrl}
sx={{
width: '100%',
borderRadius: 1,
border: '1px solid #86efac',
}}
/>
) : null}
</GeneratedMediaSection>
)}
</Stack>
);
};

View File

@@ -0,0 +1,82 @@
import React from 'react';
import {
Stack,
TextField,
Button,
Box,
} from '@mui/material';
import { Check, Close } from '@mui/icons-material';
import { Scene } from '../../../../services/youtubeApi';
import { inputSx, labelSx } from '../../styles';
interface SceneEditFormProps {
scene: Scene;
editedScene: Partial<Scene>;
onEditChange: (updates: Partial<Scene>) => void;
onSave: () => void;
onCancel: () => void;
loading: boolean;
}
export const SceneEditForm: React.FC<SceneEditFormProps> = ({
scene,
editedScene,
onEditChange,
onSave,
onCancel,
loading,
}) => {
return (
<Stack spacing={2}>
<TextField
label="Narration"
value={editedScene.narration ?? scene.narration}
onChange={(e) => onEditChange({ narration: e.target.value })}
multiline
rows={3}
fullWidth
sx={inputSx}
InputLabelProps={{ sx: labelSx }}
/>
<TextField
label="Visual Prompt"
value={editedScene.visual_prompt ?? scene.visual_prompt}
onChange={(e) => onEditChange({ visual_prompt: e.target.value })}
multiline
rows={2}
fullWidth
sx={inputSx}
InputLabelProps={{ sx: labelSx }}
/>
<TextField
label="Duration (seconds)"
type="number"
value={editedScene.duration_estimate ?? scene.duration_estimate}
onChange={(e) => onEditChange({ duration_estimate: parseFloat(e.target.value) || 5 })}
inputProps={{ min: 1, max: 10, step: 0.5 }}
fullWidth
sx={inputSx}
InputLabelProps={{ sx: labelSx }}
/>
<Box sx={{ display: 'flex', gap: 1 }}>
<Button
size="small"
variant="contained"
startIcon={<Check />}
onClick={onSave}
disabled={loading}
>
Save
</Button>
<Button
size="small"
variant="outlined"
startIcon={<Close />}
onClick={onCancel}
>
Cancel
</Button>
</Box>
</Stack>
);
};

View File

@@ -0,0 +1,203 @@
import React from 'react';
import {
Typography,
Stack,
Chip,
Box,
FormControlLabel,
Switch,
IconButton,
Tooltip,
} from '@mui/material';
import { Edit, HelpOutline } from '@mui/icons-material';
import { Scene } from '../../../../services/youtubeApi';
// Helper functions
const getSceneBorderColor = (emphasisTags?: string[]): string => {
if (!emphasisTags || emphasisTags.length === 0) return '#e5e7eb'; // Default gray
const primaryTag = emphasisTags[0];
switch (primaryTag) {
case 'hook':
return '#3b82f6'; // Blue for hook
case 'cta':
return '#8b5cf6'; // Purple for CTA
case 'transition':
return '#10b981'; // Green for transition
case 'main_content':
default:
return '#e5e7eb'; // Gray for main content
}
};
const getSceneIcon = (emphasisTag: string) => {
switch (emphasisTag) {
case 'hook':
return '🎬'; // Movie icon
case 'cta':
return '📣'; // Call made icon
case 'transition':
return '🔄'; // Shuffle icon
case 'main_content':
default:
return '➡️'; // Arrow forward icon
}
};
const getSceneChipColor = (emphasisTag: string): 'primary' | 'secondary' | 'default' => {
switch (emphasisTag) {
case 'hook':
return 'primary';
case 'cta':
return 'secondary';
default:
return 'default';
}
};
interface SceneHeaderProps {
scene: Scene;
isEditing: boolean;
onToggle: (sceneNumber: number) => void;
onEdit: (scene: Scene) => void;
}
export const SceneHeader: React.FC<SceneHeaderProps> = ({
scene,
isEditing,
onToggle,
onEdit,
}) => {
return (
<Box sx={{ display: 'flex', justifyContent: 'space-between', alignItems: 'flex-start', mb: 2.5 }}>
<Box sx={{ flexGrow: 1 }}>
<Box sx={{ display: 'flex', alignItems: 'center', gap: 1, mb: 1.5 }}>
<Typography
variant="h6"
sx={{
mb: 0,
fontWeight: 700,
fontSize: '1.125rem',
color: '#111827',
letterSpacing: '-0.01em',
}}
>
Scene {scene.scene_number}: {scene.title}
</Typography>
<Tooltip
title={
<Box>
<Typography variant="body2" sx={{ fontWeight: 600, mb: 0.5 }}>
Scene Type: {scene.emphasis_tags?.[0]?.replace('_', ' ') || 'Main Content'}
</Typography>
<Typography variant="caption" sx={{ display: 'block', mb: 0.5 }}>
{scene.emphasis_tags?.[0] === 'hook'
? 'Hook scenes capture attention in the first few seconds with compelling visuals or statements.'
: scene.emphasis_tags?.[0] === 'cta'
? 'Call-to-action scenes encourage viewers to like, subscribe, or take a specific action.'
: scene.emphasis_tags?.[0] === 'transition'
? 'Transition scenes smoothly connect different topics or segments.'
: 'Main content scenes deliver the core message and information.'}
</Typography>
<Typography variant="caption" sx={{ display: 'block' }}>
Duration: {scene.duration_estimate}s This affects rendering cost.
</Typography>
</Box>
}
arrow
placement="top"
>
<IconButton size="small" sx={{ color: '#6b7280', p: 0.5 }}>
<HelpOutline fontSize="small" />
</IconButton>
</Tooltip>
</Box>
<Stack direction="row" spacing={1} sx={{ mb: 0 }} flexWrap="wrap" useFlexGap>
{scene.emphasis_tags?.map((tag) => (
<Tooltip
key={tag}
title={
tag === 'hook'
? 'Hook: Grabs viewer attention immediately'
: tag === 'cta'
? 'CTA: Encourages viewer action'
: tag === 'transition'
? 'Transition: Connects segments smoothly'
: 'Main Content: Core message delivery'
}
arrow
>
<Chip
label={tag.replace('_', ' ')}
size="small"
color={getSceneChipColor(tag)}
icon={<span>{getSceneIcon(tag)}</span>}
sx={{
textTransform: 'capitalize',
fontWeight: 600,
fontSize: '0.75rem',
}}
/>
</Tooltip>
))}
<Tooltip
title="Estimated duration in seconds. Longer scenes cost more to render but provide more detail."
arrow
>
<Chip
label={`~${scene.duration_estimate}s`}
size="small"
variant="outlined"
sx={{
ml: 'auto',
fontWeight: 600,
fontSize: '0.75rem',
borderColor: '#d1d5db',
color: '#374151',
}}
/>
</Tooltip>
</Stack>
</Box>
<Box sx={{ display: 'flex', alignItems: 'center', gap: 1 }}>
<Tooltip
title={
scene.enabled !== false
? 'Disable this scene to exclude it from rendering and reduce cost'
: 'Enable this scene to include it in the final video'
}
arrow
>
<FormControlLabel
control={
<Switch
checked={scene.enabled !== false}
onChange={() => onToggle(scene.scene_number)}
size="small"
/>
}
label="Enable"
sx={{ mr: 0 }}
/>
</Tooltip>
{!isEditing && (
<Tooltip title="Edit scene narration, visual prompt, or duration" arrow>
<IconButton
size="small"
onClick={() => onEdit(scene)}
color="primary"
sx={{
border: '1px solid #e5e7eb',
'&:hover': {
bgcolor: '#f9fafb',
},
}}
>
<Edit fontSize="small" />
</IconButton>
</Tooltip>
)}
</Box>
</Box>
);
};

View File

@@ -0,0 +1,2 @@
export { useSceneMedia } from './useSceneMedia';
export { useGenerationState } from './useGenerationState';

View File

@@ -0,0 +1,67 @@
// Hook for managing generation state
import { useState } from 'react';
interface AudioSettings {
voiceId: string;
speed: number;
volume: number;
pitch: number;
emotion: string;
englishNormalization: boolean;
bitrate: number;
channel: string;
format: string;
enableSyncMode: boolean;
}
export const useGenerationState = () => {
const [showAudioSettingsModal, setShowAudioSettingsModal] = useState(false);
const [showImageSettingsModal, setShowImageSettingsModal] = useState(false);
const [currentAudioSettings, setCurrentAudioSettings] = useState<AudioSettings>({
voiceId: "Casual_Guy",
speed: 1.15,
volume: 1.0,
pitch: 0.0,
emotion: "happy",
englishNormalization: true,
bitrate: 128000,
channel: "1",
format: "mp3",
enableSyncMode: true,
});
const [imageGenerationProgress, setImageGenerationProgress] = useState(0);
const [imageGenerationStatus, setImageGenerationStatus] = useState<string>('');
const [audioGenerationProgress, setAudioGenerationProgress] = useState(0);
const [audioGenerationStatus, setAudioGenerationStatus] = useState<string>('');
const resetImageGeneration = () => {
setImageGenerationStatus('');
setImageGenerationProgress(0);
};
const resetAudioGeneration = () => {
setAudioGenerationStatus('');
setAudioGenerationProgress(0);
};
return {
showAudioSettingsModal,
setShowAudioSettingsModal,
showImageSettingsModal,
setShowImageSettingsModal,
currentAudioSettings,
setCurrentAudioSettings,
imageGenerationProgress,
setImageGenerationProgress,
imageGenerationStatus,
setImageGenerationStatus,
audioGenerationProgress,
setAudioGenerationProgress,
audioGenerationStatus,
setAudioGenerationStatus,
resetImageGeneration,
resetAudioGeneration,
};
};

View File

@@ -0,0 +1,54 @@
// Hook for managing scene media (images and audio)
import { useState, useEffect } from 'react';
import { fetchMediaBlobUrl } from '../../../../../utils/fetchMediaBlobUrl';
interface UseSceneMediaProps {
imageUrl?: string | null;
audioUrl?: string | null;
}
export const useSceneMedia = ({ imageUrl, audioUrl }: UseSceneMediaProps) => {
const [imageBlobUrl, setImageBlobUrl] = useState<string | null>(null);
const [imageLoading, setImageLoading] = useState(false);
const [audioBlobUrl, setAudioBlobUrl] = useState<string | null>(null);
const [audioLoading, setAudioLoading] = useState(false);
useEffect(() => {
if (imageUrl) {
setImageLoading(true);
fetchMediaBlobUrl(imageUrl)
.then(setImageBlobUrl)
.catch(console.error)
.finally(() => setImageLoading(false));
} else {
setImageBlobUrl(null);
}
return () => {
if (imageBlobUrl) URL.revokeObjectURL(imageBlobUrl);
};
}, [imageUrl]);
useEffect(() => {
if (audioUrl) {
setAudioLoading(true);
fetchMediaBlobUrl(audioUrl)
.then(setAudioBlobUrl)
.catch(console.error)
.finally(() => setAudioLoading(false));
} else {
setAudioBlobUrl(null);
}
return () => {
if (audioBlobUrl) URL.revokeObjectURL(audioBlobUrl);
};
}, [audioUrl]);
return {
imageBlobUrl,
imageLoading,
audioBlobUrl,
audioLoading,
};
};

View File

@@ -0,0 +1,215 @@
/**
* Scene Generation Step Component
*
* Third step: Generate images and audio for each scene before video rendering.
*/
import React, { useMemo } from 'react';
import {
Paper,
Typography,
Stack,
Button,
Box,
Alert,
} from '@mui/material';
import { ArrowForward, ArrowBack, CheckCircle, Warning } from '@mui/icons-material';
import { motion } from 'framer-motion';
import { Scene, VideoPlan } from '../../../services/youtubeApi';
import { YT_BG, YT_BORDER, YT_TEXT } from '../constants';
import { SceneCard } from './SceneCard';
import { AssetGenerationCostCard } from './AssetGenerationCostCard';
import type { AudioGenerationSettings } from '../../shared/AudioSettingsModal';
import type { YouTubeImageGenerationSettings } from '../shared';
interface SceneGenerationStepProps {
scenes: Scene[];
videoPlan: VideoPlan | null;
editingSceneId: number | null;
editedScene: Partial<Scene> | null;
onEditScene: (scene: Scene) => void;
onSaveScene: () => void;
onCancelEdit: () => void;
onEditChange: (updates: Partial<Scene>) => void;
onToggleScene: (sceneNumber: number) => void;
onGenerateImage?: (scene: Scene, settings?: YouTubeImageGenerationSettings) => Promise<void>;
generatingImageSceneId?: number | null;
onGenerateAudio?: (scene: Scene, settings?: AudioGenerationSettings) => Promise<void>;
generatingAudioSceneId?: number | null;
loading: boolean;
avatarUrl?: string | null;
videoPlanIdea?: string;
onBack: () => void;
onNext: () => void;
}
export const SceneGenerationStep: React.FC<SceneGenerationStepProps> = React.memo(({
scenes,
videoPlan,
editingSceneId,
editedScene,
onEditScene,
onSaveScene,
onCancelEdit,
onEditChange,
onToggleScene,
onGenerateImage,
generatingImageSceneId,
onGenerateAudio,
generatingAudioSceneId,
loading,
avatarUrl,
videoPlanIdea,
onBack,
onNext,
}) => {
// Check scene readiness: all enabled scenes must have both imageUrl and audioUrl
const sceneReadiness = useMemo(() => {
const enabledScenes = scenes.filter(s => s.enabled !== false);
const readyScenes = enabledScenes.filter(s => s.imageUrl && s.audioUrl);
const missingImage = enabledScenes.filter(s => !s.imageUrl);
const missingAudio = enabledScenes.filter(s => !s.audioUrl);
return {
allReady: enabledScenes.length > 0 && readyScenes.length === enabledScenes.length,
readyCount: readyScenes.length,
totalEnabled: enabledScenes.length,
missingImageCount: missingImage.length,
missingAudioCount: missingAudio.length,
scenesMissingImages: missingImage.map(s => s.scene_number),
scenesMissingAudio: missingAudio.map(s => s.scene_number),
};
}, [scenes]);
const canProceed = sceneReadiness.allReady;
return (
<motion.div
initial={{ opacity: 0, y: 20 }}
animate={{ opacity: 1, y: 0 }}
>
<Paper
sx={{
p: 4,
backgroundColor: 'white',
border: `1px solid ${YT_BORDER}`,
}}
>
<Typography variant="h5" sx={{ mb: 3, fontWeight: 600, color: YT_TEXT }}>
3 Generate Scene Assets
</Typography>
<Typography variant="body2" sx={{ mb: 3, color: '#64748b' }}>
Generate custom images and audio narration for each scene. All scenes must have both an image and audio before you can render the final video.
</Typography>
{/* Cost Estimate */}
<AssetGenerationCostCard scenes={scenes} />
{/* Readiness Alert */}
{sceneReadiness.allReady ? (
<Alert
severity="success"
icon={<CheckCircle />}
sx={{
mb: 3,
bgcolor: '#f0fdf4',
border: '1px solid #86efac',
'& .MuiAlert-icon': {
color: '#16a34a',
},
}}
>
<Typography variant="body2" sx={{ fontWeight: 600, mb: 0.5 }}>
All scenes are ready!
</Typography>
<Typography variant="caption" sx={{ fontSize: '0.75rem' }}>
{sceneReadiness.readyCount} of {sceneReadiness.totalEnabled} enabled scenes have both images and audio. You can proceed to render your video.
</Typography>
</Alert>
) : (
<Alert
severity="warning"
icon={<Warning />}
sx={{
mb: 3,
bgcolor: '#fffbeb',
border: '1px solid #fde68a',
'& .MuiAlert-icon': {
color: '#d97706',
},
}}
>
<Typography variant="body2" sx={{ fontWeight: 600, mb: 1 }}>
Some scenes need assets generated
</Typography>
<Typography variant="caption" sx={{ display: 'block', mb: 0.5 }}>
{sceneReadiness.missingImageCount} scene(s) need images: {sceneReadiness.scenesMissingImages.join(', ')}
</Typography>
<Typography variant="caption" sx={{ display: 'block', mb: 0.5 }}>
{sceneReadiness.missingAudioCount} scene(s) need audio: {sceneReadiness.scenesMissingAudio.join(', ')}
</Typography>
<Typography variant="caption" sx={{ display: 'block', mt: 1, fontStyle: 'italic' }}>
Click "Generate Image" and "Generate Audio" buttons on each scene card below.
</Typography>
</Alert>
)}
{/* Scene Cards */}
{scenes.length > 0 && (
<Box sx={{ mb: 3 }}>
<Stack spacing={2}>
{scenes.map((scene) => (
<SceneCard
key={scene.scene_number}
scene={scene}
isEditing={editingSceneId === scene.scene_number}
editedScene={editedScene}
onToggle={onToggleScene}
onEdit={onEditScene}
onSave={onSaveScene}
onCancel={onCancelEdit}
onEditChange={onEditChange}
loading={loading}
onGenerateImage={onGenerateImage}
generatingImage={generatingImageSceneId === scene.scene_number}
onGenerateAudio={onGenerateAudio}
generatingAudio={generatingAudioSceneId === scene.scene_number}
avatarUrl={avatarUrl}
videoPlanIdea={videoPlanIdea}
/>
))}
</Stack>
</Box>
)}
{/* Action Buttons */}
<Box sx={{ display: 'flex', gap: 2, mt: 4 }}>
<Button
variant="outlined"
startIcon={<ArrowBack />}
onClick={onBack}
>
Back to Scenes
</Button>
<Button
variant="contained"
color="error"
size="large"
endIcon={<ArrowForward />}
onClick={onNext}
disabled={!canProceed}
sx={{ px: 4 }}
>
{canProceed
? 'Proceed to Video Rendering'
: `Generate Assets (${sceneReadiness.readyCount}/${sceneReadiness.totalEnabled} ready)`}
</Button>
</Box>
</Paper>
</motion.div>
);
});
SceneGenerationStep.displayName = 'SceneGenerationStep';

View File

@@ -0,0 +1,249 @@
/**
* Scene Preview Modal
*
* Shows a preview of scene image and audio with playback controls.
*/
import React, { useState, useEffect } from 'react';
import {
Dialog,
DialogTitle,
DialogContent,
IconButton,
Box,
Typography,
Stack,
CircularProgress,
} from '@mui/material';
import {
Close as CloseIcon,
PlayArrow,
Pause,
VolumeUp,
} from '@mui/icons-material';
import { fetchMediaBlobUrl } from '../../../utils/fetchMediaBlobUrl';
interface ScenePreviewModalProps {
open: boolean;
onClose: () => void;
sceneTitle: string;
sceneNumber: number;
imageUrl?: string | null;
audioUrl?: string | null;
}
export const ScenePreviewModal: React.FC<ScenePreviewModalProps> = ({
open,
onClose,
sceneTitle,
sceneNumber,
imageUrl,
audioUrl,
}) => {
const [imageBlobUrl, setImageBlobUrl] = useState<string | null>(null);
const [audioBlobUrl, setAudioBlobUrl] = useState<string | null>(null);
const [imageLoading, setImageLoading] = useState(false);
const [audioLoading, setAudioLoading] = useState(false);
const [isPlaying, setIsPlaying] = useState(false);
const [audioElement, setAudioElement] = useState<HTMLAudioElement | null>(null);
// Load image blob
useEffect(() => {
if (!imageUrl || !open) {
setImageBlobUrl(null);
return;
}
setImageLoading(true);
fetchMediaBlobUrl(imageUrl)
.then(setImageBlobUrl)
.catch(console.error)
.finally(() => setImageLoading(false));
return () => {
if (imageBlobUrl?.startsWith('blob:')) {
URL.revokeObjectURL(imageBlobUrl);
}
};
}, [imageUrl, open]);
// Load audio blob
useEffect(() => {
if (!audioUrl || !open) {
setAudioBlobUrl(null);
return;
}
setAudioLoading(true);
fetchMediaBlobUrl(audioUrl)
.then(setAudioBlobUrl)
.catch(console.error)
.finally(() => setAudioLoading(false));
return () => {
if (audioBlobUrl?.startsWith('blob:')) {
URL.revokeObjectURL(audioBlobUrl);
}
};
}, [audioUrl, open]);
// Create audio element
useEffect(() => {
if (audioBlobUrl) {
const audio = new Audio(audioBlobUrl);
audio.addEventListener('ended', () => setIsPlaying(false));
setAudioElement(audio);
return () => {
audio.pause();
audio.remove();
};
}
}, [audioBlobUrl]);
const togglePlayPause = () => {
if (!audioElement) return;
if (isPlaying) {
audioElement.pause();
} else {
audioElement.play();
}
setIsPlaying(!isPlaying);
};
const handleClose = () => {
if (audioElement) {
audioElement.pause();
setIsPlaying(false);
}
onClose();
};
return (
<Dialog
open={open}
onClose={handleClose}
maxWidth="md"
fullWidth
PaperProps={{
sx: {
borderRadius: 3,
bgcolor: '#f8fafc',
},
}}
>
<DialogTitle>
<Stack direction="row" justifyContent="space-between" alignItems="center">
<Box>
<Typography variant="h6" sx={{ fontWeight: 700, color: '#1e293b' }}>
Scene {sceneNumber} Preview
</Typography>
<Typography variant="body2" sx={{ color: '#64748b', mt: 0.5 }}>
{sceneTitle}
</Typography>
</Box>
<IconButton onClick={handleClose} size="small">
<CloseIcon />
</IconButton>
</Stack>
</DialogTitle>
<DialogContent>
<Stack spacing={3}>
{/* Image Preview */}
{imageUrl && (
<Box>
<Typography variant="subtitle2" sx={{ mb: 1.5, fontWeight: 600, color: '#475569' }}>
🖼 Scene Image
</Typography>
{imageLoading ? (
<Box sx={{ display: 'flex', justifyContent: 'center', p: 4 }}>
<CircularProgress />
</Box>
) : imageBlobUrl ? (
<Box
component="img"
src={imageBlobUrl}
alt={sceneTitle}
sx={{
width: '100%',
height: 'auto',
borderRadius: 2,
boxShadow: '0 4px 6px -1px rgba(0, 0, 0, 0.1)',
}}
/>
) : (
<Typography variant="body2" color="text.secondary">
Failed to load image
</Typography>
)}
</Box>
)}
{/* Audio Preview */}
{audioUrl && (
<Box>
<Typography variant="subtitle2" sx={{ mb: 1.5, fontWeight: 600, color: '#475569' }}>
🎤 Scene Audio
</Typography>
{audioLoading ? (
<Box sx={{ display: 'flex', justifyContent: 'center', p: 2 }}>
<CircularProgress size={24} />
</Box>
) : audioBlobUrl ? (
<Box
sx={{
p: 3,
bgcolor: 'white',
borderRadius: 2,
border: '2px solid #e2e8f0',
display: 'flex',
alignItems: 'center',
gap: 2,
}}
>
<IconButton
onClick={togglePlayPause}
disabled={!audioElement}
sx={{
bgcolor: '#667eea',
color: 'white',
'&:hover': {
bgcolor: '#5568d3',
},
'&:disabled': {
bgcolor: '#cbd5e1',
},
}}
>
{isPlaying ? <Pause /> : <PlayArrow />}
</IconButton>
<Box sx={{ flex: 1 }}>
<Typography variant="body2" sx={{ fontWeight: 600, color: '#1e293b' }}>
{isPlaying ? 'Playing...' : 'Click to play audio'}
</Typography>
<Typography variant="caption" sx={{ color: '#64748b' }}>
Scene narration audio
</Typography>
</Box>
<VolumeUp sx={{ color: '#94a3b8' }} />
</Box>
) : (
<Typography variant="body2" color="text.secondary">
Failed to load audio
</Typography>
)}
</Box>
)}
{!imageUrl && !audioUrl && (
<Typography variant="body2" color="text.secondary" sx={{ textAlign: 'center', p: 3 }}>
No assets available for preview
</Typography>
)}
</Stack>
</DialogContent>
</Dialog>
);
};

View File

@@ -0,0 +1,179 @@
import React, { useMemo } from 'react';
import { Box, Paper, Stack, Typography, Button, LinearProgress, Alert, Chip } from '@mui/material';
import { PlayArrow, VideoLibrary, CheckCircle, ErrorOutline } from '@mui/icons-material';
import { Scene, VideoPlan } from '../../../services/youtubeApi';
import { useVideoRenderQueue, SceneVideoJob } from '../hooks/useVideoRenderQueue';
interface VideoRenderQueueProps {
scenes: Scene[];
videoPlan: VideoPlan | null;
resolution: '480p' | '720p' | '1080p';
onSceneVideoReady: (sceneNumber: number, videoUrl: string) => void;
onFinalVideoReady?: (videoUrl: string) => void;
}
const statusColor = (job?: SceneVideoJob) => {
if (!job) return 'default';
if (job.status === 'completed') return 'success';
if (job.status === 'failed') return 'error';
if (job.status === 'running') return 'info';
return 'default';
};
export const VideoRenderQueue: React.FC<VideoRenderQueueProps> = ({
scenes,
videoPlan,
resolution,
onSceneVideoReady,
onFinalVideoReady,
}) => {
const {
jobs,
runSceneVideo,
combineVideos,
combineStatus,
combineProgress,
} = useVideoRenderQueue({
scenes,
videoPlan,
resolution,
onSceneVideoReady,
onCombineReady: onFinalVideoReady,
});
const allVideosReady = useMemo(() => {
const enabled = scenes.filter((s) => s.enabled !== false);
if (enabled.length === 0) return false;
return enabled.every((s) => jobs[s.scene_number]?.videoUrl);
}, [jobs, scenes]);
return (
<Paper sx={{ p: 3, mt: 2 }}>
<Typography variant="h6" sx={{ fontWeight: 700, mb: 2 }}>
Scene-wise Video Generation
</Typography>
<Typography variant="body2" color="text.secondary" sx={{ mb: 3 }}>
Generate videos per scene to save costs and retry only failing scenes. Once all scene videos are ready, combine them into a final video.
</Typography>
<Stack spacing={2}>
{scenes.map((scene) => {
const job = jobs[scene.scene_number];
return (
<Paper key={scene.scene_number} variant="outlined" sx={{ p: 2 }}>
<Stack direction="row" justifyContent="space-between" alignItems="center" spacing={2} flexWrap="wrap">
<Box>
<Typography variant="subtitle1" sx={{ fontWeight: 600 }}>
Scene {scene.scene_number}: {scene.title}
</Typography>
<Typography variant="caption" color="text.secondary">
{scene.imageUrl ? '✅ Image ready' : '⚠️ Image missing'} · {scene.audioUrl ? '✅ Audio ready' : '⚠️ Audio missing'}
</Typography>
{job?.error && (
<Alert severity="error" sx={{ mt: 1 }}>
{job.error}
</Alert>
)}
</Box>
<Stack direction="row" spacing={1} alignItems="center" flexWrap="wrap">
<Chip
label={job?.status ?? 'idle'}
color={statusColor(job) as any}
size="small"
variant="outlined"
/>
<Button
variant="contained"
size="small"
startIcon={<PlayArrow />}
disabled={job?.status === 'running'}
onClick={() => runSceneVideo(scene, { generateAudio: false }).catch(() => {})}
>
{job?.status === 'running'
? 'Generating...'
: job?.status === 'completed'
? 'Regenerate Video'
: 'Generate Video'}
</Button>
{job?.videoUrl && (
<Button
variant="outlined"
size="small"
href={job.videoUrl}
target="_blank"
rel="noreferrer"
>
Preview
</Button>
)}
</Stack>
</Stack>
{job?.status === 'running' && (
<Box sx={{ mt: 1.5 }}>
<LinearProgress variant="determinate" value={job.progress || 0} sx={{ height: 6, borderRadius: 2 }} />
<Typography variant="caption" color="text.secondary">
{Math.round(job.progress || 0)}%
</Typography>
</Box>
)}
</Paper>
);
})}
</Stack>
<Box sx={{ mt: 3, p: 2, border: '1px solid #e5e7eb', borderRadius: 2 }}>
<Typography variant="subtitle1" sx={{ fontWeight: 600, mb: 1 }}>
Final Video
</Typography>
{!allVideosReady && (
<Alert severity="info" icon={<VideoLibrary />}>
Generate videos for all enabled scenes to combine them into a single final video.
</Alert>
)}
{allVideosReady && (
<Stack spacing={1}>
<Typography variant="body2" color="text.secondary">
All scene videos are ready. Combine into a final video.
</Typography>
{combineStatus === 'running' && (
<Box>
<LinearProgress
variant="determinate"
value={combineProgress || 0}
sx={{ height: 6, borderRadius: 2, mb: 0.5 }}
/>
<Typography variant="caption" color="text.secondary">
{Math.round(combineProgress || 0)}%
</Typography>
</Box>
)}
<Stack direction="row" spacing={1} alignItems="center">
<Button
variant="contained"
color="secondary"
startIcon={<VideoLibrary />}
disabled={combineStatus === 'running'}
onClick={() =>
combineVideos(
scenes
.filter((s) => s.enabled !== false)
.map((s) => jobs[s.scene_number]?.videoUrl)
.filter(Boolean) as string[],
videoPlan?.video_summary
).catch(() => {})
}
>
{combineStatus === 'running' ? 'Combining...' : 'Combine Scenes'}
</Button>
{combineStatus === 'completed' && <Chip icon={<CheckCircle />} color="success" label="Final video ready" />}
{combineStatus === 'failed' && (
<Chip icon={<ErrorOutline />} color="error" label="Combine failed, retry" />
)}
</Stack>
</Stack>
)}
</Box>
</Paper>
);
};

View File

@@ -7,7 +7,7 @@ export const YT_BG = '#f9f9f9';
export const YT_BORDER = '#e5e5e5';
export const YT_TEXT = '#0f0f0f';
export const STEPS = ['Plan Your Video', 'Review Scenes', 'Render Video'] as const;
export const STEPS = ['Plan Your Video', 'Review Scenes', 'Generate Assets', 'Render Video'] as const;
export const RESOLUTIONS = ['480p', '720p', '1080p'] as const;
export type Resolution = typeof RESOLUTIONS[number];

View File

@@ -11,24 +11,40 @@ interface UseCostEstimateParams {
scenes: Scene[];
resolution: Resolution;
renderTaskId: string | null;
imageModel?: 'ideogram-v3-turbo' | 'qwen-image';
}
export const useCostEstimate = ({ activeStep, scenes, resolution, renderTaskId }: UseCostEstimateParams) => {
export const useCostEstimate = ({ activeStep, scenes, resolution, renderTaskId, imageModel = 'ideogram-v3-turbo' }: UseCostEstimateParams) => {
const [costEstimate, setCostEstimate] = useState<CostEstimate | null>(null);
const [loadingCostEstimate, setLoadingCostEstimate] = useState(false);
useEffect(() => {
if (activeStep === 2 && scenes.length > 0 && !renderTaskId) {
// Fetch cost estimate on both "Generate Assets" (step 2) and "Render Video" (step 3) steps
if ((activeStep === 2 || activeStep === 3) && scenes.length > 0 && !renderTaskId) {
const fetchCostEstimate = async () => {
setLoadingCostEstimate(true);
try {
const enabledScenes = scenes.filter(s => s.enabled !== false);
// Only fetch if all enabled scenes have images and audio
const allScenesReady = enabledScenes.every(s => s.imageUrl && s.audioUrl);
if (!allScenesReady && activeStep === 3) {
// On render step, require all scenes to be ready
setCostEstimate(null);
setLoadingCostEstimate(false);
return;
}
const response = await youtubeApi.estimateCost({
scenes: enabledScenes,
resolution: resolution,
imageModel: imageModel,
});
if (response.success && response.estimate) {
setCostEstimate(response.estimate);
} else {
setCostEstimate(null);
}
} catch (err: any) {
console.error('Error estimating cost:', err);
@@ -39,8 +55,12 @@ export const useCostEstimate = ({ activeStep, scenes, resolution, renderTaskId }
};
fetchCostEstimate();
} else {
// Reset cost estimate when not on relevant steps
setCostEstimate(null);
setLoadingCostEstimate(false);
}
}, [activeStep, scenes, resolution, renderTaskId]);
}, [activeStep, scenes, resolution, renderTaskId, imageModel]);
return { costEstimate, loadingCostEstimate };
};

View File

@@ -0,0 +1,55 @@
// Hook for managing generation state
import { useState } from 'react';
import { AudioGenerationSettings } from '../../shared/AudioSettingsModal';
export const useGenerationState = () => {
const [showAudioSettingsModal, setShowAudioSettingsModal] = useState(false);
const [showImageSettingsModal, setShowImageSettingsModal] = useState(false);
const [currentAudioSettings, setCurrentAudioSettings] = useState<AudioGenerationSettings>({
voiceId: "Casual_Guy",
speed: 1.15,
volume: 1.0,
pitch: 0.0,
emotion: "happy",
englishNormalization: true,
bitrate: 128000,
channel: "1",
format: "mp3",
enableSyncMode: true,
});
const [imageGenerationProgress, setImageGenerationProgress] = useState(0);
const [imageGenerationStatus, setImageGenerationStatus] = useState<string>('');
const [audioGenerationProgress, setAudioGenerationProgress] = useState(0);
const [audioGenerationStatus, setAudioGenerationStatus] = useState<string>('');
const resetImageGeneration = () => {
setImageGenerationStatus('');
setImageGenerationProgress(0);
};
const resetAudioGeneration = () => {
setAudioGenerationStatus('');
setAudioGenerationProgress(0);
};
return {
showAudioSettingsModal,
setShowAudioSettingsModal,
showImageSettingsModal,
setShowImageSettingsModal,
currentAudioSettings,
setCurrentAudioSettings,
imageGenerationProgress,
setImageGenerationProgress,
imageGenerationStatus,
setImageGenerationStatus,
audioGenerationProgress,
setAudioGenerationProgress,
audioGenerationStatus,
setAudioGenerationStatus,
resetImageGeneration,
resetAudioGeneration,
};
};

View File

@@ -0,0 +1,188 @@
/**
* Custom hook for robust image generation polling
*
* Handles:
* - Proper cleanup on unmount
* - Retry logic with exponential backoff
* - Timeout handling
* - Error classification and handling
* - Race condition prevention
*/
import { useRef, useCallback, useEffect } from 'react';
interface PollingOptions {
taskId: string;
sceneNumber: number;
onComplete: (imageUrl: string) => void;
onError: (error: string) => void;
onProgress?: (progress: number, message: string) => void;
pollInterval?: number;
maxPollTime?: number;
maxRetries?: number;
getStatus: (taskId: string) => Promise<any>;
}
export const useImageGenerationPolling = () => {
const activePollingRef = useRef<Map<string, () => void>>(new Map());
const startPolling = useCallback((options: PollingOptions) => {
const {
taskId,
sceneNumber,
onComplete,
onError,
onProgress,
pollInterval = 3000,
maxPollTime = 5 * 60 * 1000, // 5 minutes
maxRetries = 3,
getStatus,
} = options;
// If already polling this task, stop it first
const existingCleanup = activePollingRef.current.get(taskId);
if (existingCleanup) {
existingCleanup();
}
const pollIntervalRef = { current: null as NodeJS.Timeout | null };
const timeoutRef = { current: null as NodeJS.Timeout | null };
const retryCountRef = { current: 0 };
const startTime = Date.now();
let isActive = true;
const cleanup = () => {
isActive = false;
if (pollIntervalRef.current) {
clearInterval(pollIntervalRef.current);
pollIntervalRef.current = null;
}
if (timeoutRef.current) {
clearTimeout(timeoutRef.current);
timeoutRef.current = null;
}
activePollingRef.current.delete(taskId);
};
const pollForStatus = async () => {
if (!isActive) return;
// Check if we've exceeded max poll time
if (Date.now() - startTime > maxPollTime) {
cleanup();
onError(`Scene ${sceneNumber}: Image generation timed out after 5 minutes. Please try again.`);
return;
}
try {
const status = await getStatus(taskId);
retryCountRef.current = 0; // Reset retry count on success
if (!isActive) return;
if (status.status === 'completed' && status.result) {
cleanup();
onComplete(status.result.image_url);
} else if (status.status === 'failed') {
cleanup();
const errorMsg = status.error || status.message || 'Image generation failed';
onError(`Scene ${sceneNumber}: ${errorMsg}`);
} else if (status.status === 'processing') {
if (onProgress) {
onProgress(status.progress || 0, status.message || 'Processing...');
}
// Continue polling
}
} catch (pollError: any) {
if (!isActive) return;
// Classify error type
const isNetworkError = pollError.code === 'ECONNABORTED' ||
pollError.message?.includes('timeout') ||
pollError.message?.includes('Network');
const isNotFoundError = pollError.response?.status === 404 ||
pollError.message?.includes('404') ||
pollError.message?.includes('not found');
const isServerError = pollError.response?.status >= 500;
if (isNotFoundError) {
// Task not found - stop polling immediately
cleanup();
onError(`Scene ${sceneNumber}: Image generation task was lost. Please try again.`);
return;
}
// For network/server errors, retry with exponential backoff
if ((isNetworkError || isServerError) && retryCountRef.current < maxRetries) {
retryCountRef.current += 1;
const backoffDelay = Math.min(
pollInterval * Math.pow(2, retryCountRef.current),
30000 // Max 30s
);
console.warn(
`[ImagePolling] Retrying poll for task ${taskId} ` +
`(${retryCountRef.current}/${maxRetries}) after ${backoffDelay}ms`
);
// Clear current interval and retry after backoff
if (pollIntervalRef.current) {
clearInterval(pollIntervalRef.current);
pollIntervalRef.current = null;
}
setTimeout(() => {
if (isActive && !pollIntervalRef.current) {
pollForStatus(); // Retry immediately
pollIntervalRef.current = setInterval(pollForStatus, pollInterval);
}
}, backoffDelay);
} else if (retryCountRef.current >= maxRetries) {
// Max retries exceeded
cleanup();
onError(
`Scene ${sceneNumber}: Failed to check image generation status after ${maxRetries} retries. ` +
`Please refresh and try again.`
);
}
// For other errors, continue polling (might be transient)
}
};
// Start polling immediately, then every pollInterval
pollForStatus();
pollIntervalRef.current = setInterval(pollForStatus, pollInterval);
// Set a timeout to stop polling after max time
timeoutRef.current = setTimeout(() => {
if (isActive) {
cleanup();
onError(`Scene ${sceneNumber}: Image generation timed out after 5 minutes. Please try again.`);
}
}, maxPollTime);
// Store cleanup function
activePollingRef.current.set(taskId, cleanup);
return cleanup;
}, []);
// Cleanup all polling on unmount
useEffect(() => {
return () => {
activePollingRef.current.forEach((cleanup) => cleanup());
activePollingRef.current.clear();
};
}, []);
const stopPolling = useCallback((taskId: string) => {
const cleanup = activePollingRef.current.get(taskId);
if (cleanup) {
cleanup();
}
}, []);
return { startPolling, stopPolling };
};

View File

@@ -0,0 +1,80 @@
// Hook for managing scene media (images and audio)
import { useState, useEffect } from 'react';
import { fetchMediaBlobUrl } from '../../../utils/fetchMediaBlobUrl';
interface UseSceneMediaProps {
imageUrl?: string | null;
audioUrl?: string | null;
}
export const useSceneMedia = ({ imageUrl, audioUrl }: UseSceneMediaProps) => {
const [imageBlobUrl, setImageBlobUrl] = useState<string | null>(null);
const [imageLoading, setImageLoading] = useState(false);
const [audioBlobUrl, setAudioBlobUrl] = useState<string | null>(null);
const [audioLoading, setAudioLoading] = useState(false);
useEffect(() => {
console.log('[useSceneMedia] Image URL changed:', imageUrl);
let revokedUrl: string | null = null;
const fetchImage = async () => {
if (!imageUrl) {
console.log('[useSceneMedia] No imageUrl, clearing blob');
setImageBlobUrl(null);
return;
}
setImageLoading(true);
console.log('[useSceneMedia] Starting to fetch image blob for:', imageUrl);
try {
const blobUrl = await fetchMediaBlobUrl(imageUrl);
if (blobUrl) {
console.log('[useSceneMedia] Image blob loaded:', blobUrl);
setImageBlobUrl(blobUrl);
revokedUrl = blobUrl;
return;
}
// Fallback: use direct URL if blob could not be created (e.g., 404/401 handled upstream)
console.warn('[useSceneMedia] Blob URL unavailable, falling back to direct imageUrl');
setImageBlobUrl(imageUrl);
} catch (error) {
console.error('[useSceneMedia] Failed to load image:', error);
// Fallback to direct URL so UI still shows something while we investigate auth/serving
setImageBlobUrl(imageUrl);
} finally {
setImageLoading(false);
}
};
fetchImage();
return () => {
if (revokedUrl && revokedUrl.startsWith('blob:')) {
URL.revokeObjectURL(revokedUrl);
}
};
}, [imageUrl]);
useEffect(() => {
if (audioUrl) {
setAudioLoading(true);
fetchMediaBlobUrl(audioUrl)
.then(setAudioBlobUrl)
.catch(console.error)
.finally(() => setAudioLoading(false));
} else {
setAudioBlobUrl(null);
}
return () => {
if (audioBlobUrl) URL.revokeObjectURL(audioBlobUrl);
};
}, [audioUrl]);
return {
imageBlobUrl,
imageLoading,
audioBlobUrl,
audioLoading,
};
};

View File

@@ -0,0 +1,232 @@
import { useCallback, useEffect, useRef, useState } from 'react';
import { youtubeApi, Scene, VideoPlan, TaskStatus } from '../../../services/youtubeApi';
export type VideoJobStatus = 'idle' | 'running' | 'completed' | 'failed';
export interface SceneVideoJob {
scene_number: number;
status: VideoJobStatus;
progress: number;
taskId?: string;
videoUrl?: string;
error?: string;
}
interface UseVideoRenderQueueOptions {
scenes: Scene[];
videoPlan: VideoPlan | null;
resolution: '480p' | '720p' | '1080p';
onSceneVideoReady?: (sceneNumber: number, videoUrl: string) => void;
onCombineReady?: (videoUrl: string) => void;
}
export const useVideoRenderQueue = ({
scenes,
videoPlan,
resolution,
onSceneVideoReady,
onCombineReady,
}: UseVideoRenderQueueOptions) => {
const [jobs, setJobs] = useState<Record<number, SceneVideoJob>>({});
const [combineTaskId, setCombineTaskId] = useState<string | null>(null);
const [combineProgress, setCombineProgress] = useState<number>(0);
const [combineStatus, setCombineStatus] = useState<VideoJobStatus>('idle');
const pollingRef = useRef<Map<string, NodeJS.Timeout>>(new Map());
// Initialize jobs for current scenes
useEffect(() => {
setJobs((prev) => {
const next = { ...prev };
scenes.forEach((scene) => {
const sn = scene.scene_number;
if (!next[sn]) {
next[sn] = {
scene_number: sn,
status: scene.videoUrl ? 'completed' : 'idle',
progress: scene.videoUrl ? 100 : 0,
videoUrl: scene.videoUrl,
};
} else if (scene.videoUrl && next[sn].videoUrl !== scene.videoUrl) {
next[sn] = { ...next[sn], videoUrl: scene.videoUrl, status: 'completed', progress: 100 };
}
});
return next;
});
}, [scenes]);
const stopPolling = useCallback((taskId: string) => {
const timer = pollingRef.current.get(taskId);
if (timer) {
clearInterval(timer);
pollingRef.current.delete(taskId);
}
}, []);
const pollTask = useCallback(
(taskId: string, sceneNumber?: number, isCombine?: boolean) => {
const timer = setInterval(async () => {
try {
const status: TaskStatus = await youtubeApi.getRenderStatus(taskId);
const progress = status.progress ?? 0;
if (isCombine) {
setCombineProgress(progress);
} else if (sceneNumber !== undefined) {
setJobs((prev) => ({
...prev,
[sceneNumber]: {
...(prev[sceneNumber] || { scene_number: sceneNumber, status: 'running', progress }),
status: status.status === 'failed' ? 'failed' : status.status === 'completed' ? 'completed' : 'running',
progress,
},
}));
}
if (status.status === 'completed') {
stopPolling(taskId);
const result = status.result || {};
if (isCombine) {
const finalUrl = result.final_video_url || result.video_url;
if (finalUrl && onCombineReady) {
onCombineReady(finalUrl);
}
setCombineStatus('completed');
} else if (sceneNumber !== undefined) {
const videoUrl =
result.final_video_url ||
result.video_url ||
(Array.isArray(result.scene_results) && result.scene_results[0]?.video_url);
if (videoUrl && onSceneVideoReady) {
onSceneVideoReady(sceneNumber, videoUrl);
}
setJobs((prev) => ({
...prev,
[sceneNumber]: {
...(prev[sceneNumber] || { scene_number: sceneNumber }),
status: 'completed',
progress: 100,
videoUrl,
},
}));
}
} else if (status.status === 'failed') {
stopPolling(taskId);
const errorMsg = status.error || status.message || 'Video render failed';
if (isCombine) {
setCombineStatus('failed');
} else if (sceneNumber !== undefined) {
setJobs((prev) => ({
...prev,
[sceneNumber]: {
...(prev[sceneNumber] || { scene_number: sceneNumber }),
status: 'failed',
progress: 0,
error: errorMsg,
},
}));
}
}
} catch (err) {
stopPolling(taskId);
if (sceneNumber !== undefined) {
setJobs((prev) => ({
...prev,
[sceneNumber]: {
...(prev[sceneNumber] || { scene_number: sceneNumber }),
status: 'failed',
progress: 0,
error: err instanceof Error ? err.message : 'Video render failed',
},
}));
} else {
setCombineStatus('failed');
}
}
}, 3000);
pollingRef.current.set(taskId, timer);
},
[onCombineReady, onSceneVideoReady, stopPolling]
);
const runSceneVideo = useCallback(
async (scene: Scene, opts?: { generateAudio?: boolean }) => {
if (!videoPlan) {
throw new Error('Video plan is missing');
}
if (!scene.imageUrl) throw new Error('Scene image is required before video generation.');
if (!scene.audioUrl && !opts?.generateAudio) throw new Error('Scene audio is required before video generation.');
const sn = scene.scene_number;
setJobs((prev) => ({
...prev,
[sn]: { scene_number: sn, status: 'running', progress: 5 },
}));
const resp = await youtubeApi.generateSceneVideo({
scene,
video_plan: videoPlan,
resolution,
generate_audio_enabled: Boolean(opts?.generateAudio),
});
if (resp.success && resp.task_id) {
setJobs((prev) => ({
...prev,
[sn]: { ...(prev[sn] || { scene_number: sn }), status: 'running', taskId: resp.task_id, progress: 5 },
}));
pollTask(resp.task_id, sn, false);
} else {
setJobs((prev) => ({
...prev,
[sn]: { scene_number: sn, status: 'failed', progress: 0, error: resp.message },
}));
throw new Error(resp.message || 'Failed to start scene video render');
}
},
[videoPlan, resolution, pollTask]
);
const combineVideos = useCallback(
async (videoUrls: string[], title?: string) => {
if (!videoUrls || videoUrls.length < 2) {
throw new Error('At least two scene videos are required to combine.');
}
setCombineStatus('running');
setCombineProgress(5);
const resp = await youtubeApi.combineVideos({
scene_video_urls: videoUrls,
resolution,
title,
});
if (resp.success && resp.task_id) {
setCombineTaskId(resp.task_id);
setCombineProgress(10);
pollTask(resp.task_id, undefined, true);
} else {
setCombineStatus('failed');
throw new Error(resp.message || 'Failed to start combine task');
}
},
[pollTask, resolution]
);
// Cleanup polling on unmount
useEffect(() => {
return () => {
pollingRef.current.forEach((timer) => clearInterval(timer));
pollingRef.current.clear();
};
}, []);
return {
jobs,
runSceneVideo,
combineVideos,
combineTaskId,
combineProgress,
combineStatus,
};
};

View File

@@ -0,0 +1,268 @@
import { useCallback, useEffect, useRef, useState } from 'react';
import {
youtubeApi,
Scene,
SceneVideoRenderRequest,
TaskStatus,
VideoPlan,
} from '../../../services/youtubeApi';
type SceneStatus = 'idle' | 'running' | 'completed' | 'failed';
interface SceneVideoState {
status: SceneStatus;
progress: number;
taskId?: string;
error?: string;
videoUrl?: string;
}
interface UseYouTubeRenderQueueParams {
scenes: Scene[];
videoPlan: VideoPlan | null;
resolution: '480p' | '720p' | '1080p';
onScenesUpdate: (updated: Scene[]) => void;
onError?: (msg: string) => void;
onInfo?: (msg: string) => void;
onSuccess?: (msg: string) => void;
}
interface UseYouTubeRenderQueueResult {
sceneStatuses: Record<number, SceneVideoState>;
finalVideoUrl: string | null;
combining: boolean;
combiningProgress: number;
combiningMessage: string;
runSceneVideo: (scene: Scene) => Promise<void>;
combineVideos: () => Promise<void>;
}
const POLL_MS = 3000;
export function useYouTubeRenderQueue({
scenes,
videoPlan,
resolution,
onScenesUpdate,
onError,
onInfo,
onSuccess,
}: UseYouTubeRenderQueueParams): UseYouTubeRenderQueueResult {
const [sceneStatuses, setSceneStatuses] = useState<Record<number, SceneVideoState>>({});
const [finalVideoUrl, setFinalVideoUrl] = useState<string | null>(null);
const [combining, setCombining] = useState(false);
const [combiningProgress, setCombiningProgress] = useState(0);
const [combiningMessage, setCombiningMessage] = useState('Combining videos...');
const pollingRefs = useRef<Map<string, NodeJS.Timeout>>(new Map());
const updateSceneStatus = useCallback((sceneNumber: number, updates: Partial<SceneVideoState>) => {
setSceneStatuses((prev) => ({
...prev,
[sceneNumber]: {
...prev[sceneNumber],
status: prev[sceneNumber]?.status || 'idle',
progress: prev[sceneNumber]?.progress || 0,
...updates,
},
}));
}, []);
const clearPolling = useCallback((taskId: string) => {
const timers = pollingRefs.current;
timers.forEach((interval, key) => {
if (key === taskId) {
clearInterval(interval);
timers.delete(key);
}
});
}, []);
useEffect(() => {
return () => {
pollingRefs.current.forEach((interval) => clearInterval(interval));
pollingRefs.current.clear();
};
}, []);
const pollTask = useCallback(
(taskId: string, sceneNumber: number) => {
const interval = setInterval(async () => {
try {
const status: TaskStatus = await youtubeApi.getRenderStatus(taskId);
const progress = status.progress ?? 0;
if (status.status === 'completed') {
const videoUrl =
status.result?.video_url ||
status.result?.final_video_url ||
status.result?.scene_results?.[0]?.video_url ||
null;
updateSceneStatus(sceneNumber, {
status: 'completed',
progress: 100,
videoUrl: videoUrl || undefined,
taskId,
error: undefined,
});
if (videoUrl) {
const updatedScenes = scenes.map((s) =>
s.scene_number === sceneNumber ? { ...s, videoUrl } : s
);
onScenesUpdate(updatedScenes);
}
clearPolling(taskId);
} else if (status.status === 'failed') {
const errorMessage =
status.error ||
status.message ||
status.result?.error ||
'Video generation failed';
updateSceneStatus(sceneNumber, {
status: 'failed',
progress,
error: errorMessage,
taskId,
});
clearPolling(taskId);
onError?.(errorMessage);
} else {
updateSceneStatus(sceneNumber, {
status: 'running',
progress,
taskId,
});
}
} catch (err: any) {
const msg = err?.message || 'Failed to poll render status';
updateSceneStatus(sceneNumber, {
status: 'failed',
progress: 0,
error: msg,
taskId,
});
clearPolling(taskId);
onError?.(msg);
}
}, POLL_MS);
pollingRefs.current.set(taskId, interval);
},
[clearPolling, onError, onScenesUpdate, scenes, updateSceneStatus]
);
const runSceneVideo = useCallback(
async (scene: Scene) => {
if (!videoPlan) {
onError?.('Video plan is missing');
return;
}
const sn = scene.scene_number;
const existing = sceneStatuses[sn];
if (existing?.status === 'running') return;
updateSceneStatus(sn, { status: 'running', progress: 5, error: undefined });
const payload: SceneVideoRenderRequest = {
scene,
video_plan: videoPlan,
resolution,
generate_audio_enabled: false,
voice_id: 'Wise_Woman',
};
try {
const resp = await youtubeApi.generateSceneVideo(payload);
if (resp.success && resp.task_id) {
updateSceneStatus(sn, { status: 'running', progress: 5, taskId: resp.task_id });
pollTask(resp.task_id, sn);
} else {
const msg = resp.message || 'Failed to start scene render';
updateSceneStatus(sn, { status: 'failed', progress: 0, error: msg });
onError?.(msg);
}
} catch (err: any) {
const msg = err?.message || 'Failed to start scene render';
updateSceneStatus(sn, { status: 'failed', progress: 0, error: msg });
onError?.(msg);
}
},
[pollTask, resolution, sceneStatuses, updateSceneStatus, videoPlan, onError]
);
const combineVideos = useCallback(async () => {
const readyVideos = scenes
.filter((s) => s.enabled !== false && s.videoUrl)
.map((s) => s.videoUrl as string);
if (readyVideos.length < 2) {
onError?.('Need at least two scene videos to combine.');
return;
}
setCombining(true);
setCombiningProgress(5);
setCombiningMessage('Starting combination...');
try {
const resp = await youtubeApi.combineVideos({
scene_video_urls: readyVideos,
video_plan: videoPlan || undefined,
resolution,
});
if (!resp.success || !resp.task_id) {
const msg = resp.message || 'Failed to start video combine';
setCombining(false);
setCombiningProgress(0);
setCombiningMessage(msg);
onError?.(msg);
return;
}
const taskId = resp.task_id;
let done = false;
while (!done) {
await new Promise((r) => setTimeout(r, POLL_MS));
const status = await youtubeApi.getRenderStatus(taskId);
const progress = status.progress ?? 0;
setCombiningProgress(progress);
setCombiningMessage(status.message || 'Combining...');
if (status.status === 'completed') {
const url = status.result?.video_url || status.result?.final_video_url;
setFinalVideoUrl(url || null);
setCombining(false);
setCombiningProgress(100);
setCombiningMessage('Combined successfully');
onSuccess?.('Final video combined successfully');
done = true;
} else if (status.status === 'failed') {
const msg = status.error || status.message || 'Combine failed';
setCombining(false);
setCombiningMessage(msg);
onError?.(msg);
done = true;
}
}
} catch (err: any) {
const msg = err?.message || 'Combine failed';
setCombining(false);
setCombiningMessage(msg);
onError?.(msg);
}
}, [onError, resolution, scenes, videoPlan]);
return {
sceneStatuses,
finalVideoUrl,
combining,
combiningProgress,
combiningMessage,
runSceneVideo,
combineVideos,
};
}

View File

@@ -0,0 +1,687 @@
import React, { useState, useEffect } from "react";
import {
Dialog,
DialogTitle,
DialogContent,
DialogActions,
Stack,
Box,
Typography,
TextField,
Select,
MenuItem,
FormControl,
InputLabel,
Divider,
alpha,
Tooltip,
IconButton,
Paper,
} from "@mui/material";
import {
Info as InfoIcon,
HelpOutline as HelpOutlineIcon,
Close as CloseIcon,
Palette as PaletteIcon,
} from "@mui/icons-material";
type PresetKey = "engagingHost" | "cinematicScene" | "professionalPresenter" | "casualCreator";
const PRESETS: Record<
PresetKey,
{
title: string;
subtitle: string;
prompt: string;
style: "Auto" | "Fiction" | "Realistic";
renderingSpeed: "Default" | "Turbo" | "Quality";
aspectRatio: "1:1" | "16:9" | "9:16" | "4:3" | "3:4";
}
> = {
engagingHost: {
title: "Engaging Host",
subtitle: "Dynamic presenter in engaging video environment",
prompt:
"Professional video host in modern studio, dynamic lighting, engaging facial expression, high energy atmosphere, camera-ready appearance, confident posture, vibrant background elements",
style: "Realistic",
renderingSpeed: "Quality",
aspectRatio: "16:9",
},
cinematicScene: {
title: "Cinematic Scene",
subtitle: "Dramatic, movie-like atmosphere with cinematic lighting",
prompt:
"Cinematic video scene, dramatic lighting, professional cinematography, engaging narrative atmosphere, high production value, cinematic depth of field, compelling visual storytelling",
style: "Realistic",
renderingSpeed: "Quality",
aspectRatio: "16:9",
},
professionalPresenter: {
title: "Professional Presenter",
subtitle: "Corporate-style presentation with clean, polished look",
prompt:
"Professional corporate presenter, clean business attire, polished appearance, neutral background, professional lighting, trustworthy demeanor, business presentation setting",
style: "Realistic",
renderingSpeed: "Quality",
aspectRatio: "16:9",
},
casualCreator: {
title: "Casual Creator",
subtitle: "Relaxed, approachable creator for vlogs and tutorials",
prompt:
"Casual content creator, friendly and approachable, comfortable setting, natural lighting, relaxed posture, authentic personality, everyday environment, genuine smile",
style: "Realistic",
renderingSpeed: "Quality",
aspectRatio: "16:9",
},
};
export interface YouTubeImageGenerationSettings {
prompt: string;
style: "Auto" | "Fiction" | "Realistic";
renderingSpeed: "Default" | "Turbo" | "Quality";
aspectRatio: "1:1" | "16:9" | "9:16" | "4:3" | "3:4";
model: "ideogram-v3-turbo" | "qwen-image";
}
interface YouTubeImageGenerationModalProps {
open: boolean;
onClose: () => void;
onGenerate: (settings: YouTubeImageGenerationSettings) => void;
initialPrompt: string;
initialStyle?: "Auto" | "Fiction" | "Realistic";
initialRenderingSpeed?: "Default" | "Turbo" | "Quality";
initialAspectRatio?: "1:1" | "16:9" | "9:16" | "4:3" | "3:4";
initialModel?: "ideogram-v3-turbo" | "qwen-image";
isGenerating?: boolean;
sceneTitle?: string;
}
export const YouTubeImageGenerationModal: React.FC<YouTubeImageGenerationModalProps> = ({
open,
onClose,
onGenerate,
initialPrompt,
initialStyle = "Realistic",
initialRenderingSpeed = "Quality",
initialAspectRatio = "16:9",
initialModel = "ideogram-v3-turbo",
isGenerating = false,
sceneTitle,
}) => {
const [prompt, setPrompt] = useState(initialPrompt);
const [style, setStyle] = useState<"Auto" | "Fiction" | "Realistic">(initialStyle);
const [renderingSpeed, setRenderingSpeed] = useState<"Default" | "Turbo" | "Quality">(initialRenderingSpeed);
const [aspectRatio, setAspectRatio] = useState<"1:1" | "16:9" | "9:16" | "4:3" | "3:4">(initialAspectRatio);
const [model, setModel] = useState<"ideogram-v3-turbo" | "qwen-image">("ideogram-v3-turbo");
// Update state when initial values change
useEffect(() => {
setPrompt(initialPrompt);
setStyle(initialStyle);
setRenderingSpeed(initialRenderingSpeed);
setAspectRatio(initialAspectRatio);
setModel(initialModel);
}, [initialPrompt, initialStyle, initialRenderingSpeed, initialAspectRatio, initialModel]);
const handleGenerate = () => {
onGenerate({
prompt,
style,
renderingSpeed,
aspectRatio,
model,
});
};
const applyPreset = (presetKey: PresetKey) => {
const p = PRESETS[presetKey];
// Combine the preset prompt with current scene prompt context
setPrompt((current) => {
// If user already customized, append; otherwise replace with preset
if (!current || current.trim() === "" || current.trim() === initialPrompt.trim()) {
return `${initialPrompt}\n${p.prompt}`.trim();
}
return `${current}\n${p.prompt}`.trim();
});
setStyle(p.style);
setRenderingSpeed(p.renderingSpeed);
setAspectRatio(p.aspectRatio);
};
return (
<Dialog
open={open}
onClose={onClose}
maxWidth="md"
fullWidth
PaperProps={{
sx: {
background: alpha("#1a1a2e", 0.95),
backdropFilter: "blur(20px)",
border: "1px solid rgba(255,255,255,0.1)",
borderRadius: 4,
},
}}
>
<DialogTitle>
<Stack direction="row" justifyContent="space-between" alignItems="center">
<Box>
<Typography variant="h6" sx={{ color: "white", fontWeight: 600 }}>
Generate Scene Image
</Typography>
{sceneTitle && (
<Typography variant="body2" sx={{ color: "rgba(255,255,255,0.6)", mt: 1 }}>
Customize image generation for "{sceneTitle}"
</Typography>
)}
</Box>
<IconButton
onClick={onClose}
size="small"
sx={{ color: "rgba(255,255,255,0.7)" }}
>
<CloseIcon />
</IconButton>
</Stack>
<Typography variant="body2" sx={{ color: "rgba(255,255,255,0.6)", mt: 1 }}>
Customize image generation parameters for the perfect YouTube scene visual
</Typography>
</DialogTitle>
<DialogContent>
<Stack spacing={3} sx={{ mt: 1 }}>
{/* YouTube-optimized Presets */}
<Box>
<Stack direction="row" spacing={1} alignItems="center" sx={{ mb: 1 }}>
<PaletteIcon sx={{ color: "white", fontSize: "1.2rem" }} />
<Typography variant="subtitle1" sx={{ color: "white", fontWeight: 600 }}>
YouTube-ready presets
</Typography>
<Tooltip
title="Quickly apply a YouTube-optimized look. Each preset adjusts lighting, composition, and style while keeping your avatar consistent."
arrow
>
<IconButton size="small" sx={{ color: "rgba(255,255,255,0.5)" }}>
<HelpOutlineIcon fontSize="small" />
</IconButton>
</Tooltip>
</Stack>
<Stack direction={{ xs: "column", sm: "row" }} spacing={1.5}>
{(
Object.entries(PRESETS) as Array<[PresetKey, (typeof PRESETS)[PresetKey]]>
).map(([key, p]) => (
<Paper
key={key}
onClick={() => applyPreset(key)}
sx={{
p: 1.5,
flex: 1,
cursor: "pointer",
backgroundColor: alpha("#ffffff", 0.04),
border: "1px solid rgba(255,255,255,0.1)",
borderRadius: 2,
transition: "all 0.2s ease",
"&:hover": {
borderColor: "rgba(102,126,234,0.7)",
boxShadow: "0 8px 24px rgba(0,0,0,0.25)",
backgroundColor: alpha("#667eea", 0.08),
},
}}
>
<Typography variant="subtitle2" sx={{ color: "white", fontWeight: 700 }}>
{p.title}
</Typography>
<Typography variant="body2" sx={{ color: "rgba(255,255,255,0.7)", lineHeight: 1.5, mb: 0.75 }}>
{p.subtitle}
</Typography>
<Stack direction="row" spacing={1} sx={{ color: "rgba(255,255,255,0.6)", fontSize: "0.8rem" }}>
<Typography variant="caption">Style: {p.style}</Typography>
<Typography variant="caption">Speed: {p.renderingSpeed}</Typography>
<Typography variant="caption">AR: {p.aspectRatio}</Typography>
</Stack>
</Paper>
))}
</Stack>
</Box>
{/* Prompt Section */}
<Box>
<Stack direction="row" spacing={1} alignItems="center" sx={{ mb: 1 }}>
<Typography variant="subtitle1" sx={{ color: "white", fontWeight: 600 }}>
Visual Prompt
</Typography>
<Tooltip
title="Describe what you want to see in the generated image. Include scene context, visual elements, mood, and style preferences. The AI will use this along with your base avatar to create a consistent character in the YouTube scene."
arrow
>
<IconButton size="small" sx={{ color: "rgba(255,255,255,0.5)" }}>
<HelpOutlineIcon fontSize="small" />
</IconButton>
</Tooltip>
</Stack>
<TextField
fullWidth
multiline
rows={4}
value={prompt}
onChange={(e) => setPrompt(e.target.value)}
placeholder="Describe the scene, visual elements, mood, and style..."
sx={{
"& .MuiOutlinedInput-root": {
backgroundColor: alpha("#ffffff", 0.05),
color: "white",
"& fieldset": {
borderColor: "rgba(255,255,255,0.2)",
},
"&:hover fieldset": {
borderColor: "rgba(255,255,255,0.3)",
},
"&.Mui-focused fieldset": {
borderColor: "#667eea",
},
},
"& .MuiInputBase-input": {
color: "white",
},
}}
/>
<Typography variant="caption" sx={{ color: "rgba(255,255,255,0.5)", mt: 0.5, display: "block" }}>
This prompt will be combined with scene context to generate your YouTube-ready image. Be specific about visual elements, lighting, and atmosphere.
</Typography>
</Box>
<Divider sx={{ borderColor: "rgba(255,255,255,0.1)" }} />
{/* Style Selection */}
<Box>
<Stack direction="row" spacing={1} alignItems="center" sx={{ mb: 1.5 }}>
<Typography variant="subtitle1" sx={{ color: "white", fontWeight: 600 }}>
Visual Style
</Typography>
<Tooltip
title="Determines the artistic style of the character generation. Auto lets the AI choose, Fiction creates more stylized/artistic characters, and Realistic produces photorealistic results optimized for video content."
arrow
>
<IconButton size="small" sx={{ color: "rgba(255,255,255,0.5)" }}>
<HelpOutlineIcon fontSize="small" />
</IconButton>
</Tooltip>
</Stack>
<FormControl fullWidth>
<Select
value={style}
onChange={(e) => setStyle(e.target.value as "Auto" | "Fiction" | "Realistic")}
sx={{
backgroundColor: alpha("#ffffff", 0.05),
color: "white",
"& .MuiOutlinedInput-notchedOutline": {
borderColor: "rgba(255,255,255,0.2)",
},
"&:hover .MuiOutlinedInput-notchedOutline": {
borderColor: "rgba(255,255,255,0.3)",
},
"&.Mui-focused .MuiOutlinedInput-notchedOutline": {
borderColor: "#667eea",
},
"& .MuiSvgIcon-root": {
color: "rgba(255,255,255,0.7)",
},
}}
>
<MenuItem value="Auto">
<Stack>
<Typography sx={{ color: "white" }}>Auto</Typography>
<Typography variant="caption" sx={{ color: "rgba(255,255,255,0.6)" }}>
AI automatically selects the best style
</Typography>
</Stack>
</MenuItem>
<MenuItem value="Fiction">
<Stack>
<Typography sx={{ color: "white" }}>Fiction</Typography>
<Typography variant="caption" sx={{ color: "rgba(255,255,255,0.6)" }}>
Stylized, artistic character appearance
</Typography>
</Stack>
</MenuItem>
<MenuItem value="Realistic">
<Stack>
<Typography sx={{ color: "white" }}>Realistic</Typography>
<Typography variant="caption" sx={{ color: "rgba(255,255,255,0.6)" }}>
Photorealistic, professional video appearance
</Typography>
</Stack>
</MenuItem>
</Select>
</FormControl>
<Paper
sx={{
mt: 1.5,
p: 1.5,
backgroundColor: alpha("#667eea", 0.1),
border: "1px solid rgba(102,126,234,0.3)",
borderRadius: 2,
}}
>
<Stack direction="row" spacing={1}>
<InfoIcon sx={{ color: "#667eea", fontSize: "1.2rem", mt: 0.1 }} />
<Box>
<Typography variant="body2" sx={{ color: "rgba(255,255,255,0.9)", fontWeight: 500, mb: 0.5 }}>
Style Impact for YouTube:
</Typography>
<Typography variant="body2" sx={{ color: "rgba(255,255,255,0.7)", lineHeight: 1.6 }}>
<strong>Auto:</strong> Best for most YouTube content, balances professionalism and engagement<br />
<strong>Fiction:</strong> Great for creative content, gaming, or stylized presentations<br />
<strong>Realistic:</strong> Ideal for educational, corporate, or professional YouTube channels
</Typography>
</Box>
</Stack>
</Paper>
</Box>
{/* Rendering Speed */}
<Box>
<Stack direction="row" spacing={1} alignItems="center" sx={{ mb: 1.5 }}>
<Typography variant="subtitle1" sx={{ color: "white", fontWeight: 600 }}>
Generation Speed
</Typography>
<Tooltip
title="Controls the balance between generation speed, cost, and quality. Turbo is fastest and cheapest but lower quality. Quality is slowest and most expensive but produces the best results for professional YouTube content."
arrow
>
<IconButton size="small" sx={{ color: "rgba(255,255,255,0.5)" }}>
<HelpOutlineIcon fontSize="small" />
</IconButton>
</Tooltip>
</Stack>
<FormControl fullWidth>
<Select
value={renderingSpeed}
onChange={(e) => setRenderingSpeed(e.target.value as "Default" | "Turbo" | "Quality")}
sx={{
backgroundColor: alpha("#ffffff", 0.05),
color: "white",
"& .MuiOutlinedInput-notchedOutline": {
borderColor: "rgba(255,255,255,0.2)",
},
"&:hover .MuiOutlinedInput-notchedOutline": {
borderColor: "rgba(255,255,255,0.3)",
},
"&.Mui-focused .MuiOutlinedInput-notchedOutline": {
borderColor: "#667eea",
},
"& .MuiSvgIcon-root": {
color: "rgba(255,255,255,0.7)",
},
}}
>
<MenuItem value="Turbo">
<Stack>
<Typography sx={{ color: "white" }}>Turbo </Typography>
<Typography variant="caption" sx={{ color: "rgba(255,255,255,0.6)" }}>
Fastest (~10-20s) Cheapest Good for quick iterations
</Typography>
</Stack>
</MenuItem>
<MenuItem value="Default">
<Stack>
<Typography sx={{ color: "white" }}>Default </Typography>
<Typography variant="caption" sx={{ color: "rgba(255,255,255,0.6)" }}>
Balanced (~30-60s) Moderate cost Great for most YouTube content
</Typography>
</Stack>
</MenuItem>
<MenuItem value="Quality">
<Stack>
<Typography sx={{ color: "white" }}>Quality </Typography>
<Typography variant="caption" sx={{ color: "rgba(255,255,255,0.6)" }}>
Slowest (~60-120s) Highest quality Perfect for professional videos
</Typography>
</Stack>
</MenuItem>
</Select>
</FormControl>
<Paper
sx={{
mt: 1.5,
p: 1.5,
backgroundColor: alpha("#10b981", 0.1),
border: "1px solid rgba(16,185,129,0.3)",
borderRadius: 2,
}}
>
<Stack direction="row" spacing={1}>
<InfoIcon sx={{ color: "#10b981", fontSize: "1.2rem", mt: 0.1 }} />
<Box>
<Typography variant="body2" sx={{ color: "rgba(255,255,255,0.9)", fontWeight: 500, mb: 0.5 }}>
Speed vs Quality for YouTube:
</Typography>
<Typography variant="body2" sx={{ color: "rgba(255,255,255,0.7)", lineHeight: 1.6 }}>
<strong>Turbo:</strong> Use for testing and quick iterations (~$0.02/image)<br />
<strong>Default:</strong> Best balance for regular YouTube production (~$0.04/image)<br />
<strong>Quality:</strong> Use for high-stakes, professional content (~$0.08/image)
</Typography>
</Box>
</Stack>
</Paper>
</Box>
{/* AI Model Selection */}
<Box>
<Stack direction="row" spacing={1} alignItems="center" sx={{ mb: 1.5 }}>
<Typography variant="subtitle1" sx={{ color: "white", fontWeight: 600 }}>
AI Model
</Typography>
<Tooltip
title="Choose the AI model for image generation. Different models offer different quality levels and costs. Ideogram V3 Turbo provides superior text rendering and photorealism."
arrow
>
<IconButton size="small" sx={{ color: "rgba(255,255,255,0.5)" }}>
<HelpOutlineIcon fontSize="small" />
</IconButton>
</Tooltip>
</Stack>
<FormControl fullWidth>
<Select
value={model}
onChange={(e) => setModel(e.target.value as "ideogram-v3-turbo" | "qwen-image")}
sx={{
backgroundColor: alpha("#ffffff", 0.05),
color: "white",
"& .MuiOutlinedInput-notchedOutline": {
borderColor: "rgba(255,255,255,0.2)",
},
"&:hover .MuiOutlinedInput-notchedOutline": {
borderColor: "rgba(255,255,255,0.3)",
},
"&.Mui-focused .MuiOutlinedInput-notchedOutline": {
borderColor: "#667eea",
},
"& .MuiSvgIcon-root": {
color: "rgba(255,255,255,0.7)",
},
}}
>
<MenuItem value="ideogram-v3-turbo">
<Stack>
<Typography sx={{ color: "white" }}>Ideogram V3 Turbo </Typography>
<Typography variant="caption" sx={{ color: "rgba(255,255,255,0.6)" }}>
Photorealistic Superior text rendering $0.10/image
</Typography>
</Stack>
</MenuItem>
<MenuItem value="qwen-image">
<Stack>
<Typography sx={{ color: "white" }}>Qwen Image </Typography>
<Typography variant="caption" sx={{ color: "rgba(255,255,255,0.6)" }}>
Fast generation High quality $0.05/image
</Typography>
</Stack>
</MenuItem>
</Select>
</FormControl>
<Paper
sx={{
mt: 1.5,
p: 1.5,
backgroundColor: alpha("#10b981", 0.1),
border: "1px solid rgba(16,185,129,0.3)",
borderRadius: 2,
}}
>
<Stack direction="row" spacing={1}>
<InfoIcon sx={{ color: "#10b981", fontSize: "1.2rem", mt: 0.1 }} />
<Box>
<Typography variant="body2" sx={{ color: "rgba(255,255,255,0.9)", fontWeight: 500, mb: 0.5 }}>
Model Recommendations:
</Typography>
<Typography variant="body2" sx={{ color: "rgba(255,255,255,0.7)", lineHeight: 1.6 }}>
<strong>Ideogram V3 Turbo:</strong> Best for professional YouTube content with text, logos, or detailed scenes<br />
<strong>Qwen Image:</strong> Great for fast iterations and general content creation
</Typography>
</Box>
</Stack>
</Paper>
</Box>
{/* Aspect Ratio */}
<Box>
<Stack direction="row" spacing={1} alignItems="center" sx={{ mb: 1.5 }}>
<Typography variant="subtitle1" sx={{ color: "white", fontWeight: 600 }}>
Aspect Ratio
</Typography>
<Tooltip
title="The width-to-height ratio of the generated image. Choose based on your YouTube format: 16:9 for standard videos, 9:16 for Shorts/mobile, 1:1 for thumbnails, or other formats as needed."
arrow
>
<IconButton size="small" sx={{ color: "rgba(255,255,255,0.5)" }}>
<HelpOutlineIcon fontSize="small" />
</IconButton>
</Tooltip>
</Stack>
<FormControl fullWidth>
<Select
value={aspectRatio}
onChange={(e) => setAspectRatio(e.target.value as "1:1" | "16:9" | "9:16" | "4:3" | "3:4")}
sx={{
backgroundColor: alpha("#ffffff", 0.05),
color: "white",
"& .MuiOutlinedInput-notchedOutline": {
borderColor: "rgba(255,255,255,0.2)",
},
"&:hover .MuiOutlinedInput-notchedOutline": {
borderColor: "rgba(255,255,255,0.3)",
},
"&.Mui-focused .MuiOutlinedInput-notchedOutline": {
borderColor: "#667eea",
},
"& .MuiSvgIcon-root": {
color: "rgba(255,255,255,0.7)",
},
}}
>
<MenuItem value="16:9">
<Stack>
<Typography sx={{ color: "white" }}>16:9 (Widescreen)</Typography>
<Typography variant="caption" sx={{ color: "rgba(255,255,255,0.6)" }}>
Standard YouTube videos, best for main content
</Typography>
</Stack>
</MenuItem>
<MenuItem value="9:16">
<Stack>
<Typography sx={{ color: "white" }}>9:16 (Vertical)</Typography>
<Typography variant="caption" sx={{ color: "rgba(255,255,255,0.6)" }}>
YouTube Shorts, TikTok, Instagram Stories
</Typography>
</Stack>
</MenuItem>
<MenuItem value="1:1">
<Stack>
<Typography sx={{ color: "white" }}>1:1 (Square)</Typography>
<Typography variant="caption" sx={{ color: "rgba(255,255,255,0.6)" }}>
Thumbnails, Instagram posts, profile images
</Typography>
</Stack>
</MenuItem>
<MenuItem value="4:3">
<Stack>
<Typography sx={{ color: "white" }}>4:3 (Traditional)</Typography>
<Typography variant="caption" sx={{ color: "rgba(255,255,255,0.6)" }}>
Classic format, presentations, older content
</Typography>
</Stack>
</MenuItem>
<MenuItem value="3:4">
<Stack>
<Typography sx={{ color: "white" }}>3:4 (Portrait)</Typography>
<Typography variant="caption" sx={{ color: "rgba(255,255,255,0.6)" }}>
LinkedIn, some social media formats
</Typography>
</Stack>
</MenuItem>
</Select>
</FormControl>
<Paper
sx={{
mt: 1.5,
p: 1.5,
backgroundColor: alpha("#f59e0b", 0.1),
border: "1px solid rgba(245,158,11,0.3)",
borderRadius: 2,
}}
>
<Stack direction="row" spacing={1}>
<InfoIcon sx={{ color: "#f59e0b", fontSize: "1.2rem", mt: 0.1 }} />
<Box>
<Typography variant="body2" sx={{ color: "rgba(255,255,255,0.9)", fontWeight: 500, mb: 0.5 }}>
YouTube Format Recommendations:
</Typography>
<Typography variant="body2" sx={{ color: "rgba(255,255,255,0.7)", lineHeight: 1.6 }}>
<strong>16:9:</strong> Standard videos (recommended for most content)<br />
<strong>9:16:</strong> YouTube Shorts and mobile-optimized content<br />
<strong>1:1:</strong> Thumbnails and square-format promotional content
</Typography>
</Box>
</Stack>
</Paper>
</Box>
</Stack>
</DialogContent>
<DialogActions sx={{ p: 3, pt: 2 }}>
<IconButton
onClick={onClose}
disabled={isGenerating}
sx={{ color: "rgba(255,255,255,0.7)", mr: 1 }}
>
<CloseIcon />
</IconButton>
<Box sx={{ flex: 1 }} />
<IconButton
onClick={handleGenerate}
disabled={isGenerating || !prompt.trim()}
sx={{
backgroundColor: isGenerating ? "rgba(255,255,255,0.1)" : "#667eea",
color: "white",
"&:hover": {
backgroundColor: isGenerating ? "rgba(255,255,255,0.1)" : "#5a6fd8",
},
"&:disabled": {
backgroundColor: "rgba(255,255,255,0.1)",
color: "rgba(255,255,255,0.3)",
},
px: 3,
py: 1,
borderRadius: 2,
}}
>
<Typography variant="button" sx={{ fontWeight: 600 }}>
{isGenerating ? "Generating..." : "Generate Image"}
</Typography>
</IconButton>
</DialogActions>
</Dialog>
);
};

View File

@@ -0,0 +1,2 @@
export { YouTubeImageGenerationModal } from './YouTubeImageGenerationModal';
export type { YouTubeImageGenerationSettings } from './YouTubeImageGenerationModal';

View File

@@ -195,3 +195,4 @@ export function buildImageGenerationOperation(
};
}

View File

@@ -0,0 +1,648 @@
import React, { useEffect, useState } from "react";
import {
Dialog,
DialogTitle,
DialogContent,
DialogActions,
Stack,
Box,
Typography,
Slider,
Select,
MenuItem,
FormControl,
InputLabel,
FormControlLabel,
Checkbox,
Tooltip,
IconButton,
alpha,
TextField,
} from "@mui/material";
import { HelpOutline as HelpOutlineIcon, Close as CloseIcon, VolumeUp } from "@mui/icons-material";
import { Button } from "@mui/material";
export type AudioGenerationSettings = {
voiceId: string;
speed: number;
volume: number;
pitch: number;
emotion: string;
englishNormalization: boolean;
sampleRate?: number;
bitrate: number;
channel: "1" | "2";
format: "mp3" | "wav" | "pcm" | "flac";
languageBoost?: string;
enableSyncMode: boolean;
};
interface AudioSettingsModalProps {
open: boolean;
onClose: () => void;
onApplySettings: (settings: AudioGenerationSettings) => void;
initialSettings: AudioGenerationSettings;
isGenerating?: boolean;
sceneTitle?: string;
isRegenerating?: boolean;
}
// Voice options from minimax/speech-02-hd with personality descriptions
const VOICE_OPTIONS = [
{ id: "Wise_Woman", name: "Wise Woman", personality: "Authoritative, trustworthy female voice - perfect for educational content and expert narration" },
{ id: "Friendly_Person", name: "Friendly Person", personality: "Warm, approachable voice - great for welcoming introductions and customer-facing content" },
{ id: "Inspirational_girl", name: "Inspirational Girl", personality: "Motivational, uplifting female voice - ideal for inspirational and motivational content" },
{ id: "Deep_Voice_Man", name: "Deep Voice Man", personality: "Powerful, commanding male voice - excellent for serious topics and authoritative delivery" },
{ id: "Calm_Woman", name: "Calm Woman", personality: "Soothing, composed female voice - perfect for meditation, relaxation, or sensitive topics" },
{ id: "Casual_Guy", name: "Casual Guy", personality: "Relaxed, conversational male voice - great for vlogs, tutorials, and informal content" },
{ id: "Lively_Girl", name: "Lively Girl", personality: "Energetic, enthusiastic female voice - ideal for exciting announcements and upbeat content" },
{ id: "Patient_Man", name: "Patient Man", personality: "Gentle, understanding male voice - perfect for explanations and patient guidance" },
{ id: "Young_Knight", name: "Young Knight", personality: "Brave, confident male voice - great for adventure, gaming, and heroic narratives" },
{ id: "Determined_Man", name: "Determined Man", personality: "Strong, resolute male voice - excellent for motivational speeches and determined delivery" },
{ id: "Lovely_Girl", name: "Lovely Girl", personality: "Sweet, charming female voice - ideal for storytelling and gentle narratives" },
{ id: "Decent_Boy", name: "Decent Boy", personality: "Honest, sincere male voice - perfect for testimonials and personal stories" },
{ id: "Imposing_Manner", name: "Imposing Manner", personality: "Formal, dignified male voice - great for corporate content and official announcements" },
{ id: "Elegant_Man", name: "Elegant Man", personality: "Refined, sophisticated male voice - ideal for luxury, premium content" },
{ id: "Abbess", name: "Abbess", personality: "Spiritual, serene female voice - perfect for meditation, philosophy, or contemplative content" },
{ id: "Sweet_Girl_2", name: "Sweet Girl 2", personality: "Gentle, melodic female voice - excellent for children's content and soft storytelling" },
{ id: "Exuberant_Girl", name: "Exuberant Girl", personality: "Joyful, expressive female voice - ideal for celebrations and happy announcements" },
];
const EMOTION_OPTIONS = ["happy", "sad", "angry", "fearful", "disgusted", "surprised", "neutral"];
const SAMPLE_RATE_OPTIONS = [8000, 16000, 22050, 24000, 32000, 44100];
const BITRATE_OPTIONS = [32000, 64000, 128000, 256000];
const LANGUAGE_BOOST_OPTIONS = [
"auto",
"English",
"Chinese",
"Chinese,Yue",
"Arabic",
"Russian",
"Spanish",
"French",
"Portuguese",
"German",
"Turkish",
"Dutch",
"Ukrainian",
"Vietnamese",
"Indonesian",
"Japanese",
"Italian",
"Korean",
"Thai",
"Polish",
"Romanian",
"Greek",
"Czech",
"Finnish",
"Hindi",
];
export const AudioSettingsModal: React.FC<AudioSettingsModalProps> = ({
open,
onClose,
onApplySettings,
initialSettings,
isGenerating = false,
sceneTitle,
isRegenerating = false,
}) => {
const [settings, setSettings] = useState<AudioGenerationSettings>(initialSettings);
useEffect(() => {
setSettings(initialSettings);
}, [initialSettings]);
const handleApply = () => {
onApplySettings(settings);
};
return (
<Dialog
open={open}
onClose={onClose}
maxWidth="md"
fullWidth
PaperProps={{
sx: {
background: "linear-gradient(135deg, #667eea 0%, #764ba2 100%)",
color: "white",
},
}}
>
<DialogTitle>
<Stack direction="row" justifyContent="space-between" alignItems="center">
<Box>
<Typography variant="h6" sx={{ fontWeight: 600, mb: 0.5 }}>
{isRegenerating ? 'Regenerate Audio' : 'Generate Audio'} - Voice Settings
</Typography>
{sceneTitle && (
<Typography variant="body2" sx={{ opacity: 0.8 }}>
Configure voice settings for "{sceneTitle}"
</Typography>
)}
</Box>
<IconButton onClick={onClose} size="small" sx={{ color: "rgba(255,255,255,0.7)" }}>
<CloseIcon />
</IconButton>
</Stack>
<Typography variant="body2" sx={{ opacity: 0.7, mt: 1 }}>
{isRegenerating
? 'Customize voice settings to regenerate your audio narration with different characteristics.'
: 'Choose voice settings to generate high-quality audio narration for your scene.'
}
</Typography>
</DialogTitle>
<DialogContent>
<Stack spacing={3} sx={{ mt: 1 }}>
{/* Voice Selection */}
<Box>
<Stack direction="row" spacing={1} alignItems="center" sx={{ mb: 1 }}>
<Typography variant="subtitle1" sx={{ fontWeight: 600 }}>
Voice Selection
</Typography>
<Tooltip title={
<Box>
<Typography variant="body2" sx={{ fontWeight: 600, mb: 0.5 }}>
Voice Selection Guide
</Typography>
<Typography variant="caption" sx={{ display: 'block', mb: 0.5 }}>
Choose a voice that matches your content's personality and target audience.
</Typography>
<Typography variant="caption" sx={{ display: 'block', mb: 0.5 }}>
• <strong>YouTube/Vlogging</strong>: Casual Guy (default), Friendly Person - conversational and engaging
</Typography>
<Typography variant="caption" sx={{ display: 'block', mb: 0.5 }}>
• <strong>Educational/Tutorials</strong>: Wise Woman, Deep Voice Man - authoritative and trustworthy
</Typography>
<Typography variant="caption" sx={{ display: 'block', mb: 0.5 }}>
• <strong>Motivational</strong>: Inspirational Girl, Determined Man - energetic and inspiring
</Typography>
<Typography variant="caption" sx={{ display: 'block', mb: 0.5 }}>
• <strong>Relaxing/Storytelling</strong>: Calm Woman, Lovely Girl - soothing and gentle
</Typography>
<Typography variant="caption" sx={{ display: 'block' }}>
<strong>Default:</strong> Casual Guy - optimized for engaging YouTube narration.
</Typography>
</Box>
} arrow placement="right">
<IconButton size="small" sx={{ color: "rgba(255,255,255,0.5)" }}>
<HelpOutlineIcon fontSize="small" />
</IconButton>
</Tooltip>
</Stack>
<FormControl fullWidth>
<Select
value={settings.voiceId}
onChange={(e) => setSettings({ ...settings, voiceId: e.target.value })}
sx={{
backgroundColor: alpha("#ffffff", 0.1),
color: "white",
"& .MuiOutlinedInput-notchedOutline": { borderColor: "rgba(255,255,255,0.3)" },
"&:hover .MuiOutlinedInput-notchedOutline": { borderColor: "rgba(255,255,255,0.4)" },
"&.Mui-focused .MuiOutlinedInput-notchedOutline": { borderColor: "#ffffff" },
"& .MuiSvgIcon-root": { color: "rgba(255,255,255,0.7)" },
}}
>
{VOICE_OPTIONS.map((voice) => (
<MenuItem key={voice.id} value={voice.id}>
<Box>
<Typography variant="body2" sx={{ fontWeight: 600, color: "white" }}>
{voice.name}
</Typography>
<Typography variant="caption" sx={{ color: "rgba(255,255,255,0.7)", display: "block", fontSize: "0.7rem" }}>
{voice.personality}
</Typography>
</Box>
</MenuItem>
))}
</Select>
</FormControl>
</Box>
{/* Speed / Volume / Pitch */}
<Stack direction={{ xs: "column", sm: "row" }} spacing={2}>
<Box sx={{ flex: 1 }}>
<Stack direction="row" spacing={1} alignItems="center" sx={{ mb: 0.5 }}>
<Typography variant="subtitle2" sx={{ fontWeight: 600 }}>
Speaking Speed ({settings.speed.toFixed(2)})
</Typography>
<Tooltip title={
<Box>
<Typography variant="body2" sx={{ fontWeight: 600, mb: 0.5 }}>
Natural Speaking Pace
</Typography>
<Typography variant="caption" sx={{ display: 'block', mb: 0.5 }}>
• <strong>0.8-1.0</strong>: Slow, deliberate (educational, complex topics)
</Typography>
<Typography variant="caption" sx={{ display: 'block', mb: 0.5 }}>
• <strong>1.1-1.2</strong>: Natural, engaging (recommended for YouTube)
</Typography>
<Typography variant="caption" sx={{ display: 'block', mb: 0.5 }}>
• <strong>1.3-1.5</strong>: Fast, energetic (exciting, promotional content)
</Typography>
<Typography variant="caption" sx={{ display: 'block', mt: 0.5 }}>
<strong>Default:</strong> 1.15 - Optimized for engaging YouTube narration.
</Typography>
</Box>
} arrow placement="right">
<HelpOutlineIcon fontSize="small" sx={{ color: "rgba(255,255,255,0.5)" }} />
</Tooltip>
</Stack>
<Slider
value={settings.speed}
min={0.5}
max={2.0}
step={0.05}
onChange={(_, v) => setSettings({ ...settings, speed: v as number })}
sx={{ color: "#4ade80" }}
/>
<Typography variant="caption" sx={{ opacity: 0.7 }}>
0.5 = Slower (narrative) • 1.0 = Normal • 2.0 = Faster (energetic)
</Typography>
</Box>
<Box sx={{ flex: 1 }}>
<Stack direction="row" spacing={1} alignItems="center" sx={{ mb: 0.5 }}>
<Typography variant="subtitle2" sx={{ fontWeight: 600 }}>
Volume Level ({settings.volume.toFixed(1)})
</Typography>
<Tooltip title={
<Box>
<Typography variant="body2" sx={{ fontWeight: 600, mb: 0.5 }}>
Audio Loudness
</Typography>
<Typography variant="caption" sx={{ display: 'block', mb: 0.5 }}>
• <strong>0.1-0.5</strong>: Very soft, intimate whisper
</Typography>
<Typography variant="caption" sx={{ display: 'block', mb: 0.5 }}>
• <strong>0.8-1.2</strong>: Normal speaking volume
</Typography>
<Typography variant="caption" sx={{ display: 'block', mb: 0.5 }}>
• <strong>1.5-10.0</strong>: Loud, commanding presence
</Typography>
<Typography variant="caption" sx={{ display: 'block', mt: 0.5 }}>
<strong>Note:</strong> Very high volumes may cause distortion.
</Typography>
</Box>
} arrow placement="right">
<HelpOutlineIcon fontSize="small" sx={{ color: "rgba(255,255,255,0.5)" }} />
</Tooltip>
</Stack>
<Slider
value={settings.volume}
min={0.1}
max={10.0}
step={0.1}
onChange={(_, v) => setSettings({ ...settings, volume: v as number })}
sx={{ color: "#fbbf24" }}
/>
<Typography variant="caption" sx={{ opacity: 0.7 }}>
0.1 = Very soft • 1.0 = Normal • 10.0 = Very loud
</Typography>
</Box>
<Box sx={{ flex: 1 }}>
<Stack direction="row" spacing={1} alignItems="center" sx={{ mb: 0.5 }}>
<Typography variant="subtitle2" sx={{ fontWeight: 600 }}>
Voice Pitch ({settings.pitch})
</Typography>
<Tooltip title={
<Box>
<Typography variant="body2" sx={{ fontWeight: 600, mb: 0.5 }}>
Voice Tone & Character
</Typography>
<Typography variant="caption" sx={{ display: 'block', mb: 0.5 }}>
• <strong>-12 to -6</strong>: Deep, authoritative (male voices, serious content)
</Typography>
<Typography variant="caption" sx={{ display: 'block', mb: 0.5 }}>
• <strong>-2 to +2</strong>: Natural, conversational range
</Typography>
<Typography variant="caption" sx={{ display: 'block', mb: 0.5 }}>
• <strong>+3 to +12</strong>: Bright, energetic (female voices, upbeat content)
</Typography>
<Typography variant="caption" sx={{ display: 'block', mt: 0.5 }}>
<strong>Tip:</strong> Small adjustments (±2) sound most natural.
</Typography>
</Box>
} arrow placement="right">
<HelpOutlineIcon fontSize="small" sx={{ color: "rgba(255,255,255,0.5)" }} />
</Tooltip>
</Stack>
<Slider
value={settings.pitch}
min={-12}
max={12}
step={0.5}
onChange={(_, v) => setSettings({ ...settings, pitch: v as number })}
sx={{ color: "#f87171" }}
/>
<Typography variant="caption" sx={{ opacity: 0.7 }}>
-12 = Very deep • 0 = Normal • +12 = Very high
</Typography>
</Box>
</Stack>
{/* Emotion */}
<Box>
<Stack direction="row" spacing={1} alignItems="center" sx={{ mb: 1 }}>
<Typography variant="subtitle1" sx={{ fontWeight: 600 }}>
Emotional Delivery
</Typography>
<Tooltip title={
<Box>
<Typography variant="body2" sx={{ fontWeight: 600, mb: 0.5 }}>
Voice Emotional Expression
</Typography>
<Typography variant="caption" sx={{ display: 'block', mb: 0.5 }}>
Choose the emotional tone that matches your content:
</Typography>
<Typography variant="caption" sx={{ display: 'block', mb: 0.5 }}>
• <strong>Happy</strong>: Warm, enthusiastic delivery
</Typography>
<Typography variant="caption" sx={{ display: 'block', mb: 0.5 }}>
• <strong>Neutral</strong>: Professional, straightforward tone
</Typography>
<Typography variant="caption" sx={{ display: 'block', mb: 0.5 }}>
• <strong>Sad</strong>: Somber, reflective delivery
</Typography>
<Typography variant="caption" sx={{ display: 'block', mb: 0.5 }}>
• <strong>Angry</strong>: Forceful, urgent tone (use sparingly)
</Typography>
<Typography variant="caption" sx={{ display: 'block', mt: 0.5 }}>
<strong>Recommendation:</strong> Happy/Neutral for most educational content.
</Typography>
</Box>
} arrow placement="right">
<IconButton size="small" sx={{ color: "rgba(255,255,255,0.5)" }}>
<HelpOutlineIcon fontSize="small" />
</IconButton>
</Tooltip>
</Stack>
<FormControl fullWidth>
<Select
value={settings.emotion}
onChange={(e) => setSettings({ ...settings, emotion: e.target.value })}
sx={{
backgroundColor: alpha("#ffffff", 0.1),
color: "white",
"& .MuiOutlinedInput-notchedOutline": { borderColor: "rgba(255,255,255,0.3)" },
"&:hover .MuiOutlinedInput-notchedOutline": { borderColor: "rgba(255,255,255,0.4)" },
"&.Mui-focused .MuiOutlinedInput-notchedOutline": { borderColor: "#ffffff" },
"& .MuiSvgIcon-root": { color: "rgba(255,255,255,0.7)" },
}}
>
{EMOTION_OPTIONS.map((emotion) => (
<MenuItem key={emotion} value={emotion}>
{emotion.charAt(0).toUpperCase() + emotion.slice(1)}
</MenuItem>
))}
</Select>
</FormControl>
<Typography variant="caption" sx={{ opacity: 0.7, mt: 0.5, display: "block" }}>
Select the emotional tone. "Happy" provides natural, engaging delivery for most YouTube content.
</Typography>
</Box>
{/* Language & Normalization */}
<Stack direction={{ xs: "column", sm: "row" }} spacing={2}>
<Box sx={{ flex: 1 }}>
<FormControlLabel
control={
<Checkbox
checked={settings.englishNormalization}
onChange={(e) => setSettings({ ...settings, englishNormalization: e.target.checked })}
sx={{ color: "rgba(255,255,255,0.7)" }}
/>
}
label={
<Typography variant="body2" sx={{ color: "white" }}>
English text normalization
</Typography>
}
/>
<Typography variant="caption" sx={{ opacity: 0.7 }}>
Improves pronunciation of numbers (42 "forty-two"), dates, currencies, and technical terms. Recommended for most English content.
</Typography>
</Box>
<Box sx={{ flex: 1 }}>
<TextField
select
fullWidth
label="Language boost"
value={settings.languageBoost || "auto"}
onChange={(e) => setSettings({ ...settings, languageBoost: e.target.value })}
InputLabelProps={{ sx: { color: "rgba(255,255,255,0.7)" } }}
sx={{
"& .MuiOutlinedInput-root": {
backgroundColor: alpha("#ffffff", 0.1),
color: "white",
"& fieldset": { borderColor: "rgba(255,255,255,0.3)" },
"&:hover fieldset": { borderColor: "rgba(255,255,255,0.4)" },
"&.Mui-focused fieldset": { borderColor: "#ffffff" },
"& .MuiSvgIcon-root": { color: "rgba(255,255,255,0.7)" },
},
}}
>
{LANGUAGE_BOOST_OPTIONS.map((option) => (
<MenuItem key={option} value={option}>
{option}
</MenuItem>
))}
</TextField>
<Typography variant="caption" sx={{ opacity: 0.7, mt: 0.5, display: "block" }}>
Improves pronunciation accuracy for content in specific languages or regional dialects. Use "auto" for automatic detection.
</Typography>
</Box>
</Stack>
{/* Quality Settings */}
<Stack direction={{ xs: "column", sm: "row" }} spacing={2}>
<Box sx={{ flex: 1 }}>
<TextField
select
fullWidth
label="Sample rate"
value={settings.sampleRate || 24000}
onChange={(e) => setSettings({ ...settings, sampleRate: Number(e.target.value) })}
InputLabelProps={{ sx: { color: "rgba(255,255,255,0.7)" } }}
sx={{
"& .MuiOutlinedInput-root": {
backgroundColor: alpha("#ffffff", 0.1),
color: "white",
"& fieldset": { borderColor: "rgba(255,255,255,0.3)" },
"&:hover fieldset": { borderColor: "rgba(255,255,255,0.4)" },
"&.Mui-focused fieldset": { borderColor: "#ffffff" },
"& .MuiSvgIcon-root": { color: "rgba(255,255,255,0.7)" },
},
}}
>
{SAMPLE_RATE_OPTIONS.map((rate) => (
<MenuItem key={rate} value={rate}>
{rate} Hz
</MenuItem>
))}
</TextField>
<Typography variant="caption" sx={{ opacity: 0.7, mt: 0.5, display: "block" }}>
Sample rate affects audio clarity. 24kHz is optimal for voice content - higher values increase file size without noticeable improvement.
</Typography>
</Box>
<Box sx={{ flex: 1 }}>
<TextField
select
fullWidth
label="Bitrate"
value={settings.bitrate}
onChange={(e) => setSettings({ ...settings, bitrate: Number(e.target.value) })}
InputLabelProps={{ sx: { color: "rgba(255,255,255,0.7)" } }}
sx={{
"& .MuiOutlinedInput-root": {
backgroundColor: alpha("#ffffff", 0.1),
color: "white",
"& fieldset": { borderColor: "rgba(255,255,255,0.3)" },
"&:hover fieldset": { borderColor: "rgba(255,255,255,0.4)" },
"&.Mui-focused fieldset": { borderColor: "#ffffff" },
"& .MuiSvgIcon-root": { color: "rgba(255,255,255,0.7)" },
},
}}
>
{BITRATE_OPTIONS.map((bitrate) => (
<MenuItem key={bitrate} value={bitrate}>
{bitrate / 1000} kbps
</MenuItem>
))}
</TextField>
<Typography variant="caption" sx={{ opacity: 0.7, mt: 0.5, display: "block" }}>
Audio quality vs file size trade-off. 128kbps provides excellent voice quality with reasonable file sizes.
</Typography>
</Box>
</Stack>
{/* Format & Channel */}
<Stack direction={{ xs: "column", sm: "row" }} spacing={2}>
<Box sx={{ flex: 1 }}>
<TextField
select
fullWidth
label="Channel"
value={settings.channel}
onChange={(e) => setSettings({ ...settings, channel: e.target.value as "1" | "2" })}
InputLabelProps={{ sx: { color: "rgba(255,255,255,0.7)" } }}
sx={{
"& .MuiOutlinedInput-root": {
backgroundColor: alpha("#ffffff", 0.1),
color: "white",
"& fieldset": { borderColor: "rgba(255,255,255,0.3)" },
"&:hover fieldset": { borderColor: "rgba(255,255,255,0.4)" },
"&.Mui-focused fieldset": { borderColor: "#ffffff" },
"& .MuiSvgIcon-root": { color: "rgba(255,255,255,0.7)" },
},
}}
>
<MenuItem value="1">Mono (smaller files, standard for voice)</MenuItem>
<MenuItem value="2">Stereo (wider sound, larger files)</MenuItem>
</TextField>
</Box>
<Box sx={{ flex: 1 }}>
<TextField
select
fullWidth
label="Format"
value={settings.format}
onChange={(e) => setSettings({ ...settings, format: e.target.value as "mp3" | "wav" | "pcm" | "flac" })}
InputLabelProps={{ sx: { color: "rgba(255,255,255,0.7)" } }}
sx={{
"& .MuiOutlinedInput-root": {
backgroundColor: alpha("#ffffff", 0.1),
color: "white",
"& fieldset": { borderColor: "rgba(255,255,255,0.3)" },
"&:hover fieldset": { borderColor: "rgba(255,255,255,0.4)" },
"&.Mui-focused fieldset": { borderColor: "#ffffff" },
"& .MuiSvgIcon-root": { color: "rgba(255,255,255,0.7)" },
},
}}
>
<MenuItem value="mp3">MP3 (compressed, widely supported)</MenuItem>
<MenuItem value="wav">WAV (uncompressed, highest quality)</MenuItem>
<MenuItem value="pcm">PCM (raw data, specialized use)</MenuItem>
<MenuItem value="flac">FLAC (lossless, large files)</MenuItem>
</TextField>
</Box>
</Stack>
{/* Sync Mode */}
<Box>
<FormControlLabel
control={
<Checkbox
checked={settings.enableSyncMode}
onChange={(e) => setSettings({ ...settings, enableSyncMode: e.target.checked })}
sx={{ color: "rgba(255,255,255,0.7)" }}
/>
}
label={
<Typography variant="body2" sx={{ color: "white" }}>
Enable sync mode (recommended)
</Typography>
}
/>
<Typography variant="caption" sx={{ opacity: 0.7 }}>
When enabled, waits for generation to complete before proceeding. Recommended for reliable audio delivery.
</Typography>
</Box>
{/* Pro Tips */}
<Box sx={{ mt: 2, p: 2, bgcolor: alpha("#ffffff", 0.05), borderRadius: 1, border: "1px solid rgba(255,255,255,0.1)" }}>
<Typography variant="subtitle2" sx={{ fontWeight: 600, mb: 1, color: "white" }}>
💡 Human-Like Audio Tips
</Typography>
<Typography variant="caption" sx={{ opacity: 0.8, display: "block", mb: 0.5 }}>
<strong>Voice Choice</strong>: "Casual_Guy" provides natural, conversational delivery perfect for YouTube
</Typography>
<Typography variant="caption" sx={{ opacity: 0.8, display: "block", mb: 0.5 }}>
<strong>Speed</strong>: 1.15 provides engaging pace - not too slow, not too fast, just right for viewers
</Typography>
<Typography variant="caption" sx={{ opacity: 0.8, display: "block", mb: 0.5 }}>
<strong>Emotion</strong>: "Happy" creates natural, positive delivery that keeps viewers engaged
</Typography>
<Typography variant="caption" sx={{ opacity: 0.8, display: "block", mb: 0.5 }}>
<strong>Quality</strong>: 128kbps MP3 provides professional quality with optimal file sizes
</Typography>
<Typography variant="caption" sx={{ opacity: 0.8, display: "block" }}>
<strong>Enhancement</strong>: English normalization improves pronunciation of numbers, dates, and technical terms
</Typography>
</Box>
</Stack>
</DialogContent>
<DialogActions sx={{ p: 3, pt: 2 }}>
<Button
onClick={onClose}
disabled={isGenerating}
sx={{ color: "rgba(255,255,255,0.7)" }}
>
Cancel
</Button>
<Button
onClick={handleApply}
variant="contained"
disabled={isGenerating}
startIcon={isGenerating ? undefined : <VolumeUp />}
sx={{
backgroundColor: "#4ade80",
"&:hover": { backgroundColor: "#22c55e" },
"&:disabled": { backgroundColor: "rgba(255,255,255,0.2)" },
}}
>
{isGenerating ? "Generating..." : "Apply Settings & Generate"}
</Button>
</DialogActions>
</Dialog>
);
};

View File

@@ -18,4 +18,8 @@ export * from './utils';
// Asset Library modal (images only)
export { AssetLibraryImageModal } from './AssetLibraryImageModal';
export type { AssetLibraryImageModalProps } from './AssetLibraryImageModal';
export type { AssetLibraryImageModalProps } from './AssetLibraryImageModal';
// Audio Settings modal (shared across tools)
export { AudioSettingsModal } from './AudioSettingsModal';
export type { AudioGenerationSettings } from './AudioSettingsModal';