AI podcast maker performance optimizations

This commit is contained in:
ajaysi
2025-12-12 21:43:09 +05:30
parent 81590cf4db
commit eba5210577
46 changed files with 6176 additions and 1648 deletions

View File

@@ -1,19 +1,16 @@
import React, { useEffect, useState } from "react";
import { Stack, Box, Typography, TextField, Button, Chip, CircularProgress, alpha } from "@mui/material";
import { VolumeUp as VolumeUpIcon } from "@mui/icons-material";
import { Stack, Box, Typography, TextField, Button, Chip, alpha } from "@mui/material";
import { Line } from "../types";
import { GlassyCard, glassyCardSx } from "../ui";
interface LineEditorProps {
line: Line;
onChange: (l: Line) => void;
onPreview: (text: string) => Promise<{ ok: boolean; message: string; audioUrl?: string }>;
}
export const LineEditor: React.FC<LineEditorProps> = ({ line, onChange, onPreview }) => {
export const LineEditor: React.FC<LineEditorProps> = ({ line, onChange }) => {
const [editing, setEditing] = useState(false);
const [text, setText] = useState(line.text);
const [previewing, setPreviewing] = useState(false);
useEffect(() => setText(line.text), [line.text]);
const handleSave = () => {
@@ -21,33 +18,37 @@ export const LineEditor: React.FC<LineEditorProps> = ({ line, onChange, onPrevie
setEditing(false);
};
const handlePreview = async () => {
setPreviewing(true);
try {
const res = await onPreview(text);
if (res.audioUrl) {
window.open(res.audioUrl, "_blank");
} else {
alert(res.message);
}
} finally {
setPreviewing(false);
}
};
return (
<GlassyCard
whileHover={{ y: -2 }}
sx={{
...glassyCardSx,
p: 2,
transition: "all 0.2s",
p: 2.5,
transition: "all 0.2s cubic-bezier(0.4, 0, 0.2, 1)",
borderLeft: "3px solid transparent",
"&:hover": {
borderLeftColor: "#667eea",
boxShadow: "0 4px 6px rgba(15, 23, 42, 0.08), 0 8px 24px rgba(15, 23, 42, 0.06)",
},
}}
>
<Stack spacing={2}>
<Stack direction="row" justifyContent="space-between" alignItems="flex-start">
<Box flex={1}>
<Chip label={line.speaker} size="small" sx={{ mb: 1, background: alpha("#667eea", 0.2), color: "#a78bfa" }} />
<Chip
label={line.speaker}
size="small"
sx={{
mb: 1.5,
background: "linear-gradient(135deg, rgba(102, 126, 234, 0.12) 0%, rgba(118, 75, 162, 0.12) 100%)",
color: "#667eea",
fontWeight: 600,
fontSize: "0.75rem",
height: 24,
border: "1px solid rgba(102, 126, 234, 0.2)",
boxShadow: "0 1px 2px rgba(102, 126, 234, 0.05)",
}}
/>
{editing ? (
<TextField
fullWidth
@@ -57,47 +58,97 @@ export const LineEditor: React.FC<LineEditorProps> = ({ line, onChange, onPrevie
onChange={(e) => setText(e.target.value)}
sx={{
"& .MuiOutlinedInput-root": {
color: "white",
"& fieldset": { borderColor: "rgba(255,255,255,0.2)" },
color: "#0f172a",
backgroundColor: "#f8fafc",
borderRadius: 2,
"& fieldset": {
borderColor: "rgba(15, 23, 42, 0.12)",
borderWidth: 1.5,
},
"&:hover fieldset": {
borderColor: "rgba(102, 126, 234, 0.4)",
},
"&.Mui-focused fieldset": {
borderColor: "#667eea",
borderWidth: 2,
},
},
"& .MuiInputBase-input": {
color: "#0f172a",
fontWeight: 400,
fontSize: "0.9375rem",
lineHeight: 1.6,
},
}}
/>
) : (
<Typography variant="body2" sx={{ lineHeight: 1.7, color: "rgba(255,255,255,0.9)" }}>
<Typography
variant="body2"
sx={{
lineHeight: 1.75,
color: "#0f172a",
fontWeight: 400,
fontSize: "0.9375rem",
letterSpacing: "0.01em",
}}
>
{line.text}
</Typography>
)}
{line.usedFactIds && line.usedFactIds.length > 0 && (
<Stack direction="row" spacing={0.5} sx={{ mt: 1 }} flexWrap="wrap" useFlexGap>
<Typography variant="caption" color="text.secondary">
<Stack direction="row" spacing={0.5} sx={{ mt: 1.5 }} flexWrap="wrap" useFlexGap>
<Typography variant="caption" sx={{ color: "#64748b", fontWeight: 500, fontSize: "0.75rem" }}>
Facts:
</Typography>
{line.usedFactIds.map((id) => (
<Chip key={id} label={id} size="small" variant="outlined" sx={{ fontSize: "0.65rem", height: 20 }} />
<Chip
key={id}
label={id}
size="small"
variant="outlined"
sx={{
fontSize: "0.6875rem",
height: 22,
color: "#64748b",
borderColor: "rgba(15, 23, 42, 0.12)",
fontWeight: 500,
}}
/>
))}
</Stack>
)}
</Box>
<Stack spacing={1} sx={{ ml: 2 }}>
<Box sx={{ ml: 2 }}>
<Button
size="small"
variant={editing ? "contained" : "outlined"}
onClick={editing ? handleSave : () => setEditing(true)}
sx={{ minWidth: 80 }}
sx={{
minWidth: 85,
color: editing ? "white" : "#667eea",
borderColor: editing ? "transparent" : "#667eea",
backgroundColor: editing
? "linear-gradient(135deg, #667eea 0%, #764ba2 100%)"
: "transparent",
fontWeight: 600,
fontSize: "0.8125rem",
textTransform: "none",
borderRadius: 2,
borderWidth: editing ? 0 : 1.5,
boxShadow: editing ? "0 2px 4px rgba(102, 126, 234, 0.2)" : "none",
"&:hover": {
borderColor: editing ? "transparent" : "#5568d3",
backgroundColor: editing
? "linear-gradient(135deg, #5568d3 0%, #6b3fa0 100%)"
: alpha("#667eea", 0.08),
boxShadow: editing ? "0 4px 8px rgba(102, 126, 234, 0.3)" : "none",
},
transition: "all 0.2s cubic-bezier(0.4, 0, 0.2, 1)",
}}
>
{editing ? "Save" : "Edit"}
</Button>
<Button
size="small"
variant="outlined"
startIcon={previewing ? <CircularProgress size={14} /> : <VolumeUpIcon />}
onClick={handlePreview}
disabled={previewing || editing}
sx={{ minWidth: 120 }}
>
Preview TTS
</Button>
</Stack>
</Box>
</Stack>
</Stack>
</GlassyCard>

View File

@@ -1,84 +1,377 @@
import React from "react";
import { Stack, Box, Typography, Divider, Chip, alpha } from "@mui/material";
import React, { useState, useEffect } from "react";
import { Stack, Box, Typography, Divider, Chip, alpha, CircularProgress } from "@mui/material";
import {
EditNote as EditNoteIcon,
CheckCircle as CheckCircleIcon,
RadioButtonUnchecked as RadioButtonUncheckedIcon,
VolumeUp as VolumeUpIcon,
PlayArrow as PlayArrowIcon,
Image as ImageIcon,
} from "@mui/icons-material";
import { Scene, Line } from "../types";
import { Scene, Line, Knobs } from "../types";
import { GlassyCard, glassyCardSx, PrimaryButton } from "../ui";
import { LineEditor } from "./LineEditor";
import { podcastApi } from "../../../services/podcastApi";
import { aiApiClient } from "../../../api/client";
interface SceneEditorProps {
scene: Scene;
onUpdateScene: (s: Scene) => void;
onApprove: (id: string) => Promise<void>;
onPreviewLine: (text: string) => Promise<{ ok: boolean; message: string; audioUrl?: string }>;
knobs: Knobs;
approvingSceneId?: string | null;
generatingAudioId?: string | null;
onAudioGenerationStart?: (sceneId: string) => void;
onAudioGenerated?: (sceneId: string, audioUrl: string) => void;
idea?: string; // Podcast idea for image generation context
}
export const SceneEditor: React.FC<SceneEditorProps> = ({
scene,
onUpdateScene,
onApprove,
onPreviewLine,
knobs,
approvingSceneId,
generatingAudioId,
onAudioGenerationStart,
onAudioGenerated,
idea,
}) => {
const [localGenerating, setLocalGenerating] = useState(false);
const [generatingImage, setGeneratingImage] = useState(false);
const [audioBlobUrl, setAudioBlobUrl] = useState<string | null>(null);
// Load audio as blob when audioUrl is available
useEffect(() => {
if (!scene.audioUrl) {
// Clean up blob URL if audioUrl is removed
setAudioBlobUrl((currentBlobUrl) => {
if (currentBlobUrl) {
URL.revokeObjectURL(currentBlobUrl);
}
return null;
});
return;
}
let isMounted = true;
const currentAudioUrl = scene.audioUrl; // Capture current value
const loadAudioBlob = async () => {
try {
// Normalize path
let audioPath = currentAudioUrl.startsWith('/') ? currentAudioUrl : `/${currentAudioUrl}`;
// Convert /api/story/audio/ to /api/podcast/audio/ if needed
if (audioPath.includes('/api/story/audio/')) {
const filename = audioPath.split('/api/story/audio/').pop() || '';
audioPath = `/api/podcast/audio/${filename}`;
}
// Ensure it's a podcast audio endpoint
if (!audioPath.includes('/api/podcast/audio/')) {
const filename = audioPath.split('/').pop() || currentAudioUrl;
audioPath = `/api/podcast/audio/${filename}`;
}
// Remove query parameters if present
audioPath = audioPath.split('?')[0];
const response = await aiApiClient.get(audioPath, {
responseType: 'blob',
});
if (!isMounted) {
// Component unmounted or audioUrl changed, don't set blob URL
return;
}
// Double-check that audioUrl hasn't changed
if (scene.audioUrl !== currentAudioUrl) {
return;
}
const blob = response.data;
const blobUrl = URL.createObjectURL(blob);
setAudioBlobUrl((prevBlobUrl) => {
// Clean up previous blob URL if exists
if (prevBlobUrl && prevBlobUrl !== blobUrl) {
URL.revokeObjectURL(prevBlobUrl);
}
return blobUrl;
});
} catch (error) {
console.error(`Failed to load audio blob for scene ${scene.id}:`, error);
// Don't set blob URL on error - will show error state
}
};
loadAudioBlob();
// Cleanup: only mark as unmounted, don't revoke blob URL here
// The blob URL will be cleaned up when audioUrl changes (new effect) or component unmounts
return () => {
isMounted = false;
};
}, [scene.audioUrl, scene.id]);
const updateLine = (updatedLine: Line) => {
const updated = { ...scene, lines: scene.lines.map((l) => (l.id === updatedLine.id ? updatedLine : l)) };
onUpdateScene(updated);
};
const approving = approvingSceneId === scene.id;
const generating = generatingAudioId === scene.id || localGenerating;
const hasAudio = Boolean(scene.audioUrl && audioBlobUrl);
const hasImage = Boolean(scene.imageUrl);
const handleApprove = async () => {
await onApprove(scene.id);
onUpdateScene({ ...scene, approved: true });
const handleApproveAndGenerate = async () => {
const wasAlreadyApproved = scene.approved;
const sceneId = scene.id;
try {
// Set generating state
setLocalGenerating(true);
if (onAudioGenerationStart) {
onAudioGenerationStart(sceneId);
}
// If scene is not approved yet, approve it first
// This will update the parent script state
if (!scene.approved) {
await onApprove(sceneId);
// The parent's approveScene already updated the script state
// We need to wait for React to propagate the updated scene prop
// For now, we'll update it locally too to ensure UI updates immediately
onUpdateScene({ ...scene, approved: true });
}
// Use the current scene (which should now be approved)
// If scene prop hasn't updated yet, use the local update we just made
const currentScene = { ...scene, approved: true };
// Generate audio
const result = await podcastApi.renderSceneAudio({
scene: currentScene,
voiceId: "Wise_Woman",
emotion: scene.emotion || knobs.voice_emotion || "neutral",
speed: knobs.voice_speed || 1.0,
});
// Update scene with audio URL and ensure approved state
// This will sync with parent script state
const updatedScene = { ...currentScene, audioUrl: result.audioUrl, approved: true };
onUpdateScene(updatedScene);
if (onAudioGenerated) {
onAudioGenerated(sceneId, result.audioUrl);
}
} catch (error) {
console.error("Failed to approve and generate audio:", error);
// On error, revert approval only if we just approved it in this call
if (!wasAlreadyApproved) {
onUpdateScene({ ...scene, approved: false, audioUrl: undefined });
}
throw error;
} finally {
setLocalGenerating(false);
}
};
const handleGenerateImage = async () => {
const sceneId = scene.id;
try {
setGeneratingImage(true);
// Build scene content from lines for context
const sceneContent = scene.lines.map((line) => line.text).join(" ");
const result = await podcastApi.generateSceneImage({
sceneId: scene.id,
sceneTitle: scene.title,
sceneContent: sceneContent,
idea: idea,
width: 1024,
height: 1024,
});
// Update scene with image URL
const updatedScene = { ...scene, imageUrl: result.image_url };
onUpdateScene(updatedScene);
} catch (error) {
console.error("Failed to generate image:", error);
throw error;
} finally {
setGeneratingImage(false);
}
};
return (
<GlassyCard sx={glassyCardSx}>
<Stack spacing={2}>
<Stack spacing={2.5}>
<Stack direction="row" justifyContent="space-between" alignItems="flex-start">
<Box>
<Typography variant="h6" sx={{ display: "flex", alignItems: "center", gap: 1, mb: 0.5 }}>
<EditNoteIcon fontSize="small" />
<Box sx={{ flex: 1 }}>
<Typography
variant="h6"
sx={{
display: "flex",
alignItems: "center",
gap: 1.5,
mb: 1,
color: "#0f172a",
fontWeight: 600,
fontSize: "1.25rem",
letterSpacing: "-0.01em",
}}
>
<EditNoteIcon fontSize="small" sx={{ color: "#667eea", fontSize: "1.5rem" }} />
{scene.title}
</Typography>
<Stack direction="row" spacing={1} alignItems="center">
<Stack direction="row" spacing={1.5} alignItems="center" flexWrap="wrap">
<Chip
icon={scene.approved ? <CheckCircleIcon /> : <RadioButtonUncheckedIcon />}
label={scene.approved ? "Approved" : "Pending Approval"}
size="small"
color={scene.approved ? "success" : "warning"}
sx={{
background: scene.approved ? alpha("#10b981", 0.2) : alpha("#f59e0b", 0.2),
color: scene.approved ? "#6ee7b7" : "#fbbf24",
border: scene.approved ? "1px solid rgba(16,185,129,0.3)" : "1px solid rgba(245,158,11,0.3)",
background: scene.approved
? "linear-gradient(135deg, rgba(16, 185, 129, 0.12) 0%, rgba(5, 150, 105, 0.12) 100%)"
: "linear-gradient(135deg, rgba(245, 158, 11, 0.12) 0%, rgba(217, 119, 6, 0.12) 100%)",
color: scene.approved ? "#059669" : "#d97706",
border: scene.approved
? "1px solid rgba(16, 185, 129, 0.25)"
: "1px solid rgba(245, 158, 11, 0.25)",
fontWeight: 600,
fontSize: "0.75rem",
height: 26,
boxShadow: "0 1px 2px rgba(0, 0, 0, 0.05)",
}}
/>
<Typography variant="caption" color="text.secondary">
<Typography variant="caption" sx={{ color: "#64748b", fontWeight: 500, fontSize: "0.8125rem" }}>
Duration: {scene.duration}s
</Typography>
</Stack>
</Box>
<PrimaryButton
onClick={handleApprove}
disabled={scene.approved || approving}
loading={approving}
startIcon={scene.approved ? <CheckCircleIcon /> : undefined}
tooltip={scene.approved ? "Scene is approved and ready for rendering" : "Approve this scene to enable rendering"}
>
{scene.approved ? "Approved" : approving ? "Approving..." : "Approve Scene"}
</PrimaryButton>
<Stack direction="row" spacing={1.5} flexWrap="wrap" useFlexGap>
<PrimaryButton
onClick={handleApproveAndGenerate}
disabled={approving || generating}
loading={approving || generating}
startIcon={
hasAudio && !generating ? (
<VolumeUpIcon />
) : generating ? (
<CircularProgress size={16} sx={{ color: "white" }} />
) : (
<PlayArrowIcon />
)
}
tooltip={
hasAudio && !generating
? "Regenerate audio for this scene"
: generating
? "Generating audio..."
: scene.approved
? "Generate audio for this scene"
: "Approve scene and generate audio"
}
sx={{
minWidth: 200,
}}
>
{hasAudio && !generating
? "Regenerate Audio"
: generating
? "Generating Audio..."
: scene.approved
? "Generate Audio"
: "Approve & Generate Audio"}
</PrimaryButton>
<PrimaryButton
onClick={handleGenerateImage}
disabled={generatingImage}
loading={generatingImage}
startIcon={
hasImage && !generatingImage ? (
<ImageIcon />
) : generatingImage ? (
<CircularProgress size={16} sx={{ color: "white" }} />
) : (
<ImageIcon />
)
}
tooltip={
hasImage
? "Regenerate image for this scene"
: generatingImage
? "Generating image..."
: "Generate image for video (optional)"
}
sx={{
minWidth: 180,
background: hasImage
? "linear-gradient(135deg, #10b981 0%, #059669 100%)"
: "linear-gradient(135deg, #667eea 0%, #764ba2 100%)",
"&:hover": {
background: hasImage
? "linear-gradient(135deg, #059669 0%, #047857 100%)"
: "linear-gradient(135deg, #764ba2 0%, #667eea 100%)",
},
}}
>
{hasImage && !generatingImage
? "Regenerate Image"
: generatingImage
? "Generating Image..."
: "Generate Image"}
</PrimaryButton>
</Stack>
</Stack>
<Divider sx={{ borderColor: "rgba(255,255,255,0.1)" }} />
<Divider sx={{ borderColor: "rgba(15, 23, 42, 0.08)", borderWidth: 1 }} />
<Stack spacing={2}>
{scene.lines.map((line) => (
<LineEditor key={line.id} line={line} onChange={updateLine} onPreview={(text) => onPreviewLine(text)} />
<LineEditor key={line.id} line={line} onChange={updateLine} />
))}
</Stack>
{scene.audioUrl && (
<>
<Divider sx={{ borderColor: "rgba(15, 23, 42, 0.08)", borderWidth: 1, mt: 1 }} />
<Box
sx={{
p: 2,
background: hasAudio
? "linear-gradient(135deg, rgba(16, 185, 129, 0.08) 0%, rgba(5, 150, 105, 0.08) 100%)"
: "linear-gradient(135deg, rgba(245, 158, 11, 0.08) 0%, rgba(217, 119, 6, 0.08) 100%)",
borderRadius: 2,
border: hasAudio
? "1px solid rgba(16, 185, 129, 0.2)"
: "1px solid rgba(245, 158, 11, 0.2)",
}}
>
<Stack direction="row" alignItems="center" spacing={1.5} sx={{ mb: 1.5 }}>
<VolumeUpIcon sx={{ color: hasAudio ? "#059669" : "#d97706", fontSize: "1.25rem" }} />
<Typography variant="subtitle2" sx={{ color: hasAudio ? "#059669" : "#d97706", fontWeight: 600 }}>
{hasAudio ? "Audio Generated" : "Loading Audio..."}
</Typography>
</Stack>
{hasAudio && audioBlobUrl ? (
<audio controls style={{ width: "100%", borderRadius: 8 }}>
<source src={audioBlobUrl} type="audio/mpeg" />
Your browser does not support the audio element.
</audio>
) : (
<Box sx={{ display: "flex", alignItems: "center", justifyContent: "center", py: 2 }}>
<CircularProgress size={24} sx={{ color: "#d97706" }} />
</Box>
)}
</Box>
</>
)}
</Stack>
</GlassyCard>
);

View File

@@ -1,11 +1,13 @@
import React, { useEffect, useState } from "react";
import { Box, Stack, Typography, Alert, Paper, LinearProgress, CircularProgress, alpha } from "@mui/material";
import { EditNote as EditNoteIcon, CheckCircle as CheckCircleIcon, PlayArrow as PlayArrowIcon, ArrowBack as ArrowBackIcon } from "@mui/icons-material";
import React, { useEffect, useState, useCallback } from "react";
import { Box, Stack, Typography, Alert, Paper, LinearProgress, CircularProgress, alpha, Collapse, IconButton, Divider } from "@mui/material";
import { EditNote as EditNoteIcon, CheckCircle as CheckCircleIcon, PlayArrow as PlayArrowIcon, ArrowBack as ArrowBackIcon, Info as InfoIcon, ExpandMore as ExpandMoreIcon, ExpandLess as ExpandLessIcon, Download as DownloadIcon, Refresh as RefreshIcon } from "@mui/icons-material";
import { Script, Knobs, Scene } from "../types";
import { BlogResearchResponse } from "../../../services/blogWriterApi";
import { podcastApi } from "../../../services/podcastApi";
import { GlassyCard, PrimaryButton, SecondaryButton } from "../ui";
import { SceneEditor } from "./SceneEditor";
import { InlineAudioPlayer } from "../InlineAudioPlayer";
import { aiApiClient } from "../../../api/client";
interface ScriptEditorProps {
projectId: string;
@@ -40,6 +42,15 @@ export const ScriptEditor: React.FC<ScriptEditorProps> = ({
const [loading, setLoading] = useState(false);
const [error, setError] = useState<string | null>(null);
const [approvingSceneId, setApprovingSceneId] = useState<string | null>(null);
const [generatingAudioId, setGeneratingAudioId] = useState<string | null>(null);
const [showScriptFormatInfo, setShowScriptFormatInfo] = useState(true);
const [combiningAudio, setCombiningAudio] = useState(false);
const [combinedAudioResult, setCombinedAudioResult] = useState<{
url: string;
filename: string;
duration: number;
sceneCount: number;
} | null>(null);
// Sync with parent state
useEffect(() => {
@@ -90,26 +101,32 @@ export const ScriptEditor: React.FC<ScriptEditorProps> = ({
}, [projectId, rawResearch, idea, knobs, speakers, durationMinutes]);
const updateScene = (updated: Scene) => {
if (!script) return;
const updatedScript = { ...script, scenes: script.scenes.map((s) => (s.id === updated.id ? updated : s)) };
setScript(updatedScript);
onScriptChange(updatedScript);
// Use functional update to ensure we're working with latest state
setScript((currentScript) => {
if (!currentScript) return currentScript;
const updatedScript = {
...currentScript,
scenes: currentScript.scenes.map((s) => (s.id === updated.id ? { ...s, ...updated } : s))
};
onScriptChange(updatedScript);
return updatedScript;
});
};
const approveScene = async (sceneId: string) => {
try {
setApprovingSceneId(sceneId);
await podcastApi.approveScene({ projectId, sceneId });
const updatedScript = script
? {
...script,
scenes: script.scenes.map((s) => (s.id === sceneId ? { ...s, approved: true } : s)),
}
: null;
if (updatedScript) {
setScript(updatedScript);
// Use functional update to ensure we're working with latest state
setScript((currentScript) => {
if (!currentScript) return currentScript;
const updatedScript = {
...currentScript,
scenes: currentScript.scenes.map((s) => (s.id === sceneId ? { ...s, approved: true } : s)),
};
onScriptChange(updatedScript);
}
return updatedScript;
});
} catch (err) {
const message = err instanceof Error ? err.message : "Failed to approve scene";
setError(message);
@@ -123,47 +140,405 @@ export const ScriptEditor: React.FC<ScriptEditorProps> = ({
const allApproved = script && script.scenes.every((s) => s.approved);
const approvedCount = script ? script.scenes.filter((s) => s.approved).length : 0;
const totalScenes = script ? script.scenes.length : 0;
// Check if all scenes have both audio and images (required for video rendering)
const allScenesHaveAudioAndImages = script && script.scenes.every((s) => s.audioUrl && s.imageUrl);
const scenesWithAudio = script ? script.scenes.filter((s) => s.audioUrl).length : 0;
const allScenesHaveAudio = script && script.scenes.every((s) => s.audioUrl);
const combineAudio = useCallback(async () => {
if (!script || !projectId) return;
try {
setCombiningAudio(true);
const sceneIds: string[] = [];
const sceneAudioUrls: string[] = [];
script.scenes.forEach((scene) => {
if (scene.audioUrl) {
// Ensure we're using the correct URL format (not blob URLs)
const audioUrl = scene.audioUrl.startsWith('blob:') ? '' : scene.audioUrl;
if (audioUrl) {
sceneIds.push(scene.id);
sceneAudioUrls.push(audioUrl);
}
}
});
if (sceneIds.length === 0) {
onError("No audio files found to combine.");
return;
}
const result = await podcastApi.combineAudio({
projectId,
sceneIds,
sceneAudioUrls,
});
// Store combined audio result for preview
setCombinedAudioResult({
url: result.combined_audio_url,
filename: result.combined_audio_filename,
duration: result.total_duration,
sceneCount: result.scene_count,
});
// Download the combined audio as blob (for authenticated endpoints)
try {
// Normalize path
let audioPath = result.combined_audio_url.startsWith('/')
? result.combined_audio_url
: `/${result.combined_audio_url}`;
// Ensure it's a podcast audio endpoint
if (!audioPath.includes('/api/podcast/audio/')) {
const filename = audioPath.split('/').pop() || result.combined_audio_filename;
audioPath = `/api/podcast/audio/${filename}`;
}
// Remove query parameters if present
audioPath = audioPath.split('?')[0];
// Fetch as blob using authenticated client
const response = await aiApiClient.get(audioPath, {
responseType: 'blob',
});
// Create blob URL and download
const blob = response.data;
const blobUrl = URL.createObjectURL(blob);
const link = document.createElement("a");
link.href = blobUrl;
link.download = result.combined_audio_filename || `podcast-episode-${projectId.slice(-8)}.mp3`;
document.body.appendChild(link);
link.click();
document.body.removeChild(link);
// Clean up blob URL after a delay
setTimeout(() => {
URL.revokeObjectURL(blobUrl);
}, 100);
} catch (downloadError) {
console.error('Failed to download combined audio:', downloadError);
onError('Failed to download audio file. You can try downloading again from the preview.');
}
} catch (error) {
const message = error instanceof Error ? error.message : "Failed to combine audio";
onError(`Failed to combine audio: ${message}`);
} finally {
setCombiningAudio(false);
}
}, [script, projectId, onError]);
return (
<Box sx={{ mt: 3 }}>
<Stack direction="row" spacing={2} alignItems="center" sx={{ mb: 3 }}>
<Box sx={{ mt: 4 }}>
<Stack direction="row" spacing={2} alignItems="center" sx={{ mb: 4 }}>
<SecondaryButton onClick={onBackToResearch} startIcon={<ArrowBackIcon />}>
Back to Research
</SecondaryButton>
<Typography
variant="h4"
sx={{
background: "linear-gradient(135deg, #a78bfa 0%, #60a5fa 100%)",
WebkitBackgroundClip: "text",
WebkitTextFillColor: "transparent",
fontWeight: 800,
display: "flex",
alignItems: "center",
gap: 1,
}}
>
<EditNoteIcon />
Script Editor
</Typography>
<Box sx={{ flex: 1 }}>
<Typography
variant="h4"
sx={{
background: "linear-gradient(135deg, #667eea 0%, #764ba2 100%)",
WebkitBackgroundClip: "text",
WebkitTextFillColor: "transparent",
fontWeight: 700,
letterSpacing: "-0.02em",
display: "flex",
alignItems: "center",
gap: 1.5,
fontSize: { xs: "1.75rem", md: "2rem" },
}}
>
<EditNoteIcon sx={{ fontSize: "2rem" }} />
Script Editor
</Typography>
<Typography variant="body2" sx={{ color: "#64748b", mt: 0.5, ml: 5.5 }}>
Review and refine your podcast script before rendering
</Typography>
</Box>
</Stack>
{loading && (
<Alert severity="info" icon={<CircularProgress size={20} />} sx={{ mb: 3 }}>
<Typography variant="body2">Generating script with AI... This may take a moment.</Typography>
<Alert
severity="info"
icon={<CircularProgress size={20} />}
sx={{
mb: 3,
background: "linear-gradient(135deg, rgba(99, 102, 241, 0.08) 0%, rgba(139, 92, 246, 0.08) 100%)",
border: "1px solid rgba(99, 102, 241, 0.2)",
borderRadius: 2,
boxShadow: "0 1px 2px rgba(99, 102, 241, 0.05)",
"& .MuiAlert-icon": {
color: "#6366f1",
},
}}
>
<Typography variant="body2" sx={{ color: "#0f172a", fontWeight: 500 }}>
Generating script with AI... This may take a moment.
</Typography>
</Alert>
)}
{error && (
<Alert severity="error" sx={{ mb: 3 }}>
{error}
<Alert
severity="error"
sx={{
mb: 3,
background: "linear-gradient(135deg, rgba(239, 68, 68, 0.08) 0%, rgba(220, 38, 38, 0.08) 100%)",
border: "1px solid rgba(239, 68, 68, 0.2)",
borderRadius: 2,
boxShadow: "0 1px 2px rgba(239, 68, 68, 0.05)",
"& .MuiAlert-icon": {
color: "#ef4444",
},
}}
>
<Typography variant="body2" sx={{ color: "#0f172a", fontWeight: 500 }}>
{error}
</Typography>
</Alert>
)}
{script && (
<Stack spacing={3}>
<Alert severity="info" sx={{ background: alpha("#3b82f6", 0.1), border: "1px solid rgba(59,130,246,0.3)" }}>
<Typography variant="body2">
<strong>Approval Required:</strong> Each scene must be approved before rendering. Review and edit lines as needed, then approve each scene.
{/* Script Format Explanation Panel */}
<Paper
sx={{
p: 3,
background: "linear-gradient(135deg, rgba(99, 102, 241, 0.05) 0%, rgba(139, 92, 246, 0.05) 100%)",
border: "1px solid rgba(99, 102, 241, 0.15)",
borderRadius: 2,
boxShadow: "0 2px 8px rgba(99, 102, 241, 0.08)",
}}
>
<Stack direction="row" alignItems="center" justifyContent="space-between" sx={{ mb: showScriptFormatInfo ? 2 : 0 }}>
<Stack direction="row" alignItems="center" spacing={1.5}>
<Box
sx={{
width: 40,
height: 40,
borderRadius: "50%",
background: "linear-gradient(135deg, #667eea 0%, #764ba2 100%)",
display: "flex",
alignItems: "center",
justifyContent: "center",
boxShadow: "0 2px 8px rgba(102, 126, 234, 0.3)",
}}
>
<InfoIcon sx={{ color: "#ffffff", fontSize: "1.5rem" }} />
</Box>
<Box>
<Typography variant="h6" sx={{ color: "#0f172a", fontWeight: 600, fontSize: "1.1rem" }}>
Why This Script Format?
</Typography>
<Typography variant="body2" sx={{ color: "#64748b", mt: 0.25 }}>
Understanding how your script creates natural, human-like audio
</Typography>
</Box>
</Stack>
<IconButton
onClick={() => setShowScriptFormatInfo(!showScriptFormatInfo)}
sx={{
color: "#6366f1",
"&:hover": {
background: "rgba(99, 102, 241, 0.1)",
},
}}
>
{showScriptFormatInfo ? <ExpandLessIcon /> : <ExpandMoreIcon />}
</IconButton>
</Stack>
<Collapse in={showScriptFormatInfo}>
<Stack spacing={2.5}>
<Box>
<Typography variant="body2" sx={{ color: "#0f172a", lineHeight: 1.8, mb: 2 }}>
Our AI script generator creates scripts specifically optimized for <strong style={{ fontWeight: 600 }}>high-quality text-to-speech</strong>.
The format you see here is designed to produce audio that sounds natural and human-like, not robotic.
</Typography>
</Box>
<Stack spacing={2}>
<Box sx={{ display: "flex", gap: 2 }}>
<Box
sx={{
minWidth: 32,
height: 32,
borderRadius: "8px",
background: "linear-gradient(135deg, rgba(99, 102, 241, 0.1) 0%, rgba(139, 92, 246, 0.1) 100%)",
display: "flex",
alignItems: "center",
justifyContent: "center",
flexShrink: 0,
}}
>
<Typography variant="body2" sx={{ color: "#6366f1", fontWeight: 700 }}>
1
</Typography>
</Box>
<Box>
<Typography variant="subtitle2" sx={{ color: "#0f172a", fontWeight: 600, mb: 0.5 }}>
Natural Pauses & Rhythm
</Typography>
<Typography variant="body2" sx={{ color: "#475569", lineHeight: 1.7 }}>
The script includes strategic pauses between lines and when speakers change. This creates natural breathing patterns
and conversation flow, just like real human speech. Without these pauses, the audio would sound rushed and robotic.
</Typography>
</Box>
</Box>
<Box sx={{ display: "flex", gap: 2 }}>
<Box
sx={{
minWidth: 32,
height: 32,
borderRadius: "8px",
background: "linear-gradient(135deg, rgba(99, 102, 241, 0.1) 0%, rgba(139, 92, 246, 0.1) 100%)",
display: "flex",
alignItems: "center",
justifyContent: "center",
flexShrink: 0,
}}
>
<Typography variant="body2" sx={{ color: "#6366f1", fontWeight: 700 }}>
2
</Typography>
</Box>
<Box>
<Typography variant="subtitle2" sx={{ color: "#0f172a", fontWeight: 600, mb: 0.5 }}>
Emphasis Markers
</Typography>
<Typography variant="body2" sx={{ color: "#475569", lineHeight: 1.7 }}>
Lines marked with emphasis help highlight important points, statistics, or key insights. The AI voice will naturally
stress these parts, making your podcast more engaging and easier to followjust like a real host would emphasize important information.
</Typography>
</Box>
</Box>
<Box sx={{ display: "flex", gap: 2 }}>
<Box
sx={{
minWidth: 32,
height: 32,
borderRadius: "8px",
background: "linear-gradient(135deg, rgba(99, 102, 241, 0.1) 0%, rgba(139, 92, 246, 0.1) 100%)",
display: "flex",
alignItems: "center",
justifyContent: "center",
flexShrink: 0,
}}
>
<Typography variant="body2" sx={{ color: "#6366f1", fontWeight: 700 }}>
3
</Typography>
</Box>
<Box>
<Typography variant="subtitle2" sx={{ color: "#0f172a", fontWeight: 600, mb: 0.5 }}>
Short, Conversational Sentences
</Typography>
<Typography variant="body2" sx={{ color: "#475569", lineHeight: 1.7 }}>
The script uses shorter sentences (15-20 words) written in a conversational style. This matches how people actually
speak, making the audio sound more natural. Long, complex sentences would sound awkward when spoken aloud.
</Typography>
</Box>
</Box>
<Box sx={{ display: "flex", gap: 2 }}>
<Box
sx={{
minWidth: 32,
height: 32,
borderRadius: "8px",
background: "linear-gradient(135deg, rgba(99, 102, 241, 0.1) 0%, rgba(139, 92, 246, 0.1) 100%)",
display: "flex",
alignItems: "center",
justifyContent: "center",
flexShrink: 0,
}}
>
<Typography variant="body2" sx={{ color: "#6366f1", fontWeight: 700 }}>
4
</Typography>
</Box>
<Box>
<Typography variant="subtitle2" sx={{ color: "#0f172a", fontWeight: 600, mb: 0.5 }}>
Scene-Specific Emotions
</Typography>
<Typography variant="body2" sx={{ color: "#475569", lineHeight: 1.7 }}>
Each scene has an emotional tone (excited, serious, curious, etc.) that guides the AI voice's delivery. This creates
variety and keeps listeners engaged, just like a real podcast host would vary their tone based on the topic.
</Typography>
</Box>
</Box>
<Box sx={{ display: "flex", gap: 2 }}>
<Box
sx={{
minWidth: 32,
height: 32,
borderRadius: "8px",
background: "linear-gradient(135deg, rgba(99, 102, 241, 0.1) 0%, rgba(139, 92, 246, 0.1) 100%)",
display: "flex",
alignItems: "center",
justifyContent: "center",
flexShrink: 0,
}}
>
<Typography variant="body2" sx={{ color: "#6366f1", fontWeight: 700 }}>
5
</Typography>
</Box>
<Box>
<Typography variant="subtitle2" sx={{ color: "#0f172a", fontWeight: 600, mb: 0.5 }}>
Optimized for Podcast Narration
</Typography>
<Typography variant="body2" sx={{ color: "#475569", lineHeight: 1.7 }}>
The script is optimized with slightly slower pacing and natural pronunciation settings specifically for podcast narration.
This ensures clarity and makes the content easy to understand, even when listeners are multitasking.
</Typography>
</Box>
</Box>
</Stack>
<Alert
severity="info"
sx={{
mt: 1,
background: "rgba(99, 102, 241, 0.06)",
border: "1px solid rgba(99, 102, 241, 0.15)",
"& .MuiAlert-icon": {
color: "#6366f1",
},
}}
>
<Typography variant="body2" sx={{ color: "#0f172a", lineHeight: 1.7 }}>
<strong style={{ fontWeight: 600 }}>Tip:</strong> You can edit any line or scene to match your preferences.
The format will be preserved when rendering, ensuring your audio still sounds natural and professional.
</Typography>
</Alert>
</Stack>
</Collapse>
</Paper>
<Alert
severity="info"
sx={{
background: "linear-gradient(135deg, rgba(99, 102, 241, 0.08) 0%, rgba(139, 92, 246, 0.08) 100%)",
border: "1px solid rgba(99, 102, 241, 0.2)",
borderRadius: 2,
boxShadow: "0 1px 2px rgba(99, 102, 241, 0.05)",
"& .MuiAlert-icon": {
color: "#6366f1",
},
}}
>
<Typography variant="body2" sx={{ color: "#0f172a", fontWeight: 500, lineHeight: 1.6 }}>
<strong style={{ fontWeight: 600 }}>Approval Required:</strong> Each scene must be approved before rendering. Review and edit lines as needed, then approve each scene.
</Typography>
</Alert>
@@ -179,8 +554,27 @@ export const ScriptEditor: React.FC<ScriptEditorProps> = ({
scene={scene}
onUpdateScene={updateScene}
onApprove={approveScene}
onPreviewLine={(text) => podcastApi.previewLine(text)}
knobs={knobs}
approvingSceneId={approvingSceneId}
generatingAudioId={generatingAudioId}
onAudioGenerationStart={(sceneId) => {
setGeneratingAudioId(sceneId);
}}
onAudioGenerated={async (sceneId, audioUrl) => {
setGeneratingAudioId(null);
// Use functional update to ensure we're working with latest state
// Ensure scene is marked as approved and has audioUrl
setScript((currentScript) => {
if (!currentScript) return currentScript;
const updatedScenes = currentScript.scenes.map((s) =>
s.id === sceneId ? { ...s, audioUrl, approved: true } : s
);
const updatedScript = { ...currentScript, scenes: updatedScenes };
onScriptChange(updatedScript);
return updatedScript;
});
}}
idea={idea}
/>
</GlassyCard>
))}
@@ -188,39 +582,187 @@ export const ScriptEditor: React.FC<ScriptEditorProps> = ({
<Paper
sx={{
p: 3,
background: alpha("#1e293b", 0.6),
border: allApproved ? "2px solid rgba(16,185,129,0.4)" : "1px solid rgba(255,255,255,0.1)",
p: 3.5,
background: allApproved
? "linear-gradient(135deg, rgba(16, 185, 129, 0.05) 0%, rgba(5, 150, 105, 0.05) 100%)"
: "#ffffff",
border: allApproved
? "2px solid rgba(16, 185, 129, 0.25)"
: "1px solid rgba(15, 23, 42, 0.08)",
borderRadius: 3,
boxShadow: allApproved
? "0 4px 6px rgba(16, 185, 129, 0.08), 0 8px 24px rgba(16, 185, 129, 0.06)"
: "0 1px 3px rgba(15, 23, 42, 0.06), 0 4px 12px rgba(15, 23, 42, 0.04)",
transition: "all 0.3s cubic-bezier(0.4, 0, 0.2, 1)",
}}
>
<Stack direction="row" justifyContent="space-between" alignItems="center">
<Box>
<Typography variant="subtitle1" sx={{ mb: 0.5, display: "flex", alignItems: "center", gap: 1 }}>
<CheckCircleIcon fontSize="small" color={allApproved ? "success" : "disabled"} />
<Typography variant="subtitle1" sx={{ mb: 1, display: "flex", alignItems: "center", gap: 1.5, color: "#0f172a", fontWeight: 600, fontSize: "1.1rem" }}>
<CheckCircleIcon fontSize="small" sx={{ color: allApproved ? "#10b981" : "#94a3b8", fontSize: "1.25rem" }} />
Approval Status
</Typography>
<Typography variant="body2" color="text.secondary">
<Typography variant="body2" sx={{ color: "#64748b", fontWeight: 400, lineHeight: 1.6 }}>
{approvedCount} of {totalScenes} scenes approved
{!allApproved && " Approve all scenes to enable rendering"}
{allScenesHaveAudioAndImages && " All scenes ready for video rendering"}
{!allScenesHaveAudioAndImages && allApproved && " • Generate images for all scenes to enable video rendering"}
{!allApproved && " — Approve all scenes first"}
</Typography>
{!allApproved && (
{!allScenesHaveAudioAndImages && (
<LinearProgress
variant="determinate"
value={(approvedCount / totalScenes) * 100}
value={
allScenesHaveAudioAndImages
? 100
: script
? (script.scenes.filter((s) => s.audioUrl && s.imageUrl).length / totalScenes) * 100
: 0
}
sx={{ mt: 1, height: 6, borderRadius: 3 }}
/>
)}
</Box>
<PrimaryButton
onClick={() => script && onProceedToRendering(script)}
disabled={!allApproved}
disabled={!allScenesHaveAudioAndImages}
startIcon={<PlayArrowIcon />}
tooltip={!allApproved ? "Approve all scenes to proceed to rendering" : "Start rendering all approved scenes"}
tooltip={
!allScenesHaveAudioAndImages
? "Generate audio and images for all scenes to proceed to video rendering"
: "Proceed to video rendering (all scenes have audio and images)"
}
>
Proceed to Rendering
</PrimaryButton>
</Stack>
</Paper>
{/* Download Audio-Only Podcast Section */}
{allScenesHaveAudio && (
<Paper
sx={{
p: 3,
background: "linear-gradient(135deg, rgba(102, 126, 234, 0.05) 0%, rgba(118, 75, 162, 0.05) 100%)",
border: "1px solid rgba(102, 126, 234, 0.15)",
borderRadius: 2,
}}
>
<Stack spacing={3}>
<Typography variant="h6" sx={{ color: "#0f172a", fontWeight: 600 }}>
Download Audio-Only Podcast
</Typography>
{!combinedAudioResult ? (
<>
<PrimaryButton
onClick={combineAudio}
disabled={combiningAudio}
loading={combiningAudio}
startIcon={<DownloadIcon />}
tooltip="Combine all scene audio files into a single podcast episode"
sx={{
minWidth: 280,
fontSize: "1rem",
py: 1.5,
background: "linear-gradient(135deg, #667eea 0%, #764ba2 100%)",
"&:hover": {
background: "linear-gradient(135deg, #764ba2 0%, #667eea 100%)",
},
}}
>
{combiningAudio ? "Combining Audio..." : "Download Audio-Only Podcast"}
</PrimaryButton>
<Typography variant="caption" sx={{ color: "#64748b", fontStyle: "italic" }}>
This will combine all {scenesWithAudio} scene audio files into one complete podcast episode.
</Typography>
</>
) : (
<Stack spacing={2}>
{/* Success Alert */}
<Alert
severity="success"
sx={{
background: alpha("#10b981", 0.1),
border: "1px solid rgba(16,185,129,0.3)",
"& .MuiAlert-icon": { color: "#10b981" },
}}
>
<Typography variant="body2" sx={{ color: "#059669", fontWeight: 500 }}>
Combined audio generated successfully! ({combinedAudioResult.sceneCount} scenes,{" "}
{Math.round(combinedAudioResult.duration)}s)
</Typography>
</Alert>
{/* Combined Audio Preview */}
<InlineAudioPlayer audioUrl={combinedAudioResult.url} title="Complete Podcast Episode" />
{/* Action Buttons */}
<Stack direction="row" spacing={2}>
<SecondaryButton
onClick={async () => {
try {
// Normalize path
let audioPath = combinedAudioResult.url.startsWith('/')
? combinedAudioResult.url
: `/${combinedAudioResult.url}`;
// Ensure it's a podcast audio endpoint
if (!audioPath.includes('/api/podcast/audio/')) {
const filename = audioPath.split('/').pop() || combinedAudioResult.filename;
audioPath = `/api/podcast/audio/${filename}`;
}
// Remove query parameters if present
audioPath = audioPath.split('?')[0];
// Fetch as blob using authenticated client
const response = await aiApiClient.get(audioPath, {
responseType: 'blob',
});
// Create blob URL and download
const blob = response.data;
const blobUrl = URL.createObjectURL(blob);
const link = document.createElement("a");
link.href = blobUrl;
link.download = combinedAudioResult.filename || `podcast-episode-${projectId.slice(-8)}.mp3`;
document.body.appendChild(link);
link.click();
document.body.removeChild(link);
// Clean up blob URL after a delay
setTimeout(() => {
URL.revokeObjectURL(blobUrl);
}, 100);
} catch (error) {
console.error('Failed to download audio:', error);
onError('Failed to download audio file. Please try again.');
}
}}
startIcon={<DownloadIcon />}
tooltip="Download the combined audio file again"
>
Download Again
</SecondaryButton>
<SecondaryButton
onClick={() => {
setCombinedAudioResult(null);
combineAudio();
}}
disabled={combiningAudio}
loading={combiningAudio}
startIcon={<RefreshIcon />}
tooltip="Regenerate combined audio (useful if scenes were updated)"
>
Regenerate
</SecondaryButton>
</Stack>
</Stack>
)}
</Stack>
</Paper>
)}
</Stack>
)}
</Box>