feat: voice clone audio generation + podcast workspace architecture
- Voice clone integration: When user selects voice clone in Write phase, backend uses their uploaded voice sample + scene script text to generate audio via qwen3/minimax/cosyvoice voice clone APIs - Multi-tenant workspace storage: All podcast assets (audio, video, images, charts) now use workspace-specific directories per user - Chart preview improvements: Card-based B-Roll charts UI with thumbnails, takeaway text, and action buttons; public endpoint for image serving - Voice clone caching: In-memory LRU cache for voice samples (avoids re-downloading per scene); frontend caches voice clone metadata - Thread pool for voice clone: Audio generation uses ThreadPoolExecutor to avoid blocking the FastAPI event loop - Auto-detect voice clone IDs (vc_*, MY_VOICE_CLONE) to route correctly - DB fallback for voice sample URL: Fetches from ContentAsset if not passed - Fixed API URL resolution for chart previews - Fixed GlassyCard DOM warnings for motion props - Fixed ScriptGenerationProgressView syntax error - Fixed usePodcastWorkflow scriptData reference
This commit is contained in:
@@ -16,7 +16,7 @@ import { GlassyCard, glassyCardSx, PrimaryButton } from "../ui";
|
||||
import { LineEditor } from "./LineEditor";
|
||||
import { ImageRegenerateModal, ImageGenerationSettings } from "./ImageRegenerateModal";
|
||||
import { AudioRegenerateModal, AudioGenerationSettings } from "./AudioRegenerateModal";
|
||||
import { podcastApi } from "../../../services/podcastApi";
|
||||
import { podcastApi, getCachedVoiceCloneInfo } from "../../../services/podcastApi";
|
||||
import { aiApiClient } from "../../../api/client";
|
||||
import { getCachedMedia, setCachedMedia } from "../../../utils/mediaCache";
|
||||
|
||||
@@ -68,6 +68,9 @@ export const SceneEditor: React.FC<SceneEditorProps> = ({
|
||||
const [audioSettings, setAudioSettings] = useState<AudioGenerationSettings>({
|
||||
voiceId: knobs.voice_id || "Wise_Woman",
|
||||
customVoiceId: knobs.custom_voice_id || undefined,
|
||||
useVoiceClone: knobs.is_voice_clone || false,
|
||||
voiceSampleUrl: knobs.voice_sample_url || undefined,
|
||||
voiceCloneEngine: knobs.voice_clone_engine || undefined,
|
||||
speed: knobs.voice_speed ?? 1.0,
|
||||
volume: 1.0,
|
||||
pitch: 0.0,
|
||||
@@ -308,10 +311,14 @@ export const SceneEditor: React.FC<SceneEditorProps> = ({
|
||||
|
||||
// Generate audio
|
||||
const effectiveSettings = settings || audioSettings;
|
||||
const cachedClone = getCachedVoiceCloneInfo();
|
||||
const result = await podcastApi.renderSceneAudio({
|
||||
scene: currentScene,
|
||||
voiceId: effectiveSettings.voiceId || knobs.voice_id || "Wise_Woman",
|
||||
customVoiceId: effectiveSettings.customVoiceId || knobs.custom_voice_id,
|
||||
customVoiceId: effectiveSettings.customVoiceId || knobs.custom_voice_id || cachedClone?.customVoiceId,
|
||||
useVoiceClone: effectiveSettings.useVoiceClone || knobs.is_voice_clone || cachedClone?.isVoiceClone || false,
|
||||
voiceSampleUrl: effectiveSettings.voiceSampleUrl || knobs.voice_sample_url || cachedClone?.voiceSampleUrl || undefined,
|
||||
voiceCloneEngine: effectiveSettings.voiceCloneEngine || knobs.voice_clone_engine || cachedClone?.engine || undefined,
|
||||
emotion: effectiveSettings.emotion || scene.emotion || knobs.voice_emotion || "neutral",
|
||||
speed: effectiveSettings.speed ?? knobs.voice_speed ?? 1.0,
|
||||
volume: effectiveSettings.volume ?? 1.0,
|
||||
|
||||
Reference in New Issue
Block a user