feat: voice clone audio generation + podcast workspace architecture

- Voice clone integration: When user selects voice clone in Write phase, backend uses their uploaded voice sample + scene script text to generate audio via qwen3/minimax/cosyvoice voice clone APIs - Multi-tenant workspace storage: All podcast assets (audio, video, images, charts) now use workspace-specific directories per user - Chart preview improvements: Card-based B-Roll charts UI with thumbnails, takeaway text, and action buttons; public endpoint for image serving - Voice clone caching: In-memory LRU cache for voice samples (avoids re-downloading per scene); frontend caches voice clone metadata - Thread pool for voice clone: Audio generation uses ThreadPoolExecutor to avoid blocking the FastAPI event loop - Auto-detect voice clone IDs (vc_*, MY_VOICE_CLONE) to route correctly - DB fallback for voice sample URL: Fetches from ContentAsset if not passed - Fixed API URL resolution for chart previews - Fixed GlassyCard DOM warnings for motion props - Fixed ScriptGenerationProgressView syntax error - Fixed usePodcastWorkflow scriptData reference
2026-04-21 19:38:50 +05:30
parent 7637babd7d
commit 91b2f996fd
33 changed files with 1642 additions and 457 deletions
--- a/frontend/src/services/billingService.ts
+++ b/frontend/src/services/billingService.ts
@@ -93,9 +93,14 @@ billingAPI.interceptors.response.use(
  async (error) => {
    const originalRequest = error.config;
    
-    // Handle network errors
+    // Handle network errors - but NOT timeouts (backend might just be slow)
    if (!error.response) {
-      noteBackendUnavailable(error?.message || 'billing_network_error');
+      const errorMsg = error?.message || '';
+      const isTimeout = errorMsg.includes('timeout') || errorMsg.includes('ETIMEDOUT');
+      
+      if (!isTimeout) {
+        noteBackendUnavailable(errorMsg || 'billing_network_error');
+      }
      console.error('Billing API Network Error:', error.message);
      return Promise.reject(error);
    }
--- a/frontend/src/services/podcastApi.ts
+++ b/frontend/src/services/podcastApi.ts
@@ -1,3 +1,4 @@
+import { noteBackendRecovered } from "../api/client";
 import { ResearchProvider, ResearchConfig } from "./blogWriterApi";
 import {
  storyWriterApi,
@@ -28,12 +29,42 @@ const DEFAULT_KNOBS: Knobs = {
  voice_speed: 1,
  voice_id: "Wise_Woman",
  custom_voice_id: undefined,
+  is_voice_clone: undefined,
+  voice_sample_url: undefined,
+  voice_clone_engine: undefined,
  resolution: "720p",
  scene_length_target: 45,
  sample_rate: 24000,
  bitrate: "standard",
 };

+// In-memory cache for voice clone info to avoid re-fetching per scene
+let _voiceCloneCache: {
+  customVoiceId?: string;
+  voiceSampleUrl?: string;
+  engine?: string;
+  isVoiceClone?: boolean;
+  timestamp: number;
+} | null = null;
+const VOICE_CLONE_CACHE_TTL = 30 * 60 * 1000; // 30 minutes
+
+export function getCachedVoiceCloneInfo() {
+  if (_voiceCloneCache && Date.now() - _voiceCloneCache.timestamp < VOICE_CLONE_CACHE_TTL) {
+    return _voiceCloneCache;
+  }
+  _voiceCloneCache = null;
+  return null;
+}
+
+export function setCachedVoiceCloneInfo(info: {
+  customVoiceId?: string;
+  voiceSampleUrl?: string;
+  engine?: string;
+  isVoiceClone?: boolean;
+}) {
+  _voiceCloneCache = { ...info, timestamp: Date.now() };
+}
+
 // const sleep = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms));

 const createId = (prefix: string) => {
@@ -244,9 +275,9 @@ const mapExaResearchResponse = (response: any): Research => {
 };

 const ensurePreflight = async (operation: PreflightOperation) => {
-  console.log('[podcastApi] Running preflight for:', operation);
+  console.log('[podcastApi] Running preflight for:', operation.operation_type);
  const result = await checkPreflight(operation);
-  console.log('[podcastApi] Preflight result:', result);
+  console.log('[podcastApi] Preflight result: can_proceed=', result.can_proceed);
  if (!result.can_proceed) {
    const message = result.operations[0]?.message || "Pre-flight validation failed";
    throw new Error(message);
@@ -379,7 +410,9 @@ export const podcastApi = {
        bible: params.bible,
        analysis: params.analysis,
      }, { timeout: 300000 }); // 5 minute timeout for research
-      console.log('[podcastApi] Exa research response received:', response.status, response.data);
+      const sourceCount = response.data?.sources?.length || 0;
+      const insightCount = response.data?.key_insights?.length || 0;
+      console.log(`[podcastApi] Exa research response: status=${response.status}, sources=${sourceCount}, insights=${insightCount}`);
    } catch (error: any) {
      console.error('[podcastApi] Exa research error:', error?.response?.status, error?.response?.data, error?.message);
      throw error;
@@ -497,6 +530,9 @@ export const podcastApi = {
    scene: Scene;
    voiceId?: string;
    customVoiceId?: string;
+    useVoiceClone?: boolean;
+    voiceSampleUrl?: string;
+    voiceCloneEngine?: string;
    emotion?: string; // Fallback if scene doesn't have emotion
    speed?: number;
    volume?: number;
@@ -600,7 +636,7 @@ export const podcastApi = {
      channel: params.channel || null,
      format: params.format || null,
      language_boost: params.languageBoost || null,
-    });
+    }, { timeout: 300000 }); // 5 minute timeout for voice clone / TTS

    return {
      audioUrl: response.data.audio_url,
@@ -623,12 +659,14 @@ export const podcastApi = {
  },

  // Project persistence endpoints
-  async saveProject(projectId: string, state: any): Promise<void> {
+  async saveProject(projectId: string, state: any): Promise<boolean> {
    try {
      await aiApiClient.put(`/api/podcast/projects/${projectId}`, state);
+      return true;
    } catch (error) {
      console.error("Failed to save project to database:", error);
-      // Don't throw - localStorage fallback is acceptable
+      noteBackendRecovered();
+      return false;
    }
  },

@@ -952,6 +990,9 @@ export const podcastApi = {
    scenes: { id: string; title: string; lines: { text: string }[] }[];
    voiceId: string;
    customVoiceId?: string;
+    useVoiceClone?: boolean;
+    voiceSampleUrl?: string;
+    voiceCloneEngine?: string;
    speed: number;
    emotion: string;
    englishNormalization?: boolean;