AI Image and Audio Generation Improvements.

AI Video Generation Pre-Flight Checklist. Cost Estimate Improvements.
2025-12-25 16:26:08 +05:30
parent 59913bffa9
commit 7512933c65
163 changed files with 8938 additions and 37401 deletions
--- a/frontend/src/services/youtubeApi.ts
+++ b/frontend/src/services/youtubeApi.ts
@@ -55,6 +55,9 @@ export interface Scene {
  visual_cues: string[];
  emphasis_tags: string[];
  enabled?: boolean;
+  imageUrl?: string; // Per-scene generated image URL
+  audioUrl?: string; // Per-scene generated audio URL
+  videoUrl?: string; // Per-scene generated video URL
 }

 export interface VideoRenderRequest {
@@ -65,6 +68,42 @@ export interface VideoRenderRequest {
  voice_id?: string;
 }

+export interface SceneVideoRenderRequest {
+  scene: Scene;
+  video_plan: VideoPlan;
+  resolution?: '480p' | '720p' | '1080p';
+  voice_id?: string;
+  generate_audio_enabled?: boolean;
+}
+
+export interface SceneVideoRenderResponse {
+  success: boolean;
+  task_id?: string;
+  message: string;
+  scene_number?: number;
+}
+
+export interface CombineVideosRequest {
+  scene_video_urls: string[];
+  resolution?: '480p' | '720p' | '1080p';
+  title?: string;
+  video_plan?: VideoPlan;
+}
+
+export interface VideoListItem {
+  scene_number?: number | null;
+  video_url: string;
+  filename: string;
+  created_at?: string;
+  resolution?: string;
+}
+
+export interface VideoListResponse {
+  success: boolean;
+  videos: VideoListItem[];
+  message: string;
+}
+
 export interface TaskStatus {
  task_id: string;
  status: 'pending' | 'processing' | 'completed' | 'failed';
@@ -77,6 +116,7 @@ export interface TaskStatus {
 export interface CostEstimateRequest {
  scenes: Scene[];
  resolution: '480p' | '720p' | '1080p';
+  imageModel?: 'ideogram-v3-turbo' | 'qwen-image';
 }

 export interface CostEstimate {
@@ -95,6 +135,9 @@ export interface CostEstimate {
    min: number;
    max: number;
  };
+  image_model?: string;
+  image_cost_per_scene?: number;
+  total_image_cost?: number;
 }

 export interface CostEstimateResponse {
@@ -128,6 +171,13 @@ export interface SceneImageRequest {
  style?: string;
  renderingSpeed?: string;
  aspectRatio?: string;
+  model?: string;
+}
+
+export interface SceneImageTaskResponse {
+  success: boolean;
+  task_id: string;
+  message: string;
 }

 export interface SceneImageResponse {
@@ -139,6 +189,38 @@ export interface SceneImageResponse {
  height: number;
 }

+export interface SceneAudioRequest {
+  sceneId: string;
+  sceneTitle: string;
+  text: string;
+  voiceId?: string;
+  speed?: number;
+  volume?: number;
+  pitch?: number;
+  emotion?: string;
+  englishNormalization?: boolean;
+  sampleRate?: number;
+  bitrate?: number;
+  channel?: string;
+  format?: string;
+  languageBoost?: string;
+  enableSyncMode?: boolean;
+  videoPlanContext?: any; // Context for intelligent voice/emotion selection
+}
+
+export interface SceneAudioResponse {
+  scene_id: string;
+  scene_title: string;
+  audio_filename: string;
+  audio_url: string;
+  provider: string;
+  model: string;
+  voice_id: string;
+  text_length: number;
+  file_size: number;
+  cost: number;
+}
+
 export const youtubeApi = {
  /**
   * Generate a video plan from user input.
@@ -216,6 +298,53 @@ export const youtubeApi = {
    }
  },

+  /**
+   * Render a single scene video (scene-wise generation).
+   */
+  async generateSceneVideo(params: SceneVideoRenderRequest): Promise<SceneVideoRenderResponse> {
+    try {
+      const response = await apiClient.post(`${API_BASE}/render/scene`, {
+        scene: params.scene,
+        video_plan: params.video_plan,
+        resolution: params.resolution || '720p',
+        voice_id: params.voice_id || 'Wise_Woman',
+        generate_audio_enabled: params.generate_audio_enabled ?? false,
+      });
+      return response.data;
+    } catch (error: any) {
+      const errorMessage = error.response?.data?.message || error.response?.data?.detail || error.message || 'Failed to start scene video render';
+      throw new Error(errorMessage);
+    }
+  },
+
+  /**
+   * Combine multiple scene videos into a final video.
+   */
+  async combineVideos(params: CombineVideosRequest): Promise<{ success: boolean; task_id?: string; message: string }> {
+    try {
+      const response = await apiClient.post(`${API_BASE}/render/combine`, {
+        video_urls: params.scene_video_urls,
+        video_plan: params.video_plan,
+        resolution: params.resolution || '720p',
+        title: params.title,
+      });
+      return response.data;
+    } catch (error: any) {
+      const errorMessage = error.response?.data?.message || error.response?.data?.detail || error.message || 'Failed to start video combination';
+      throw new Error(errorMessage);
+    }
+  },
+
+  async listVideos(): Promise<VideoListResponse> {
+    try {
+      const response = await apiClient.get(`${API_BASE}/videos`);
+      return response.data;
+    } catch (error: any) {
+      const errorMessage = error.response?.data?.message || error.response?.data?.detail || error.message || 'Failed to list videos';
+      throw new Error(errorMessage);
+    }
+  },
+
  /**
   * Estimate the cost of rendering a video before rendering.
   */
@@ -229,6 +358,19 @@ export const youtubeApi = {
    }
  },

+  /**
+   * Get the status of an image generation task.
+   */
+  async getImageGenerationStatus(taskId: string): Promise<TaskStatus> {
+    try {
+      const response = await apiClient.get(`${API_BASE}/image/status/${taskId}`);
+      return response.data;
+    } catch (error: any) {
+      const errorMessage = error.response?.data?.message || error.response?.data?.detail || error.message || 'Failed to get task status';
+      throw new Error(errorMessage);
+    }
+  },
+
  /**
   * Get video URL for a generated video.
   */
@@ -323,9 +465,11 @@ export const youtubeApi = {

  /**
   * Generate a YouTube scene image (with optional avatar consistency).
+   * Returns a task ID for polling status.
   */
-  async generateSceneImage(params: SceneImageRequest): Promise<SceneImageResponse> {
+  async generateSceneImage(params: SceneImageRequest): Promise<SceneImageTaskResponse> {
    try {
+      // Use aiApiClient for longer timeout (image generation can take 30-60 seconds)
      const response = await apiClient.post(`${API_BASE}/image`, {
        scene_id: params.sceneId,
        scene_title: params.sceneTitle,
@@ -338,6 +482,7 @@ export const youtubeApi = {
        style: params.style || null,
        rendering_speed: params.renderingSpeed || null,
        aspect_ratio: params.aspectRatio || null,
+        model: params.model,
      });
      return response.data;
    } catch (error: any) {
@@ -359,4 +504,55 @@ export const youtubeApi = {
  getSceneImageUrl(filename: string): string {
    return `${API_BASE}/images/scenes/${filename}`;
  },
+
+  /**
+   * Generate a YouTube scene audio (narration).
+   */
+  async generateSceneAudio(params: SceneAudioRequest): Promise<SceneAudioResponse> {
+    try {
+      // Use aiApiClient for longer timeout (audio generation can take 10-30 seconds)
+      const requestBody: any = {
+        scene_id: params.sceneId,
+        scene_title: params.sceneTitle,
+        text: params.text,
+        voice_id: params.voiceId || 'Wise_Woman',
+        speed: params.speed ?? 1.0,
+        volume: params.volume ?? 1.0,
+        pitch: params.pitch ?? 0.0,
+        emotion: params.emotion || 'neutral',
+        english_normalization: params.englishNormalization ?? false,
+        enable_sync_mode: params.enableSyncMode !== false,
+      };
+      
+      // Only include optional fields if they are defined and valid
+      // WaveSpeed has strict validation for these parameters
+      if (params.sampleRate !== undefined && params.sampleRate !== null) {
+        requestBody.sample_rate = params.sampleRate;
+      }
+      if (params.bitrate !== undefined && params.bitrate !== null) {
+        requestBody.bitrate = params.bitrate;
+      }
+      // Channel must be "1" or "2" (strings) - only include if valid
+      if (params.channel !== undefined && params.channel !== null && (params.channel === "1" || params.channel === "2")) {
+        requestBody.channel = params.channel;
+      }
+      if (params.format !== undefined && params.format !== null) {
+        requestBody.format = params.format;
+      }
+      if (params.languageBoost !== undefined && params.languageBoost !== null) {
+        requestBody.language_boost = params.languageBoost;
+      }
+
+      // Include video plan context for intelligent voice/emotion selection
+      if (params.videoPlanContext !== undefined && params.videoPlanContext !== null) {
+        requestBody.video_plan_context = params.videoPlanContext;
+      }
+
+      const response = await aiApiClient.post(`${API_BASE}/audio`, requestBody);
+      return response.data;
+    } catch (error: any) {
+      const errorMessage = error.response?.data?.message || error.response?.data?.detail || error.message || 'Failed to generate scene audio';
+      throw new Error(errorMessage);
+    }
+  },
 };