AI Image and Audio Generation Improvements.

AI Video Generation Pre-Flight Checklist. Cost Estimate Improvements.
This commit is contained in:
ajaysi
2025-12-25 16:26:08 +05:30
parent 59913bffa9
commit 7512933c65
163 changed files with 8938 additions and 37401 deletions

View File

@@ -55,6 +55,9 @@ export interface Scene {
visual_cues: string[];
emphasis_tags: string[];
enabled?: boolean;
imageUrl?: string; // Per-scene generated image URL
audioUrl?: string; // Per-scene generated audio URL
videoUrl?: string; // Per-scene generated video URL
}
export interface VideoRenderRequest {
@@ -65,6 +68,42 @@ export interface VideoRenderRequest {
voice_id?: string;
}
export interface SceneVideoRenderRequest {
scene: Scene;
video_plan: VideoPlan;
resolution?: '480p' | '720p' | '1080p';
voice_id?: string;
generate_audio_enabled?: boolean;
}
export interface SceneVideoRenderResponse {
success: boolean;
task_id?: string;
message: string;
scene_number?: number;
}
export interface CombineVideosRequest {
scene_video_urls: string[];
resolution?: '480p' | '720p' | '1080p';
title?: string;
video_plan?: VideoPlan;
}
export interface VideoListItem {
scene_number?: number | null;
video_url: string;
filename: string;
created_at?: string;
resolution?: string;
}
export interface VideoListResponse {
success: boolean;
videos: VideoListItem[];
message: string;
}
export interface TaskStatus {
task_id: string;
status: 'pending' | 'processing' | 'completed' | 'failed';
@@ -77,6 +116,7 @@ export interface TaskStatus {
export interface CostEstimateRequest {
scenes: Scene[];
resolution: '480p' | '720p' | '1080p';
imageModel?: 'ideogram-v3-turbo' | 'qwen-image';
}
export interface CostEstimate {
@@ -95,6 +135,9 @@ export interface CostEstimate {
min: number;
max: number;
};
image_model?: string;
image_cost_per_scene?: number;
total_image_cost?: number;
}
export interface CostEstimateResponse {
@@ -128,6 +171,13 @@ export interface SceneImageRequest {
style?: string;
renderingSpeed?: string;
aspectRatio?: string;
model?: string;
}
export interface SceneImageTaskResponse {
success: boolean;
task_id: string;
message: string;
}
export interface SceneImageResponse {
@@ -139,6 +189,38 @@ export interface SceneImageResponse {
height: number;
}
export interface SceneAudioRequest {
sceneId: string;
sceneTitle: string;
text: string;
voiceId?: string;
speed?: number;
volume?: number;
pitch?: number;
emotion?: string;
englishNormalization?: boolean;
sampleRate?: number;
bitrate?: number;
channel?: string;
format?: string;
languageBoost?: string;
enableSyncMode?: boolean;
videoPlanContext?: any; // Context for intelligent voice/emotion selection
}
export interface SceneAudioResponse {
scene_id: string;
scene_title: string;
audio_filename: string;
audio_url: string;
provider: string;
model: string;
voice_id: string;
text_length: number;
file_size: number;
cost: number;
}
export const youtubeApi = {
/**
* Generate a video plan from user input.
@@ -216,6 +298,53 @@ export const youtubeApi = {
}
},
/**
* Render a single scene video (scene-wise generation).
*/
async generateSceneVideo(params: SceneVideoRenderRequest): Promise<SceneVideoRenderResponse> {
try {
const response = await apiClient.post(`${API_BASE}/render/scene`, {
scene: params.scene,
video_plan: params.video_plan,
resolution: params.resolution || '720p',
voice_id: params.voice_id || 'Wise_Woman',
generate_audio_enabled: params.generate_audio_enabled ?? false,
});
return response.data;
} catch (error: any) {
const errorMessage = error.response?.data?.message || error.response?.data?.detail || error.message || 'Failed to start scene video render';
throw new Error(errorMessage);
}
},
/**
* Combine multiple scene videos into a final video.
*/
async combineVideos(params: CombineVideosRequest): Promise<{ success: boolean; task_id?: string; message: string }> {
try {
const response = await apiClient.post(`${API_BASE}/render/combine`, {
video_urls: params.scene_video_urls,
video_plan: params.video_plan,
resolution: params.resolution || '720p',
title: params.title,
});
return response.data;
} catch (error: any) {
const errorMessage = error.response?.data?.message || error.response?.data?.detail || error.message || 'Failed to start video combination';
throw new Error(errorMessage);
}
},
async listVideos(): Promise<VideoListResponse> {
try {
const response = await apiClient.get(`${API_BASE}/videos`);
return response.data;
} catch (error: any) {
const errorMessage = error.response?.data?.message || error.response?.data?.detail || error.message || 'Failed to list videos';
throw new Error(errorMessage);
}
},
/**
* Estimate the cost of rendering a video before rendering.
*/
@@ -229,6 +358,19 @@ export const youtubeApi = {
}
},
/**
* Get the status of an image generation task.
*/
async getImageGenerationStatus(taskId: string): Promise<TaskStatus> {
try {
const response = await apiClient.get(`${API_BASE}/image/status/${taskId}`);
return response.data;
} catch (error: any) {
const errorMessage = error.response?.data?.message || error.response?.data?.detail || error.message || 'Failed to get task status';
throw new Error(errorMessage);
}
},
/**
* Get video URL for a generated video.
*/
@@ -323,9 +465,11 @@ export const youtubeApi = {
/**
* Generate a YouTube scene image (with optional avatar consistency).
* Returns a task ID for polling status.
*/
async generateSceneImage(params: SceneImageRequest): Promise<SceneImageResponse> {
async generateSceneImage(params: SceneImageRequest): Promise<SceneImageTaskResponse> {
try {
// Use aiApiClient for longer timeout (image generation can take 30-60 seconds)
const response = await apiClient.post(`${API_BASE}/image`, {
scene_id: params.sceneId,
scene_title: params.sceneTitle,
@@ -338,6 +482,7 @@ export const youtubeApi = {
style: params.style || null,
rendering_speed: params.renderingSpeed || null,
aspect_ratio: params.aspectRatio || null,
model: params.model,
});
return response.data;
} catch (error: any) {
@@ -359,4 +504,55 @@ export const youtubeApi = {
getSceneImageUrl(filename: string): string {
return `${API_BASE}/images/scenes/${filename}`;
},
/**
* Generate a YouTube scene audio (narration).
*/
async generateSceneAudio(params: SceneAudioRequest): Promise<SceneAudioResponse> {
try {
// Use aiApiClient for longer timeout (audio generation can take 10-30 seconds)
const requestBody: any = {
scene_id: params.sceneId,
scene_title: params.sceneTitle,
text: params.text,
voice_id: params.voiceId || 'Wise_Woman',
speed: params.speed ?? 1.0,
volume: params.volume ?? 1.0,
pitch: params.pitch ?? 0.0,
emotion: params.emotion || 'neutral',
english_normalization: params.englishNormalization ?? false,
enable_sync_mode: params.enableSyncMode !== false,
};
// Only include optional fields if they are defined and valid
// WaveSpeed has strict validation for these parameters
if (params.sampleRate !== undefined && params.sampleRate !== null) {
requestBody.sample_rate = params.sampleRate;
}
if (params.bitrate !== undefined && params.bitrate !== null) {
requestBody.bitrate = params.bitrate;
}
// Channel must be "1" or "2" (strings) - only include if valid
if (params.channel !== undefined && params.channel !== null && (params.channel === "1" || params.channel === "2")) {
requestBody.channel = params.channel;
}
if (params.format !== undefined && params.format !== null) {
requestBody.format = params.format;
}
if (params.languageBoost !== undefined && params.languageBoost !== null) {
requestBody.language_boost = params.languageBoost;
}
// Include video plan context for intelligent voice/emotion selection
if (params.videoPlanContext !== undefined && params.videoPlanContext !== null) {
requestBody.video_plan_context = params.videoPlanContext;
}
const response = await aiApiClient.post(`${API_BASE}/audio`, requestBody);
return response.data;
} catch (error: any) {
const errorMessage = error.response?.data?.message || error.response?.data?.detail || error.message || 'Failed to generate scene audio';
throw new Error(errorMessage);
}
},
};