AI Image and Audio Generation Improvements.
AI Video Generation Pre-Flight Checklist. Cost Estimate Improvements.
This commit is contained in:
@@ -55,6 +55,9 @@ export interface Scene {
|
||||
visual_cues: string[];
|
||||
emphasis_tags: string[];
|
||||
enabled?: boolean;
|
||||
imageUrl?: string; // Per-scene generated image URL
|
||||
audioUrl?: string; // Per-scene generated audio URL
|
||||
videoUrl?: string; // Per-scene generated video URL
|
||||
}
|
||||
|
||||
export interface VideoRenderRequest {
|
||||
@@ -65,6 +68,42 @@ export interface VideoRenderRequest {
|
||||
voice_id?: string;
|
||||
}
|
||||
|
||||
export interface SceneVideoRenderRequest {
|
||||
scene: Scene;
|
||||
video_plan: VideoPlan;
|
||||
resolution?: '480p' | '720p' | '1080p';
|
||||
voice_id?: string;
|
||||
generate_audio_enabled?: boolean;
|
||||
}
|
||||
|
||||
export interface SceneVideoRenderResponse {
|
||||
success: boolean;
|
||||
task_id?: string;
|
||||
message: string;
|
||||
scene_number?: number;
|
||||
}
|
||||
|
||||
export interface CombineVideosRequest {
|
||||
scene_video_urls: string[];
|
||||
resolution?: '480p' | '720p' | '1080p';
|
||||
title?: string;
|
||||
video_plan?: VideoPlan;
|
||||
}
|
||||
|
||||
export interface VideoListItem {
|
||||
scene_number?: number | null;
|
||||
video_url: string;
|
||||
filename: string;
|
||||
created_at?: string;
|
||||
resolution?: string;
|
||||
}
|
||||
|
||||
export interface VideoListResponse {
|
||||
success: boolean;
|
||||
videos: VideoListItem[];
|
||||
message: string;
|
||||
}
|
||||
|
||||
export interface TaskStatus {
|
||||
task_id: string;
|
||||
status: 'pending' | 'processing' | 'completed' | 'failed';
|
||||
@@ -77,6 +116,7 @@ export interface TaskStatus {
|
||||
export interface CostEstimateRequest {
|
||||
scenes: Scene[];
|
||||
resolution: '480p' | '720p' | '1080p';
|
||||
imageModel?: 'ideogram-v3-turbo' | 'qwen-image';
|
||||
}
|
||||
|
||||
export interface CostEstimate {
|
||||
@@ -95,6 +135,9 @@ export interface CostEstimate {
|
||||
min: number;
|
||||
max: number;
|
||||
};
|
||||
image_model?: string;
|
||||
image_cost_per_scene?: number;
|
||||
total_image_cost?: number;
|
||||
}
|
||||
|
||||
export interface CostEstimateResponse {
|
||||
@@ -128,6 +171,13 @@ export interface SceneImageRequest {
|
||||
style?: string;
|
||||
renderingSpeed?: string;
|
||||
aspectRatio?: string;
|
||||
model?: string;
|
||||
}
|
||||
|
||||
export interface SceneImageTaskResponse {
|
||||
success: boolean;
|
||||
task_id: string;
|
||||
message: string;
|
||||
}
|
||||
|
||||
export interface SceneImageResponse {
|
||||
@@ -139,6 +189,38 @@ export interface SceneImageResponse {
|
||||
height: number;
|
||||
}
|
||||
|
||||
export interface SceneAudioRequest {
|
||||
sceneId: string;
|
||||
sceneTitle: string;
|
||||
text: string;
|
||||
voiceId?: string;
|
||||
speed?: number;
|
||||
volume?: number;
|
||||
pitch?: number;
|
||||
emotion?: string;
|
||||
englishNormalization?: boolean;
|
||||
sampleRate?: number;
|
||||
bitrate?: number;
|
||||
channel?: string;
|
||||
format?: string;
|
||||
languageBoost?: string;
|
||||
enableSyncMode?: boolean;
|
||||
videoPlanContext?: any; // Context for intelligent voice/emotion selection
|
||||
}
|
||||
|
||||
export interface SceneAudioResponse {
|
||||
scene_id: string;
|
||||
scene_title: string;
|
||||
audio_filename: string;
|
||||
audio_url: string;
|
||||
provider: string;
|
||||
model: string;
|
||||
voice_id: string;
|
||||
text_length: number;
|
||||
file_size: number;
|
||||
cost: number;
|
||||
}
|
||||
|
||||
export const youtubeApi = {
|
||||
/**
|
||||
* Generate a video plan from user input.
|
||||
@@ -216,6 +298,53 @@ export const youtubeApi = {
|
||||
}
|
||||
},
|
||||
|
||||
/**
|
||||
* Render a single scene video (scene-wise generation).
|
||||
*/
|
||||
async generateSceneVideo(params: SceneVideoRenderRequest): Promise<SceneVideoRenderResponse> {
|
||||
try {
|
||||
const response = await apiClient.post(`${API_BASE}/render/scene`, {
|
||||
scene: params.scene,
|
||||
video_plan: params.video_plan,
|
||||
resolution: params.resolution || '720p',
|
||||
voice_id: params.voice_id || 'Wise_Woman',
|
||||
generate_audio_enabled: params.generate_audio_enabled ?? false,
|
||||
});
|
||||
return response.data;
|
||||
} catch (error: any) {
|
||||
const errorMessage = error.response?.data?.message || error.response?.data?.detail || error.message || 'Failed to start scene video render';
|
||||
throw new Error(errorMessage);
|
||||
}
|
||||
},
|
||||
|
||||
/**
|
||||
* Combine multiple scene videos into a final video.
|
||||
*/
|
||||
async combineVideos(params: CombineVideosRequest): Promise<{ success: boolean; task_id?: string; message: string }> {
|
||||
try {
|
||||
const response = await apiClient.post(`${API_BASE}/render/combine`, {
|
||||
video_urls: params.scene_video_urls,
|
||||
video_plan: params.video_plan,
|
||||
resolution: params.resolution || '720p',
|
||||
title: params.title,
|
||||
});
|
||||
return response.data;
|
||||
} catch (error: any) {
|
||||
const errorMessage = error.response?.data?.message || error.response?.data?.detail || error.message || 'Failed to start video combination';
|
||||
throw new Error(errorMessage);
|
||||
}
|
||||
},
|
||||
|
||||
async listVideos(): Promise<VideoListResponse> {
|
||||
try {
|
||||
const response = await apiClient.get(`${API_BASE}/videos`);
|
||||
return response.data;
|
||||
} catch (error: any) {
|
||||
const errorMessage = error.response?.data?.message || error.response?.data?.detail || error.message || 'Failed to list videos';
|
||||
throw new Error(errorMessage);
|
||||
}
|
||||
},
|
||||
|
||||
/**
|
||||
* Estimate the cost of rendering a video before rendering.
|
||||
*/
|
||||
@@ -229,6 +358,19 @@ export const youtubeApi = {
|
||||
}
|
||||
},
|
||||
|
||||
/**
|
||||
* Get the status of an image generation task.
|
||||
*/
|
||||
async getImageGenerationStatus(taskId: string): Promise<TaskStatus> {
|
||||
try {
|
||||
const response = await apiClient.get(`${API_BASE}/image/status/${taskId}`);
|
||||
return response.data;
|
||||
} catch (error: any) {
|
||||
const errorMessage = error.response?.data?.message || error.response?.data?.detail || error.message || 'Failed to get task status';
|
||||
throw new Error(errorMessage);
|
||||
}
|
||||
},
|
||||
|
||||
/**
|
||||
* Get video URL for a generated video.
|
||||
*/
|
||||
@@ -323,9 +465,11 @@ export const youtubeApi = {
|
||||
|
||||
/**
|
||||
* Generate a YouTube scene image (with optional avatar consistency).
|
||||
* Returns a task ID for polling status.
|
||||
*/
|
||||
async generateSceneImage(params: SceneImageRequest): Promise<SceneImageResponse> {
|
||||
async generateSceneImage(params: SceneImageRequest): Promise<SceneImageTaskResponse> {
|
||||
try {
|
||||
// Use aiApiClient for longer timeout (image generation can take 30-60 seconds)
|
||||
const response = await apiClient.post(`${API_BASE}/image`, {
|
||||
scene_id: params.sceneId,
|
||||
scene_title: params.sceneTitle,
|
||||
@@ -338,6 +482,7 @@ export const youtubeApi = {
|
||||
style: params.style || null,
|
||||
rendering_speed: params.renderingSpeed || null,
|
||||
aspect_ratio: params.aspectRatio || null,
|
||||
model: params.model,
|
||||
});
|
||||
return response.data;
|
||||
} catch (error: any) {
|
||||
@@ -359,4 +504,55 @@ export const youtubeApi = {
|
||||
getSceneImageUrl(filename: string): string {
|
||||
return `${API_BASE}/images/scenes/${filename}`;
|
||||
},
|
||||
|
||||
/**
|
||||
* Generate a YouTube scene audio (narration).
|
||||
*/
|
||||
async generateSceneAudio(params: SceneAudioRequest): Promise<SceneAudioResponse> {
|
||||
try {
|
||||
// Use aiApiClient for longer timeout (audio generation can take 10-30 seconds)
|
||||
const requestBody: any = {
|
||||
scene_id: params.sceneId,
|
||||
scene_title: params.sceneTitle,
|
||||
text: params.text,
|
||||
voice_id: params.voiceId || 'Wise_Woman',
|
||||
speed: params.speed ?? 1.0,
|
||||
volume: params.volume ?? 1.0,
|
||||
pitch: params.pitch ?? 0.0,
|
||||
emotion: params.emotion || 'neutral',
|
||||
english_normalization: params.englishNormalization ?? false,
|
||||
enable_sync_mode: params.enableSyncMode !== false,
|
||||
};
|
||||
|
||||
// Only include optional fields if they are defined and valid
|
||||
// WaveSpeed has strict validation for these parameters
|
||||
if (params.sampleRate !== undefined && params.sampleRate !== null) {
|
||||
requestBody.sample_rate = params.sampleRate;
|
||||
}
|
||||
if (params.bitrate !== undefined && params.bitrate !== null) {
|
||||
requestBody.bitrate = params.bitrate;
|
||||
}
|
||||
// Channel must be "1" or "2" (strings) - only include if valid
|
||||
if (params.channel !== undefined && params.channel !== null && (params.channel === "1" || params.channel === "2")) {
|
||||
requestBody.channel = params.channel;
|
||||
}
|
||||
if (params.format !== undefined && params.format !== null) {
|
||||
requestBody.format = params.format;
|
||||
}
|
||||
if (params.languageBoost !== undefined && params.languageBoost !== null) {
|
||||
requestBody.language_boost = params.languageBoost;
|
||||
}
|
||||
|
||||
// Include video plan context for intelligent voice/emotion selection
|
||||
if (params.videoPlanContext !== undefined && params.videoPlanContext !== null) {
|
||||
requestBody.video_plan_context = params.videoPlanContext;
|
||||
}
|
||||
|
||||
const response = await aiApiClient.post(`${API_BASE}/audio`, requestBody);
|
||||
return response.data;
|
||||
} catch (error: any) {
|
||||
const errorMessage = error.response?.data?.message || error.response?.data?.detail || error.message || 'Failed to generate scene audio';
|
||||
throw new Error(errorMessage);
|
||||
}
|
||||
},
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user