AI story writer enhancements, text to video and voice generation, subscription management, and more.
This commit is contained in:
@@ -288,4 +288,90 @@ class StoryAudioGenerationService:
|
||||
|
||||
logger.info(f"[StoryAudioGeneration] Generated {len(audio_results)} audio files out of {total_scenes} scenes")
|
||||
return audio_results
|
||||
|
||||
def generate_ai_audio(
|
||||
self,
|
||||
scene_number: int,
|
||||
scene_title: str,
|
||||
text: str,
|
||||
user_id: str,
|
||||
voice_id: str = "Wise_Woman",
|
||||
speed: float = 1.0,
|
||||
volume: float = 1.0,
|
||||
pitch: float = 0.0,
|
||||
emotion: str = "happy",
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Generate AI audio for a single scene using main_audio_generation.
|
||||
|
||||
Parameters:
|
||||
scene_number (int): Scene number.
|
||||
scene_title (str): Scene title.
|
||||
text (str): Text to convert to speech.
|
||||
user_id (str): Clerk user ID for subscription checking.
|
||||
voice_id (str): Voice ID for AI audio generation (default: "Wise_Woman").
|
||||
speed (float): Speech speed (0.5-2.0, default: 1.0).
|
||||
volume (float): Speech volume (0.1-10.0, default: 1.0).
|
||||
pitch (float): Speech pitch (-12 to 12, default: 0.0).
|
||||
emotion (str): Emotion for speech (default: "happy").
|
||||
|
||||
Returns:
|
||||
Dict[str, Any]: Audio metadata including file path, URL, and scene info.
|
||||
"""
|
||||
if not text or not text.strip():
|
||||
raise ValueError(f"Scene {scene_number} ({scene_title}) requires non-empty text")
|
||||
|
||||
try:
|
||||
logger.info(f"[StoryAudioGeneration] Generating AI audio for scene {scene_number}: {scene_title}")
|
||||
logger.debug(f"[StoryAudioGeneration] Text length: {len(text)} characters, voice: {voice_id}")
|
||||
|
||||
# Import main_audio_generation
|
||||
from services.llm_providers.main_audio_generation import generate_audio
|
||||
|
||||
# Generate audio using main_audio_generation service
|
||||
result = generate_audio(
|
||||
text=text.strip(),
|
||||
voice_id=voice_id,
|
||||
speed=speed,
|
||||
volume=volume,
|
||||
pitch=pitch,
|
||||
emotion=emotion,
|
||||
user_id=user_id,
|
||||
)
|
||||
|
||||
# Save audio to file
|
||||
audio_filename = self._generate_audio_filename(scene_number, scene_title)
|
||||
audio_path = self.output_dir / audio_filename
|
||||
|
||||
with open(audio_path, "wb") as f:
|
||||
f.write(result.audio_bytes)
|
||||
|
||||
logger.info(f"[StoryAudioGeneration] Saved AI audio to: {audio_path} ({result.file_size} bytes)")
|
||||
|
||||
# Calculate cost (for response)
|
||||
character_count = result.text_length
|
||||
cost_per_1000_chars = 0.05
|
||||
cost = (character_count / 1000.0) * cost_per_1000_chars
|
||||
|
||||
# Return audio metadata
|
||||
return {
|
||||
"scene_number": scene_number,
|
||||
"scene_title": scene_title,
|
||||
"audio_path": str(audio_path),
|
||||
"audio_filename": audio_filename,
|
||||
"audio_url": f"/api/story/audio/{audio_filename}",
|
||||
"provider": result.provider,
|
||||
"model": result.model,
|
||||
"voice_id": result.voice_id,
|
||||
"text_length": result.text_length,
|
||||
"file_size": result.file_size,
|
||||
"cost": cost,
|
||||
}
|
||||
|
||||
except HTTPException:
|
||||
# Re-raise HTTPExceptions (e.g., 429 subscription limit)
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"[StoryAudioGeneration] Error generating AI audio for scene {scene_number}: {e}")
|
||||
raise RuntimeError(f"Failed to generate AI audio for scene {scene_number}: {str(e)}") from e
|
||||
|
||||
|
||||
@@ -193,4 +193,82 @@ class StoryImageGenerationService:
|
||||
|
||||
logger.info(f"[StoryImageGeneration] Generated {len(image_results)} images out of {total_scenes} scenes")
|
||||
return image_results
|
||||
|
||||
def regenerate_scene_image(
|
||||
self,
|
||||
scene_number: int,
|
||||
scene_title: str,
|
||||
prompt: str,
|
||||
user_id: str,
|
||||
provider: Optional[str] = None,
|
||||
width: int = 1024,
|
||||
height: int = 1024,
|
||||
model: Optional[str] = None
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Regenerate an image for a single scene using a direct prompt (no AI prompt generation).
|
||||
|
||||
Parameters:
|
||||
scene_number (int): Scene number.
|
||||
scene_title (str): Scene title.
|
||||
prompt (str): Direct prompt to use for image generation.
|
||||
user_id (str): Clerk user ID for subscription checking.
|
||||
provider (str, optional): Image generation provider (gemini, huggingface, stability).
|
||||
width (int): Image width (default: 1024).
|
||||
height (int): Image height (default: 1024).
|
||||
model (str, optional): Model to use for image generation.
|
||||
|
||||
Returns:
|
||||
Dict[str, Any]: Image metadata including file path, URL, and scene info.
|
||||
"""
|
||||
if not prompt or not prompt.strip():
|
||||
raise ValueError(f"Scene {scene_number} ({scene_title}) requires a non-empty prompt")
|
||||
|
||||
try:
|
||||
logger.info(f"[StoryImageGeneration] Regenerating image for scene {scene_number}: {scene_title}")
|
||||
logger.debug(f"[StoryImageGeneration] Using direct prompt: {prompt[:100]}...")
|
||||
|
||||
# Generate image using main_image_generation service with the direct prompt
|
||||
image_options = {
|
||||
"provider": provider,
|
||||
"width": width,
|
||||
"height": height,
|
||||
"model": model,
|
||||
}
|
||||
|
||||
result: ImageGenerationResult = generate_image(
|
||||
prompt=prompt.strip(),
|
||||
options=image_options,
|
||||
user_id=user_id
|
||||
)
|
||||
|
||||
# Save image to file
|
||||
image_filename = self._generate_image_filename(scene_number, scene_title)
|
||||
image_path = self.output_dir / image_filename
|
||||
|
||||
with open(image_path, "wb") as f:
|
||||
f.write(result.image_bytes)
|
||||
|
||||
logger.info(f"[StoryImageGeneration] Saved regenerated image to: {image_path}")
|
||||
|
||||
# Return image metadata
|
||||
return {
|
||||
"scene_number": scene_number,
|
||||
"scene_title": scene_title,
|
||||
"image_path": str(image_path),
|
||||
"image_filename": image_filename,
|
||||
"image_url": f"/api/story/images/{image_filename}",
|
||||
"width": result.width,
|
||||
"height": result.height,
|
||||
"provider": result.provider,
|
||||
"model": result.model,
|
||||
"seed": result.seed,
|
||||
}
|
||||
|
||||
except HTTPException:
|
||||
# Re-raise HTTPExceptions (e.g., 429 subscription limit)
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"[StoryImageGeneration] Error regenerating image for scene {scene_number}: {e}")
|
||||
raise RuntimeError(f"Failed to regenerate image for scene {scene_number}: {str(e)}") from e
|
||||
|
||||
|
||||
@@ -220,35 +220,41 @@ class StoryVideoGenerationService:
|
||||
def generate_story_video(
|
||||
self,
|
||||
scenes: List[Dict[str, Any]],
|
||||
image_paths: List[str],
|
||||
image_paths: List[Optional[str]],
|
||||
audio_paths: List[str],
|
||||
user_id: str,
|
||||
story_title: str = "Story",
|
||||
fps: int = 24,
|
||||
transition_duration: float = 0.5,
|
||||
progress_callback: Optional[callable] = None
|
||||
progress_callback: Optional[callable] = None,
|
||||
video_paths: Optional[List[Optional[str]]] = None
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Generate a complete story video from multiple scenes.
|
||||
|
||||
Parameters:
|
||||
scenes (List[Dict[str, Any]]): List of scene data.
|
||||
image_paths (List[str]): List of image file paths for each scene.
|
||||
image_paths (List[Optional[str]]): List of image file paths (None if scene has animated video).
|
||||
audio_paths (List[str]): List of audio file paths for each scene.
|
||||
user_id (str): Clerk user ID for subscription checking.
|
||||
story_title (str): Title of the story (default: "Story").
|
||||
fps (int): Frames per second for video (default: 24).
|
||||
transition_duration (float): Duration of transitions between scenes in seconds (default: 0.5).
|
||||
progress_callback (callable, optional): Callback function for progress updates.
|
||||
video_paths (Optional[List[Optional[str]]]): List of animated video file paths (None if scene has static image).
|
||||
|
||||
Returns:
|
||||
Dict[str, Any]: Video metadata including file path, URL, and story info.
|
||||
"""
|
||||
if not scenes or not image_paths or not audio_paths:
|
||||
raise ValueError("Scenes, image paths, and audio paths are required")
|
||||
if not scenes or not audio_paths:
|
||||
raise ValueError("Scenes and audio paths are required")
|
||||
|
||||
if len(scenes) != len(image_paths) or len(scenes) != len(audio_paths):
|
||||
raise ValueError("Number of scenes, image paths, and audio paths must match")
|
||||
if len(scenes) != len(audio_paths):
|
||||
raise ValueError("Number of scenes and audio paths must match")
|
||||
|
||||
video_paths = video_paths or [None] * len(scenes)
|
||||
if len(video_paths) != len(scenes):
|
||||
video_paths = video_paths + [None] * (len(scenes) - len(video_paths))
|
||||
|
||||
try:
|
||||
logger.info(f"[StoryVideoGeneration] Generating story video for {len(scenes)} scenes")
|
||||
@@ -293,36 +299,59 @@ class StoryVideoGenerationService:
|
||||
scene_clips = []
|
||||
total_duration = 0.0
|
||||
|
||||
for idx, (scene, image_path, audio_path) in enumerate(zip(scenes, image_paths, audio_paths)):
|
||||
# Import VideoFileClip for animated videos
|
||||
try:
|
||||
from moviepy import VideoFileClip
|
||||
except ImportError:
|
||||
VideoFileClip = None
|
||||
|
||||
for idx, (scene, image_path, audio_path, video_path) in enumerate(zip(scenes, image_paths, audio_paths, video_paths)):
|
||||
try:
|
||||
scene_number = scene.get("scene_number", idx + 1)
|
||||
scene_title = scene.get("title", "Untitled")
|
||||
|
||||
logger.info(f"[StoryVideoGeneration] Processing scene {scene_number}/{len(scenes)}: {scene_title}")
|
||||
|
||||
# Load image and audio
|
||||
image_file = Path(image_path)
|
||||
audio_file = Path(audio_path)
|
||||
|
||||
if not image_file.exists():
|
||||
logger.warning(f"[StoryVideoGeneration] Image not found: {image_path}, skipping scene {scene_number}")
|
||||
continue
|
||||
if not audio_file.exists():
|
||||
logger.warning(f"[StoryVideoGeneration] Audio not found: {audio_path}, skipping scene {scene_number}")
|
||||
continue
|
||||
|
||||
# Load audio to get duration
|
||||
# Load audio
|
||||
audio_clip = AudioFileClip(str(audio_file))
|
||||
audio_duration = audio_clip.duration
|
||||
|
||||
# Create image clip (MoviePy v2: use with_* API)
|
||||
image_clip = ImageClip(str(image_file)).with_duration(audio_duration)
|
||||
image_clip = image_clip.with_fps(fps)
|
||||
# Prefer animated video if available
|
||||
if video_path and Path(video_path).exists():
|
||||
logger.info(f"[StoryVideoGeneration] Using animated video for scene {scene_number}: {video_path}")
|
||||
# Load animated video
|
||||
if VideoFileClip is None:
|
||||
raise RuntimeError("VideoFileClip not available - MoviePy may not be fully installed")
|
||||
video_clip = VideoFileClip(str(video_path))
|
||||
# Replace audio with the preferred audio (AI or free)
|
||||
video_clip = video_clip.with_audio(audio_clip)
|
||||
# Match duration to audio if needed
|
||||
if video_clip.duration > audio_duration:
|
||||
video_clip = video_clip.subclip(0, audio_duration)
|
||||
elif video_clip.duration < audio_duration:
|
||||
# Loop the video if it's shorter than audio
|
||||
loops_needed = int(audio_duration / video_clip.duration) + 1
|
||||
video_clip = concatenate_videoclips([video_clip] * loops_needed).subclip(0, audio_duration)
|
||||
video_clip = video_clip.with_audio(audio_clip)
|
||||
elif image_path and Path(image_path).exists():
|
||||
# Fall back to static image
|
||||
logger.info(f"[StoryVideoGeneration] Using static image for scene {scene_number}: {image_path}")
|
||||
image_file = Path(image_path)
|
||||
# Create image clip (MoviePy v2: use with_* API)
|
||||
image_clip = ImageClip(str(image_file)).with_duration(audio_duration)
|
||||
image_clip = image_clip.with_fps(fps)
|
||||
# Set audio to image clip
|
||||
video_clip = image_clip.with_audio(audio_clip)
|
||||
else:
|
||||
logger.warning(f"[StoryVideoGeneration] No video or image found for scene {scene_number}, skipping")
|
||||
continue
|
||||
|
||||
# Set audio to image clip
|
||||
video_clip = image_clip.with_audio(audio_clip)
|
||||
scene_clips.append(video_clip)
|
||||
|
||||
total_duration += audio_duration
|
||||
|
||||
# Call progress callback if provided
|
||||
|
||||
Reference in New Issue
Block a user