AI story writer enhancements, text to video and voice generation, subscription management, and more.

2025-11-19 09:55:32 +05:30
parent bf7493c366
commit e96525347b
64 changed files with 10367 additions and 400 deletions
--- a/backend/services/story_writer/audio_generation_service.py
+++ b/backend/services/story_writer/audio_generation_service.py
@@ -288,4 +288,90 @@ class StoryAudioGenerationService:
        
        logger.info(f"[StoryAudioGeneration] Generated {len(audio_results)} audio files out of {total_scenes} scenes")
        return audio_results
+    
+    def generate_ai_audio(
+        self,
+        scene_number: int,
+        scene_title: str,
+        text: str,
+        user_id: str,
+        voice_id: str = "Wise_Woman",
+        speed: float = 1.0,
+        volume: float = 1.0,
+        pitch: float = 0.0,
+        emotion: str = "happy",
+    ) -> Dict[str, Any]:
+        """
+        Generate AI audio for a single scene using main_audio_generation.
+        
+        Parameters:
+            scene_number (int): Scene number.
+            scene_title (str): Scene title.
+            text (str): Text to convert to speech.
+            user_id (str): Clerk user ID for subscription checking.
+            voice_id (str): Voice ID for AI audio generation (default: "Wise_Woman").
+            speed (float): Speech speed (0.5-2.0, default: 1.0).
+            volume (float): Speech volume (0.1-10.0, default: 1.0).
+            pitch (float): Speech pitch (-12 to 12, default: 0.0).
+            emotion (str): Emotion for speech (default: "happy").
+        
+        Returns:
+            Dict[str, Any]: Audio metadata including file path, URL, and scene info.
+        """
+        if not text or not text.strip():
+            raise ValueError(f"Scene {scene_number} ({scene_title}) requires non-empty text")
+        
+        try:
+            logger.info(f"[StoryAudioGeneration] Generating AI audio for scene {scene_number}: {scene_title}")
+            logger.debug(f"[StoryAudioGeneration] Text length: {len(text)} characters, voice: {voice_id}")
+            
+            # Import main_audio_generation
+            from services.llm_providers.main_audio_generation import generate_audio
+            
+            # Generate audio using main_audio_generation service
+            result = generate_audio(
+                text=text.strip(),
+                voice_id=voice_id,
+                speed=speed,
+                volume=volume,
+                pitch=pitch,
+                emotion=emotion,
+                user_id=user_id,
+            )
+            
+            # Save audio to file
+            audio_filename = self._generate_audio_filename(scene_number, scene_title)
+            audio_path = self.output_dir / audio_filename
+            
+            with open(audio_path, "wb") as f:
+                f.write(result.audio_bytes)
+            
+            logger.info(f"[StoryAudioGeneration] Saved AI audio to: {audio_path} ({result.file_size} bytes)")
+            
+            # Calculate cost (for response)
+            character_count = result.text_length
+            cost_per_1000_chars = 0.05
+            cost = (character_count / 1000.0) * cost_per_1000_chars
+            
+            # Return audio metadata
+            return {
+                "scene_number": scene_number,
+                "scene_title": scene_title,
+                "audio_path": str(audio_path),
+                "audio_filename": audio_filename,
+                "audio_url": f"/api/story/audio/{audio_filename}",
+                "provider": result.provider,
+                "model": result.model,
+                "voice_id": result.voice_id,
+                "text_length": result.text_length,
+                "file_size": result.file_size,
+                "cost": cost,
+            }
+            
+        except HTTPException:
+            # Re-raise HTTPExceptions (e.g., 429 subscription limit)
+            raise
+        except Exception as e:
+            logger.error(f"[StoryAudioGeneration] Error generating AI audio for scene {scene_number}: {e}")
+            raise RuntimeError(f"Failed to generate AI audio for scene {scene_number}: {str(e)}") from e

--- a/backend/services/story_writer/image_generation_service.py
+++ b/backend/services/story_writer/image_generation_service.py
@@ -193,4 +193,82 @@ class StoryImageGenerationService:
        
        logger.info(f"[StoryImageGeneration] Generated {len(image_results)} images out of {total_scenes} scenes")
        return image_results
+    
+    def regenerate_scene_image(
+        self,
+        scene_number: int,
+        scene_title: str,
+        prompt: str,
+        user_id: str,
+        provider: Optional[str] = None,
+        width: int = 1024,
+        height: int = 1024,
+        model: Optional[str] = None
+    ) -> Dict[str, Any]:
+        """
+        Regenerate an image for a single scene using a direct prompt (no AI prompt generation).
+        
+        Parameters:
+            scene_number (int): Scene number.
+            scene_title (str): Scene title.
+            prompt (str): Direct prompt to use for image generation.
+            user_id (str): Clerk user ID for subscription checking.
+            provider (str, optional): Image generation provider (gemini, huggingface, stability).
+            width (int): Image width (default: 1024).
+            height (int): Image height (default: 1024).
+            model (str, optional): Model to use for image generation.
+        
+        Returns:
+            Dict[str, Any]: Image metadata including file path, URL, and scene info.
+        """
+        if not prompt or not prompt.strip():
+            raise ValueError(f"Scene {scene_number} ({scene_title}) requires a non-empty prompt")
+        
+        try:
+            logger.info(f"[StoryImageGeneration] Regenerating image for scene {scene_number}: {scene_title}")
+            logger.debug(f"[StoryImageGeneration] Using direct prompt: {prompt[:100]}...")
+            
+            # Generate image using main_image_generation service with the direct prompt
+            image_options = {
+                "provider": provider,
+                "width": width,
+                "height": height,
+                "model": model,
+            }
+            
+            result: ImageGenerationResult = generate_image(
+                prompt=prompt.strip(),
+                options=image_options,
+                user_id=user_id
+            )
+            
+            # Save image to file
+            image_filename = self._generate_image_filename(scene_number, scene_title)
+            image_path = self.output_dir / image_filename
+            
+            with open(image_path, "wb") as f:
+                f.write(result.image_bytes)
+            
+            logger.info(f"[StoryImageGeneration] Saved regenerated image to: {image_path}")
+            
+            # Return image metadata
+            return {
+                "scene_number": scene_number,
+                "scene_title": scene_title,
+                "image_path": str(image_path),
+                "image_filename": image_filename,
+                "image_url": f"/api/story/images/{image_filename}",
+                "width": result.width,
+                "height": result.height,
+                "provider": result.provider,
+                "model": result.model,
+                "seed": result.seed,
+            }
+            
+        except HTTPException:
+            # Re-raise HTTPExceptions (e.g., 429 subscription limit)
+            raise
+        except Exception as e:
+            logger.error(f"[StoryImageGeneration] Error regenerating image for scene {scene_number}: {e}")
+            raise RuntimeError(f"Failed to regenerate image for scene {scene_number}: {str(e)}") from e

--- a/backend/services/story_writer/video_generation_service.py
+++ b/backend/services/story_writer/video_generation_service.py
@@ -220,35 +220,41 @@ class StoryVideoGenerationService:
    def generate_story_video(
        self,
        scenes: List[Dict[str, Any]],
-        image_paths: List[str],
+        image_paths: List[Optional[str]],
        audio_paths: List[str],
        user_id: str,
        story_title: str = "Story",
        fps: int = 24,
        transition_duration: float = 0.5,
-        progress_callback: Optional[callable] = None
+        progress_callback: Optional[callable] = None,
+        video_paths: Optional[List[Optional[str]]] = None
    ) -> Dict[str, Any]:
        """
        Generate a complete story video from multiple scenes.
        
        Parameters:
            scenes (List[Dict[str, Any]]): List of scene data.
-            image_paths (List[str]): List of image file paths for each scene.
+            image_paths (List[Optional[str]]): List of image file paths (None if scene has animated video).
            audio_paths (List[str]): List of audio file paths for each scene.
            user_id (str): Clerk user ID for subscription checking.
            story_title (str): Title of the story (default: "Story").
            fps (int): Frames per second for video (default: 24).
            transition_duration (float): Duration of transitions between scenes in seconds (default: 0.5).
            progress_callback (callable, optional): Callback function for progress updates.
+            video_paths (Optional[List[Optional[str]]]): List of animated video file paths (None if scene has static image).
        
        Returns:
            Dict[str, Any]: Video metadata including file path, URL, and story info.
        """
-        if not scenes or not image_paths or not audio_paths:
-            raise ValueError("Scenes, image paths, and audio paths are required")
+        if not scenes or not audio_paths:
+            raise ValueError("Scenes and audio paths are required")
        
-        if len(scenes) != len(image_paths) or len(scenes) != len(audio_paths):
-            raise ValueError("Number of scenes, image paths, and audio paths must match")
+        if len(scenes) != len(audio_paths):
+            raise ValueError("Number of scenes and audio paths must match")
+        
+        video_paths = video_paths or [None] * len(scenes)
+        if len(video_paths) != len(scenes):
+            video_paths = video_paths + [None] * (len(scenes) - len(video_paths))
        
        try:
            logger.info(f"[StoryVideoGeneration] Generating story video for {len(scenes)} scenes")
@@ -293,36 +299,59 @@ class StoryVideoGenerationService:
            scene_clips = []
            total_duration = 0.0
            
-            for idx, (scene, image_path, audio_path) in enumerate(zip(scenes, image_paths, audio_paths)):
+            # Import VideoFileClip for animated videos
+            try:
+                from moviepy import VideoFileClip
+            except ImportError:
+                VideoFileClip = None
+            
+            for idx, (scene, image_path, audio_path, video_path) in enumerate(zip(scenes, image_paths, audio_paths, video_paths)):
                try:
                    scene_number = scene.get("scene_number", idx + 1)
                    scene_title = scene.get("title", "Untitled")
                    
                    logger.info(f"[StoryVideoGeneration] Processing scene {scene_number}/{len(scenes)}: {scene_title}")
                    
-                    # Load image and audio
-                    image_file = Path(image_path)
                    audio_file = Path(audio_path)
-                    
-                    if not image_file.exists():
-                        logger.warning(f"[StoryVideoGeneration] Image not found: {image_path}, skipping scene {scene_number}")
-                        continue
                    if not audio_file.exists():
                        logger.warning(f"[StoryVideoGeneration] Audio not found: {audio_path}, skipping scene {scene_number}")
                        continue
                    
-                    # Load audio to get duration
+                    # Load audio
                    audio_clip = AudioFileClip(str(audio_file))
                    audio_duration = audio_clip.duration
                    
-                    # Create image clip (MoviePy v2: use with_* API)
-                    image_clip = ImageClip(str(image_file)).with_duration(audio_duration)
-                    image_clip = image_clip.with_fps(fps)
+                    # Prefer animated video if available
+                    if video_path and Path(video_path).exists():
+                        logger.info(f"[StoryVideoGeneration] Using animated video for scene {scene_number}: {video_path}")
+                        # Load animated video
+                        if VideoFileClip is None:
+                            raise RuntimeError("VideoFileClip not available - MoviePy may not be fully installed")
+                        video_clip = VideoFileClip(str(video_path))
+                        # Replace audio with the preferred audio (AI or free)
+                        video_clip = video_clip.with_audio(audio_clip)
+                        # Match duration to audio if needed
+                        if video_clip.duration > audio_duration:
+                            video_clip = video_clip.subclip(0, audio_duration)
+                        elif video_clip.duration < audio_duration:
+                            # Loop the video if it's shorter than audio
+                            loops_needed = int(audio_duration / video_clip.duration) + 1
+                            video_clip = concatenate_videoclips([video_clip] * loops_needed).subclip(0, audio_duration)
+                            video_clip = video_clip.with_audio(audio_clip)
+                    elif image_path and Path(image_path).exists():
+                        # Fall back to static image
+                        logger.info(f"[StoryVideoGeneration] Using static image for scene {scene_number}: {image_path}")
+                        image_file = Path(image_path)
+                        # Create image clip (MoviePy v2: use with_* API)
+                        image_clip = ImageClip(str(image_file)).with_duration(audio_duration)
+                        image_clip = image_clip.with_fps(fps)
+                        # Set audio to image clip
+                        video_clip = image_clip.with_audio(audio_clip)
+                    else:
+                        logger.warning(f"[StoryVideoGeneration] No video or image found for scene {scene_number}, skipping")
+                        continue
                    
-                    # Set audio to image clip
-                    video_clip = image_clip.with_audio(audio_clip)
                    scene_clips.append(video_clip)
-                    
                    total_duration += audio_duration
                    
                    # Call progress callback if provided