AI story writer enhancements, text to video and voice generation, subscription management, and more.

This commit is contained in:
ajaysi
2025-11-19 09:55:32 +05:30
parent bf7493c366
commit e96525347b
64 changed files with 10367 additions and 400 deletions

View File

@@ -288,4 +288,90 @@ class StoryAudioGenerationService:
logger.info(f"[StoryAudioGeneration] Generated {len(audio_results)} audio files out of {total_scenes} scenes")
return audio_results
def generate_ai_audio(
self,
scene_number: int,
scene_title: str,
text: str,
user_id: str,
voice_id: str = "Wise_Woman",
speed: float = 1.0,
volume: float = 1.0,
pitch: float = 0.0,
emotion: str = "happy",
) -> Dict[str, Any]:
"""
Generate AI audio for a single scene using main_audio_generation.
Parameters:
scene_number (int): Scene number.
scene_title (str): Scene title.
text (str): Text to convert to speech.
user_id (str): Clerk user ID for subscription checking.
voice_id (str): Voice ID for AI audio generation (default: "Wise_Woman").
speed (float): Speech speed (0.5-2.0, default: 1.0).
volume (float): Speech volume (0.1-10.0, default: 1.0).
pitch (float): Speech pitch (-12 to 12, default: 0.0).
emotion (str): Emotion for speech (default: "happy").
Returns:
Dict[str, Any]: Audio metadata including file path, URL, and scene info.
"""
if not text or not text.strip():
raise ValueError(f"Scene {scene_number} ({scene_title}) requires non-empty text")
try:
logger.info(f"[StoryAudioGeneration] Generating AI audio for scene {scene_number}: {scene_title}")
logger.debug(f"[StoryAudioGeneration] Text length: {len(text)} characters, voice: {voice_id}")
# Import main_audio_generation
from services.llm_providers.main_audio_generation import generate_audio
# Generate audio using main_audio_generation service
result = generate_audio(
text=text.strip(),
voice_id=voice_id,
speed=speed,
volume=volume,
pitch=pitch,
emotion=emotion,
user_id=user_id,
)
# Save audio to file
audio_filename = self._generate_audio_filename(scene_number, scene_title)
audio_path = self.output_dir / audio_filename
with open(audio_path, "wb") as f:
f.write(result.audio_bytes)
logger.info(f"[StoryAudioGeneration] Saved AI audio to: {audio_path} ({result.file_size} bytes)")
# Calculate cost (for response)
character_count = result.text_length
cost_per_1000_chars = 0.05
cost = (character_count / 1000.0) * cost_per_1000_chars
# Return audio metadata
return {
"scene_number": scene_number,
"scene_title": scene_title,
"audio_path": str(audio_path),
"audio_filename": audio_filename,
"audio_url": f"/api/story/audio/{audio_filename}",
"provider": result.provider,
"model": result.model,
"voice_id": result.voice_id,
"text_length": result.text_length,
"file_size": result.file_size,
"cost": cost,
}
except HTTPException:
# Re-raise HTTPExceptions (e.g., 429 subscription limit)
raise
except Exception as e:
logger.error(f"[StoryAudioGeneration] Error generating AI audio for scene {scene_number}: {e}")
raise RuntimeError(f"Failed to generate AI audio for scene {scene_number}: {str(e)}") from e

View File

@@ -193,4 +193,82 @@ class StoryImageGenerationService:
logger.info(f"[StoryImageGeneration] Generated {len(image_results)} images out of {total_scenes} scenes")
return image_results
def regenerate_scene_image(
self,
scene_number: int,
scene_title: str,
prompt: str,
user_id: str,
provider: Optional[str] = None,
width: int = 1024,
height: int = 1024,
model: Optional[str] = None
) -> Dict[str, Any]:
"""
Regenerate an image for a single scene using a direct prompt (no AI prompt generation).
Parameters:
scene_number (int): Scene number.
scene_title (str): Scene title.
prompt (str): Direct prompt to use for image generation.
user_id (str): Clerk user ID for subscription checking.
provider (str, optional): Image generation provider (gemini, huggingface, stability).
width (int): Image width (default: 1024).
height (int): Image height (default: 1024).
model (str, optional): Model to use for image generation.
Returns:
Dict[str, Any]: Image metadata including file path, URL, and scene info.
"""
if not prompt or not prompt.strip():
raise ValueError(f"Scene {scene_number} ({scene_title}) requires a non-empty prompt")
try:
logger.info(f"[StoryImageGeneration] Regenerating image for scene {scene_number}: {scene_title}")
logger.debug(f"[StoryImageGeneration] Using direct prompt: {prompt[:100]}...")
# Generate image using main_image_generation service with the direct prompt
image_options = {
"provider": provider,
"width": width,
"height": height,
"model": model,
}
result: ImageGenerationResult = generate_image(
prompt=prompt.strip(),
options=image_options,
user_id=user_id
)
# Save image to file
image_filename = self._generate_image_filename(scene_number, scene_title)
image_path = self.output_dir / image_filename
with open(image_path, "wb") as f:
f.write(result.image_bytes)
logger.info(f"[StoryImageGeneration] Saved regenerated image to: {image_path}")
# Return image metadata
return {
"scene_number": scene_number,
"scene_title": scene_title,
"image_path": str(image_path),
"image_filename": image_filename,
"image_url": f"/api/story/images/{image_filename}",
"width": result.width,
"height": result.height,
"provider": result.provider,
"model": result.model,
"seed": result.seed,
}
except HTTPException:
# Re-raise HTTPExceptions (e.g., 429 subscription limit)
raise
except Exception as e:
logger.error(f"[StoryImageGeneration] Error regenerating image for scene {scene_number}: {e}")
raise RuntimeError(f"Failed to regenerate image for scene {scene_number}: {str(e)}") from e

View File

@@ -220,35 +220,41 @@ class StoryVideoGenerationService:
def generate_story_video(
self,
scenes: List[Dict[str, Any]],
image_paths: List[str],
image_paths: List[Optional[str]],
audio_paths: List[str],
user_id: str,
story_title: str = "Story",
fps: int = 24,
transition_duration: float = 0.5,
progress_callback: Optional[callable] = None
progress_callback: Optional[callable] = None,
video_paths: Optional[List[Optional[str]]] = None
) -> Dict[str, Any]:
"""
Generate a complete story video from multiple scenes.
Parameters:
scenes (List[Dict[str, Any]]): List of scene data.
image_paths (List[str]): List of image file paths for each scene.
image_paths (List[Optional[str]]): List of image file paths (None if scene has animated video).
audio_paths (List[str]): List of audio file paths for each scene.
user_id (str): Clerk user ID for subscription checking.
story_title (str): Title of the story (default: "Story").
fps (int): Frames per second for video (default: 24).
transition_duration (float): Duration of transitions between scenes in seconds (default: 0.5).
progress_callback (callable, optional): Callback function for progress updates.
video_paths (Optional[List[Optional[str]]]): List of animated video file paths (None if scene has static image).
Returns:
Dict[str, Any]: Video metadata including file path, URL, and story info.
"""
if not scenes or not image_paths or not audio_paths:
raise ValueError("Scenes, image paths, and audio paths are required")
if not scenes or not audio_paths:
raise ValueError("Scenes and audio paths are required")
if len(scenes) != len(image_paths) or len(scenes) != len(audio_paths):
raise ValueError("Number of scenes, image paths, and audio paths must match")
if len(scenes) != len(audio_paths):
raise ValueError("Number of scenes and audio paths must match")
video_paths = video_paths or [None] * len(scenes)
if len(video_paths) != len(scenes):
video_paths = video_paths + [None] * (len(scenes) - len(video_paths))
try:
logger.info(f"[StoryVideoGeneration] Generating story video for {len(scenes)} scenes")
@@ -293,36 +299,59 @@ class StoryVideoGenerationService:
scene_clips = []
total_duration = 0.0
for idx, (scene, image_path, audio_path) in enumerate(zip(scenes, image_paths, audio_paths)):
# Import VideoFileClip for animated videos
try:
from moviepy import VideoFileClip
except ImportError:
VideoFileClip = None
for idx, (scene, image_path, audio_path, video_path) in enumerate(zip(scenes, image_paths, audio_paths, video_paths)):
try:
scene_number = scene.get("scene_number", idx + 1)
scene_title = scene.get("title", "Untitled")
logger.info(f"[StoryVideoGeneration] Processing scene {scene_number}/{len(scenes)}: {scene_title}")
# Load image and audio
image_file = Path(image_path)
audio_file = Path(audio_path)
if not image_file.exists():
logger.warning(f"[StoryVideoGeneration] Image not found: {image_path}, skipping scene {scene_number}")
continue
if not audio_file.exists():
logger.warning(f"[StoryVideoGeneration] Audio not found: {audio_path}, skipping scene {scene_number}")
continue
# Load audio to get duration
# Load audio
audio_clip = AudioFileClip(str(audio_file))
audio_duration = audio_clip.duration
# Create image clip (MoviePy v2: use with_* API)
image_clip = ImageClip(str(image_file)).with_duration(audio_duration)
image_clip = image_clip.with_fps(fps)
# Prefer animated video if available
if video_path and Path(video_path).exists():
logger.info(f"[StoryVideoGeneration] Using animated video for scene {scene_number}: {video_path}")
# Load animated video
if VideoFileClip is None:
raise RuntimeError("VideoFileClip not available - MoviePy may not be fully installed")
video_clip = VideoFileClip(str(video_path))
# Replace audio with the preferred audio (AI or free)
video_clip = video_clip.with_audio(audio_clip)
# Match duration to audio if needed
if video_clip.duration > audio_duration:
video_clip = video_clip.subclip(0, audio_duration)
elif video_clip.duration < audio_duration:
# Loop the video if it's shorter than audio
loops_needed = int(audio_duration / video_clip.duration) + 1
video_clip = concatenate_videoclips([video_clip] * loops_needed).subclip(0, audio_duration)
video_clip = video_clip.with_audio(audio_clip)
elif image_path and Path(image_path).exists():
# Fall back to static image
logger.info(f"[StoryVideoGeneration] Using static image for scene {scene_number}: {image_path}")
image_file = Path(image_path)
# Create image clip (MoviePy v2: use with_* API)
image_clip = ImageClip(str(image_file)).with_duration(audio_duration)
image_clip = image_clip.with_fps(fps)
# Set audio to image clip
video_clip = image_clip.with_audio(audio_clip)
else:
logger.warning(f"[StoryVideoGeneration] No video or image found for scene {scene_number}, skipping")
continue
# Set audio to image clip
video_clip = image_clip.with_audio(audio_clip)
scene_clips.append(video_clip)
total_duration += audio_duration
# Call progress callback if provided