youtube shorts video generator

2025-05-16 21:53:56 +05:30
parent b2ce1ceb49
commit 4049d19787
8 changed files with 1152 additions and 80 deletions
--- a/lib/gpt_providers/text_to_image_generation/gen_gemini_images.py
+++ b/lib/gpt_providers/text_to_image_generation/gen_gemini_images.py
@@ -161,7 +161,7 @@ class AIPromptGenerator:
        return ", ".join(prompt_parts)


-def generate_gemini_image(prompt, keywords=None, style=None, focus=None, enhance_prompt=True, max_retries=3, initial_retry_delay=2):
+def generate_gemini_image(prompt, keywords=None, style=None, focus=None, enhance_prompt=True, max_retries=3, initial_retry_delay=2, aspect_ratio="16:9"):
    """
    Generate images using Gemini
    Depending on the prompt and context, Gemini will generate content in different modes (text to image, text to image and text, etc.). 
@@ -184,6 +184,7 @@ def generate_gemini_image(prompt, keywords=None, style=None, focus=None, enhance
        enhance_prompt (bool, optional): Whether to enhance the prompt using AIPromptGenerator. Defaults to True.
        max_retries (int, optional): Maximum number of retry attempts for handling 503 errors. Defaults to 3.
        initial_retry_delay (int, optional): Initial delay in seconds before retrying. Defaults to 2.
+        aspect_ratio (str, optional): The aspect ratio for the generated image. Must be one of "16:9", "9:16", "4:3", "3:4", or "1:1". Defaults to "16:9".

    Returns:
        str: The path to the generated image.
@@ -212,6 +213,10 @@ def generate_gemini_image(prompt, keywords=None, style=None, focus=None, enhance
        prompt = f"{prompt}\n\nEnhanced prompt: {enhanced_prompt}"
        logger.info(f"Final prompt: '{prompt[:100]}...'")
    
+    # Add aspect ratio to the prompt
+    if aspect_ratio:
+        prompt += f"\n\nPlease generate the image with {aspect_ratio} aspect ratio."
+    
    retry_count = 0
    retry_delay = initial_retry_delay
    
@@ -238,7 +243,40 @@ def generate_gemini_image(prompt, keywords=None, style=None, focus=None, enhance
                elif part.inline_data is not None:
                    logger.info("Received image data from Gemini")
                    image = Image.open(BytesIO((part.inline_data.data)))
-                    image.show()
+                    
+                    # Resize image to match aspect ratio if needed
+                    if aspect_ratio:
+                        current_width, current_height = image.size
+                        target_width = current_width
+                        target_height = current_height
+                        
+                        # Calculate target dimensions based on aspect ratio
+                        if aspect_ratio == "16:9":
+                            target_height = int(current_width * 9/16)
+                        elif aspect_ratio == "9:16":
+                            target_width = int(current_height * 9/16)
+                        elif aspect_ratio == "4:3":
+                            target_height = int(current_width * 3/4)
+                        elif aspect_ratio == "3:4":
+                            target_width = int(current_height * 3/4)
+                        elif aspect_ratio == "1:1":
+                            target_size = min(current_width, current_height)
+                            target_width = target_size
+                            target_height = target_size
+                        
+                        logger.info(f"Resizing image from {current_width}x{current_height} to {target_width}x{target_height}")
+                        
+                        # Create a new image with the target dimensions
+                        resized_image = Image.new('RGB', (target_width, target_height), (255, 255, 255))
+                        
+                        # Calculate position to paste the original image
+                        paste_x = (target_width - current_width) // 2
+                        paste_y = (target_height - current_height) // 2
+                        
+                        # Paste the original image onto the new canvas
+                        resized_image.paste(image, (paste_x, paste_y))
+                        image = resized_image
+                    
                    if part.text is not None:
                        img_name = f'{part.text}-gemini-native-image.png'
                    else:
--- a/lib/gpt_providers/text_to_image_generation/main_generate_image_from_prompt.py
+++ b/lib/gpt_providers/text_to_image_generation/main_generate_image_from_prompt.py
@@ -27,7 +27,7 @@ from .gen_stabl_diff_img import generate_stable_diffusion_image
 from ..text_generation.main_text_generation import llm_text_gen
 from .gen_gemini_images import generate_gemini_image

-def generate_image(user_prompt, title=None, description=None, tags=None, content=None):
+def generate_image(user_prompt, title=None, description=None, tags=None, content=None, aspect_ratio="16:9"):
    """
    The generation API endpoint creates an image based on a text prompt.

@@ -42,6 +42,7 @@ def generate_image(user_prompt, title=None, description=None, tags=None, content
    -->response_format (str): The format in which the generated images are returned. 
    Must be one of "url" or "b64_json". Defaults to "url".
    --> user (str): A unique identifier representing your end-user, which will help OpenAI to monitor and detect abuse.
+    --> aspect_ratio (str): The aspect ratio for the generated image. Must be one of "16:9", "4:3", or "1:1". Defaults to "16:9".
    """
    # FIXME: Need to remove default value to match sidebar input.
    image_engine = 'Gemini-AI'
@@ -51,6 +52,11 @@ def generate_image(user_prompt, title=None, description=None, tags=None, content
        try:
            # Use enhanced prompt generator with all available parameters
            img_prompt = generate_enhanced_img_prompt(user_prompt, title, description, tags, content)
+            
+            # Add aspect ratio to the prompt
+            if aspect_ratio:
+                img_prompt += f"\n\nAspect ratio: {aspect_ratio}"
+            
            if 'Dalle3' in image_engine:
                logger.info(f"Calling Dalle3 text-to-image with prompt: {img_prompt}")
                image_stored_at = generate_dalle3_images(img_prompt)
@@ -59,7 +65,7 @@ def generate_image(user_prompt, title=None, description=None, tags=None, content
                image_stored_at = generate_stable_diffusion_image(img_prompt)
            elif 'Gemini-AI' in image_engine:
                logger.info(f"Calling Gemini text-to-image with prompt: \n{img_prompt}")
-                image_stored_at = generate_gemini_image(img_prompt)
+                image_stored_at = generate_gemini_image(img_prompt, aspect_ratio=aspect_ratio)
            return image_stored_at
        except Exception as err:
            logger.error(f"Failed to generate Image: {err}")