ALwrity Version 0.5.0 (Fastapi + React )

2025-08-06 12:48:02 +05:30
parent f28a919caa
commit 32f97fa6b3
476 changed files with 115544 additions and 28747 deletions
--- a/backend/services/llm_providers/text_to_image_generation/gen_dali2_images.py
+++ b/backend/services/llm_providers/text_to_image_generation/gen_dali2_images.py
@@ -0,0 +1,56 @@
+from openai import OpenAI
+from loguru import logger
+import sys
+
+from .save_image import save_generated_image
+
+from tenacity import (
+    retry,
+    stop_after_attempt,
+    wait_random_exponential,
+)  # for exponential backoff
+
+
+@retry(wait=wait_random_exponential(min=1, max=120), stop=stop_after_attempt(6))
+def generate_dalle3_images(img_prompt, image_dir, size="1024x1024", quality="hd", n=1):
+    """
+    Generates images using the DALL-E 3 model based on a given text prompt.
+
+    Args:
+        img_prompt (str): Text prompt to generate the image.
+        image_dir (str): Directory where the generated image will be saved.
+        size (str, optional): Size of the generated images. Defaults to "1024x1024".
+        quality (str, optional): Quality of the generated images. Defaults to "hd".
+        n (int, optional): Number of images to generate. Defaults to 1.
+
+    Returns:
+        str: Path to the saved image.
+
+    Raises:
+        SystemExit: If an error occurs in image generation or saving.
+    """
+    try:
+        logger.info("Generating Dall-e-3 image for the blog.")
+        client = OpenAI()
+
+        img_generation_response = client.images.generate(
+            model="dall-e-3",
+            prompt=img_prompt,
+            size=size,
+            quality=quality,
+            n=n
+        )
+        # Save the generated image locally.
+        try:
+            img_path = save_generated_image(img_generation_response, image_dir)
+            return img_path
+        except Exception as err:
+            logger.error(f"Failed to Save generated image: {err}")
+
+    except openai.OpenAIError as e:
+        logger.error(f"Dalle-3 image generation error: HTTP Status {e.http_status}, Error: {e.error}")
+        sys.exit("Exiting due to Dalle-3 image generation error.")
+
+    except Exception as e:
+        logger.error(f"Failed to generate images with Dalle3: {e}")
+        sys.exit("Exiting due to a general error in image generation.")
--- a/backend/services/llm_providers/text_to_image_generation/gen_dali3_images.py
+++ b/backend/services/llm_providers/text_to_image_generation/gen_dali3_images.py
@@ -0,0 +1,53 @@
+from openai import OpenAI
+from loguru import logger
+import sys
+
+from tenacity import (
+    retry,
+    stop_after_attempt,
+    wait_random_exponential,
+)  # for exponential backoff
+
+from .save_image import save_generated_image
+
+
+@retry(wait=wait_random_exponential(min=1, max=120), stop=stop_after_attempt(6))
+def generate_dalle3_images(img_prompt, image_dir, size="1024x1024", quality="hd", n=1):
+    """
+    Generates images using the DALL-E 3 model based on a given text prompt.
+
+    Args:
+        img_prompt (str): Text prompt to generate the image.
+        image_dir (str): Directory where the generated image will be saved.
+        size (str, optional): Size of the generated images. Defaults to "1024x1024".
+        quality (str, optional): Quality of the generated images. Defaults to "hd".
+        n (int, optional): Number of images to generate. Defaults to 1.
+
+    Returns:
+        str: Path to the saved image.
+
+    Raises:
+        SystemExit: If an error occurs in image generation or saving.
+    """
+    try:
+        logger.info("Generating Dall-e-3 image for the blog.")
+        client = OpenAI()
+
+        img_generation_response = client.images.generate(
+            model="dall-e-3",
+            prompt=img_prompt,
+            size=size,
+            quality=quality,
+            n=n
+        )
+
+        img_path = save_generated_image(img_generation_response, image_dir)
+        return img_path
+
+    except openai.OpenAIError as e:
+        logger.error(f"Dalle-3 image generation error: HTTP Status {e.http_status}, Error: {e.error}")
+        sys.exit("Exiting due to Dalle-3 image generation error.")
+
+    except Exception as e:
+        logger.error(f"Failed to generate images with Dalle3: {e}")
+        sys.exit("Exiting due to a general error in image generation.")
--- a/backend/services/llm_providers/text_to_image_generation/gen_gemini_images.py
+++ b/backend/services/llm_providers/text_to_image_generation/gen_gemini_images.py
@@ -0,0 +1,421 @@
+import os
+import sys
+import time
+import datetime
+import streamlit as st
+from PIL import Image
+from io import BytesIO
+from loguru import logger
+from tenacity import retry, stop_after_attempt, wait_random_exponential
+
+# Import APIKeyManager
+from ...api_key_manager import APIKeyManager
+
+try:
+    import google.generativeai as genai
+    from google.generativeai import types
+except ImportError:
+    genai = None
+    logger.warning("Google genai library not available. Install with: pip install google-generativeai")
+
+
+from .save_image import save_generated_image
+
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger('gemini_image_generator')
+
+# With image generation in Gemini, your imagination is the limit. 
+# If what you see doesn't quite match what you had in mind, try adding more details to the prompt. 
+# The more specific you are, the better Gemini can create images that reflect your vision.
+
+# Generate images using Gemini
+# Gemini 2.0 Flash Experimental supports the ability to output text and inline images. 
+# This lets you use Gemini to conversationally edit images or generate outputs with interwoven text (for example, generating a blog post with text and images in a single turn).
+# Note: Make sure to include responseModalities: ["Text", "Image"] in your generation configuration for text and image output with gemini-2.0-flash-exp-image-generation. Image only is not allowed.
+
+
+class AIPromptGenerator:
+    """
+    Generates enhanced AI image prompts based on user keywords,
+    following the guidelines of the Imagen documentation.
+    """
+
+    def __init__(self):
+        self.photography_styles = ["photo", "photograph"]
+        self.art_styles = ["painting", "sketch", "drawing", "illustration", "digital art", "render"]
+        self.art_techniques = ["technical pencil drawing", "charcoal drawing", "color pencil drawing", "pastel painting", "digital art", "art deco (poster)", "impressionist painting", "renaissance painting", "pop art"]
+        self.camera_proximity = ["close-up", "zoomed out", "taken from far away"]
+        self.camera_position = ["aerial", "from below"]
+        self.lighting = ["natural lighting", "dramatic lighting", "warm lighting", "cold lighting", "studio lighting", "golden hour lighting"]
+        self.camera_settings = ["motion blur", "soft focus", "bokeh", "portrait"]
+        self.lens_types = ["35mm lens", "50mm lens", "fisheye lens", "wide angle lens", "macro lens", "telephoto lens"]
+        self.film_types = ["black and white film", "polaroid"]
+        self.materials = ["made of cheese", "made of paper", "made of neon tubes", "metallic", "glass", "wooden", "stone"]
+        self.shapes = ["in the shape of a bird", "angular", "curved", "geometric"]
+        self.quality_modifiers_general = ["high-quality", "beautiful", "stylized", "detailed", "epic", "grand"]
+        self.quality_modifiers_photo = ["4K", "HDR", "studio photo", "professional photo", "photorealistic"]
+        self.quality_modifiers_art = ["by a professional artist", "intricate details", "masterpiece"]
+        self.aspect_ratios = ["1:1 aspect ratio", "4:3 aspect ratio", "3:4 aspect ratio", "16:9 aspect ratio", "9:16 aspect ratio"]
+        self.photorealistic_modifiers = {
+            "portraits": ["prime lens", "zoom lens", "24-35mm", "black and white film", "film noir", "shallow depth of field", "duotone (mention two colors)"],
+            "objects": ["macro lens", "60-105mm", "high detail", "precise focusing", "controlled lighting"],
+            "motion": ["telephoto zoom lens", "100-400mm", "fast shutter speed", "action shot", "movement tracking"],
+            "wide-angle": ["wide-angle lens", "10-24mm", "long exposure", "sharp focus", "smooth water or clouds", "astro photography"]
+        }
+
+    def generate_prompt(self, keywords):
+        """
+        Generates an enhanced AI image prompt based on user-provided keywords.
+
+        Args:
+            keywords (list): A list of keywords describing the desired image.
+
+        Returns:
+            str: An enhanced AI image prompt.
+        """
+        if not keywords:
+            return "A beautiful image."
+
+        prompt_parts = []
+        subject = " ".join(keywords)
+        prompt_parts.append(subject)
+
+        # Add context and background (optional)
+        context_options = ["in a detailed background", "outdoors", "indoors", "in a studio", "with a blurred background"]
+        if random.random() < 0.6:  # Add context with a probability
+            prompt_parts.append(random.choice(context_options))
+
+        # Add style (optional)
+        style_options = self.photography_styles + [f"{art} of" for art in self.art_styles]
+        if random.random() < 0.7:
+            prompt_parts.insert(0, random.choice(style_options))
+            if prompt_parts[0].startswith("painting of") or prompt_parts[0].startswith("sketch of") or prompt_parts[0].startswith("drawing of"):
+                if random.random() < 0.5:
+                    prompt_parts.append(f"in the style of {random.choice(self.art_techniques)}")
+
+        # Add photography modifiers (if photography style is chosen)
+        if any(style in prompt_parts[0] for style in self.photography_styles):
+            if random.random() < 0.4:
+                prompt_parts.append(random.choice(self.camera_proximity))
+            if random.random() < 0.3:
+                prompt_parts.append(random.choice(self.camera_position))
+            if random.random() < 0.5:
+                prompt_parts.append(random.choice(self.lighting))
+            if random.random() < 0.3:
+                prompt_parts.append(random.choice(self.camera_settings))
+            if random.random() < 0.2:
+                prompt_parts.append(random.choice(self.lens_types))
+            if random.random() < 0.1:
+                prompt_parts.append(random.choice(self.film_types))
+
+        # Add shapes and materials (optional)
+        if random.random() < 0.3:
+            prompt_parts.append(random.choice(self.materials))
+        if random.random() < 0.2:
+            prompt_parts.append(random.choice(self.shapes))
+
+        # Add quality modifiers (optional)
+        if random.random() < 0.6:
+            quality_options = self.quality_modifiers_general
+            if any(style in prompt_parts[0] for style in self.photography_styles):
+                quality_options += self.quality_modifiers_photo
+            else:
+                quality_options += self.quality_modifiers_art
+            prompt_parts.append(random.choice(list(set(quality_options)))) # Avoid duplicates
+
+        # Add aspect ratio (optional)
+        if random.random() < 0.2:
+            prompt_parts.append(random.choice(self.aspect_ratios))
+
+        return ", ".join(prompt_parts)
+
+    def generate_photorealistic_prompt(self, keywords, focus=""):
+        """
+        Generates an enhanced AI image prompt specifically for photorealistic images.
+
+        Args:
+            keywords (list): A list of keywords describing the desired image.
+            focus (str, optional): The focus of the photorealistic image (e.g., "portraits", "objects", "motion", "wide-angle"). Defaults to "".
+
+        Returns:
+            str: An enhanced photorealistic AI image prompt.
+        """
+        if not keywords:
+            return "A photorealistic image."
+
+        prompt_parts = ["A photo of", "photorealistic"]
+        prompt_parts.append(" ".join(keywords))
+
+        if focus and focus in self.photorealistic_modifiers:
+            modifiers = self.photorealistic_modifiers[focus]
+            if modifiers:
+                num_modifiers = random.randint(1, min(3, len(modifiers)))
+                selected_modifiers = random.sample(modifiers, num_modifiers)
+                prompt_parts.extend(selected_modifiers)
+
+        # Add general quality modifiers
+        if random.random() < 0.5:
+            prompt_parts.append(random.choice(self.quality_modifiers_photo))
+
+        # Add lighting
+        if random.random() < 0.4:
+            prompt_parts.append(random.choice(self.lighting))
+
+        return ", ".join(prompt_parts)
+
+
+def generate_gemini_image(prompt, keywords=None, style=None, focus=None, enhance_prompt=True, max_retries=3, initial_retry_delay=2, aspect_ratio="16:9"):
+    """
+    Generate an image using Gemini's image generation capabilities.
+    
+    Args:
+        prompt (str): The text prompt for image generation
+        keywords (list, optional): Keywords to enhance the prompt
+        style (str, optional): Style of the image (photorealistic, artistic, etc.)
+        focus (str, optional): Focus area for photorealistic images
+        enhance_prompt (bool, optional): Whether to enhance the prompt with AI
+        max_retries (int, optional): Maximum number of retry attempts
+        initial_retry_delay (int, optional): Initial delay between retries
+        aspect_ratio (str, optional): Aspect ratio for the generated image
+        
+    Returns:
+        str: The path to the generated image.
+    """
+    logger.info(f"Generating image with prompt: '{prompt[:100]}...'")
+    
+    # Use APIKeyManager instead of direct environment variable access
+    api_key_manager = APIKeyManager()
+    api_key = api_key_manager.get_api_key("gemini")
+    
+    if not api_key:
+        error_msg = "Gemini API key not found. Please configure it in the onboarding process."
+        logger.error(error_msg)
+        st.error(f"🔑 {error_msg}")
+        return None
+    
+    # Enhance the prompt if requested
+    if enhance_prompt and keywords:
+        prompt_generator = AIPromptGenerator()
+        if style == "photorealistic" and focus:
+            logger.info(f"Generating photorealistic prompt with focus: {focus}")
+            enhanced_prompt = prompt_generator.generate_photorealistic_prompt(keywords, focus)
+        else:
+            logger.info("Generating enhanced prompt")
+            enhanced_prompt = prompt_generator.generate_prompt(keywords)
+        
+        # Combine the enhanced prompt with the original prompt
+        prompt = f"{prompt}\n\nEnhanced prompt: {enhanced_prompt}"
+        logger.info(f"Final prompt: '{prompt[:100]}...'")
+    
+    # Add aspect ratio to the prompt
+    if aspect_ratio:
+        prompt += f"\n\nPlease generate the image with {aspect_ratio} aspect ratio."
+    
+    retry_count = 0
+    retry_delay = initial_retry_delay
+    
+    while retry_count <= max_retries:
+        try:
+            client = genai.Client(api_key=api_key)
+            contents = (prompt)
+
+            logger.info("Sending request to Gemini API")
+            response = client.models.generate_content(
+                model="gemini-2.0-flash-exp-image-generation",
+                contents=contents,
+                config=types.GenerateContentConfig(
+                    response_modalities=['Text', 'Image']
+                )
+            )
+            logger.info("Received response from Gemini API")
+
+            img_name = None
+            for part in response.candidates[0].content.parts:
+                if part.text is not None:
+                    logger.info(f"Received text response: '{part.text[:100]}...'")
+                    print(part.text)
+                elif part.inline_data is not None:
+                    logger.info("Received image data from Gemini")
+                    image = Image.open(BytesIO((part.inline_data.data)))
+                    
+                    # Resize image to match aspect ratio if needed
+                    if aspect_ratio:
+                        current_width, current_height = image.size
+                        target_width = current_width
+                        target_height = current_height
+                        
+                        # Calculate target dimensions based on aspect ratio
+                        if aspect_ratio == "16:9":
+                            target_height = int(current_width * 9/16)
+                        elif aspect_ratio == "9:16":
+                            target_width = int(current_height * 9/16)
+                        elif aspect_ratio == "4:3":
+                            target_height = int(current_width * 3/4)
+                        elif aspect_ratio == "3:4":
+                            target_width = int(current_height * 3/4)
+                        elif aspect_ratio == "1:1":
+                            target_size = min(current_width, current_height)
+                            target_width = target_size
+                            target_height = target_size
+                        
+                        logger.info(f"Resizing image from {current_width}x{current_height} to {target_width}x{target_height}")
+                        
+                        # Create a new image with the target dimensions
+                        resized_image = Image.new('RGB', (target_width, target_height), (255, 255, 255))
+                        
+                        # Calculate position to paste the original image
+                        paste_x = (target_width - current_width) // 2
+                        paste_y = (target_height - current_height) // 2
+                        
+                        # Paste the original image onto the new canvas
+                        resized_image.paste(image, (paste_x, paste_y))
+                        image = resized_image
+                    
+                    if part.text is not None:
+                        img_name = f'{part.text}-gemini-native-image.png'
+                    else:
+                        img_name = f'gemini-native-image-{datetime.datetime.now().strftime("%Y%m%d-%H%M%S")}.png'
+                    try:
+                        logger.info(f"Saving image to: {img_name}")
+                        image.save(img_name)
+                        
+                        # Create a dictionary with the expected format for save_generated_image
+                        img_response = {
+                            "artifacts": [
+                                {
+                                    "base64": base64.b64encode(open(img_name, "rb").read()).decode('utf-8')
+                                }
+                            ]
+                        }
+                        
+                        # Call save_generated_image with the correct format
+                        save_generated_image(img_response)
+                    except Exception as err:
+                        logger.error(f"Failed to save image: {err}")
+                        st.error(f"Failed to save image: {err}")
+            
+            logger.info(f"Image generation completed. Image name: {img_name}")
+            return img_name
+        except Exception as err:
+            error_message = str(err)
+            logger.error(f"Error in generate_gemini_image: {err}")
+            
+            # Check if this is a 503 UNAVAILABLE error
+            if "503 UNAVAILABLE" in error_message and retry_count < max_retries:
+                retry_count += 1
+                logger.info(f"Model is overloaded. Retrying in {retry_delay} seconds (attempt {retry_count}/{max_retries})")
+                st.warning(f"The image generation service is currently busy. Retrying in {retry_delay} seconds...")
+                time.sleep(retry_delay)
+                # Exponential backoff
+                retry_delay *= 2
+            else:
+                st.error(f"Error generating image: {err}")
+                return None
+    
+    # If we've exhausted all retries
+    st.error("The image generation service is currently unavailable. Please try again later.")
+    return None
+
+
+def edit_image(image_path, prompt, max_retries=3, initial_retry_delay=2):
+    """
+    - Image editing (text and image to image)
+    Example prompt: "Edit this image to make it look like a cartoon"
+    Example prompt: [image of a cat] + [image of a pillow] + "Create a cross stitch of my cat on this pillow."
+    
+    - Multi-turn image editing (chat)
+    Example prompts: [upload an image of a blue car.] "Turn this car into a convertible." "Now change the color to yellow."
+    
+    Image editing with Gemini
+    To perform image editing, add an image as input. 
+    The following example demonstrats uploading base64 encoded images. 
+    For multiple images and larger payloads, check the image input section.
+
+    Args:
+        image_path (str): The path to the image to edit.
+        prompt (str): The prompt to edit the image with.
+        max_retries (int, optional): Maximum number of retry attempts for handling 503 errors. Defaults to 3.
+        initial_retry_delay (int, optional): Initial delay in seconds before retrying. Defaults to 2.
+
+    Returns:
+        str: The path to the edited image.
+    """
+    import PIL.Image
+    image = PIL.Image.open(image_path)
+
+    retry_count = 0
+    retry_delay = initial_retry_delay
+    
+    while retry_count <= max_retries:
+        try:
+            client = genai.Client()
+            text_input = (prompt)
+
+            logger.info("Sending request to Gemini API for image editing")
+            response = client.models.generate_content(
+                model="gemini-2.0-flash-exp-image-generation",
+                contents=[text_input, image],
+                config=types.GenerateContentConfig(
+                    response_modalities=['Text', 'Image']
+                )
+            )
+            logger.info("Received response from Gemini API for image editing")
+
+            edited_img_name = None
+            for part in response.candidates[0].content.parts:
+                if part.text is not None:
+                    logger.info(f"Received text response: '{part.text[:100]}...'")
+                    st.write(part.text)
+                elif part.inline_data is not None:
+                    logger.info("Received edited image data from Gemini")
+                    edited_image = Image.open(BytesIO(part.inline_data.data))
+                    edited_image.show()
+                    
+                    # Save the edited image
+                    edited_img_name = f'edited-{os.path.basename(image_path)}'
+                    try:
+                        logger.info(f"Saving edited image to: {edited_img_name}")
+                        edited_image.save(edited_img_name)
+                        
+                        # Create a dictionary with the expected format for save_generated_image
+                        img_response = {
+                            "artifacts": [
+                                {
+                                    "base64": base64.b64encode(open(edited_img_name, "rb").read()).decode('utf-8')
+                                }
+                            ]
+                        }
+                        
+                        # Call save_generated_image with the correct format
+                        save_generated_image(img_response)
+                    except Exception as err:
+                        logger.error(f"Failed to save edited image: {err}")
+                        st.error(f"Failed to save edited image: {err}")
+            
+            logger.info(f"Image editing completed. Edited image name: {edited_img_name}")
+            return edited_img_name
+        except Exception as err:
+            error_message = str(err)
+            logger.error(f"Error in edit_image: {err}")
+            
+            # Check if this is a 503 UNAVAILABLE error
+            if "503 UNAVAILABLE" in error_message and retry_count < max_retries:
+                retry_count += 1
+                logger.info(f"Model is overloaded. Retrying in {retry_delay} seconds (attempt {retry_count}/{max_retries})")
+                st.warning(f"The image editing service is currently busy. Retrying in {retry_delay} seconds...")
+                time.sleep(retry_delay)
+                # Exponential backoff
+                retry_delay *= 2
+            else:
+                st.error(f"Error editing image: {err}")
+                return None
+    
+    # If we've exhausted all retries
+    st.error("The image editing service is currently unavailable. Please try again later.")
+    return None
+
+
--- a/backend/services/llm_providers/text_to_image_generation/gen_stabl_diff_img.py
+++ b/backend/services/llm_providers/text_to_image_generation/gen_stabl_diff_img.py
@@ -0,0 +1,69 @@
+# Ensure you sign up for an account to obtain an API key:
+# https://platform.stability.ai/
+# Your API key can be found here after account creation:
+# https://platform.stability.ai/account/keys
+
+import os
+import requests
+import base64
+from PIL import Image
+from io import BytesIO
+import streamlit as st
+from loguru import logger
+
+# Import APIKeyManager
+from ...api_key_manager import APIKeyManager
+
+def save_generated_image(data):
+    """Save the generated image to a file."""
+    # Implementation for saving image
+    pass
+
+def generate_stable_diffusion_image(prompt):
+    engine_id = "stable-diffusion-xl-1024-v1-0"
+    api_host = os.getenv('API_HOST', 'https://api.stability.ai')
+    
+    # Use APIKeyManager instead of direct environment variable access
+    api_key_manager = APIKeyManager()
+    api_key = api_key_manager.get_api_key("stability")
+    
+    if api_key is None:
+        st.warning("Missing Stability API key. Please configure it in the onboarding process.")
+        return None
+    
+    response = requests.post(
+        f"{api_host}/v1/generation/{engine_id}/text-to-image",
+        headers={
+            "Content-Type": "application/json",
+            "Accept": "application/json",
+            "Authorization": f"Bearer {api_key}"
+        },
+        json={
+            "text_prompts": [
+                {
+                    "text": prompt
+                }
+            ],
+            "cfg_scale": 7,
+            "height": 1024,
+            "width": 1024,
+            "samples": 1,
+            "steps": 30,
+        },
+    )
+    
+    if response.status_code != 200:
+        raise Exception("Non-200 response: " + str(response.text))
+    
+    data = response.json()
+    img_path = save_generated_image(data)
+
+    for i, image in enumerate(data["artifacts"]):
+        # Decode base64 image data
+        img_data = base64.b64decode(image["base64"])
+        # Open image using PIL
+        img = Image.open(BytesIO(img_data))
+        # Display the image
+        img.show()
+
+    return img_path
--- a/backend/services/llm_providers/text_to_image_generation/gen_variation_img.py
+++ b/backend/services/llm_providers/text_to_image_generation/gen_variation_img.py
@@ -0,0 +1,51 @@
+from loguru import logger
+import sys
+from PIL import Image
+from openai import OpenAI
+
+def gen_new_from_given_img(img_path, image_dir, num_img=1, img_size="1024x1024", response_format="url"):
+    """
+    Generates variations of a given image using OpenAI's image variation API.
+
+    This function takes an existing image, processes it, and generates a specified number of new images based on it. 
+    These generated images are variations of the original, providing creative flexibility.
+
+    Args:
+        img_path (str): Path to the original image file.
+        image_dir (str): Directory where the generated images will be saved.
+        num_img (int, optional): Number of image variations to generate. Defaults to 1.
+        img_size (str, optional): Size of the generated images. Defaults to "1024x1024".
+        response_format (str, optional): Format in which the generated images are returned. Defaults to "url".
+
+    Returns:
+        str: Path to the saved image variation.
+
+    Raises:
+        SystemExit: If a critical error occurs that prevents successful execution.
+    """
+    try:
+        logger.info(f"Starting image variation generation for: {img_path}")
+
+        # Convert and prepare the image
+        png = Image.open(img_path).convert('RGBA')
+        background = Image.new('RGBA', png.size, (255, 255, 255))
+        alpha_composite = Image.alpha_composite(background, png)
+        alpha_composite.save(img_path, 'PNG', quality=80)
+        logger.info("Image prepared for variation generation.")
+
+        client = OpenAI()
+        variation_response = client.images.create_variation(
+            image=open(img_path, "rb", encoding="utf-8"),
+            n=num_img,
+            size=img_size,
+            response_format=response_format
+        )
+
+        # Saving the generated image
+        generated_image_path = save_generated_image(variation_response, image_dir)
+        logger.info(f"Image variation generated and saved to: {generated_image_path}")
+        return generated_image_path
+
+    except Exception as e:
+        logger.error(f"Error occurred during image variation generation: {e}")
+        sys.exit(f"Exiting due to critical error: {e}")
--- a/backend/services/llm_providers/text_to_image_generation/main_generate_image_from_prompt.py
+++ b/backend/services/llm_providers/text_to_image_generation/main_generate_image_from_prompt.py
@@ -0,0 +1,163 @@
+#########################################################
+#
+# This module will generate images for the blogs using APIs
+# from Dall-E and other free resources. Given a prompt, the
+# images will be stored in local directory.
+# Required: openai API key.
+#
+#########################################################
+
+# imports
+import os
+import sys
+import datetime
+import streamlit as st
+
+import openai  # OpenAI Python library to make API calls
+from loguru import logger
+logger.remove()
+logger.add(sys.stdout,
+        colorize=True,
+        format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}"
+    )
+
+#from .gen_dali2_images
+from .gen_dali3_images import generate_dalle3_images
+from .gen_stabl_diff_img import generate_stable_diffusion_image
+from ..text_generation.main_text_generation import llm_text_gen
+from .gen_gemini_images import generate_gemini_image
+
+def generate_image(user_prompt, title=None, description=None, tags=None, content=None, aspect_ratio="16:9"):
+    """
+    The generation API endpoint creates an image based on a text prompt.
+
+    Required inputs:
+    prompt (str): A text description of the desired image(s). The maximum length is 1000 characters.
+
+    Optional inputs:
+    --> image_engine: dalle2, dalle3, stable diffusion are supported.
+    --> num_images (int): The number of images to generate. Must be between 1 and 10. Defaults to 1.
+    --> size (str): The size of the generated images. Must be one of "256x256", "512x512", or "1024x1024". 
+    Smaller images are faster. Defaults to "1024x1024".
+    -->response_format (str): The format in which the generated images are returned. 
+    Must be one of "url" or "b64_json". Defaults to "url".
+    --> user (str): A unique identifier representing your end-user, which will help OpenAI to monitor and detect abuse.
+    --> aspect_ratio (str): The aspect ratio for the generated image. Must be one of "16:9", "4:3", or "1:1". Defaults to "16:9".
+    """
+    # FIXME: Need to remove default value to match sidebar input.
+    image_engine = 'Gemini-AI'
+    image_stored_at = None
+
+    if user_prompt:
+        try:
+            # Use enhanced prompt generator with all available parameters
+            img_prompt = generate_enhanced_img_prompt(user_prompt, title, description, tags, content)
+            
+            # Add aspect ratio to the prompt
+            if aspect_ratio:
+                img_prompt += f"\n\nAspect ratio: {aspect_ratio}"
+            
+            if 'Dalle3' in image_engine:
+                logger.info(f"Calling Dalle3 text-to-image with prompt: {img_prompt}")
+                image_stored_at = generate_dalle3_images(img_prompt)
+            elif 'Stability-AI' in image_engine:
+                logger.info(f"Calling Stable diffusion text-to-image with prompt: \n{img_prompt}")
+                image_stored_at = generate_stable_diffusion_image(img_prompt)
+            elif 'Gemini-AI' in image_engine:
+                logger.info(f"Calling Gemini text-to-image with prompt: \n{img_prompt}")
+                image_stored_at = generate_gemini_image(img_prompt, aspect_ratio=aspect_ratio)
+            return image_stored_at
+        except Exception as err:
+            logger.error(f"Failed to generate Image: {err}")
+            st.warning(f"Failed to generate Image: {err}")
+    else:
+        logger.error("Skipping Image creation, No prompt provided.")
+
+
+def generate_img_prompt(user_prompt):
+    """
+    Given prompt, this functions generated a prompt for image generation.
+    """
+    prompt = f"""
+        As an expert prompt generator for AI text to image models and artist, I will provide you with 'user text' for creating images.
+        Your task is to create a prompt for a highly relevant image from given 'user text'.
+        \n
+        Choose from various art styles, utilize light & shadow effects etc.
+        Make sure to avoid common image generation mistakes.
+        Reply with only one answer, no descrition and in plaintext.
+        Make sure your prompt is detailed and creative descriptions that will inspire unique and interesting images from the AI. 
+        
+        \n\nuser text:  
+        '''{user_prompt}'''"""
+
+    response = llm_text_gen(prompt)
+    return response
+
+
+def generate_enhanced_img_prompt(user_prompt, title=None, description=None, tags=None, content=None):
+    """
+    Given user prompt and additional context (title, description, tags, content),
+    this function generates an enhanced prompt for better image generation.
+    
+    Args:
+        user_prompt (str): Base prompt from the user
+        title (str, optional): Blog title or content title
+        description (str, optional): Blog or content description/summary
+        tags (list, optional): List of tags related to the content
+        content (str, optional): Actual content or excerpt
+        
+    Returns:
+        str: Enhanced prompt for image generation
+    """
+    # Start with the base prompt
+    context_parts = [user_prompt]
+    
+    # Add relevant context if available
+    if title:
+        context_parts.append(f"Title: {title}")
+    
+    if description:
+        context_parts.append(f"Description: {description}")
+    
+    if tags and len(tags) > 0:
+        tag_text = ", ".join(tags[:5])  # Limit to 5 tags to avoid too much noise
+        context_parts.append(f"Tags: {tag_text}")
+    
+    # Create a combined context
+    combined_context = "\n".join(context_parts)
+    
+    # Add some content excerpt if available (limited to avoid token limits)
+    content_excerpt = ""
+    if content:
+        # Just use the first few hundred characters as excerpt
+        content_excerpt = content[:300] + "..." if len(content) > 300 else content
+    
+    # Create the prompt for LLM
+    prompt = f"""
+        As an expert prompt engineer for AI image generation models, create a detailed, creative prompt
+        for generating a high-quality, relevant image based on the following context:
+        
+        {combined_context}
+        
+        Additional content excerpt:
+        {content_excerpt}
+        
+        Your task is to:
+        1. Analyze the context and content to understand the main theme and subject
+        2. Create a rich, detailed prompt for image generation (50-75 words)
+        3. Include specific visual details, art style, mood, lighting, composition
+        4. Make sure the prompt is highly relevant to the original context
+        5. Avoid prohibited content or anything that violates image generation guidelines
+        
+        Reply with ONLY the final prompt. No explanations or other text.
+    """
+    
+    # Generate the enhanced prompt
+    try:
+        enhanced_prompt = llm_text_gen(prompt)
+        logger.info(f"Generated enhanced image prompt: {enhanced_prompt[:100]}...")
+        return enhanced_prompt
+    except Exception as e:
+        logger.error(f"Error generating enhanced prompt: {e}")
+        # Fall back to the simple prompt generation if enhanced fails
+        return generate_img_prompt(user_prompt)
--- a/backend/services/llm_providers/text_to_image_generation/save_image.py
+++ b/backend/services/llm_providers/text_to_image_generation/save_image.py
@@ -0,0 +1,39 @@
+import base64
+import datetime
+import os
+import requests
+from PIL import Image
+import logging
+
+def save_generated_image(img_generation_response):
+    """
+    Save generated images for blog, ensuring unique names for SEO.
+    """
+    logging.basicConfig(level=logging.INFO)
+    logger = logging.getLogger(__name__)
+
+    # Get image save directory with fallback to a local directory
+    image_save_dir = os.getenv('IMG_SAVE_DIR', 'generated_images')
+    
+    # Create the directory if it doesn't exist
+    if not os.path.exists(image_save_dir):
+        logger.info(f"Creating image save directory: {image_save_dir}")
+        os.makedirs(image_save_dir, exist_ok=True)
+
+    generated_image_name = f"generated_image_{datetime.datetime.now():%Y-%m-%d-%H-%M-%S}.webp"
+    generated_image_filepath = os.path.join(image_save_dir, generated_image_name)
+
+    try:
+        for i, image in enumerate(img_generation_response["artifacts"]):
+            with open(generated_image_filepath, "wb") as f:
+                f.write(base64.b64decode(image["base64"]))
+    except requests.exceptions.RequestException as e:
+        logger.error(f"Failed to get generated image content: {e}")
+        return None
+    except Exception as e:
+        logger.error(f"Error saving image: {e}")
+        return None
+
+    logger.info(f"Saved image at path: {generated_image_filepath}")
+
+    return generated_image_filepath