ALwrity Version 0.5.0 (Fastapi + React )

This commit is contained in:
ajaysi
2025-08-06 12:48:02 +05:30
parent f28a919caa
commit 32f97fa6b3
476 changed files with 115544 additions and 28747 deletions

View File

@@ -0,0 +1,56 @@
from openai import OpenAI
from loguru import logger
import sys
from .save_image import save_generated_image
from tenacity import (
retry,
stop_after_attempt,
wait_random_exponential,
) # for exponential backoff
@retry(wait=wait_random_exponential(min=1, max=120), stop=stop_after_attempt(6))
def generate_dalle3_images(img_prompt, image_dir, size="1024x1024", quality="hd", n=1):
"""
Generates images using the DALL-E 3 model based on a given text prompt.
Args:
img_prompt (str): Text prompt to generate the image.
image_dir (str): Directory where the generated image will be saved.
size (str, optional): Size of the generated images. Defaults to "1024x1024".
quality (str, optional): Quality of the generated images. Defaults to "hd".
n (int, optional): Number of images to generate. Defaults to 1.
Returns:
str: Path to the saved image.
Raises:
SystemExit: If an error occurs in image generation or saving.
"""
try:
logger.info("Generating Dall-e-3 image for the blog.")
client = OpenAI()
img_generation_response = client.images.generate(
model="dall-e-3",
prompt=img_prompt,
size=size,
quality=quality,
n=n
)
# Save the generated image locally.
try:
img_path = save_generated_image(img_generation_response, image_dir)
return img_path
except Exception as err:
logger.error(f"Failed to Save generated image: {err}")
except openai.OpenAIError as e:
logger.error(f"Dalle-3 image generation error: HTTP Status {e.http_status}, Error: {e.error}")
sys.exit("Exiting due to Dalle-3 image generation error.")
except Exception as e:
logger.error(f"Failed to generate images with Dalle3: {e}")
sys.exit("Exiting due to a general error in image generation.")

View File

@@ -0,0 +1,53 @@
from openai import OpenAI
from loguru import logger
import sys
from tenacity import (
retry,
stop_after_attempt,
wait_random_exponential,
) # for exponential backoff
from .save_image import save_generated_image
@retry(wait=wait_random_exponential(min=1, max=120), stop=stop_after_attempt(6))
def generate_dalle3_images(img_prompt, image_dir, size="1024x1024", quality="hd", n=1):
"""
Generates images using the DALL-E 3 model based on a given text prompt.
Args:
img_prompt (str): Text prompt to generate the image.
image_dir (str): Directory where the generated image will be saved.
size (str, optional): Size of the generated images. Defaults to "1024x1024".
quality (str, optional): Quality of the generated images. Defaults to "hd".
n (int, optional): Number of images to generate. Defaults to 1.
Returns:
str: Path to the saved image.
Raises:
SystemExit: If an error occurs in image generation or saving.
"""
try:
logger.info("Generating Dall-e-3 image for the blog.")
client = OpenAI()
img_generation_response = client.images.generate(
model="dall-e-3",
prompt=img_prompt,
size=size,
quality=quality,
n=n
)
img_path = save_generated_image(img_generation_response, image_dir)
return img_path
except openai.OpenAIError as e:
logger.error(f"Dalle-3 image generation error: HTTP Status {e.http_status}, Error: {e.error}")
sys.exit("Exiting due to Dalle-3 image generation error.")
except Exception as e:
logger.error(f"Failed to generate images with Dalle3: {e}")
sys.exit("Exiting due to a general error in image generation.")

View File

@@ -0,0 +1,421 @@
import os
import sys
import time
import datetime
import streamlit as st
from PIL import Image
from io import BytesIO
from loguru import logger
from tenacity import retry, stop_after_attempt, wait_random_exponential
# Import APIKeyManager
from ...api_key_manager import APIKeyManager
try:
import google.generativeai as genai
from google.generativeai import types
except ImportError:
genai = None
logger.warning("Google genai library not available. Install with: pip install google-generativeai")
from .save_image import save_generated_image
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger('gemini_image_generator')
# With image generation in Gemini, your imagination is the limit.
# If what you see doesn't quite match what you had in mind, try adding more details to the prompt.
# The more specific you are, the better Gemini can create images that reflect your vision.
# Generate images using Gemini
# Gemini 2.0 Flash Experimental supports the ability to output text and inline images.
# This lets you use Gemini to conversationally edit images or generate outputs with interwoven text (for example, generating a blog post with text and images in a single turn).
# Note: Make sure to include responseModalities: ["Text", "Image"] in your generation configuration for text and image output with gemini-2.0-flash-exp-image-generation. Image only is not allowed.
class AIPromptGenerator:
"""
Generates enhanced AI image prompts based on user keywords,
following the guidelines of the Imagen documentation.
"""
def __init__(self):
self.photography_styles = ["photo", "photograph"]
self.art_styles = ["painting", "sketch", "drawing", "illustration", "digital art", "render"]
self.art_techniques = ["technical pencil drawing", "charcoal drawing", "color pencil drawing", "pastel painting", "digital art", "art deco (poster)", "impressionist painting", "renaissance painting", "pop art"]
self.camera_proximity = ["close-up", "zoomed out", "taken from far away"]
self.camera_position = ["aerial", "from below"]
self.lighting = ["natural lighting", "dramatic lighting", "warm lighting", "cold lighting", "studio lighting", "golden hour lighting"]
self.camera_settings = ["motion blur", "soft focus", "bokeh", "portrait"]
self.lens_types = ["35mm lens", "50mm lens", "fisheye lens", "wide angle lens", "macro lens", "telephoto lens"]
self.film_types = ["black and white film", "polaroid"]
self.materials = ["made of cheese", "made of paper", "made of neon tubes", "metallic", "glass", "wooden", "stone"]
self.shapes = ["in the shape of a bird", "angular", "curved", "geometric"]
self.quality_modifiers_general = ["high-quality", "beautiful", "stylized", "detailed", "epic", "grand"]
self.quality_modifiers_photo = ["4K", "HDR", "studio photo", "professional photo", "photorealistic"]
self.quality_modifiers_art = ["by a professional artist", "intricate details", "masterpiece"]
self.aspect_ratios = ["1:1 aspect ratio", "4:3 aspect ratio", "3:4 aspect ratio", "16:9 aspect ratio", "9:16 aspect ratio"]
self.photorealistic_modifiers = {
"portraits": ["prime lens", "zoom lens", "24-35mm", "black and white film", "film noir", "shallow depth of field", "duotone (mention two colors)"],
"objects": ["macro lens", "60-105mm", "high detail", "precise focusing", "controlled lighting"],
"motion": ["telephoto zoom lens", "100-400mm", "fast shutter speed", "action shot", "movement tracking"],
"wide-angle": ["wide-angle lens", "10-24mm", "long exposure", "sharp focus", "smooth water or clouds", "astro photography"]
}
def generate_prompt(self, keywords):
"""
Generates an enhanced AI image prompt based on user-provided keywords.
Args:
keywords (list): A list of keywords describing the desired image.
Returns:
str: An enhanced AI image prompt.
"""
if not keywords:
return "A beautiful image."
prompt_parts = []
subject = " ".join(keywords)
prompt_parts.append(subject)
# Add context and background (optional)
context_options = ["in a detailed background", "outdoors", "indoors", "in a studio", "with a blurred background"]
if random.random() < 0.6: # Add context with a probability
prompt_parts.append(random.choice(context_options))
# Add style (optional)
style_options = self.photography_styles + [f"{art} of" for art in self.art_styles]
if random.random() < 0.7:
prompt_parts.insert(0, random.choice(style_options))
if prompt_parts[0].startswith("painting of") or prompt_parts[0].startswith("sketch of") or prompt_parts[0].startswith("drawing of"):
if random.random() < 0.5:
prompt_parts.append(f"in the style of {random.choice(self.art_techniques)}")
# Add photography modifiers (if photography style is chosen)
if any(style in prompt_parts[0] for style in self.photography_styles):
if random.random() < 0.4:
prompt_parts.append(random.choice(self.camera_proximity))
if random.random() < 0.3:
prompt_parts.append(random.choice(self.camera_position))
if random.random() < 0.5:
prompt_parts.append(random.choice(self.lighting))
if random.random() < 0.3:
prompt_parts.append(random.choice(self.camera_settings))
if random.random() < 0.2:
prompt_parts.append(random.choice(self.lens_types))
if random.random() < 0.1:
prompt_parts.append(random.choice(self.film_types))
# Add shapes and materials (optional)
if random.random() < 0.3:
prompt_parts.append(random.choice(self.materials))
if random.random() < 0.2:
prompt_parts.append(random.choice(self.shapes))
# Add quality modifiers (optional)
if random.random() < 0.6:
quality_options = self.quality_modifiers_general
if any(style in prompt_parts[0] for style in self.photography_styles):
quality_options += self.quality_modifiers_photo
else:
quality_options += self.quality_modifiers_art
prompt_parts.append(random.choice(list(set(quality_options)))) # Avoid duplicates
# Add aspect ratio (optional)
if random.random() < 0.2:
prompt_parts.append(random.choice(self.aspect_ratios))
return ", ".join(prompt_parts)
def generate_photorealistic_prompt(self, keywords, focus=""):
"""
Generates an enhanced AI image prompt specifically for photorealistic images.
Args:
keywords (list): A list of keywords describing the desired image.
focus (str, optional): The focus of the photorealistic image (e.g., "portraits", "objects", "motion", "wide-angle"). Defaults to "".
Returns:
str: An enhanced photorealistic AI image prompt.
"""
if not keywords:
return "A photorealistic image."
prompt_parts = ["A photo of", "photorealistic"]
prompt_parts.append(" ".join(keywords))
if focus and focus in self.photorealistic_modifiers:
modifiers = self.photorealistic_modifiers[focus]
if modifiers:
num_modifiers = random.randint(1, min(3, len(modifiers)))
selected_modifiers = random.sample(modifiers, num_modifiers)
prompt_parts.extend(selected_modifiers)
# Add general quality modifiers
if random.random() < 0.5:
prompt_parts.append(random.choice(self.quality_modifiers_photo))
# Add lighting
if random.random() < 0.4:
prompt_parts.append(random.choice(self.lighting))
return ", ".join(prompt_parts)
def generate_gemini_image(prompt, keywords=None, style=None, focus=None, enhance_prompt=True, max_retries=3, initial_retry_delay=2, aspect_ratio="16:9"):
"""
Generate an image using Gemini's image generation capabilities.
Args:
prompt (str): The text prompt for image generation
keywords (list, optional): Keywords to enhance the prompt
style (str, optional): Style of the image (photorealistic, artistic, etc.)
focus (str, optional): Focus area for photorealistic images
enhance_prompt (bool, optional): Whether to enhance the prompt with AI
max_retries (int, optional): Maximum number of retry attempts
initial_retry_delay (int, optional): Initial delay between retries
aspect_ratio (str, optional): Aspect ratio for the generated image
Returns:
str: The path to the generated image.
"""
logger.info(f"Generating image with prompt: '{prompt[:100]}...'")
# Use APIKeyManager instead of direct environment variable access
api_key_manager = APIKeyManager()
api_key = api_key_manager.get_api_key("gemini")
if not api_key:
error_msg = "Gemini API key not found. Please configure it in the onboarding process."
logger.error(error_msg)
st.error(f"🔑 {error_msg}")
return None
# Enhance the prompt if requested
if enhance_prompt and keywords:
prompt_generator = AIPromptGenerator()
if style == "photorealistic" and focus:
logger.info(f"Generating photorealistic prompt with focus: {focus}")
enhanced_prompt = prompt_generator.generate_photorealistic_prompt(keywords, focus)
else:
logger.info("Generating enhanced prompt")
enhanced_prompt = prompt_generator.generate_prompt(keywords)
# Combine the enhanced prompt with the original prompt
prompt = f"{prompt}\n\nEnhanced prompt: {enhanced_prompt}"
logger.info(f"Final prompt: '{prompt[:100]}...'")
# Add aspect ratio to the prompt
if aspect_ratio:
prompt += f"\n\nPlease generate the image with {aspect_ratio} aspect ratio."
retry_count = 0
retry_delay = initial_retry_delay
while retry_count <= max_retries:
try:
client = genai.Client(api_key=api_key)
contents = (prompt)
logger.info("Sending request to Gemini API")
response = client.models.generate_content(
model="gemini-2.0-flash-exp-image-generation",
contents=contents,
config=types.GenerateContentConfig(
response_modalities=['Text', 'Image']
)
)
logger.info("Received response from Gemini API")
img_name = None
for part in response.candidates[0].content.parts:
if part.text is not None:
logger.info(f"Received text response: '{part.text[:100]}...'")
print(part.text)
elif part.inline_data is not None:
logger.info("Received image data from Gemini")
image = Image.open(BytesIO((part.inline_data.data)))
# Resize image to match aspect ratio if needed
if aspect_ratio:
current_width, current_height = image.size
target_width = current_width
target_height = current_height
# Calculate target dimensions based on aspect ratio
if aspect_ratio == "16:9":
target_height = int(current_width * 9/16)
elif aspect_ratio == "9:16":
target_width = int(current_height * 9/16)
elif aspect_ratio == "4:3":
target_height = int(current_width * 3/4)
elif aspect_ratio == "3:4":
target_width = int(current_height * 3/4)
elif aspect_ratio == "1:1":
target_size = min(current_width, current_height)
target_width = target_size
target_height = target_size
logger.info(f"Resizing image from {current_width}x{current_height} to {target_width}x{target_height}")
# Create a new image with the target dimensions
resized_image = Image.new('RGB', (target_width, target_height), (255, 255, 255))
# Calculate position to paste the original image
paste_x = (target_width - current_width) // 2
paste_y = (target_height - current_height) // 2
# Paste the original image onto the new canvas
resized_image.paste(image, (paste_x, paste_y))
image = resized_image
if part.text is not None:
img_name = f'{part.text}-gemini-native-image.png'
else:
img_name = f'gemini-native-image-{datetime.datetime.now().strftime("%Y%m%d-%H%M%S")}.png'
try:
logger.info(f"Saving image to: {img_name}")
image.save(img_name)
# Create a dictionary with the expected format for save_generated_image
img_response = {
"artifacts": [
{
"base64": base64.b64encode(open(img_name, "rb").read()).decode('utf-8')
}
]
}
# Call save_generated_image with the correct format
save_generated_image(img_response)
except Exception as err:
logger.error(f"Failed to save image: {err}")
st.error(f"Failed to save image: {err}")
logger.info(f"Image generation completed. Image name: {img_name}")
return img_name
except Exception as err:
error_message = str(err)
logger.error(f"Error in generate_gemini_image: {err}")
# Check if this is a 503 UNAVAILABLE error
if "503 UNAVAILABLE" in error_message and retry_count < max_retries:
retry_count += 1
logger.info(f"Model is overloaded. Retrying in {retry_delay} seconds (attempt {retry_count}/{max_retries})")
st.warning(f"The image generation service is currently busy. Retrying in {retry_delay} seconds...")
time.sleep(retry_delay)
# Exponential backoff
retry_delay *= 2
else:
st.error(f"Error generating image: {err}")
return None
# If we've exhausted all retries
st.error("The image generation service is currently unavailable. Please try again later.")
return None
def edit_image(image_path, prompt, max_retries=3, initial_retry_delay=2):
"""
- Image editing (text and image to image)
Example prompt: "Edit this image to make it look like a cartoon"
Example prompt: [image of a cat] + [image of a pillow] + "Create a cross stitch of my cat on this pillow."
- Multi-turn image editing (chat)
Example prompts: [upload an image of a blue car.] "Turn this car into a convertible." "Now change the color to yellow."
Image editing with Gemini
To perform image editing, add an image as input.
The following example demonstrats uploading base64 encoded images.
For multiple images and larger payloads, check the image input section.
Args:
image_path (str): The path to the image to edit.
prompt (str): The prompt to edit the image with.
max_retries (int, optional): Maximum number of retry attempts for handling 503 errors. Defaults to 3.
initial_retry_delay (int, optional): Initial delay in seconds before retrying. Defaults to 2.
Returns:
str: The path to the edited image.
"""
import PIL.Image
image = PIL.Image.open(image_path)
retry_count = 0
retry_delay = initial_retry_delay
while retry_count <= max_retries:
try:
client = genai.Client()
text_input = (prompt)
logger.info("Sending request to Gemini API for image editing")
response = client.models.generate_content(
model="gemini-2.0-flash-exp-image-generation",
contents=[text_input, image],
config=types.GenerateContentConfig(
response_modalities=['Text', 'Image']
)
)
logger.info("Received response from Gemini API for image editing")
edited_img_name = None
for part in response.candidates[0].content.parts:
if part.text is not None:
logger.info(f"Received text response: '{part.text[:100]}...'")
st.write(part.text)
elif part.inline_data is not None:
logger.info("Received edited image data from Gemini")
edited_image = Image.open(BytesIO(part.inline_data.data))
edited_image.show()
# Save the edited image
edited_img_name = f'edited-{os.path.basename(image_path)}'
try:
logger.info(f"Saving edited image to: {edited_img_name}")
edited_image.save(edited_img_name)
# Create a dictionary with the expected format for save_generated_image
img_response = {
"artifacts": [
{
"base64": base64.b64encode(open(edited_img_name, "rb").read()).decode('utf-8')
}
]
}
# Call save_generated_image with the correct format
save_generated_image(img_response)
except Exception as err:
logger.error(f"Failed to save edited image: {err}")
st.error(f"Failed to save edited image: {err}")
logger.info(f"Image editing completed. Edited image name: {edited_img_name}")
return edited_img_name
except Exception as err:
error_message = str(err)
logger.error(f"Error in edit_image: {err}")
# Check if this is a 503 UNAVAILABLE error
if "503 UNAVAILABLE" in error_message and retry_count < max_retries:
retry_count += 1
logger.info(f"Model is overloaded. Retrying in {retry_delay} seconds (attempt {retry_count}/{max_retries})")
st.warning(f"The image editing service is currently busy. Retrying in {retry_delay} seconds...")
time.sleep(retry_delay)
# Exponential backoff
retry_delay *= 2
else:
st.error(f"Error editing image: {err}")
return None
# If we've exhausted all retries
st.error("The image editing service is currently unavailable. Please try again later.")
return None

View File

@@ -0,0 +1,69 @@
# Ensure you sign up for an account to obtain an API key:
# https://platform.stability.ai/
# Your API key can be found here after account creation:
# https://platform.stability.ai/account/keys
import os
import requests
import base64
from PIL import Image
from io import BytesIO
import streamlit as st
from loguru import logger
# Import APIKeyManager
from ...api_key_manager import APIKeyManager
def save_generated_image(data):
"""Save the generated image to a file."""
# Implementation for saving image
pass
def generate_stable_diffusion_image(prompt):
engine_id = "stable-diffusion-xl-1024-v1-0"
api_host = os.getenv('API_HOST', 'https://api.stability.ai')
# Use APIKeyManager instead of direct environment variable access
api_key_manager = APIKeyManager()
api_key = api_key_manager.get_api_key("stability")
if api_key is None:
st.warning("Missing Stability API key. Please configure it in the onboarding process.")
return None
response = requests.post(
f"{api_host}/v1/generation/{engine_id}/text-to-image",
headers={
"Content-Type": "application/json",
"Accept": "application/json",
"Authorization": f"Bearer {api_key}"
},
json={
"text_prompts": [
{
"text": prompt
}
],
"cfg_scale": 7,
"height": 1024,
"width": 1024,
"samples": 1,
"steps": 30,
},
)
if response.status_code != 200:
raise Exception("Non-200 response: " + str(response.text))
data = response.json()
img_path = save_generated_image(data)
for i, image in enumerate(data["artifacts"]):
# Decode base64 image data
img_data = base64.b64decode(image["base64"])
# Open image using PIL
img = Image.open(BytesIO(img_data))
# Display the image
img.show()
return img_path

View File

@@ -0,0 +1,51 @@
from loguru import logger
import sys
from PIL import Image
from openai import OpenAI
def gen_new_from_given_img(img_path, image_dir, num_img=1, img_size="1024x1024", response_format="url"):
"""
Generates variations of a given image using OpenAI's image variation API.
This function takes an existing image, processes it, and generates a specified number of new images based on it.
These generated images are variations of the original, providing creative flexibility.
Args:
img_path (str): Path to the original image file.
image_dir (str): Directory where the generated images will be saved.
num_img (int, optional): Number of image variations to generate. Defaults to 1.
img_size (str, optional): Size of the generated images. Defaults to "1024x1024".
response_format (str, optional): Format in which the generated images are returned. Defaults to "url".
Returns:
str: Path to the saved image variation.
Raises:
SystemExit: If a critical error occurs that prevents successful execution.
"""
try:
logger.info(f"Starting image variation generation for: {img_path}")
# Convert and prepare the image
png = Image.open(img_path).convert('RGBA')
background = Image.new('RGBA', png.size, (255, 255, 255))
alpha_composite = Image.alpha_composite(background, png)
alpha_composite.save(img_path, 'PNG', quality=80)
logger.info("Image prepared for variation generation.")
client = OpenAI()
variation_response = client.images.create_variation(
image=open(img_path, "rb", encoding="utf-8"),
n=num_img,
size=img_size,
response_format=response_format
)
# Saving the generated image
generated_image_path = save_generated_image(variation_response, image_dir)
logger.info(f"Image variation generated and saved to: {generated_image_path}")
return generated_image_path
except Exception as e:
logger.error(f"Error occurred during image variation generation: {e}")
sys.exit(f"Exiting due to critical error: {e}")

View File

@@ -0,0 +1,163 @@
#########################################################
#
# This module will generate images for the blogs using APIs
# from Dall-E and other free resources. Given a prompt, the
# images will be stored in local directory.
# Required: openai API key.
#
#########################################################
# imports
import os
import sys
import datetime
import streamlit as st
import openai # OpenAI Python library to make API calls
from loguru import logger
logger.remove()
logger.add(sys.stdout,
colorize=True,
format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}"
)
#from .gen_dali2_images
from .gen_dali3_images import generate_dalle3_images
from .gen_stabl_diff_img import generate_stable_diffusion_image
from ..text_generation.main_text_generation import llm_text_gen
from .gen_gemini_images import generate_gemini_image
def generate_image(user_prompt, title=None, description=None, tags=None, content=None, aspect_ratio="16:9"):
"""
The generation API endpoint creates an image based on a text prompt.
Required inputs:
prompt (str): A text description of the desired image(s). The maximum length is 1000 characters.
Optional inputs:
--> image_engine: dalle2, dalle3, stable diffusion are supported.
--> num_images (int): The number of images to generate. Must be between 1 and 10. Defaults to 1.
--> size (str): The size of the generated images. Must be one of "256x256", "512x512", or "1024x1024".
Smaller images are faster. Defaults to "1024x1024".
-->response_format (str): The format in which the generated images are returned.
Must be one of "url" or "b64_json". Defaults to "url".
--> user (str): A unique identifier representing your end-user, which will help OpenAI to monitor and detect abuse.
--> aspect_ratio (str): The aspect ratio for the generated image. Must be one of "16:9", "4:3", or "1:1". Defaults to "16:9".
"""
# FIXME: Need to remove default value to match sidebar input.
image_engine = 'Gemini-AI'
image_stored_at = None
if user_prompt:
try:
# Use enhanced prompt generator with all available parameters
img_prompt = generate_enhanced_img_prompt(user_prompt, title, description, tags, content)
# Add aspect ratio to the prompt
if aspect_ratio:
img_prompt += f"\n\nAspect ratio: {aspect_ratio}"
if 'Dalle3' in image_engine:
logger.info(f"Calling Dalle3 text-to-image with prompt: {img_prompt}")
image_stored_at = generate_dalle3_images(img_prompt)
elif 'Stability-AI' in image_engine:
logger.info(f"Calling Stable diffusion text-to-image with prompt: \n{img_prompt}")
image_stored_at = generate_stable_diffusion_image(img_prompt)
elif 'Gemini-AI' in image_engine:
logger.info(f"Calling Gemini text-to-image with prompt: \n{img_prompt}")
image_stored_at = generate_gemini_image(img_prompt, aspect_ratio=aspect_ratio)
return image_stored_at
except Exception as err:
logger.error(f"Failed to generate Image: {err}")
st.warning(f"Failed to generate Image: {err}")
else:
logger.error("Skipping Image creation, No prompt provided.")
def generate_img_prompt(user_prompt):
"""
Given prompt, this functions generated a prompt for image generation.
"""
prompt = f"""
As an expert prompt generator for AI text to image models and artist, I will provide you with 'user text' for creating images.
Your task is to create a prompt for a highly relevant image from given 'user text'.
\n
Choose from various art styles, utilize light & shadow effects etc.
Make sure to avoid common image generation mistakes.
Reply with only one answer, no descrition and in plaintext.
Make sure your prompt is detailed and creative descriptions that will inspire unique and interesting images from the AI.
\n\nuser text:
'''{user_prompt}'''"""
response = llm_text_gen(prompt)
return response
def generate_enhanced_img_prompt(user_prompt, title=None, description=None, tags=None, content=None):
"""
Given user prompt and additional context (title, description, tags, content),
this function generates an enhanced prompt for better image generation.
Args:
user_prompt (str): Base prompt from the user
title (str, optional): Blog title or content title
description (str, optional): Blog or content description/summary
tags (list, optional): List of tags related to the content
content (str, optional): Actual content or excerpt
Returns:
str: Enhanced prompt for image generation
"""
# Start with the base prompt
context_parts = [user_prompt]
# Add relevant context if available
if title:
context_parts.append(f"Title: {title}")
if description:
context_parts.append(f"Description: {description}")
if tags and len(tags) > 0:
tag_text = ", ".join(tags[:5]) # Limit to 5 tags to avoid too much noise
context_parts.append(f"Tags: {tag_text}")
# Create a combined context
combined_context = "\n".join(context_parts)
# Add some content excerpt if available (limited to avoid token limits)
content_excerpt = ""
if content:
# Just use the first few hundred characters as excerpt
content_excerpt = content[:300] + "..." if len(content) > 300 else content
# Create the prompt for LLM
prompt = f"""
As an expert prompt engineer for AI image generation models, create a detailed, creative prompt
for generating a high-quality, relevant image based on the following context:
{combined_context}
Additional content excerpt:
{content_excerpt}
Your task is to:
1. Analyze the context and content to understand the main theme and subject
2. Create a rich, detailed prompt for image generation (50-75 words)
3. Include specific visual details, art style, mood, lighting, composition
4. Make sure the prompt is highly relevant to the original context
5. Avoid prohibited content or anything that violates image generation guidelines
Reply with ONLY the final prompt. No explanations or other text.
"""
# Generate the enhanced prompt
try:
enhanced_prompt = llm_text_gen(prompt)
logger.info(f"Generated enhanced image prompt: {enhanced_prompt[:100]}...")
return enhanced_prompt
except Exception as e:
logger.error(f"Error generating enhanced prompt: {e}")
# Fall back to the simple prompt generation if enhanced fails
return generate_img_prompt(user_prompt)

View File

@@ -0,0 +1,39 @@
import base64
import datetime
import os
import requests
from PIL import Image
import logging
def save_generated_image(img_generation_response):
"""
Save generated images for blog, ensuring unique names for SEO.
"""
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# Get image save directory with fallback to a local directory
image_save_dir = os.getenv('IMG_SAVE_DIR', 'generated_images')
# Create the directory if it doesn't exist
if not os.path.exists(image_save_dir):
logger.info(f"Creating image save directory: {image_save_dir}")
os.makedirs(image_save_dir, exist_ok=True)
generated_image_name = f"generated_image_{datetime.datetime.now():%Y-%m-%d-%H-%M-%S}.webp"
generated_image_filepath = os.path.join(image_save_dir, generated_image_name)
try:
for i, image in enumerate(img_generation_response["artifacts"]):
with open(generated_image_filepath, "wb") as f:
f.write(base64.b64decode(image["base64"]))
except requests.exceptions.RequestException as e:
logger.error(f"Failed to get generated image content: {e}")
return None
except Exception as e:
logger.error(f"Error saving image: {e}")
return None
logger.info(f"Saved image at path: {generated_image_filepath}")
return generated_image_filepath