Create images for blogs - Stability AI

2024-04-22 19:02:55 +05:30
parent 357cba36e4
commit aec2d6b432
11 changed files with 108 additions and 71 deletions
--- a/lib/gpt_providers/text_to_image_generation/gen_stabl_diff_img.py
+++ b/lib/gpt_providers/text_to_image_generation/gen_stabl_diff_img.py
@@ -1,41 +1,56 @@
-from PIL import Image
-import requests
-
 # Ensure you sign up for an account to obtain an API key:
 # https://platform.stability.ai/
 # Your API key can be found here after account creation:
 # https://platform.stability.ai/account/keys

+import base64
+import os
+import requests
+from PIL import Image
+from io import BytesIO
+
+from .save_image import save_generated_image
+

 def generate_stable_diffusion_image(prompt):
-    """
-    Generate images using Stable Diffusion API based on a given prompt.
-
-    Args:
-        prompt (str): The prompt to generate the image.
-        image_dir (str): The directory where the image will be saved.
-
-    Raises:
-        Warning: If the adult content classifier is triggered.
-        Exception: For any issues during image generation or saving.
-    """
-    api_key = os.getenv('STABILITY_API_KEY')
-
+    engine_id = "stable-diffusion-xl-1024-v1-0"
+    api_host = os.getenv('API_HOST', 'https://api.stability.ai')
+    api_key = os.getenv("STABILITY_API_KEY")
+    
+    if api_key is None:
+        raise Exception("Missing Stability API key.")
+    
    response = requests.post(
-        f"https://api.stability.ai/v2beta/stable-image/generate/sd3",
+        f"{api_host}/v1/generation/{engine_id}/text-to-image",
        headers={
-            "authorization": f"Bearer {api_key}",
-            "accept": "image/*"
+            "Content-Type": "application/json",
+            "Accept": "application/json",
+            "Authorization": f"Bearer {api_key}"
        },
-        files={"none": ''},
-        data={
-            "prompt": prompt,
-            "output_format": "webp",
+        json={
+            "text_prompts": [
+                {
+                    "text": prompt
+                }
+            ],
+            "cfg_scale": 7,
+            "height": 1024,
+            "width": 1024,
+            "samples": 1,
+            "steps": 30,
        },
    )
+    
+    if response.status_code != 200:
+        raise Exception("Non-200 response: " + str(response.text))
+    
+    data = response.json()
+    save_generated_image(data)

-    if response.status_code == 200:
-        with open("./dog-wearing-glasses.jpeg", 'wb') as file:
-            file.write(response.content)
-    else:
-        raise Exception(str(response.json()))
+    for i, image in enumerate(data["artifacts"]):
+        # Decode base64 image data
+        img_data = base64.b64decode(image["base64"])
+        # Open image using PIL
+        img = Image.open(BytesIO(img_data))
+        # Display the image
+        img.show()  
--- a/lib/gpt_providers/text_to_image_generation/main_generate_image_from_prompt.py
+++ b/lib/gpt_providers/text_to_image_generation/main_generate_image_from_prompt.py
@@ -23,9 +23,10 @@ logger.add(sys.stdout,
 #from .gen_dali2_images
 from .gen_dali3_images import generate_dalle3_images
 from .gen_stabl_diff_img import generate_stable_diffusion_image
+from ..text_generation.main_text_generation import llm_text_gen


-def generate_image(user_prompt, image_engine="dalle3"):
+def generate_image(user_prompt, image_engine):
    """
    The generation API endpoint creates an image based on a text prompt.

@@ -41,15 +42,17 @@ def generate_image(user_prompt, image_engine="dalle3"):
    Must be one of "url" or "b64_json". Defaults to "url".
    --> user (str): A unique identifier representing your end-user, which will help OpenAI to monitor and detect abuse.
    """
-    img_prompt = generate_img_prompt(user_prompt) 
-    # call the OpenAI API to generate image from prompt.
-    logger.info(f"Calling image.generate with prompt: {img_prompt}")
-
-    if 'Dalle3' in image_engine:
-        image_stored_at = generate_dalle3_images(img_prompt)
-    elif 'Stable Diffusion' in image_engine:
-        image_stored_at = generate_stable_diffusion_image(img_prompt)
-
+    try:
+        img_prompt = generate_img_prompt(user_prompt)
+        if 'Dalle3' in image_engine:
+            logger.info(f"Calling Dalle3 text-to-image with prompt: {img_prompt}")
+            image_stored_at = generate_dalle3_images(img_prompt)
+        elif 'Stability-Stable-Diffusion' in image_engine:
+            logger.info(f"Calling Stable diffusion text-to-image with prompt: \n{img_prompt}")
+            print("\n\n")
+            image_stored_at = generate_stable_diffusion_image(img_prompt)
+    except Exception as err:
+        logger.error(f"Failed to generate Image: {err}")
    return image_stored_at


@@ -57,17 +60,16 @@ def generate_img_prompt(user_prompt):
    """
    Given prompt, this functions generated a prompt for image generation.
    """
-    # I want you to act as an artist advisor providing advice on various art styles such tips on utilizing 
-    # light & shadow effects effectively in painting, shading techniques while sculpting etc.
-    # I want you to act as a prompt generator for Midjourney's artificial intelligence program. 
-    # Your job is to provide detailed and creative descriptions that will inspire unique and interesting images from the AI. 
-    # Here is your first prompt: ""
-    logger.info(f"Generate image prompt for : {user_prompt}")
-    prompt = f"""As an educationist and expert infographic artist, your tasked to create prompts that will be used for image generation.
-            Craft prompt for Openai Dall-e image generation program. Clearly describe the given text to represent it as image.
-            Make sure to avoid common image generation mistakes. 
-            Advice for creating prompt for image from the given text(no more than 150 words).
-            Reply with only one answer and no descrition. Generate image prompt for the below text.
-            Text: {user_prompt}"""
-    response = (prompt)
+    prompt = f"""
+        As an expert prompt engineer and artist, I will provide you with 'text' for creating image.
+        I want you to act as a prompt generator for AI text to image models(no more than 150 words).
+        \n
+        Choose from various art styles, utilize light & shadow effects etc.
+        Make sure to avoid common image generation mistakes.
+        Reply with only one answer, no descrition and in plaintext.
+        Make sure your prompt is detailed and creative descriptions that will inspire unique and interesting images from the AI. 
+        
+        \n\ntext:{user_prompt} """
+
+    response = llm_text_gen(prompt)
    return response
--- a/lib/gpt_providers/text_to_image_generation/save_image.py
+++ b/lib/gpt_providers/text_to_image_generation/save_image.py
@@ -1,35 +1,28 @@
+import base64
 import datetime
 import os
 import requests
 from PIL import Image
 import logging

-def save_generated_image(img_generation_response, image_dir):
+def save_generated_image(img_generation_response):
    """
    Save generated images for blog, ensuring unique names for SEO.
    """
    logging.basicConfig(level=logging.INFO)
    logger = logging.getLogger(__name__)

-    generated_image_name = f"generated_image_{datetime.datetime.now():%Y-%m-%d-%H-%M-%S}.png"
-    generated_image_filepath = os.path.join(image_dir, generated_image_name)
-    generated_image_url = img_generation_response.data[0].url
+    generated_image_name = f"generated_image_{datetime.datetime.now():%Y-%m-%d-%H-%M-%S}.webp"
+    generated_image_filepath = os.path.join(os.getenv('IMG_SAVE_DIR'), generated_image_name)

-    logger.info(f"Fetch the image from url: {generated_image_url}")
    try:
-        response = requests.get(generated_image_url, stream=True)
-        response.raise_for_status()
-        with open(generated_image_filepath, "wb", encoding="utf-8") as image_file:
-            image_file.write(response.content)
+        for i, image in enumerate(img_generation_response["artifacts"]):
+            with open(generated_image_filepath, "wb") as f:
+                f.write(base64.b64decode(image["base64"]))
    except requests.exceptions.RequestException as e:
        logger.error(f"Failed to get generated image content: {e}")
        return None

    logger.info(f"Saved image at path: {generated_image_filepath}")

-    if os.environ.get('DISPLAY', ''):  # Check if display is supported
-        img = Image.open(generated_image_filepath)
-        img.show()
-
    return generated_image_filepath
-