Fixed issue with Gemini API

This commit is contained in:
ajaysi
2024-04-22 10:09:07 +05:30
parent 180f28a493
commit 357cba36e4
15 changed files with 188 additions and 186 deletions

View File

@@ -1,79 +0,0 @@
"""
"""
import os
import logging
from pathlib import Path
import google.generativeai as genai
logging.basicConfig(level=logging.INFO, format='%(asctime)s-%(levelname)s-%(module)s-%(lineno)d-%(message)s')
from dotenv import load_dotenv
load_dotenv(Path('../../.env'))
from tenacity import (
retry,
stop_after_attempt,
wait_random_exponential,
) # for exponential backoff
@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
def gemini_get_img_info(prompt, img_path):
""" Get image details from arxiv papers. """
logging.info(f"Get image details from Gemini Pro.")
try:
genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
except Exception as e:
logging.error(f"Could not load gemini API key: {e}")
raise e
# Set up the model
generation_config = {
"temperature": 0.9,
"top_p": 1,
"top_k": 1,
"max_output_tokens": 1096,
}
safety_settings = [{
"category": "HARM_CATEGORY_HARASSMENT",
"threshold": "BLOCK_NONE"
},
{
"category": "HARM_CATEGORY_HATE_SPEECH",
"threshold": "BLOCK_NONE"
},
{
"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
"threshold": "BLOCK_NONE"
},
{
"category": "HARM_CATEGORY_DANGEROUS_CONTENT",
"threshold": "BLOCK_NONE"
},]
try:
model = genai.GenerativeModel(model_name="gemini-pro-vision",
generation_config=generation_config,
safety_settings=safety_settings)
except Exception as e:
logging.error(f"Could not create GenerativeModel: {e}")
raise e
# Validate that an image is present
if not (img := Path(img_path)).exists():
raise FileNotFoundError(f"Could not find image: {img}")
image_parts = [{
"mime_type": "image/png",
"data": Path(img_path).read_bytes()
},]
prompt_parts = [f"{prompt}", image_parts[0],]
try:
response = model.generate_content(prompt_parts)
return response.text
except Exception as e:
logging.error(f"Gemini is blocking this request: {response.prompt_feedback.block_reason}")
logging.error(f"Gemini Vision, Failed to give image Details: {e}\n{response.prompt_feedback}")
raise e

View File

@@ -0,0 +1,79 @@
"""
This module provides functionality to analyze images using OpenAI's Vision API.
It encodes an image to a base64 string and sends a request to the OpenAI API
to interpret the contents of the image, returning a textual description.
"""
import requests
import sys
import re
import base64
def analyze_and_extract_details_from_image(image_path, api_key):
"""
Analyzes an image using OpenAI's Vision API and extracts Alt Text, Description, Title, and Caption.
Args:
image_path (str): Path to the image file.
api_key (str): Your OpenAI API key.
Returns:
dict: Extracted details including Alt Text, Description, Title, and Caption.
"""
def encode_image(path):
""" Encodes an image to a base64 string. """
with open(path, "rb", encoding="utf-8") as image_file:
return base64.b64encode(image_file.read()).decode('utf-8')
base64_image = encode_image(image_path)
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {api_key}"
}
payload = {
"model": "gpt-4-vision-preview",
"messages": [
{
"role": "user",
"content": [
{
"type": "text",
"text": "The given image is used in blog content. Analyze the given image and suggest alternative(alt) test, description, title, caption."
},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image}"
}
}
]
}
],
"max_tokens": 300
}
try:
response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
response.raise_for_status()
assistant_message = response.json()['choices'][0]['message']['content']
# Extracting details using regular expressions
alt_text_match = re.search(r'Alt Text: "(.*?)"', assistant_message)
description_match = re.search(r'Description: (.*?)\n\n', assistant_message)
title_match = re.search(r'Title: "(.*?)"', assistant_message)
caption_match = re.search(r'Caption: "(.*?)"', assistant_message)
return {
'alt_text': alt_text_match.group(1) if alt_text_match else None,
'description': description_match.group(1) if description_match else None,
'title': title_match.group(1) if title_match else None,
'caption': caption_match.group(1) if caption_match else None
}
except requests.RequestException as e:
sys.exit(f"Error: Failed to communicate with OpenAI API. Error: {e}")
except Exception as e:
sys.exit(f"Error occurred: {e}")

View File

@@ -132,7 +132,7 @@ def ai_essay_generator(essay_title, selected_essay_type, selected_education_leve
load_dotenv(Path('../.env'))
genai.configure(api_key=os.getenv('GEMINI_API_KEY'))
# Initialize the generative model
model = genai.GenerativeModel('gemini-1.0-pro')
model = genai.GenerativeModel('gemini-pro')
# Generate prompts
try:

View File

@@ -19,7 +19,7 @@ from tenacity import (
)
#@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
def gemini_text_response(prompt, temperature, top_p, n, max_tokens):
""" Common functiont to get response from gemini pro Text. """
#FIXME: Include : https://github.com/google-gemini/cookbook/blob/main/quickstarts/rest/System_instructions_REST.ipynb

View File

@@ -0,0 +1,41 @@
from PIL import Image
import requests
# Ensure you sign up for an account to obtain an API key:
# https://platform.stability.ai/
# Your API key can be found here after account creation:
# https://platform.stability.ai/account/keys
def generate_stable_diffusion_image(prompt):
"""
Generate images using Stable Diffusion API based on a given prompt.
Args:
prompt (str): The prompt to generate the image.
image_dir (str): The directory where the image will be saved.
Raises:
Warning: If the adult content classifier is triggered.
Exception: For any issues during image generation or saving.
"""
api_key = os.getenv('STABILITY_API_KEY')
response = requests.post(
f"https://api.stability.ai/v2beta/stable-image/generate/sd3",
headers={
"authorization": f"Bearer {api_key}",
"accept": "image/*"
},
files={"none": ''},
data={
"prompt": prompt,
"output_format": "webp",
},
)
if response.status_code == 200:
with open("./dog-wearing-glasses.jpeg", 'wb') as file:
file.write(response.content)
else:
raise Exception(str(response.json()))

View File

@@ -0,0 +1,73 @@
#########################################################
#
# This module will generate images for the blogs using APIs
# from Dall-E and other free resources. Given a prompt, the
# images will be stored in local directory.
# Required: openai API key.
#
#########################################################
# imports
import sys
import datetime
import openai # OpenAI Python library to make API calls
import os # used to access filepaths
from loguru import logger
logger.remove()
logger.add(sys.stdout,
colorize=True,
format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}"
)
#from .gen_dali2_images
from .gen_dali3_images import generate_dalle3_images
from .gen_stabl_diff_img import generate_stable_diffusion_image
def generate_image(user_prompt, image_engine="dalle3"):
"""
The generation API endpoint creates an image based on a text prompt.
Required inputs:
prompt (str): A text description of the desired image(s). The maximum length is 1000 characters.
Optional inputs:
--> image_engine: dalle2, dalle3, stable diffusion are supported.
--> num_images (int): The number of images to generate. Must be between 1 and 10. Defaults to 1.
--> size (str): The size of the generated images. Must be one of "256x256", "512x512", or "1024x1024".
Smaller images are faster. Defaults to "1024x1024".
-->response_format (str): The format in which the generated images are returned.
Must be one of "url" or "b64_json". Defaults to "url".
--> user (str): A unique identifier representing your end-user, which will help OpenAI to monitor and detect abuse.
"""
img_prompt = generate_img_prompt(user_prompt)
# call the OpenAI API to generate image from prompt.
logger.info(f"Calling image.generate with prompt: {img_prompt}")
if 'Dalle3' in image_engine:
image_stored_at = generate_dalle3_images(img_prompt)
elif 'Stable Diffusion' in image_engine:
image_stored_at = generate_stable_diffusion_image(img_prompt)
return image_stored_at
def generate_img_prompt(user_prompt):
"""
Given prompt, this functions generated a prompt for image generation.
"""
# I want you to act as an artist advisor providing advice on various art styles such tips on utilizing
# light & shadow effects effectively in painting, shading techniques while sculpting etc.
# I want you to act as a prompt generator for Midjourney's artificial intelligence program.
# Your job is to provide detailed and creative descriptions that will inspire unique and interesting images from the AI.
# Here is your first prompt: ""
logger.info(f"Generate image prompt for : {user_prompt}")
prompt = f"""As an educationist and expert infographic artist, your tasked to create prompts that will be used for image generation.
Craft prompt for Openai Dall-e image generation program. Clearly describe the given text to represent it as image.
Make sure to avoid common image generation mistakes.
Advice for creating prompt for image from the given text(no more than 150 words).
Reply with only one answer and no descrition. Generate image prompt for the below text.
Text: {user_prompt}"""
response = (prompt)
return response

View File

@@ -0,0 +1,35 @@
import datetime
import os
import requests
from PIL import Image
import logging
def save_generated_image(img_generation_response, image_dir):
"""
Save generated images for blog, ensuring unique names for SEO.
"""
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
generated_image_name = f"generated_image_{datetime.datetime.now():%Y-%m-%d-%H-%M-%S}.png"
generated_image_filepath = os.path.join(image_dir, generated_image_name)
generated_image_url = img_generation_response.data[0].url
logger.info(f"Fetch the image from url: {generated_image_url}")
try:
response = requests.get(generated_image_url, stream=True)
response.raise_for_status()
with open(generated_image_filepath, "wb", encoding="utf-8") as image_file:
image_file.write(response.content)
except requests.exceptions.RequestException as e:
logger.error(f"Failed to get generated image content: {e}")
return None
logger.info(f"Saved image at path: {generated_image_filepath}")
if os.environ.get('DISPLAY', ''): # Check if display is supported
img = Image.open(generated_image_filepath)
img.show()
return generated_image_filepath