ALwrity Version 0.5.0 (Fastapi + React )

This commit is contained in:
ajaysi
2025-08-06 12:48:02 +05:30
parent f28a919caa
commit 32f97fa6b3
476 changed files with 115544 additions and 28747 deletions

View File

@@ -0,0 +1,125 @@
"""
Gemini Image Description Module
This module provides functionality to generate text descriptions of images using Google's Gemini API.
"""
import os
import sys
from pathlib import Path
import base64
from typing import Optional, Dict, Any, List, Union
from dotenv import load_dotenv
import google.genai as genai
from google.genai import types
from PIL import Image
from loguru import logger
logger.remove()
logger.add(sys.stdout,
colorize=True,
format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}"
)
# Import APIKeyManager
from ...api_key_manager import APIKeyManager
try:
import google.generativeai as genai
except ImportError:
genai = None
logger.warning("Google genai library not available. Install with: pip install google-generativeai")
def describe_image(image_path: str, prompt: str = "Describe this image in detail:") -> Optional[str]:
"""
Describe an image using Google's Gemini API.
Parameters:
image_path (str): Path to the image file.
prompt (str): Prompt for describing the image.
Returns:
Optional[str]: The generated description of the image, or None if an error occurs.
"""
try:
if not genai:
logger.error("Google genai library not available")
return None
# Use APIKeyManager instead of direct environment variable access
api_key_manager = APIKeyManager()
api_key = api_key_manager.get_api_key("gemini")
if not api_key:
error_message = "Gemini API key not found. Please configure it in the onboarding process."
logger.error(error_message)
raise ValueError(error_message)
# Check if image file exists
if not os.path.exists(image_path):
error_message = f"Image file not found: {image_path}"
logger.error(error_message)
raise FileNotFoundError(error_message)
# Initialize the Gemini client
client = genai.Client(api_key=api_key)
# Open and process the image
try:
image = Image.open(image_path)
logger.info(f"Successfully opened image: {image_path}")
except Exception as e:
error_message = f"Failed to open image: {e}"
logger.error(error_message)
return None
# Generate content description
try:
response = client.models.generate_content(
model='gemini-2.0-flash',
contents=[
prompt,
image
]
)
# Extract and return the text
description = response.text
logger.info(f"Successfully generated description for image: {image_path}")
return description
except Exception as e:
error_message = f"Failed to generate content: {e}"
logger.error(error_message)
return None
except Exception as e:
error_message = f"An unexpected error occurred: {e}"
logger.error(error_message)
return None
def analyze_image_with_prompt(image_path: str, prompt: str) -> Optional[str]:
"""
Analyze an image with a custom prompt using Google's Gemini API.
Parameters:
image_path (str): Path to the image file.
prompt (str): Custom prompt for analyzing the image.
Returns:
Optional[str]: The generated analysis of the image, or None if an error occurs.
"""
return describe_image(image_path, prompt)
# Example usage
if __name__ == "__main__":
# Example usage of the function
image_path = "path/to/your/image.jpg"
description = describe_image(image_path)
if description:
print(f"Image description: {description}")
else:
print("Failed to generate image description")

View File

@@ -0,0 +1,79 @@
"""
This module provides functionality to analyze images using OpenAI's Vision API.
It encodes an image to a base64 string and sends a request to the OpenAI API
to interpret the contents of the image, returning a textual description.
"""
import requests
import sys
import re
import base64
def analyze_and_extract_details_from_image(image_path, api_key):
"""
Analyzes an image using OpenAI's Vision API and extracts Alt Text, Description, Title, and Caption.
Args:
image_path (str): Path to the image file.
api_key (str): Your OpenAI API key.
Returns:
dict: Extracted details including Alt Text, Description, Title, and Caption.
"""
def encode_image(path):
""" Encodes an image to a base64 string. """
with open(path, "rb", encoding="utf-8") as image_file:
return base64.b64encode(image_file.read()).decode('utf-8')
base64_image = encode_image(image_path)
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {api_key}"
}
payload = {
"model": "gpt-4-vision-preview",
"messages": [
{
"role": "user",
"content": [
{
"type": "text",
"text": "The given image is used in blog content. Analyze the given image and suggest alternative(alt) test, description, title, caption."
},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image}"
}
}
]
}
],
"max_tokens": 300
}
try:
response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
response.raise_for_status()
assistant_message = response.json()['choices'][0]['message']['content']
# Extracting details using regular expressions
alt_text_match = re.search(r'Alt Text: "(.*?)"', assistant_message)
description_match = re.search(r'Description: (.*?)\n\n', assistant_message)
title_match = re.search(r'Title: "(.*?)"', assistant_message)
caption_match = re.search(r'Caption: "(.*?)"', assistant_message)
return {
'alt_text': alt_text_match.group(1) if alt_text_match else None,
'description': description_match.group(1) if description_match else None,
'title': title_match.group(1) if title_match else None,
'caption': caption_match.group(1) if caption_match else None
}
except requests.RequestException as e:
sys.exit(f"Error: Failed to communicate with OpenAI API. Error: {e}")
except Exception as e:
sys.exit(f"Error occurred: {e}")