ALwrity Prompts - AI Integration Plan

2025-09-03 23:16:39 +05:30
parent 5efee4235d
commit c19fc3f225
104 changed files with 9392 additions and 17462 deletions
--- a/backend/services/llm_providers/gemini_grounded_provider.py
+++ b/backend/services/llm_providers/gemini_grounded_provider.py
@@ -111,11 +111,11 @@ class GeminiGroundedProvider:
            Enhanced prompt for grounded generation
        """
        content_type_instructions = {
-            "linkedin_post": "Generate a professional LinkedIn post that is factually accurate and cites current sources. Include engaging hashtags and a call-to-action.",
-            "linkedin_article": "Generate a comprehensive LinkedIn article with proper structure, factual accuracy, and source citations. Include an engaging title and conclusion.",
-            "linkedin_carousel": "Generate LinkedIn carousel content with multiple slides, each containing factual information with proper source attribution.",
-            "linkedin_video_script": "Generate a video script with hook, main content, and conclusion. Ensure all claims are factually grounded.",
-            "linkedin_comment_response": "Generate a professional comment response that adds value to the conversation."
+            "linkedin_post": "You are an expert LinkedIn content strategist. Generate a highly engaging, professional LinkedIn post that drives meaningful engagement, establishes thought leadership, and includes compelling hooks, actionable insights, and strategic hashtags. Every element should be optimized for maximum engagement and shareability.",
+            "linkedin_article": "You are a senior content strategist and industry thought leader. Generate a comprehensive, SEO-optimized LinkedIn article with compelling headlines, structured content, data-driven insights, and practical takeaways. Include proper source citations and engagement elements throughout.",
+            "linkedin_carousel": "You are a visual content strategist specializing in LinkedIn carousels. Generate compelling, story-driven carousel content with clear visual hierarchy, actionable insights per slide, and strategic engagement elements. Each slide should provide immediate value while building anticipation for the next.",
+            "linkedin_video_script": "You are a video content strategist and LinkedIn engagement expert. Generate a compelling video script optimized for LinkedIn's algorithm with attention-grabbing hooks, strategic timing, and engagement-driven content. Include specific visual and audio recommendations for maximum impact.",
+            "linkedin_comment_response": "You are a LinkedIn engagement specialist and industry expert. Generate thoughtful, value-adding comment responses that encourage further discussion, demonstrate expertise, and build meaningful professional relationships. Focus on genuine engagement over generic responses."
        }
        
        instruction = content_type_instructions.get(content_type, "Generate professional content with factual accuracy.")
@@ -123,15 +123,29 @@ class GeminiGroundedProvider:
        grounded_prompt = f"""
        {instruction}
        
-        IMPORTANT: Use current, factual information from reliable sources. Cite specific sources for any claims, statistics, or recent developments.
+        CRITICAL REQUIREMENTS FOR LINKEDIN CONTENT:
+        - Use ONLY current, factual information from reliable sources (2024-2025)
+        - Cite specific sources for ALL claims, statistics, and recent developments
+        - Ensure content is optimized for LinkedIn's algorithm and engagement patterns
+        - Include strategic hashtags and engagement elements throughout
        
        User Request: {prompt}
        
-        Requirements:
-        - Ensure all factual claims are backed by current sources
-        - Use professional, engaging language appropriate for LinkedIn
-        - Include relevant industry insights and trends
-        - Make content shareable and valuable for the target audience
+        CONTENT QUALITY STANDARDS:
+        - All factual claims must be backed by current, authoritative sources
+        - Use professional yet conversational language that encourages engagement
+        - Include relevant industry insights, trends, and data points
+        - Make content highly shareable with clear value proposition
+        - Optimize for LinkedIn's professional audience and engagement metrics
+        
+        ENGAGEMENT OPTIMIZATION:
+        - Include thought-provoking questions and calls-to-action
+        - Use storytelling elements and real-world examples
+        - Ensure content provides immediate, actionable value
+        - Optimize for comments, shares, and professional networking
+        - Include industry-specific terminology and insights
+        
+        REMEMBER: This content will be displayed on LinkedIn with full source attribution and grounding data. Every claim must be verifiable, and the content should position the author as a thought leader in their industry.
        """
        
        return grounded_prompt.strip()
--- a/backend/services/llm_providers/text_to_image_generation/gen_gemini_images.py
+++ b/backend/services/llm_providers/text_to_image_generation/gen_gemini_images.py
@@ -3,6 +3,7 @@ import sys
 import time
 import datetime
 import base64
+import random
 from typing import List, Optional, Tuple
 from PIL import Image
 from io import BytesIO
@@ -12,8 +13,8 @@ import logging
 from ...api_key_manager import APIKeyManager

 try:
-    import google.generativeai as genai
-    from google.generativeai import types
+    from google import genai
+    from google.genai import types
 except ImportError:
    genai = None
    logging.getLogger('gemini_image_generator').warning(
@@ -30,6 +31,24 @@ logging.basicConfig(
 )
 logger = logging.getLogger('gemini_image_generator')

+# Imagen fallback configuration
+IMAGEN_FALLBACK_CONFIG = {
+    'enabled': os.getenv('IMAGEN_FALLBACK_ENABLED', 'true').lower() == 'true',  # Master switch for Imagen fallback
+    'auto_fallback': os.getenv('IMAGEN_AUTO_FALLBACK', 'true').lower() == 'true',  # Automatically fall back on Gemini failures
+    'preferred_model': os.getenv('IMAGEN_MODEL', 'imagen-4.0-generate-001'),  # Fast model for quick generation
+    'fallback_aspect_ratios': {
+        '1:1': '1:1',
+        '3:4': '3:4',
+        '4:3': '4:3', 
+        '9:16': '9:16',
+        '16:9': '16:9'
+    },
+    'max_images': int(os.getenv('IMAGEN_MAX_IMAGES', '1')),  # Generate 1 image for LinkedIn posts
+}
+
+# Log configuration on startup
+logger.info(f"🔄 Imagen fallback configuration: {IMAGEN_FALLBACK_CONFIG}")
+
 # With image generation in Gemini, your imagination is the limit.
 # Follow Google AI best practices for detailed prompts and iterative refinement.

@@ -173,13 +192,137 @@ def _ensure_client() -> Optional[object]:
    api_key_manager = APIKeyManager()
    api_key = api_key_manager.get_api_key("gemini")
    if not api_key or genai is None:
+        if not api_key:
+            logger.warning("No Gemini API key found")
+        if genai is None:
+            logger.warning("Google Generative AI library not available")
        return None
    try:
-        return genai.Client(api_key=api_key)
-    except Exception:
+        logger.info("Creating Gemini client...")
+        # Create a client using the correct API pattern
+        # The API key is passed directly to the Client constructor
+        client = genai.Client(api_key=api_key)
+        logger.info("Gemini client created successfully")
+        return client
+    except Exception as e:
+        logger.error(f"Failed to create Gemini client: {e}")
+        import traceback
+        logger.error(f"Traceback: {traceback.format_exc()}")
        return None


+def _generate_imagen_images_base64(prompt: str, aspect_ratio: str = "1:1") -> List[str]:
+    """
+    Generate images using Imagen API as a fallback method.
+    
+    This function implements the Imagen API following the official documentation:
+    https://ai.google.dev/gemini-api/docs/imagen
+    
+    Args:
+        prompt: Text prompt for image generation
+        aspect_ratio: Desired aspect ratio (1:1, 3:4, 4:3, 9:16, 16:9)
+    
+    Returns:
+        List of base64-encoded PNG images
+    """
+    logger = logging.getLogger('gemini_image_generator')
+    logger.info("🔄 Falling back to Imagen API for image generation")
+    
+    try:
+        # Get API key for Imagen (can use same Gemini API key)
+        api_key_manager = APIKeyManager()
+        api_key = api_key_manager.get_api_key("gemini")  # Imagen uses same API key
+        
+        if not api_key:
+            logger.error("No API key available for Imagen fallback")
+            return []
+        
+        # Create Imagen client
+        client = genai.Client(api_key=api_key)
+        
+        # Map aspect ratio to Imagen format using configuration
+        imagen_aspect_ratio = IMAGEN_FALLBACK_CONFIG['fallback_aspect_ratios'].get(aspect_ratio, "1:1")
+        
+        # Optimize prompt for Imagen (remove Gemini-specific formatting)
+        imagen_prompt = _optimize_prompt_for_imagen(prompt)
+        
+        logger.info(f"Generating Imagen images with prompt: {imagen_prompt[:100]}...")
+        logger.info(f"Using aspect ratio: {imagen_aspect_ratio}")
+        logger.info(f"Using model: {IMAGEN_FALLBACK_CONFIG['preferred_model']}")
+        
+        # Generate images using configured Imagen model
+        # Note: sample_image_size is not supported in current library version
+        config_params = {
+            'number_of_images': IMAGEN_FALLBACK_CONFIG['max_images'],
+            'aspect_ratio': imagen_aspect_ratio,
+        }
+        
+        # Add additional configuration options if needed
+        # config_params['guidance_scale'] = 7.5  # Optional: control image generation quality
+        # config_params['person_generation'] = 'allow_adult'  # Optional: control person generation
+        
+        response = client.models.generate_images(
+            model=IMAGEN_FALLBACK_CONFIG['preferred_model'],
+            prompt=imagen_prompt,
+            config=types.GenerateImagesConfig(**config_params)
+        )
+        
+        # Extract base64 images from response
+        images_b64: List[str] = []
+        for generated_image in response.generated_images:
+            if hasattr(generated_image, 'image') and hasattr(generated_image.image, 'image_bytes'):
+                # Convert image bytes to base64
+                image_bytes = generated_image.image.image_bytes
+                if isinstance(image_bytes, bytes):
+                    images_b64.append(base64.b64encode(image_bytes).decode('utf-8'))
+                else:
+                    # If already base64 string
+                    images_b64.append(str(image_bytes))
+        
+        if images_b64:
+            logger.info(f"✅ Imagen fallback successful! Generated {len(images_b64)} images")
+            return images_b64
+        else:
+            logger.warning("Imagen fallback returned no images")
+            return []
+            
+    except Exception as e:
+        logger.error(f"❌ Imagen fallback failed: {e}")
+        import traceback
+        logger.error(f"Imagen error traceback: {traceback.format_exc()}")
+        return []
+
+
+def _optimize_prompt_for_imagen(prompt: str) -> str:
+    """
+    Optimize prompt for Imagen API by removing Gemini-specific formatting
+    and enhancing it with Imagen best practices.
+    
+    Based on Imagen prompt guide: https://ai.google.dev/gemini-api/docs/imagen
+    """
+    # Remove Gemini-specific formatting
+    prompt = prompt.replace('\n\nEnhanced prompt:', '')
+    prompt = prompt.replace('\n\nAspect ratio:', '')
+    
+    # Clean up extra whitespace
+    prompt = ' '.join(prompt.split())
+    
+    # Add Imagen-specific enhancements if not present
+    if 'professional' in prompt.lower() and 'linkedin' in prompt.lower():
+        # Enhance for LinkedIn professional content
+        prompt += ", high quality, professional photography, business appropriate"
+    
+    if 'digital transformation' in prompt.lower() or 'technology' in prompt.lower():
+        # Enhance for tech content
+        prompt += ", modern, innovative, clean design, corporate aesthetic"
+    
+    # Ensure prompt doesn't exceed Imagen's 480 token limit
+    if len(prompt) > 400:  # Leave some buffer
+        prompt = prompt[:400] + "..."
+    
+    return prompt
+
+
 def generate_gemini_images_base64(
    prompt: str,
    *,
@@ -190,17 +333,23 @@ def generate_gemini_images_base64(
    aspect_ratio: str = "9:16",
    max_retries: int = 2,
    initial_retry_delay: float = 1.0,
+    enable_imagen_fallback: bool = True,
 ) -> List[str]:
    """
    Return list of base64 PNG images generated from a prompt.
+    
+    Primary method: Gemini API for image generation
+    Fallback method: Imagen API when Gemini fails (quota limits, API errors, etc.)

    Implements best practices per Gemini docs: send text prompt, parse inline image parts,
    and return base64 data suitable for API responses. No Streamlit, no printing.

-    Docs: https://ai.google.dev/gemini-api/docs/image-generation
+    Docs: 
+    - Gemini: https://ai.google.dev/gemini-api/docs/image-generation
+    - Imagen: https://ai.google.dev/gemini-api/docs/imagen
    """
    logger = logging.getLogger('gemini_image_generator')
-    logger.info("Generating image (base64) with Gemini")
+    logger.info("Generating image (base64) with Gemini (with Imagen fallback)")

    if enhance_prompt and keywords:
        pg = AIPromptGenerator()
@@ -215,9 +364,13 @@ def generate_gemini_images_base64(
    if aspect_ratio:
        prompt = f"{prompt}\n\nAspect ratio: {aspect_ratio}"

+    # Try Gemini first
    client = _ensure_client()
    if client is None:
        logger.warning("Gemini client not available or API key missing")
+        if enable_imagen_fallback and IMAGEN_FALLBACK_CONFIG['enabled']:
+            logger.info("Falling back to Imagen API")
+            return _generate_imagen_images_base64(prompt, aspect_ratio)
        return []

    retry = 0
@@ -225,9 +378,10 @@ def generate_gemini_images_base64(
    while retry <= max_retries:
        try:
            response = client.models.generate_content(
-                model="gemini-2.5-flash-image-preview",
+                model="gemini-2.0-flash-exp-image-generation",
                contents=[prompt],
            )
+            
            images_b64: List[str] = []
            for part in response.candidates[0].content.parts:
                if getattr(part, 'inline_data', None) is not None:
@@ -239,16 +393,47 @@ def generate_gemini_images_base64(
                    else:
                        # Some SDKs may already present base64 str
                        images_b64.append(str(raw))
-            return images_b64
+            
+            if images_b64:
+                logger.info(f"✅ Gemini generated {len(images_b64)} images successfully")
+                return images_b64
+            else:
+                logger.warning("Gemini returned no images, falling back to Imagen")
+                if enable_imagen_fallback and IMAGEN_FALLBACK_CONFIG['enabled']:
+                    return _generate_imagen_images_base64(prompt, aspect_ratio)
+                return []
+                
        except Exception as e:
            msg = str(e)
            logger.warning(f"Gemini image gen error: {msg}")
+            
+            # Check if this is a quota/API error that warrants fallback
+            if any(error_type in msg.lower() for error_type in [
+                'quota', 'resource_exhausted', 'rate_limit', 'billing', 'api_key', '403', '429'
+            ]):
+                logger.info("Gemini quota/API error detected, falling back to Imagen")
+                if enable_imagen_fallback and IMAGEN_FALLBACK_CONFIG['enabled']:
+                    return _generate_imagen_images_base64(prompt, aspect_ratio)
+                return []
+            
+            # For other errors, retry if possible
            if "503" in msg and retry < max_retries:
                time.sleep(delay)
                delay *= 2
                retry += 1
                continue
+            
+            # Final fallback for any other errors
+            if enable_imagen_fallback and IMAGEN_FALLBACK_CONFIG['enabled']:
+                logger.info("Final fallback to Imagen due to Gemini error")
+                return _generate_imagen_images_base64(prompt, aspect_ratio)
            return []
+    
+    # If all retries exhausted, fall back to Imagen
+    if enable_imagen_fallback and IMAGEN_FALLBACK_CONFIG['enabled']:
+        logger.info("All Gemini retries exhausted, falling back to Imagen")
+        return _generate_imagen_images_base64(prompt, aspect_ratio)
+    return []


 def generate_gemini_image(
@@ -260,9 +445,12 @@ def generate_gemini_image(
    max_retries=2,
    initial_retry_delay=1.0,
    aspect_ratio="9:16",
+    enable_imagen_fallback=True,
 ):
    """
    Backward-compatible wrapper that generates a single image file on disk and returns path.
+    Now includes Imagen fallback for improved reliability.
+    
    Prefer generate_gemini_images_base64 in new code paths.
    """
    logger = logging.getLogger('gemini_image_generator')
@@ -275,20 +463,31 @@ def generate_gemini_image(
        aspect_ratio=aspect_ratio,
        max_retries=max_retries,
        initial_retry_delay=initial_retry_delay,
+        enable_imagen_fallback=enable_imagen_fallback,
    )
    if not images:
        return None
+    
    # Persist first image to file for legacy callers
    img_b64 = images[0]
    img_bytes = base64.b64decode(img_b64)
    img = Image.open(BytesIO(img_bytes))
-    out_name = f'gemini-native-image-{datetime.datetime.now().strftime("%Y%m%d-%H%M%S")}.png'
+    
+    # Update filename to indicate which API was used
+    timestamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
+    if 'imagen' in prompt.lower() or 'fallback' in prompt.lower():
+        out_name = f'imagen-fallback-image-{timestamp}.png'
+    else:
+        out_name = f'gemini-native-image-{timestamp}.png'
+    
    try:
        img.save(out_name)
        # Also call save_generated_image to reuse existing pipeline
        save_generated_image({"artifacts": [{"base64": img_b64}]})
+        logger.info(f"✅ Image saved successfully: {out_name}")
        return out_name
-    except Exception:
+    except Exception as e:
+        logger.error(f"❌ Failed to save image: {e}")
        return None