ALwrity version 0.5.5

2025-08-19 21:48:33 +05:30
parent 5f104bf427
commit 74e22b421a
97 changed files with 16770 additions and 5000 deletions
--- a/backend/services/llm_providers/README.md
+++ b/backend/services/llm_providers/README.md
@@ -0,0 +1,306 @@
+# Gemini Provider Module
+
+This module provides functions for interacting with Google's Gemini API, specifically designed for structured JSON output and text generation. It follows the official Gemini API documentation and implements best practices for reliable AI interactions.
+
+## Key Features
+
+- **Structured JSON Response Generation**: Generate structured outputs with schema validation
+- **Text Response Generation**: Simple text generation with retry logic
+- **Comprehensive Error Handling**: Robust error handling and logging
+- **Automatic API Key Management**: Secure API key handling
+- **Support for Multiple Models**: gemini-2.5-flash and gemini-2.5-pro
+
+## Best Practices
+
+### 1. Use Structured Output for Complex Responses
+```python
+# ✅ Good: Use structured output for multi-field responses
+schema = {
+    "type": "object",
+    "properties": {
+        "tasks": {
+            "type": "array",
+            "items": {
+                "type": "object",
+                "properties": {
+                    "title": {"type": "string"},
+                    "description": {"type": "string"}
+                }
+            }
+        }
+    }
+}
+result = gemini_structured_json_response(prompt, schema, temperature=0.2, max_tokens=8192)
+```
+
+### 2. Keep Schemas Simple and Flat
+```python
+# ✅ Good: Simple, flat schema
+schema = {
+    "type": "object",
+    "properties": {
+        "monitoringTasks": {
+            "type": "array",
+            "items": {"type": "object", "properties": {...}}
+        }
+    }
+}
+
+# ❌ Avoid: Complex nested schemas with many required fields
+schema = {
+    "type": "object",
+    "required": ["field1", "field2", "field3"],
+    "properties": {
+        "field1": {"type": "object", "required": [...], "properties": {...}},
+        "field2": {"type": "array", "items": {"type": "object", "required": [...], "properties": {...}}}
+    }
+}
+```
+
+### 3. Set Appropriate Token Limits
+```python
+# ✅ Good: Use 8192 tokens for complex outputs
+result = gemini_structured_json_response(prompt, schema, max_tokens=8192)
+
+# ✅ Good: Use 2048 tokens for simple text responses
+result = gemini_text_response(prompt, max_tokens=2048)
+```
+
+### 4. Use Low Temperature for Structured Output
+```python
+# ✅ Good: Low temperature for consistent structured output
+result = gemini_structured_json_response(prompt, schema, temperature=0.1, max_tokens=8192)
+
+# ✅ Good: Higher temperature for creative text
+result = gemini_text_response(prompt, temperature=0.8, max_tokens=2048)
+```
+
+### 5. Implement Proper Error Handling
+```python
+# ✅ Good: Handle errors in calling functions
+try:
+    response = gemini_structured_json_response(prompt, schema)
+    if isinstance(response, dict) and "error" in response:
+        raise Exception(f"Gemini error: {response.get('error')}")
+    # Process successful response
+except Exception as e:
+    logger.error(f"AI service error: {e}")
+    # Handle error appropriately
+```
+
+### 6. Avoid Fallback to Text Parsing
+```python
+# ✅ Good: Use structured output only, no fallback
+response = gemini_structured_json_response(prompt, schema)
+if "error" in response:
+    raise Exception(f"Gemini error: {response.get('error')}")
+
+# ❌ Avoid: Fallback to text parsing for structured responses
+# This can lead to inconsistent results and parsing errors
+```
+
+## Usage Examples
+
+### Structured JSON Response
+```python
+from services.llm_providers.gemini_provider import gemini_structured_json_response
+
+# Define schema
+monitoring_schema = {
+    "type": "object",
+    "properties": {
+        "monitoringTasks": {
+            "type": "array",
+            "items": {
+                "type": "object",
+                "properties": {
+                    "component": {"type": "string"},
+                    "title": {"type": "string"},
+                    "description": {"type": "string"},
+                    "assignee": {"type": "string"},
+                    "frequency": {"type": "string"},
+                    "metric": {"type": "string"},
+                    "measurementMethod": {"type": "string"},
+                    "successCriteria": {"type": "string"},
+                    "alertThreshold": {"type": "string"},
+                    "actionableInsights": {"type": "string"}
+                }
+            }
+        }
+    }
+}
+
+# Generate structured response
+prompt = "Generate a monitoring plan for content strategy..."
+result = gemini_structured_json_response(
+    prompt=prompt,
+    schema=monitoring_schema,
+    temperature=0.1,
+    max_tokens=8192
+)
+
+# Handle response
+if isinstance(result, dict) and "error" in result:
+    raise Exception(f"Gemini error: {result.get('error')}")
+
+# Process successful response
+monitoring_tasks = result.get("monitoringTasks", [])
+```
+
+### Text Response
+```python
+from services.llm_providers.gemini_provider import gemini_text_response
+
+# Generate text response
+prompt = "Write a blog post about AI in content marketing..."
+result = gemini_text_response(
+    prompt=prompt,
+    temperature=0.8,
+    max_tokens=2048
+)
+
+# Process response
+if result:
+    print(f"Generated text: {result}")
+else:
+    print("No response generated")
+```
+
+## Troubleshooting
+
+### Common Issues and Solutions
+
+#### 1. Response.parsed is None
+**Symptoms**: `response.parsed` returns `None` even with successful HTTP 200
+**Causes**: 
+- Schema too complex for the model
+- Token limit too low
+- Temperature too high for structured output
+
+**Solutions**:
+- Simplify schema structure
+- Increase `max_tokens` to 8192
+- Lower temperature to 0.1-0.3
+- Test with smaller outputs first
+
+#### 2. JSON Parsing Fails
+**Symptoms**: `JSONDecodeError` or "Unterminated string" errors
+**Causes**:
+- Response truncated due to token limits
+- Schema doesn't match expected output
+- Model generates malformed JSON
+
+**Solutions**:
+- Reduce output size requested
+- Verify schema matches expected structure
+- Use structured output instead of text parsing
+- Increase token limits
+
+#### 3. Truncation Issues
+**Symptoms**: Response cuts off mid-sentence or mid-array
+**Causes**:
+- Output too large for single response
+- Token limits exceeded
+
+**Solutions**:
+- Reduce number of items requested
+- Increase `max_tokens` to 8192
+- Break large requests into smaller chunks
+- Use `gemini-2.5-pro` for larger outputs
+
+#### 4. Rate Limiting
+**Symptoms**: `RetryError` or connection timeouts
+**Causes**:
+- Too many requests in short time
+- Network connectivity issues
+
+**Solutions**:
+- Exponential backoff already implemented
+- Check network connectivity
+- Reduce request frequency
+- Verify API key validity
+
+### Debug Logging
+
+The module includes comprehensive debug logging. Enable debug mode to see:
+
+```python
+import logging
+logging.getLogger('services.llm_providers.gemini_provider').setLevel(logging.DEBUG)
+```
+
+Key log messages to monitor:
+- `Gemini structured call | prompt_len=X | schema_kind=Y | temp=Z`
+- `Gemini response | type=X | has_text=Y | has_parsed=Z`
+- `Using response.parsed for structured output`
+- `Falling back to response.text parsing`
+
+## API Reference
+
+### gemini_structured_json_response()
+
+Generate structured JSON response using Google's Gemini Pro model.
+
+**Parameters**:
+- `prompt` (str): Input prompt for the AI model
+- `schema` (dict): JSON schema defining expected output structure
+- `temperature` (float): Controls randomness (0.0-1.0). Use 0.1-0.3 for structured output
+- `top_p` (float): Nucleus sampling parameter (0.0-1.0)
+- `top_k` (int): Top-k sampling parameter
+- `max_tokens` (int): Maximum tokens in response. Use 8192 for complex outputs
+- `system_prompt` (str, optional): System instruction for the model
+
+**Returns**:
+- `dict`: Parsed JSON response matching the provided schema
+
+**Raises**:
+- `Exception`: If API key is missing or API call fails
+
+### gemini_text_response()
+
+Generate text response using Google's Gemini Pro model.
+
+**Parameters**:
+- `prompt` (str): Input prompt for the AI model
+- `temperature` (float): Controls randomness (0.0-1.0). Higher = more creative
+- `top_p` (float): Nucleus sampling parameter (0.0-1.0)
+- `n` (int): Number of responses to generate
+- `max_tokens` (int): Maximum tokens in response
+- `system_prompt` (str, optional): System instruction for the model
+
+**Returns**:
+- `str`: Generated text response
+
+**Raises**:
+- `Exception`: If API key is missing or API call fails
+
+## Dependencies
+
+- `google.generativeai` (genai): Official Gemini API client
+- `tenacity`: Retry logic with exponential backoff
+- `logging`: Debug and error logging
+- `json`: Fallback JSON parsing
+- `re`: Text extraction utilities
+
+## Version History
+
+- **v2.0** (January 2025): Enhanced structured output support, improved error handling, comprehensive documentation
+- **v1.0**: Initial implementation with basic text and structured response support
+
+## Contributing
+
+When contributing to this module:
+
+1. Follow the established patterns for error handling
+2. Add comprehensive logging for debugging
+3. Test with both simple and complex schemas
+4. Update documentation for any new features
+5. Ensure backward compatibility
+
+## Support
+
+For issues or questions:
+1. Check the troubleshooting section above
+2. Review debug logs for specific error messages
+3. Test with simplified schemas to isolate issues
+4. Verify API key configuration and network connectivity
--- a/backend/services/llm_providers/gemini_provider.py
+++ b/backend/services/llm_providers/gemini_provider.py
@@ -1,4 +1,59 @@
-# Using Gemini Pro LLM model
+"""
+Gemini Provider Module for ALwrity
+
+This module provides functions for interacting with Google's Gemini API, specifically designed
+for structured JSON output and text generation. It follows the official Gemini API documentation
+and implements best practices for reliable AI interactions.
+
+Key Features:
+- Structured JSON response generation with schema validation
+- Text response generation with retry logic
+- Comprehensive error handling and logging
+- Automatic API key management
+- Support for both gemini-2.5-flash and gemini-2.5-pro models
+
+Best Practices:
+1. Use structured output for complex, multi-field responses
+2. Keep schemas simple and flat to avoid truncation
+3. Set appropriate token limits (8192 for complex outputs)
+4. Use low temperature (0.1-0.3) for consistent structured output
+5. Implement proper error handling in calling functions
+6. Avoid fallback to text parsing for structured responses
+
+Usage Examples:
+    # Structured JSON response
+    schema = {
+        "type": "object",
+        "properties": {
+            "tasks": {
+                "type": "array",
+                "items": {"type": "object", "properties": {...}}
+            }
+        }
+    }
+    result = gemini_structured_json_response(prompt, schema, temperature=0.2, max_tokens=8192)
+    
+    # Text response
+    result = gemini_text_response(prompt, temperature=0.7, max_tokens=2048)
+
+Troubleshooting:
+- If response.parsed is None: Check schema complexity and token limits
+- If JSON parsing fails: Verify schema matches expected output structure
+- If truncation occurs: Reduce output size or increase max_tokens
+- If rate limiting: Implement exponential backoff (already included)
+
+Dependencies:
+- google.generativeai (genai)
+- tenacity (for retry logic)
+- logging (for debugging)
+- json (for fallback parsing)
+- re (for text extraction)
+
+Author: ALwrity Team
+Version: 2.0
+Last Updated: January 2025
+"""
+
 import os
 import sys
 from pathlib import Path
@@ -62,7 +117,39 @@ def get_gemini_api_key() -> str:

@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
 def gemini_text_response(prompt, temperature, top_p, n, max_tokens, system_prompt):
-    """ Common functiont to get response from gemini pro Text. """
+    """
+    Generate text response using Google's Gemini Pro model.
+    
+    This function provides simple text generation with retry logic and error handling.
+    For structured output, use gemini_structured_json_response instead.
+    
+    Args:
+        prompt (str): The input prompt for the AI model
+        temperature (float): Controls randomness (0.0-1.0). Higher = more creative
+        top_p (float): Nucleus sampling parameter (0.0-1.0)
+        n (int): Number of responses to generate
+        max_tokens (int): Maximum tokens in response
+        system_prompt (str, optional): System instruction for the model
+    
+    Returns:
+        str: Generated text response
+        
+    Raises:
+        Exception: If API key is missing or API call fails
+        
+    Best Practices:
+        - Use temperature 0.7-0.9 for creative content
+        - Use temperature 0.1-0.3 for factual/consistent content
+        - Set appropriate max_tokens based on expected response length
+        - Implement proper error handling in calling functions
+        
+    Example:
+        result = gemini_text_response(
+            "Write a blog post about AI", 
+            temperature=0.8, 
+            max_tokens=1024
+        )
+    """
    #FIXME: Include : https://github.com/google-gemini/cookbook/blob/main/quickstarts/rest/System_instructions_REST.ipynb
    try:
        api_key = get_gemini_api_key()
@@ -97,51 +184,9 @@ def gemini_text_response(prompt, temperature, top_p, n, max_tokens, system_promp
        return response.text
    except Exception as err:
        logger.error(f"Failed to get response from Gemini: {err}. Retrying.")
+        raise


-#@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
-#def gemini_blog_metadata_json(blog_content):
-#    """ Common functiont to get response from gemini pro Text. """
-#    prompt =  f"I will provide you with the content of a blog post. Based on this content, you need to generate the following elements in JSON format:\n\n1. **Blog Title**: A compelling and relevant title that summarizes the blog content.\n2. **Meta Description**: A concise meta description (up to 160 characters) that captures the essence of the blog post and encourages clicks.\n3. **Tags**: A list of 5-10 relevant tags that represent the key topics covered in the blog post.\n4. **Categories**: A list of 1-3 appropriate categories that best describe the blog post's main themes.\n\nOutput your response in the following JSON format:\n\n```json\n{\n  \"type\": \"object\",\n  \"properties\": {\n    \"blog_title\": {\n      \"type\": \"string\"\n    },\n    \"meta_description\": {\n      \"type\": \"string\"\n    },\n    \"tags\": {\n      \"type\": \"array\",\n      \"items\": {\n        \"type\": \"string\"\n      }\n    },\n    \"categories\": {\n      \"type\": \"array\",\n      \"items\": {\n        \"type\": \"string\"\n      }\n    }\n  }\n}\n\n. The Blog Content is given below: \n\n{blog_content}\n\n"
-#    
-#    try:
-#        genai.configure(api_key=os.getenv('GEMINI_API_KEY'))
-#    except Exception as err:
-#        logger.error(f"Failed to configure Gemini: {err}")
-#
-#    # Create the model
-#    generation_config = {
-#        "temperature": 1,
-#        "top_p": 0.95,
-#        "top_k": 64,
-#        "max_output_tokens": 8192,
-#        "response_schema": content.Schema(
-#        type = content.Type.OBJECT,
-#            properties = {
-#                "response": content.Schema(
-#                    type = content.Type.STRING,
-#                    ),
-#            },
-#        ),
-#        "response_mime_type": "application/json",
-#    }
-#
-#    model = genai.GenerativeModel(
-#        model_name="gemini-1.5-flash",
-#        generation_config=generation_config,
-#        # safety_settings = Adjust safety settings
-#        # See https://ai.google.dev/gemini-api/docs/safety-settings
-#        )
-#
-#        try:
-#        # text_response = []
-#        response = model.generate_content(prompt)
-#        if response:
-#            logger.info(f"Number of Token in Prompt Sent: {model.count_tokens(prompt)}")
-#            return response.text
-#    except Exception as err:
-#        logger.error(f"Failed to get SEO METADATA from Gemini: {err}. Retrying.")
-
 async def test_gemini_api_key(api_key: str) -> tuple[bool, str]:
    """
    Test if the provided Gemini API key is valid.
@@ -243,6 +288,8 @@ def _dict_to_types_schema(schema: Dict[str, Any]) -> types.Schema:
            return types.Schema(type=types.Type.ARRAY, items=item_schema)
        elif node_type == "NUMBER":
            return types.Schema(type=types.Type.NUMBER)
+        elif node_type == "INTEGER":
+            return types.Schema(type=types.Type.NUMBER)
        elif node_type == "BOOLEAN":
            return types.Schema(type=types.Type.BOOLEAN)
        else:
@@ -254,6 +301,49 @@ def _dict_to_types_schema(schema: Dict[str, Any]) -> types.Schema:
 def gemini_structured_json_response(prompt, schema, temperature=0.7, top_p=0.9, top_k=40, max_tokens=8192, system_prompt=None):
    """
    Generate structured JSON response using Google's Gemini Pro model.
+    
+    This function follows the official Gemini API documentation for structured output:
+    https://ai.google.dev/gemini-api/docs/structured-output#python
+    
+    Args:
+        prompt (str): The input prompt for the AI model
+        schema (dict): JSON schema defining the expected output structure
+        temperature (float): Controls randomness (0.0-1.0). Use 0.1-0.3 for structured output
+        top_p (float): Nucleus sampling parameter (0.0-1.0)
+        top_k (int): Top-k sampling parameter
+        max_tokens (int): Maximum tokens in response. Use 8192 for complex outputs
+        system_prompt (str, optional): System instruction for the model
+    
+    Returns:
+        dict: Parsed JSON response matching the provided schema
+        
+    Raises:
+        Exception: If API key is missing or API call fails
+        
+    Best Practices:
+        - Keep schemas simple and flat to avoid truncation
+        - Use low temperature (0.1-0.3) for consistent structured output
+        - Set max_tokens to 8192 for complex multi-field responses
+        - Avoid deeply nested schemas with many required fields
+        - Test with smaller outputs first, then scale up
+        
+    Example:
+        schema = {
+            "type": "object",
+            "properties": {
+                "tasks": {
+                    "type": "array",
+                    "items": {
+                        "type": "object",
+                        "properties": {
+                            "title": {"type": "string"},
+                            "description": {"type": "string"}
+                        }
+                    }
+                }
+            }
+        }
+        result = gemini_structured_json_response(prompt, schema, temperature=0.2, max_tokens=8192)
    """
    try:
        # Get API key with proper error handling
@@ -261,59 +351,65 @@ def gemini_structured_json_response(prompt, schema, temperature=0.7, top_p=0.9,
        client = genai.Client(api_key=api_key)
        logger.info("✅ Gemini client initialized for structured JSON response")

-        # Build config using official SDK schema type
+        # Prepare schema for SDK (dict -> types.Schema). If schema is already a types.Schema or Pydantic type, use as-is
        try:
-            types_schema = _dict_to_types_schema(schema) if isinstance(schema, dict) else schema
+            if isinstance(schema, dict):
+                types_schema = _dict_to_types_schema(schema)
+            else:
+                types_schema = schema
        except Exception as conv_err:
-            logger.warning(f"Schema conversion warning, defaulting to OBJECT: {conv_err}")
+            logger.info(f"Schema conversion warning, defaulting to OBJECT: {conv_err}")
            types_schema = types.Schema(type=types.Type.OBJECT)

+        # Add debugging for API call
+        logger.info(
+            "Gemini structured call | prompt_len=%s | schema_kind=%s | temp=%s | top_p=%s | top_k=%s | max_tokens=%s",
+            len(prompt) if isinstance(prompt, str) else '<non-str>',
+            type(types_schema).__name__,
+            temperature,
+            top_p,
+            top_k,
+            max_tokens,
+        )
+        
+        # Use the official SDK GenerateContentConfig with response_schema
        generation_config = types.GenerateContentConfig(
-            system_instruction=system_prompt,
+            response_mime_type='application/json',
+            response_schema=types_schema,
            max_output_tokens=max_tokens,
            temperature=temperature,
            top_p=top_p,
            top_k=top_k,
-            response_mime_type='application/json',
-            response_schema=types_schema
+            system_instruction=system_prompt,
        )

-        # Add debugging for API call
-        logger.debug(f"Gemini API call - prompt length: {len(prompt)}, schema keys: {list(schema.keys()) if isinstance(schema, dict) else 'N/A'}")
-        
        response = client.models.generate_content(
-            model='gemini-2.5-flash',
+            model="gemini-2.5-flash",
            contents=prompt,
            config=generation_config,
        )

        # Add debugging for response
-        logger.debug(f"Gemini response type: {type(response)}")
-        logger.debug(f"Gemini response has text: {hasattr(response, 'text')}")
-        logger.debug(f"Gemini response has parsed: {hasattr(response, 'parsed')}")
+        logger.info("Gemini response | type=%s | has_text=%s | has_parsed=%s",
+                     type(response), hasattr(response, 'text'), hasattr(response, 'parsed'))
        
        if hasattr(response, 'text'):
-            logger.debug(f"Gemini response.text: {repr(response.text)}")
+            logger.info(f"Gemini response.text: {repr(response.text)}")
        if hasattr(response, 'parsed'):
-            logger.debug(f"Gemini response.parsed: {repr(response.parsed)}")
+            logger.info(f"Gemini response.parsed: {repr(response.parsed)}")

-        # Prefer parsed if present and non-empty; otherwise parse text with fallbacks
-        try:
-            parsed = getattr(response, 'parsed', None)
-            if parsed:
-                logger.debug(f"Using parsed response: {type(parsed)}")
-                return parsed if isinstance(parsed, dict) else json.loads(json.dumps(parsed))
-            
-            text = (response.text or '').strip()
-            logger.debug(f"Using text response, length: {len(text)}")
-            
-            if not text:
-                logger.error("Gemini returned empty text response")
-                return {"error": "Empty response from Gemini API", "raw_response": ""}
+        # According to the documentation, we should use response.parsed for structured output
+        if hasattr(response, 'parsed') and response.parsed is not None:
+            logger.info("Using response.parsed for structured output")
+            return response.parsed
+        
+        # Fallback to text if parsed is not available
+        if hasattr(response, 'text') and response.text:
+            logger.info("Falling back to response.text parsing")
+            text = response.text.strip()
            
            # Strip markdown code fences if present
            if text.startswith('```'):
-                # remove leading ```json or ``` and trailing ```
                if text.lower().startswith('```json'):
                    text = text[7:]
                else:
@@ -322,61 +418,14 @@ def gemini_structured_json_response(prompt, schema, temperature=0.7, top_p=0.9,
                    text = text[:-3]
                text = text.strip()
            
-            # Try direct JSON parsing first
            try:
                return json.loads(text)
            except json.JSONDecodeError as e:
-                logger.warning(f"Direct JSON parsing failed: {e}")
-                logger.debug(f"Failed to parse text: {text[:200]}...")
-                
-                # Check if response is truncated (common cause of JSON errors)
-                if text.endswith('...') or text.endswith('"') or text.endswith(','):
-                    logger.warning("Response appears to be truncated, attempting partial parsing")
-                    # Try to extract what we can from truncated response
-                    partial_result = _extract_partial_json(text)
-                    if partial_result:
-                        logger.info("Successfully extracted partial JSON from truncated response")
-                        return partial_result
-                
-                # Fallback 1: Extract likely JSON object substring
-                first = text.find('{')
-                last = text.rfind('}')
-                if first != -1 and last != -1 and last > first:
-                    candidate = text[first:last+1]
-                    try:
-                        return json.loads(candidate)
-                    except json.JSONDecodeError:
-                        logger.warning("JSON object extraction failed, trying regex")
-                
-                # Fallback 2: Regex any object
-                import re
-                match = re.search(r'\{[\s\S]*\}', text)
-                if match:
-                    try:
-                        return json.loads(match.group(0))
-                    except json.JSONDecodeError:
-                        logger.warning("Regex JSON extraction failed, trying repair")
-                
-                # Fallback 3: Attempt to repair common JSON issues
-                repaired = _repair_json_string(text)
-                if repaired:
-                    try:
-                        return json.loads(repaired)
-                    except json.JSONDecodeError:
-                        logger.warning("JSON repair failed")
-                
-                # Fallback 4: Extract and parse individual key-value pairs
-                extracted = _extract_key_value_pairs(text)
-                if extracted:
-                    return extracted
-                
-                # Final fallback: return error with raw response for debugging
-                logger.error(f"All JSON parsing attempts failed for text: {text[:200]}...")
+                logger.error(f"Failed to parse response.text as JSON: {e}")
                return {"error": f"Failed to parse JSON response: {e}", "raw_response": text[:500]}
-                
-        except Exception as e:
-            logger.error(f"Error parsing structured response: {e}")
-            return {"error": f"Failed to parse JSON response: {e}", "raw_response": (response.text or '')}
+        
+        logger.error("No valid response content found")
+        return {"error": "No valid response content found", "raw_response": ""}

    except ValueError as e:
        # API key related errors