ALwrity AI Blog Writer - Added Google Grounding UI Implementation

2025-09-18 18:45:53 +05:30
parent 9f13daf443
commit 4d153b292d
72 changed files with 11944 additions and 1526 deletions
--- a/backend/services/llm_providers/gemini_provider.py
+++ b/backend/services/llm_providers/gemini_provider.py
@@ -440,7 +440,8 @@ def gemini_structured_json_response(prompt, schema, temperature=0.7, top_p=0.9,
        return {"error": str(e)}


-def _repair_json_string(text: str) -> Optional[str]:
+# Removed JSON repair functions to avoid false positives
+def _removed_repair_json_string(text: str) -> Optional[str]:
    """
    Attempt to repair common JSON issues in AI responses.
    """
@@ -489,13 +490,21 @@ def _repair_json_string(text: str) -> Optional[str]:
        fixed_lines.append(line)
    repaired = '\n'.join(fixed_lines)
    
-    # 3. Fix unescaped quotes in string values
-    # This is complex - we'll use a simple approach
+    # 3. Fix unterminated strings (common issue with AI responses)
    try:
-        # Try to balance quotes by adding missing ones
+        # Handle unterminated strings by finding the last incomplete string and closing it
        lines = repaired.split('\n')
        fixed_lines = []
-        for line in lines:
+        for i, line in enumerate(lines):
+            stripped = line.strip()
+            # Check for unterminated strings (line ends with quote but no closing quote)
+            if stripped.endswith('"') and i < len(lines) - 1:
+                next_line = lines[i + 1].strip()
+                # If next line doesn't start with quote or closing bracket, we might have an unterminated string
+                if not next_line.startswith('"') and not next_line.startswith(']') and not next_line.startswith('}'):
+                    # Check if this looks like an unterminated string value
+                    if ':' in line and not line.strip().endswith('",'):
+                        line = line + '",'
            # Count quotes in the line
            quote_count = line.count('"')
            if quote_count % 2 == 1:  # Odd number of quotes
@@ -518,7 +527,8 @@ def _repair_json_string(text: str) -> Optional[str]:
    return repaired


-def _extract_partial_json(text: str) -> Optional[Dict[str, Any]]:
+# Removed partial JSON extraction to avoid false positives
+def _removed_extract_partial_json(text: str) -> Optional[Dict[str, Any]]:
    """
    Extract partial JSON from truncated responses.
    Attempts to salvage as much data as possible from incomplete JSON.
@@ -572,26 +582,77 @@ def _extract_partial_json(text: str) -> Optional[Dict[str, Any]]:
            # Try to extract individual fields as a last resort
            fields = {}
            
-            # Extract key-value pairs using regex
-            kv_pattern = r'"([^"]+)"\s*:\s*"([^"]*)"'
-            matches = re.findall(kv_pattern, json_text)
-            for key, value in matches:
-                fields[key] = value
+            # Extract key-value pairs using regex (more comprehensive patterns)
+            kv_patterns = [
+                r'"([^"]+)"\s*:\s*"([^"]*)"',  # "key": "value"
+                r'"([^"]+)"\s*:\s*(\d+)',      # "key": 123
+                r'"([^"]+)"\s*:\s*(true|false)', # "key": true/false
+                r'"([^"]+)"\s*:\s*null',       # "key": null
+            ]
            
-            # Extract array fields
+            for pattern in kv_patterns:
+                matches = re.findall(pattern, json_text)
+                for key, value in matches:
+                    if value == 'true':
+                        fields[key] = True
+                    elif value == 'false':
+                        fields[key] = False
+                    elif value == 'null':
+                        fields[key] = None
+                    elif value.isdigit():
+                        fields[key] = int(value)
+                    else:
+                        fields[key] = value
+            
+            # Extract array fields (more robust)
            array_pattern = r'"([^"]+)"\s*:\s*\[([^\]]*)\]'
            array_matches = re.findall(array_pattern, json_text)
            for key, array_content in array_matches:
-                # Parse array items
+                # Parse array items more comprehensively
                items = []
-                item_pattern = r'"([^"]*)"'
-                item_matches = re.findall(item_pattern, array_content)
-                items.extend(item_matches)
-                fields[key] = items
+                # Look for quoted strings, numbers, booleans, null
+                item_patterns = [
+                    r'"([^"]*)"',  # quoted strings
+                    r'(\d+)',      # numbers
+                    r'(true|false)', # booleans
+                    r'(null)',     # null
+                ]
+                for pattern in item_patterns:
+                    item_matches = re.findall(pattern, array_content)
+                    for match in item_matches:
+                        if match == 'true':
+                            items.append(True)
+                        elif match == 'false':
+                            items.append(False)
+                        elif match == 'null':
+                            items.append(None)
+                        elif match.isdigit():
+                            items.append(int(match))
+                        else:
+                            items.append(match)
+                if items:
+                    fields[key] = items
+            
+            # Extract nested object fields (basic)
+            object_pattern = r'"([^"]+)"\s*:\s*\{([^}]*)\}'
+            object_matches = re.findall(object_pattern, json_text)
+            for key, object_content in object_matches:
+                # Simple nested object extraction
+                nested_fields = {}
+                nested_kv_matches = re.findall(r'"([^"]+)"\s*:\s*"([^"]*)"', object_content)
+                for nested_key, nested_value in nested_kv_matches:
+                    nested_fields[nested_key] = nested_value
+                if nested_fields:
+                    fields[key] = nested_fields
            
            if fields:
-                logger.info(f"Extracted {len(fields)} fields from truncated JSON")
-                return fields
+                logger.info(f"Extracted {len(fields)} fields from truncated JSON: {list(fields.keys())}")
+                # Only return if we have a valid outline structure
+                if 'outline' in fields and isinstance(fields['outline'], list):
+                    return {'outline': fields['outline']}
+                else:
+                    logger.error("No valid 'outline' field found in partial JSON")
+                    return None
            
            return None
            
@@ -600,7 +661,8 @@ def _extract_partial_json(text: str) -> Optional[Dict[str, Any]]:
        return None


-def _extract_key_value_pairs(text: str) -> Optional[Dict[str, Any]]:
+# Removed key-value extraction to avoid false positives
+def _removed_extract_key_value_pairs(text: str) -> Optional[Dict[str, Any]]:
    """
    Extract key-value pairs from malformed JSON text as a last resort.
    """