Repair huggingface provider and restore explicit retry/fallback behavior

2026-03-12 16:29:50 +05:30
parent 679c0e8c89
commit 968900858c
1 changed files with 149 additions and 495 deletions
--- a/backend/services/llm_providers/huggingface_provider.py
+++ b/backend/services/llm_providers/huggingface_provider.py
@@ -1,106 +1,50 @@
 """
-Hugging Face Provider Module for ALwrity
+Hugging Face Provider Module for ALwrity.
-This module provides functions for interacting with Hugging Face's Inference Providers API
+Provides text and structured JSON generation through Hugging Face Router
-using the Responses API (beta) which provides a unified interface for model interactions.
+(OpenAI-compatible API), with retry and explicit fallback controls.
 Key Features:
 - Text response generation with retry logic
 - Structured JSON response generation with schema validation
 - Comprehensive error handling and logging
 - Automatic API key management
 - Support for various Hugging Face models via Inference Providers
 Best Practices:
 1. Use structured output for complex, multi-field responses
 2. Keep schemas simple and flat to avoid truncation
 3. Set appropriate token limits (8192 for complex outputs)
 4. Use low temperature (0.1-0.3) for consistent structured output
 5. Implement proper error handling in calling functions
 6. Use the Responses API for better compatibility
 Usage Examples:
    # Text response
    result = huggingface_text_response(prompt, temperature=0.7, max_tokens=2048)
    # Structured JSON response
    schema = {
        "type": "object",
        "properties": {
            "tasks": {
                "type": "array",
                "items": {"type": "object", "properties": {...}}
            }
        }
    }
    result = huggingface_structured_json_response(prompt, schema, temperature=0.2, max_tokens=8192)
 Dependencies:
 - openai (for Hugging Face Responses API)
 - tenacity (for retry logic)
 - logging (for debugging)
 - json (for fallback parsing)
 Author: ALwrity Team
 Version: 1.0
 Last Updated: January 2025
 """
 import os
 import json
 import os
 import re
 from functools import lru_cache
-from typing import Optional, Dict, Any
+from typing import Any, Dict, List, Optional
 from tenacity import retry, retry_if_exception, stop_after_attempt, wait_random_exponential
 from loguru import logger
 from utils.logger_utils import get_service_logger
 from .routing_policy import PREMIUM_DEFAULT_MODEL, SIF_LOW_COST_MODEL_DEFAULTS
 # Use service-specific logger to avoid conflicts
 logger = get_service_logger("huggingface_provider")
 <<<<<<< HEAD
 from tenacity import (
    retry,
    retry_if_exception,
    stop_after_attempt,
    wait_random_exponential,
 )
 =======
 >>>>>>> pr-416
 try:
-    from openai import OpenAI
+    from openai import NotFoundError, OpenAI
-    OPENAI_AVAILABLE = True
+
-except ImportError:
+    OPENAI_AVAILABLE = True
-    OPENAI_AVAILABLE = False
+except ImportError:  # pragma: no cover - environment-dependent
-    logger.warn("OpenAI library not available. Install with: pip install openai")
+    OPENAI_AVAILABLE = False
    OpenAI = None
    NotFoundError = Exception
    logger.warning("OpenAI library not available. Install with: pip install openai")
 <<<<<<< HEAD
 HF_FALLBACK_MODELS = [
 <<<<<<< HEAD
    "openai/gpt-oss-120b:cerebras",
    "moonshotai/Kimi-K2-Instruct-0905:cerebras",
    "meta-llama/Llama-3.1-8B-Instruct:cerebras",
    "mistralai/Mistral-7B-Instruct-v0.3:cerebras",
 =======
    PREMIUM_DEFAULT_MODEL,
    "moonshotai/Kimi-K2-Instruct-0905:groq",
    "meta-llama/Llama-3.1-8B-Instruct:groq",
    SIF_LOW_COST_MODEL_DEFAULTS[0],
 >>>>>>> pr-417
 ]
 def _candidate_model_variants(model: str):
-    """Yield model ids to try for a single logical model preference."""
+    """Yield model IDs to try for a single logical model preference."""
    if not model:
        return
-    # Try configured model first (supports provider suffixes like ":cerebras")
+    # Try configured model first (supports provider suffixes like ':groq').
    yield model
-    # Fallback to base repo id when provider suffix is not recognized by the router
+    # Fallback to base repo id when provider suffix isn't recognized.
    if ":" in model:
        base_model = model.split(":", 1)[0]
        if base_model:
@@ -108,12 +52,16 @@ def _candidate_model_variants(model: str):
 def _fallback_model_sequence(model: str, fallback_models: Optional[List[str]] = None):
-    # IMPORTANT: Do not apply implicit global fallback chains.
+    """Yield unique model candidates preserving caller-defined order.
-    # Callers must explicitly provide fallback_models when they want multi-model retries.
+
    IMPORTANT: no implicit global fallback chain is applied here; callers must
    explicitly pass fallback_models if they want multi-model retries.
    """
    if fallback_models:
        sequence = [model] + fallback_models
    else:
        sequence = [model]
    seen = set()
    for preferred_model in sequence:
        for candidate in _candidate_model_variants(preferred_model):
@@ -123,11 +71,9 @@ def _fallback_model_sequence(model: str, fallback_models: Optional[List[str]] =
 def _is_non_retryable_hf_error(exc: Exception) -> bool:
    """Skip retries for deterministic HF failures (e.g., unknown model ids, billing)."""
    msg = str(exc).lower()
    status = getattr(exc, "status_code", None)
-    
+
    # Non-retryable errors
    if isinstance(exc, NotFoundError) or "not found" in msg or "404" in msg:
        return True
    if status == 402 or "402" in msg or "depleted" in msg or "credits" in msg:
@@ -136,7 +82,6 @@ def _is_non_retryable_hf_error(exc: Exception) -> bool:
        return True
    if status == 403 or "forbidden" in msg or "403" in msg:
        return True
    return False
@@ -145,7 +90,6 @@ def _should_retry_hf_error(exc: Exception) -> bool:
 def _classify_hf_error(exc: Exception) -> str:
    """Classify HF failures for actionable logs."""
    msg = str(exc).lower()
    if any(token in msg for token in ["insufficient", "balance", "quota", "billing", "payment", "402"]):
        return "billing_or_quota"
@@ -156,445 +100,175 @@ def _classify_hf_error(exc: Exception) -> str:
    return "unknown"
-def _hf_error_details(exc: Exception) -> str:
+def _error_details(exc: Exception) -> Dict[str, str]:
    """Return compact, actionable exception details for logs."""
    status = getattr(exc, "status_code", None)
    err_type = type(exc).__name__
    message = str(exc)
    raw_body = getattr(exc, "body", None)
    details = f"type={err_type}"
    if status is not None:
        details += f", status={status}"
    if message:
        details += f", message={message}"
    if raw_body:
        details += f", body={raw_body}"
    details += f", repr={repr(exc)}"
    return details
 def get_huggingface_api_key() -> str:
 =======
 def _classify_hf_error(error: Exception) -> str:
    message = str(error or "").lower()
    if any(x in message for x in ["insufficient", "quota", "billing", "payment", "credits", "balance"]):
        return "billing_or_quota"
    if any(x in message for x in ["unauthorized", "forbidden", "permission", "invalid api key", "authentication"]):
        return "auth_or_permission"
    if ("not found" in message) or ("404" in message):
        return "model_not_found"
    return "other"
 def _error_details(error: Exception) -> Dict[str, str]:
    return {
-        "type": type(error).__name__,
+        "type": type(exc).__name__,
-        "message": str(error),
+        "message": str(exc),
-        "repr": repr(error),
+        "repr": repr(exc),
    }
 def get_huggingface_api_key(explicit_api_key: Optional[str] = None) -> str:
->>>>>>> pr-416
+    """Get Hugging Face API key with basic validation."""
-    """Get Hugging Face API key with proper error handling."""
+    api_key = explicit_api_key or os.getenv("HF_TOKEN")
    api_key = explicit_api_key or os.getenv('HF_TOKEN')
    if not api_key:
        error_msg = "HF_TOKEN environment variable is not set. Please set it in your .env file."
        logger.error(error_msg)
        raise ValueError(error_msg)
-    
+
-    # Validate API key format (basic check)
+    if not api_key.startswith("hf_"):
    if not api_key.startswith('hf_'):
        error_msg = "HF_TOKEN appears to be invalid. It should start with 'hf_'."
        logger.error(error_msg)
        raise ValueError(error_msg)
-    
+
    return api_key
-<<<<<<< HEAD
+
@retry(
    retry=retry_if_exception(_should_retry_hf_error),
    wait=wait_random_exponential(min=1, max=60),
    stop=stop_after_attempt(6),
 )
 =======
@lru_cache(maxsize=16)
 def _get_hf_client(api_key: str):
    return OpenAI(base_url="https://router.huggingface.co/v1", api_key=api_key)
->>>>>>> pr-416
+@retry(
    retry=retry_if_exception(_should_retry_hf_error),
    wait=wait_random_exponential(min=1, max=60),
    stop=stop_after_attempt(6),
 )
 def huggingface_text_response(
    prompt: str,
 <<<<<<< HEAD
    model: str = "openai/gpt-oss-120b:cerebras",
    fallback_models: Optional[List[str]] = None,
 =======
    model: str = PREMIUM_DEFAULT_MODEL,
->>>>>>> pr-417
+    fallback_models: Optional[List[str]] = None,
    temperature: float = 0.7,
    max_tokens: int = 2048,
    top_p: float = 0.9,
    system_prompt: Optional[str] = None,
    api_key: Optional[str] = None,
 ) -> str:
-    """
+    """Generate text with explicit fallback model sequence."""
    Generate text response using Hugging Face Inference Providers API.
    This function uses the Hugging Face Responses API which provides a unified interface
    for model interactions with built-in retry logic and error handling.
    Args:
        prompt (str): The input prompt for the AI model
        model (str): Hugging Face model identifier (default: "openai/gpt-oss-120b:groq")
        temperature (float): Controls randomness (0.0-1.0)
        max_tokens (int): Maximum tokens in response
        top_p (float): Nucleus sampling parameter (0.0-1.0)
        system_prompt (str, optional): System instruction for the model
    Returns:
        str: Generated text response
    Raises:
        Exception: If API key is missing or API call fails
    Best Practices:
        - Use appropriate temperature for your use case (0.7 for creative, 0.1-0.3 for factual)
        - Set max_tokens based on expected response length
        - Use system_prompt to guide model behavior
        - Handle errors gracefully in calling functions
    Example:
        result = huggingface_text_response(
            prompt="Write a blog post about AI",
 <<<<<<< HEAD
            model="openai/gpt-oss-120b:cerebras",
 =======
            model=PREMIUM_DEFAULT_MODEL,
 >>>>>>> pr-417
            temperature=0.7,
            max_tokens=2048,
            system_prompt="You are a professional content writer."
        )
    """
    try:
        if not OPENAI_AVAILABLE:
            raise ImportError("OpenAI library not available. Install with: pip install openai")
        # Get API key with proper error handling
        api_key = get_huggingface_api_key(api_key)
        logger.info(f"🔑 Hugging Face API key loaded: {bool(api_key)} (length: {len(api_key) if api_key else 0})")
        if not api_key:
            raise Exception("HF_TOKEN not found in environment variables")
        # Initialize Hugging Face client
 <<<<<<< HEAD
        client = OpenAI(
            base_url="https://router.huggingface.co/v1",
            api_key=api_key,
        )
 =======
        client = _get_hf_client(api_key)
 >>>>>>> pr-416
        logger.info("✅ Hugging Face client initialized for text response")
-        # Prepare input for the API
+        hf_api_key = get_huggingface_api_key(api_key)
        client = _get_hf_client(hf_api_key)
        messages = []
        # Add system prompt if provided
        if system_prompt:
-            messages.append({
+            messages.append({"role": "system", "content": system_prompt})
-                "role": "system",
+        messages.append({"role": "user", "content": prompt})
                "content": system_prompt
            })
        # Add user prompt
        messages.append({
            "role": "user", 
            "content": prompt
        })
-        # Add debugging for API call
+        response = None
-        logger.info(
+        last_error = None
-            "Hugging Face text call | model={} | prompt_len={} | temp={} | top_p={} | max_tokens={}",
+        for candidate_model in _fallback_model_sequence(model, fallback_models):
            model,
            len(prompt) if isinstance(prompt, str) else '<non-str>',
            temperature,
            top_p,
            max_tokens,
        )
        logger.info("🚀 Making Hugging Face API call (chat completion)...")
 <<<<<<< HEAD
        # Add rate limiting to prevent expensive API calls
        import time
        time.sleep(1)  # 1 second delay between API calls
        # Call exactly the requested model; no retries, no fallbacks, no variants
 =======
 >>>>>>> pr-416
        response = client.chat.completions.create(
            model=model,
            messages=messages,
            temperature=temperature,
            top_p=top_p,
            max_tokens=max_tokens
        )
        # Extract text from response
        generated_text = response.choices[0].message.content
        # Clean up the response
        if generated_text:
            # Remove any markdown formatting if present
            generated_text = re.sub(r'```[a-zA-Z]*\n?', '', generated_text)
            generated_text = re.sub(r'```\n?', '', generated_text)
            generated_text = generated_text.strip()
        logger.info("✅ Hugging Face text response generated successfully (length: {})", len(generated_text))
        return generated_text
    except Exception as e:
        error_class = _classify_hf_error(e)
 <<<<<<< HEAD
        error_details = _hf_error_details(e)
        logger.error(f"❌ Hugging Face text generation failed: {error_details}")
        # Extra diagnostics: try to capture raw response if available
        if hasattr(e, 'response') and e.response is not None:
            logger.error(f"🔍 HF Error Diagnostics:")
            logger.error(f"  - Status: {e.response.status_code}")
            logger.error(f"  - Headers: {dict(e.response.headers)}")
            try:
-                body_json = e.response.json()
+                response = client.chat.completions.create(
-                logger.error(f"  - Body JSON: {json.dumps(body_json, indent=2)}")
+                    model=candidate_model,
-            except Exception:
+                    messages=messages,
-                logger.error(f"  - Body Raw: {e.response.text[:1000]}")
+                    temperature=temperature,
-        else:
+                    top_p=top_p,
-            logger.error(f"🔍 No HTTP response attached to exception object.")
+                    max_tokens=max_tokens,
-        
+                )
-=======
+                if candidate_model != model:
-        details = _error_details(e)
+                    logger.warning("HF text fallback model used: {}", candidate_model)
-        logger.error("❌ Hugging Face text generation failed | error_class={} | type={} | message={} | repr={}", error_class, details["type"], details["message"], details["repr"])
+                break
->>>>>>> pr-416
+            except NotFoundError as nf_err:
-        raise Exception(f"Hugging Face text generation failed: {str(e)}")
+                last_error = nf_err
                logger.warning("HF text model not found: {}", candidate_model)
                continue
            except Exception as call_err:
                last_error = call_err
                logger.warning("HF text call failed for model {}: {}", candidate_model, _error_details(call_err))
                continue
        if response is None:
            raise last_error or RuntimeError("All fallback models failed")
        generated_text = response.choices[0].message.content or ""
        generated_text = re.sub(r"```[a-zA-Z]*\n?", "", generated_text)
        generated_text = re.sub(r"```\n?", "", generated_text).strip()
        return generated_text
    except Exception as exc:
        details = _error_details(exc)
        logger.error(
            "❌ Hugging Face text generation failed | error_class={} | type={} | message={} | repr={}",
            _classify_hf_error(exc),
            details["type"],
            details["message"],
            details["repr"],
        )
        raise Exception(f"Hugging Face text generation failed: {exc}") from exc
@retry(
    retry=retry_if_exception(_should_retry_hf_error),
    wait=wait_random_exponential(min=1, max=60),
    stop=stop_after_attempt(6),
 )
 def huggingface_structured_json_response(
    prompt: str,
    schema: Dict[str, Any],
 <<<<<<< HEAD
    model: str = "openai/gpt-oss-120b:cerebras",
    fallback_models: Optional[List[str]] = None,
 =======
    model: str = PREMIUM_DEFAULT_MODEL,
->>>>>>> pr-417
+    fallback_models: Optional[List[str]] = None,
    temperature: float = 0.7,
    max_tokens: int = 8192,
    system_prompt: Optional[str] = None,
    api_key: Optional[str] = None,
 ) -> Dict[str, Any]:
-    """
+    """Generate structured JSON with explicit fallback model sequence."""
    Generate structured JSON response using Hugging Face Inference Providers API.
    This function uses the Hugging Face Responses API with structured output support
    to generate JSON responses that match a provided schema.
    Args:
        prompt (str): The input prompt for the AI model
        schema (dict): JSON schema defining the expected output structure
        model (str): Hugging Face model identifier (default: "openai/gpt-oss-120b:groq")
        temperature (float): Controls randomness (0.0-1.0). Use 0.1-0.3 for structured output
        max_tokens (int): Maximum tokens in response. Use 8192 for complex outputs
        system_prompt (str, optional): System instruction for the model
    Returns:
        dict: Parsed JSON response matching the provided schema
    Raises:
        Exception: If API key is missing or API call fails
    Best Practices:
        - Keep schemas simple and flat to avoid truncation
        - Use low temperature (0.1-0.3) for consistent structured output
        - Set max_tokens to 8192 for complex multi-field responses
        - Avoid deeply nested schemas with many required fields
        - Test with smaller outputs first, then scale up
    Example:
        schema = {
            "type": "object",
            "properties": {
                "tasks": {
                    "type": "array",
                    "items": {
                        "type": "object",
                        "properties": {
                            "title": {"type": "string"},
                            "description": {"type": "string"}
                        }
                    }
                }
            }
        }
        result = huggingface_structured_json_response(prompt, schema, temperature=0.2, max_tokens=8192)
    """
    try:
        if not OPENAI_AVAILABLE:
            raise ImportError("OpenAI library not available. Install with: pip install openai")
        # Get API key with proper error handling
        api_key = get_huggingface_api_key(api_key)
        logger.info(f"🔑 Hugging Face API key loaded: {bool(api_key)} (length: {len(api_key) if api_key else 0})")
        if not api_key:
            raise Exception("HF_TOKEN not found in environment variables")
        # Initialize OpenAI client with Hugging Face base URL
        # Use standard Inference API endpoint
 <<<<<<< HEAD
        client = OpenAI(
            base_url="https://router.huggingface.co/v1",
            api_key=api_key,
        )
 =======
        client = _get_hf_client(api_key)
 >>>>>>> pr-416
        logger.info("✅ Hugging Face client initialized for structured JSON response")
-        # Prepare input for the API
+        hf_api_key = get_huggingface_api_key(api_key)
        client = _get_hf_client(hf_api_key)
        messages = []
        # Add system prompt if provided
        if system_prompt:
-            messages.append({
+            messages.append({"role": "system", "content": system_prompt})
-                "role": "system",
+        messages.append({"role": "user", "content": prompt})
                "content": system_prompt
            })
        # Add user prompt with JSON instruction
        # For HF models, explicit JSON instruction in prompt is often better than response_format
        json_instruction = "Please respond with valid JSON that matches the provided schema."
        messages.append({
            "role": "user", 
            "content": f"{prompt}\n\n{json_instruction}"
        })
-        # Add debugging for API call
+        response = None
-        logger.info(
+        last_error = None
-            "Hugging Face structured call | model={} | prompt_len={} | schema_kind={} | temp={} | max_tokens={}",
+
-            model,
+        for candidate_model in _fallback_model_sequence(model, fallback_models):
-            len(prompt) if isinstance(prompt, str) else '<non-str>',
+            try:
-            type(schema).__name__,
+                response = client.chat.completions.create(
-            temperature,
+                    model=candidate_model,
-            max_tokens,
+                    messages=messages,
-        )
+                    temperature=temperature,
-        
+                    max_tokens=max_tokens,
-        logger.info("🚀 Making Hugging Face structured API call...")
+                    response_format={"type": "json_object"},
-        
+                )
-        # Make the API call using standard Chat Completions
+                if candidate_model != model:
-        logger.info("🚀 Making Hugging Face API call (chat completion)...")
+                    logger.warning("HF structured fallback model used: {}", candidate_model)
-        
+                break
-        # Add JSON schema to prompt for guidance
+            except Exception as err:
-        json_schema_str = json.dumps(schema, indent=2)
+                last_error = err
-        messages[-1]["content"] += f"\n\nJSON Schema:\n{json_schema_str}"
+                if isinstance(err, NotFoundError):
-        
+                    logger.warning("HF structured model not found: {}", candidate_model)
        try:
 <<<<<<< HEAD
            response = None
            last_error = None
            for candidate_model in _fallback_model_sequence(model, fallback_models):
                try:
                    response = client.chat.completions.create(
                        model=candidate_model,
                        messages=messages,
                        temperature=temperature,
                        max_tokens=max_tokens,
                        response_format={"type": "json_object"} # Try to enforce JSON mode if supported
                    )
                    if candidate_model != model:
                        logger.warning("HF structured generation switched to fallback model: {}", candidate_model)
                    break
                except NotFoundError as nf_err:
                    last_error = nf_err
                    logger.warning("HF structured model not found: {}. Trying fallback model.", candidate_model)
                    continue
-            if response is None:
+                msg = str(err).lower()
-                raise last_error or Exception("Hugging Face structured generation failed: all fallback models failed")
+                if "422" in msg or "not supported" in msg:
            response_text = response.choices[0].message.content
            # Clean up response text if needed
            response_text = response_text.strip()
            if response_text.startswith("```json"):
                response_text = response_text[7:]
            if response_text.endswith("```"):
                response_text = response_text[:-3]
            response_text = response_text.strip()
            try:
                parsed_json = json.loads(response_text)
                logger.info("✅ Hugging Face structured JSON response parsed successfully")
                return parsed_json
            except json.JSONDecodeError as json_err:
                logger.error(f"❌ JSON parsing failed: {json_err}")
                logger.error(f"Raw response: {response_text}")
                # Try to extract JSON from the response using regex
                json_match = re.search(r'\{.*\}', response_text, re.DOTALL)
                if json_match:
                    try:
                        extracted_json = json.loads(json_match.group())
                        logger.info("✅ JSON extracted using regex fallback")
                        return extracted_json
                    except json.JSONDecodeError:
                        pass
                return {"error": "Failed to parse JSON response", "raw_response": response_text}
        except Exception as e:
            logger.error(f"❌ Hugging Face API call failed: {e}")
            # If 422 Unprocessable Entity (often due to response_format not supported), retry without it
            if "422" in str(e) or "not supported" in str(e).lower() or isinstance(e, NotFoundError):
                logger.info("Retrying without response_format...")
                response = None
                last_error = None
                for candidate_model in _fallback_model_sequence(model, fallback_models):
                    try:
                        response = client.chat.completions.create(
                            model=candidate_model,
                            messages=messages,
                            temperature=temperature,
-                            max_tokens=max_tokens
+                            max_tokens=max_tokens,
                        )
                        if candidate_model != model:
-                            logger.warning("HF structured no-response_format fallback model: {}", candidate_model)
+                            logger.warning("HF structured fallback(no response_format) model: {}", candidate_model)
                        break
-                    except NotFoundError as nf_err:
+                    except Exception as second_err:
-                        last_error = nf_err
+                        last_error = second_err
                        logger.warning("HF structured model not found (no response_format path): {}", candidate_model)
                        continue
 =======
            response = client.chat.completions.create(
                model=model,
                messages=messages,
                temperature=temperature,
                max_tokens=max_tokens,
                response_format={"type": "json_object"}
            )
        except Exception as e:
            details = _error_details(e)
            logger.error("❌ Hugging Face API call failed | error_class={} | type={} | message={} | repr={}", _classify_hf_error(e), details["type"], details["message"], details["repr"])
            raise
 >>>>>>> pr-416
-        response_text = response.choices[0].message.content
+        if response is None:
            raise last_error or RuntimeError("All fallback models failed")
-        # Clean up response text if needed
+        response_text = (response.choices[0].message.content or "").strip()
        response_text = response_text.strip()
        if response_text.startswith("```json"):
            response_text = response_text[7:]
        if response_text.endswith("```"):
@@ -602,57 +276,37 @@ def huggingface_structured_json_response(
        response_text = response_text.strip()
        try:
-            parsed_json = json.loads(response_text)
+            return json.loads(response_text)
-            logger.info("✅ Hugging Face structured JSON response parsed successfully")
+        except json.JSONDecodeError:
-            return parsed_json
+            json_match = re.search(r"\{.*\}", response_text, re.DOTALL)
        except json.JSONDecodeError as json_err:
            logger.error(f"❌ JSON parsing failed: {json_err}")
            logger.error(f"Raw response: {response_text}")
            json_match = re.search(r'\{.*\}', response_text, re.DOTALL)
            if json_match:
-                try:
+                return json.loads(json_match.group())
                    extracted_json = json.loads(json_match.group())
                    logger.info("✅ JSON extracted using regex fallback")
                    return extracted_json
                except json.JSONDecodeError:
                    pass
            return {"error": "Failed to parse JSON response", "raw_response": response_text}
-        
+
-    except Exception as e:
+    except Exception as exc:
-        error_msg = str(e) if str(e) else repr(e)
+        details = _error_details(exc)
-        error_type = type(e).__name__
+        logger.error(
-        details = _error_details(e)
+            "❌ Hugging Face structured JSON generation failed | error_class={} | type={} | message={} | repr={}",
-        logger.error("❌ Hugging Face structured JSON generation failed | error_class={} | type={} | message={} | repr={}", _classify_hf_error(e), error_type, details["message"], details["repr"])
+            _classify_hf_error(exc),
-        logger.error(f"❌ Full exception details: {repr(e)}")
+            details["type"],
-        import traceback
+            details["message"],
-        logger.error(f"❌ Traceback: {traceback.format_exc()}")
+            details["repr"],
-        raise Exception(f"Hugging Face structured JSON generation failed: {error_type}: {error_msg}")
+        )
        raise Exception(f"Hugging Face structured JSON generation failed: {exc}") from exc
 def get_available_models() -> list:
-    """
+    """Get list of available Hugging Face models for text generation."""
    Get list of available Hugging Face models for text generation.
    Returns:
        list: List of available model identifiers
    """
    return [
        PREMIUM_DEFAULT_MODEL,
        "moonshotai/Kimi-K2-Instruct-0905:groq",
        "Qwen/Qwen2.5-VL-7B-Instruct",
        "meta-llama/Llama-3.1-8B-Instruct:groq",
        "microsoft/Phi-3-medium-4k-instruct:groq",
-        SIF_LOW_COST_MODEL_DEFAULTS[0]
+        SIF_LOW_COST_MODEL_DEFAULTS[0],
    ]
 def validate_model(model: str) -> bool:
-    """
+    """Validate if a model identifier is supported."""
-    Validate if a model identifier is supported.
+    return model in get_available_models()
    Args:
        model (str): Model identifier to validate
    Returns:
        bool: True if model is supported, False otherwise
    """
    available_models = get_available_models()
    return model in available_models