Merge_PR_437_repair_huggingface_provider_and_restore_explicit_retry_fallback

This commit is contained in:
ajaysi
2026-03-12 16:36:37 +05:30

View File

@@ -1,62 +1,25 @@
""" """
Hugging Face Provider Module for ALwrity Hugging Face Provider Module for ALwrity.
This module provides functions for interacting with Hugging Face's Inference Providers API Provides text and structured JSON generation through Hugging Face Router
using the Responses API (beta) which provides a unified interface for model interactions. (OpenAI-compatible API), with retry and explicit fallback controls.
Key Features:
- Text response generation with retry logic
- Structured JSON response generation with schema validation
- Comprehensive error handling and logging
- Automatic API key management
- Support for various Hugging Face models via Inference Providers
Best Practices:
1. Use structured output for complex, multi-field responses
2. Keep schemas simple and flat to avoid truncation
3. Set appropriate token limits (8192 for complex outputs)
4. Use low temperature (0.1-0.3) for consistent structured output
5. Implement proper error handling in calling functions
6. Use the Responses API for better compatibility
Usage Examples:
# Text response
result = huggingface_text_response(prompt, temperature=0.7, max_tokens=2048)
# Structured JSON response
schema = {
"type": "object",
"properties": {
"tasks": {
"type": "array",
"items": {"type": "object", "properties": {...}}
}
}
}
result = huggingface_structured_json_response(prompt, schema, temperature=0.2, max_tokens=8192)
Dependencies:
- openai (for Hugging Face Responses API)
- tenacity (for retry logic)
- logging (for debugging)
- json (for fallback parsing)
Author: ALwrity Team
Version: 1.0
Last Updated: January 2025
""" """
<<<<<<< HEAD
<<<<<<< HEAD <<<<<<< HEAD
import os import os
======= =======
import hashlib import hashlib
>>>>>>> pr-419 >>>>>>> pr-419
=======
>>>>>>> pr-437
import json import json
import os import os
import re import re
<<<<<<< HEAD <<<<<<< HEAD
<<<<<<< HEAD <<<<<<< HEAD
from functools import lru_cache from functools import lru_cache
<<<<<<< HEAD
from typing import Optional, Dict, Any from typing import Optional, Dict, Any
======= =======
from typing import Optional, Dict, Any, List, Iterable from typing import Optional, Dict, Any, List, Iterable
@@ -66,49 +29,38 @@ import time
from threading import Lock from threading import Lock
from typing import Optional, Dict, Any from typing import Optional, Dict, Any
>>>>>>> pr-419 >>>>>>> pr-419
=======
from typing import Any, Dict, List, Optional
from tenacity import retry, retry_if_exception, stop_after_attempt, wait_random_exponential
>>>>>>> pr-437
from loguru import logger
from utils.logger_utils import get_service_logger from utils.logger_utils import get_service_logger
from .routing_policy import PREMIUM_DEFAULT_MODEL, SIF_LOW_COST_MODEL_DEFAULTS from .routing_policy import PREMIUM_DEFAULT_MODEL, SIF_LOW_COST_MODEL_DEFAULTS
# Use service-specific logger to avoid conflicts
logger = get_service_logger("huggingface_provider") logger = get_service_logger("huggingface_provider")
<<<<<<< HEAD
from tenacity import (
retry,
retry_if_exception,
stop_after_attempt,
wait_random_exponential,
)
=======
>>>>>>> pr-416
try: try:
from openai import OpenAI from openai import NotFoundError, OpenAI
OPENAI_AVAILABLE = True
except ImportError: OPENAI_AVAILABLE = True
OPENAI_AVAILABLE = False except ImportError: # pragma: no cover - environment-dependent
logger.warn("OpenAI library not available. Install with: pip install openai") OPENAI_AVAILABLE = False
OpenAI = None
NotFoundError = Exception
logger.warning("OpenAI library not available. Install with: pip install openai")
<<<<<<< HEAD
HF_FALLBACK_MODELS = [ HF_FALLBACK_MODELS = [
<<<<<<< HEAD
"openai/gpt-oss-120b:cerebras",
"moonshotai/Kimi-K2-Instruct-0905:cerebras",
"meta-llama/Llama-3.1-8B-Instruct:cerebras",
"mistralai/Mistral-7B-Instruct-v0.3:cerebras",
=======
PREMIUM_DEFAULT_MODEL, PREMIUM_DEFAULT_MODEL,
"moonshotai/Kimi-K2-Instruct-0905:groq", "moonshotai/Kimi-K2-Instruct-0905:groq",
"meta-llama/Llama-3.1-8B-Instruct:groq", "meta-llama/Llama-3.1-8B-Instruct:groq",
SIF_LOW_COST_MODEL_DEFAULTS[0], SIF_LOW_COST_MODEL_DEFAULTS[0],
>>>>>>> pr-417
] ]
_HF_CLIENT_CACHE: Dict[str, Any] = {} _HF_CLIENT_CACHE: Dict[str, Any] = {}
_HF_CLIENT_CACHE_LOCK = Lock() _HF_CLIENT_CACHE_LOCK = Lock()
<<<<<<< HEAD
def _masked_key_id(api_key: str) -> str: def _masked_key_id(api_key: str) -> str:
return hashlib.sha256(api_key.encode("utf-8")).hexdigest()[:12] return hashlib.sha256(api_key.encode("utf-8")).hexdigest()[:12]
@@ -134,14 +86,23 @@ def get_huggingface_client(api_key: str):
def _candidate_model_variants(model: str, allow_model_variant_fallback: bool = True): def _candidate_model_variants(model: str, allow_model_variant_fallback: bool = True):
"""Yield model ids to try for a single logical model preference.""" """Yield model ids to try for a single logical model preference."""
=======
def _candidate_model_variants(model: str):
"""Yield model IDs to try for a single logical model preference."""
>>>>>>> pr-437
if not model: if not model:
return return
# Try configured model first (supports provider suffixes like ":cerebras") # Try configured model first (supports provider suffixes like ':groq').
yield model yield model
<<<<<<< HEAD
# Fallback to base repo id when provider suffix is not recognized by the router # Fallback to base repo id when provider suffix is not recognized by the router
if allow_model_variant_fallback and ":" in model: if allow_model_variant_fallback and ":" in model:
=======
# Fallback to base repo id when provider suffix isn't recognized.
if ":" in model:
>>>>>>> pr-437
base_model = model.split(":", 1)[0] base_model = model.split(":", 1)[0]
if base_model: if base_model:
yield base_model yield base_model
@@ -149,12 +110,16 @@ def _candidate_model_variants(model: str, allow_model_variant_fallback: bool = T
<<<<<<< HEAD <<<<<<< HEAD
def _fallback_model_sequence(model: str, fallback_models: Optional[List[str]] = None): def _fallback_model_sequence(model: str, fallback_models: Optional[List[str]] = None):
# IMPORTANT: Do not apply implicit global fallback chains. """Yield unique model candidates preserving caller-defined order.
# Callers must explicitly provide fallback_models when they want multi-model retries.
IMPORTANT: no implicit global fallback chain is applied here; callers must
explicitly pass fallback_models if they want multi-model retries.
"""
if fallback_models: if fallback_models:
sequence = [model] + fallback_models sequence = [model] + fallback_models
else: else:
sequence = [model] sequence = [model]
<<<<<<< HEAD
======= =======
def _fallback_model_sequence( def _fallback_model_sequence(
model: str, model: str,
@@ -170,6 +135,9 @@ def _fallback_model_sequence(
sequence = [model] + list(fallback_models) sequence = [model] + list(fallback_models)
>>>>>>> pr-418 >>>>>>> pr-418
=======
>>>>>>> pr-437
seen = set() seen = set()
for preferred_model in sequence: for preferred_model in sequence:
for candidate in _candidate_model_variants( for candidate in _candidate_model_variants(
@@ -182,11 +150,9 @@ def _fallback_model_sequence(
def _is_non_retryable_hf_error(exc: Exception) -> bool: def _is_non_retryable_hf_error(exc: Exception) -> bool:
"""Skip retries for deterministic HF failures (e.g., unknown model ids, billing)."""
msg = str(exc).lower() msg = str(exc).lower()
status = getattr(exc, "status_code", None) status = getattr(exc, "status_code", None)
# Non-retryable errors
if isinstance(exc, NotFoundError) or "not found" in msg or "404" in msg: if isinstance(exc, NotFoundError) or "not found" in msg or "404" in msg:
return True return True
if status == 402 or "402" in msg or "depleted" in msg or "credits" in msg: if status == 402 or "402" in msg or "depleted" in msg or "credits" in msg:
@@ -195,7 +161,6 @@ def _is_non_retryable_hf_error(exc: Exception) -> bool:
return True return True
if status == 403 or "forbidden" in msg or "403" in msg: if status == 403 or "forbidden" in msg or "403" in msg:
return True return True
return False return False
@@ -204,7 +169,6 @@ def _should_retry_hf_error(exc: Exception) -> bool:
def _classify_hf_error(exc: Exception) -> str: def _classify_hf_error(exc: Exception) -> str:
"""Classify HF failures for actionable logs."""
msg = str(exc).lower() msg = str(exc).lower()
if any(token in msg for token in ["insufficient", "balance", "quota", "billing", "payment", "402"]): if any(token in msg for token in ["insufficient", "balance", "quota", "billing", "payment", "402"]):
return "billing_or_quota" return "billing_or_quota"
@@ -215,62 +179,30 @@ def _classify_hf_error(exc: Exception) -> str:
return "unknown" return "unknown"
def _hf_error_details(exc: Exception) -> str: def _error_details(exc: Exception) -> Dict[str, str]:
"""Return compact, actionable exception details for logs."""
status = getattr(exc, "status_code", None)
err_type = type(exc).__name__
message = str(exc)
raw_body = getattr(exc, "body", None)
details = f"type={err_type}"
if status is not None:
details += f", status={status}"
if message:
details += f", message={message}"
if raw_body:
details += f", body={raw_body}"
details += f", repr={repr(exc)}"
return details
def get_huggingface_api_key() -> str:
=======
def _classify_hf_error(error: Exception) -> str:
message = str(error or "").lower()
if any(x in message for x in ["insufficient", "quota", "billing", "payment", "credits", "balance"]):
return "billing_or_quota"
if any(x in message for x in ["unauthorized", "forbidden", "permission", "invalid api key", "authentication"]):
return "auth_or_permission"
if ("not found" in message) or ("404" in message):
return "model_not_found"
return "other"
def _error_details(error: Exception) -> Dict[str, str]:
return { return {
"type": type(error).__name__, "type": type(exc).__name__,
"message": str(error), "message": str(exc),
"repr": repr(error), "repr": repr(exc),
} }
def get_huggingface_api_key(explicit_api_key: Optional[str] = None) -> str: def get_huggingface_api_key(explicit_api_key: Optional[str] = None) -> str:
>>>>>>> pr-416 """Get Hugging Face API key with basic validation."""
"""Get Hugging Face API key with proper error handling.""" api_key = explicit_api_key or os.getenv("HF_TOKEN")
api_key = explicit_api_key or os.getenv('HF_TOKEN')
if not api_key: if not api_key:
error_msg = "HF_TOKEN environment variable is not set. Please set it in your .env file." error_msg = "HF_TOKEN environment variable is not set. Please set it in your .env file."
logger.error(error_msg) logger.error(error_msg)
raise ValueError(error_msg) raise ValueError(error_msg)
# Validate API key format (basic check) if not api_key.startswith("hf_"):
if not api_key.startswith('hf_'):
error_msg = "HF_TOKEN appears to be invalid. It should start with 'hf_'." error_msg = "HF_TOKEN appears to be invalid. It should start with 'hf_'."
logger.error(error_msg) logger.error(error_msg)
raise ValueError(error_msg) raise ValueError(error_msg)
return api_key return api_key
<<<<<<< HEAD
<<<<<<< HEAD <<<<<<< HEAD
<<<<<<< HEAD <<<<<<< HEAD
@retry( @retry(
@@ -279,11 +211,15 @@ def get_huggingface_api_key(explicit_api_key: Optional[str] = None) -> str:
stop=stop_after_attempt(6), stop=stop_after_attempt(6),
) )
======= =======
=======
>>>>>>> pr-437
@lru_cache(maxsize=16) @lru_cache(maxsize=16)
def _get_hf_client(api_key: str): def _get_hf_client(api_key: str):
return OpenAI(base_url="https://router.huggingface.co/v1", api_key=api_key) return OpenAI(base_url="https://router.huggingface.co/v1", api_key=api_key)
<<<<<<< HEAD
>>>>>>> pr-416 >>>>>>> pr-416
======= =======
@retry( @retry(
@@ -292,14 +228,17 @@ def _get_hf_client(api_key: str):
reraise=True, reraise=True,
) )
>>>>>>> pr-419 >>>>>>> pr-419
=======
@retry(
retry=retry_if_exception(_should_retry_hf_error),
wait=wait_random_exponential(min=1, max=60),
stop=stop_after_attempt(6),
)
>>>>>>> pr-437
def huggingface_text_response( def huggingface_text_response(
prompt: str, prompt: str,
<<<<<<< HEAD
model: str = "openai/gpt-oss-120b:cerebras",
fallback_models: Optional[List[str]] = None,
=======
model: str = PREMIUM_DEFAULT_MODEL, model: str = PREMIUM_DEFAULT_MODEL,
>>>>>>> pr-417 fallback_models: Optional[List[str]] = None,
temperature: float = 0.7, temperature: float = 0.7,
max_tokens: int = 2048, max_tokens: int = 2048,
top_p: float = 0.9, top_p: float = 0.9,
@@ -311,48 +250,11 @@ def huggingface_text_response(
allow_model_variant_fallback: bool = True, allow_model_variant_fallback: bool = True,
>>>>>>> pr-418 >>>>>>> pr-418
) -> str: ) -> str:
""" """Generate text with explicit fallback model sequence."""
Generate text response using Hugging Face Inference Providers API.
This function uses the Hugging Face Responses API which provides a unified interface
for model interactions with built-in retry logic and error handling.
Args:
prompt (str): The input prompt for the AI model
model (str): Hugging Face model identifier (default: "openai/gpt-oss-120b:groq")
temperature (float): Controls randomness (0.0-1.0)
max_tokens (int): Maximum tokens in response
top_p (float): Nucleus sampling parameter (0.0-1.0)
system_prompt (str, optional): System instruction for the model
Returns:
str: Generated text response
Raises:
Exception: If API key is missing or API call fails
Best Practices:
- Use appropriate temperature for your use case (0.7 for creative, 0.1-0.3 for factual)
- Set max_tokens based on expected response length
- Use system_prompt to guide model behavior
- Handle errors gracefully in calling functions
Example:
result = huggingface_text_response(
prompt="Write a blog post about AI",
<<<<<<< HEAD
model="openai/gpt-oss-120b:cerebras",
=======
model=PREMIUM_DEFAULT_MODEL,
>>>>>>> pr-417
temperature=0.7,
max_tokens=2048,
system_prompt="You are a professional content writer."
)
"""
try: try:
if not OPENAI_AVAILABLE: if not OPENAI_AVAILABLE:
raise ImportError("OpenAI library not available. Install with: pip install openai") raise ImportError("OpenAI library not available. Install with: pip install openai")
<<<<<<< HEAD
# Get API key with proper error handling # Get API key with proper error handling
api_key = get_huggingface_api_key(api_key) api_key = get_huggingface_api_key(api_key)
@@ -376,23 +278,18 @@ def huggingface_text_response(
client = get_huggingface_client(api_key) client = get_huggingface_client(api_key)
>>>>>>> pr-419 >>>>>>> pr-419
logger.info("✅ Hugging Face client initialized for text response") logger.info("✅ Hugging Face client initialized for text response")
=======
>>>>>>> pr-437
hf_api_key = get_huggingface_api_key(api_key)
client = _get_hf_client(hf_api_key)
# Prepare input for the API
messages = [] messages = []
# Add system prompt if provided
if system_prompt: if system_prompt:
messages.append({ messages.append({"role": "system", "content": system_prompt})
"role": "system", messages.append({"role": "user", "content": prompt})
"content": system_prompt
})
# Add user prompt
messages.append({
"role": "user",
"content": prompt
})
<<<<<<< HEAD
# Add debugging for API call # Add debugging for API call
logger.info( logger.info(
"Hugging Face text call | model={} | prompt_len={} | temp={} | top_p={} | max_tokens={}", "Hugging Face text call | model={} | prompt_len={} | temp={} | top_p={} | max_tokens={}",
@@ -496,20 +393,32 @@ def huggingface_text_response(
logger.error(f"🔍 HF Error Diagnostics:") logger.error(f"🔍 HF Error Diagnostics:")
logger.error(f" - Status: {e.response.status_code}") logger.error(f" - Status: {e.response.status_code}")
logger.error(f" - Headers: {dict(e.response.headers)}") logger.error(f" - Headers: {dict(e.response.headers)}")
try:
body_json = e.response.json()
logger.error(f" - Body JSON: {json.dumps(body_json, indent=2)}")
except Exception:
logger.error(f" - Body Raw: {e.response.text[:1000]}")
else:
logger.error(f"🔍 No HTTP response attached to exception object.")
======= =======
details = _error_details(e) response = None
logger.error("❌ Hugging Face text generation failed | error_class={} | type={} | message={} | repr={}", error_class, details["type"], details["message"], details["repr"]) last_error = None
>>>>>>> pr-416 for candidate_model in _fallback_model_sequence(model, fallback_models):
raise Exception(f"Hugging Face text generation failed: {str(e)}") >>>>>>> pr-437
try:
response = client.chat.completions.create(
model=candidate_model,
messages=messages,
temperature=temperature,
top_p=top_p,
max_tokens=max_tokens,
)
if candidate_model != model:
logger.warning("HF text fallback model used: {}", candidate_model)
break
except NotFoundError as nf_err:
last_error = nf_err
logger.warning("HF text model not found: {}", candidate_model)
continue
except Exception as call_err:
last_error = call_err
logger.warning("HF text call failed for model {}: {}", candidate_model, _error_details(call_err))
continue
<<<<<<< HEAD
<<<<<<< HEAD <<<<<<< HEAD
======= =======
@retry( @retry(
@@ -518,15 +427,38 @@ def huggingface_text_response(
reraise=True, reraise=True,
) )
>>>>>>> pr-419 >>>>>>> pr-419
=======
if response is None:
raise last_error or RuntimeError("All fallback models failed")
generated_text = response.choices[0].message.content or ""
generated_text = re.sub(r"```[a-zA-Z]*\n?", "", generated_text)
generated_text = re.sub(r"```\n?", "", generated_text).strip()
return generated_text
except Exception as exc:
details = _error_details(exc)
logger.error(
"❌ Hugging Face text generation failed | error_class={} | type={} | message={} | repr={}",
_classify_hf_error(exc),
details["type"],
details["message"],
details["repr"],
)
raise Exception(f"Hugging Face text generation failed: {exc}") from exc
@retry(
retry=retry_if_exception(_should_retry_hf_error),
wait=wait_random_exponential(min=1, max=60),
stop=stop_after_attempt(6),
)
>>>>>>> pr-437
def huggingface_structured_json_response( def huggingface_structured_json_response(
prompt: str, prompt: str,
schema: Dict[str, Any], schema: Dict[str, Any],
<<<<<<< HEAD
model: str = "openai/gpt-oss-120b:cerebras",
fallback_models: Optional[List[str]] = None,
=======
model: str = PREMIUM_DEFAULT_MODEL, model: str = PREMIUM_DEFAULT_MODEL,
>>>>>>> pr-417 fallback_models: Optional[List[str]] = None,
temperature: float = 0.7, temperature: float = 0.7,
max_tokens: int = 8192, max_tokens: int = 8192,
system_prompt: Optional[str] = None, system_prompt: Optional[str] = None,
@@ -537,54 +469,11 @@ def huggingface_structured_json_response(
allow_model_variant_fallback: bool = True, allow_model_variant_fallback: bool = True,
>>>>>>> pr-418 >>>>>>> pr-418
) -> Dict[str, Any]: ) -> Dict[str, Any]:
""" """Generate structured JSON with explicit fallback model sequence."""
Generate structured JSON response using Hugging Face Inference Providers API.
This function uses the Hugging Face Responses API with structured output support
to generate JSON responses that match a provided schema.
Args:
prompt (str): The input prompt for the AI model
schema (dict): JSON schema defining the expected output structure
model (str): Hugging Face model identifier (default: "openai/gpt-oss-120b:groq")
temperature (float): Controls randomness (0.0-1.0). Use 0.1-0.3 for structured output
max_tokens (int): Maximum tokens in response. Use 8192 for complex outputs
system_prompt (str, optional): System instruction for the model
Returns:
dict: Parsed JSON response matching the provided schema
Raises:
Exception: If API key is missing or API call fails
Best Practices:
- Keep schemas simple and flat to avoid truncation
- Use low temperature (0.1-0.3) for consistent structured output
- Set max_tokens to 8192 for complex multi-field responses
- Avoid deeply nested schemas with many required fields
- Test with smaller outputs first, then scale up
Example:
schema = {
"type": "object",
"properties": {
"tasks": {
"type": "array",
"items": {
"type": "object",
"properties": {
"title": {"type": "string"},
"description": {"type": "string"}
}
}
}
}
}
result = huggingface_structured_json_response(prompt, schema, temperature=0.2, max_tokens=8192)
"""
try: try:
if not OPENAI_AVAILABLE: if not OPENAI_AVAILABLE:
raise ImportError("OpenAI library not available. Install with: pip install openai") raise ImportError("OpenAI library not available. Install with: pip install openai")
<<<<<<< HEAD
# Get API key with proper error handling # Get API key with proper error handling
api_key = get_huggingface_api_key(api_key) api_key = get_huggingface_api_key(api_key)
@@ -609,25 +498,18 @@ def huggingface_structured_json_response(
client = get_huggingface_client(api_key) client = get_huggingface_client(api_key)
>>>>>>> pr-419 >>>>>>> pr-419
logger.info("✅ Hugging Face client initialized for structured JSON response") logger.info("✅ Hugging Face client initialized for structured JSON response")
=======
>>>>>>> pr-437
hf_api_key = get_huggingface_api_key(api_key)
client = _get_hf_client(hf_api_key)
# Prepare input for the API
messages = [] messages = []
# Add system prompt if provided
if system_prompt: if system_prompt:
messages.append({ messages.append({"role": "system", "content": system_prompt})
"role": "system", messages.append({"role": "user", "content": prompt})
"content": system_prompt
})
# Add user prompt with JSON instruction
# For HF models, explicit JSON instruction in prompt is often better than response_format
json_instruction = "Please respond with valid JSON that matches the provided schema."
messages.append({
"role": "user",
"content": f"{prompt}\n\n{json_instruction}"
})
<<<<<<< HEAD
# Add debugging for API call # Add debugging for API call
logger.info( logger.info(
"Hugging Face structured call | model={} | prompt_len={} | schema_kind={} | temp={} | max_tokens={}", "Hugging Face structured call | model={} | prompt_len={} | schema_kind={} | temp={} | max_tokens={}",
@@ -753,12 +635,37 @@ def huggingface_structured_json_response(
fallback_attempt += 1 fallback_attempt += 1
started_at = time.perf_counter() started_at = time.perf_counter()
>>>>>>> pr-419 >>>>>>> pr-419
=======
response = None
last_error = None
for candidate_model in _fallback_model_sequence(model, fallback_models):
try:
response = client.chat.completions.create(
model=candidate_model,
messages=messages,
temperature=temperature,
max_tokens=max_tokens,
response_format={"type": "json_object"},
)
if candidate_model != model:
logger.warning("HF structured fallback model used: {}", candidate_model)
break
except Exception as err:
last_error = err
if isinstance(err, NotFoundError):
logger.warning("HF structured model not found: {}", candidate_model)
continue
msg = str(err).lower()
if "422" in msg or "not supported" in msg:
>>>>>>> pr-437
try: try:
response = client.chat.completions.create( response = client.chat.completions.create(
model=candidate_model, model=candidate_model,
messages=messages, messages=messages,
temperature=temperature, temperature=temperature,
max_tokens=max_tokens max_tokens=max_tokens,
) )
elapsed_ms = (time.perf_counter() - started_at) * 1000 elapsed_ms = (time.perf_counter() - started_at) * 1000
logger.debug( logger.debug(
@@ -768,8 +675,9 @@ def huggingface_structured_json_response(
elapsed_ms, elapsed_ms,
) )
if candidate_model != model: if candidate_model != model:
logger.warning("HF structured no-response_format fallback model: {}", candidate_model) logger.warning("HF structured fallback(no response_format) model: {}", candidate_model)
break break
<<<<<<< HEAD
except NotFoundError as nf_err: except NotFoundError as nf_err:
last_error = nf_err last_error = nf_err
elapsed_ms = (time.perf_counter() - started_at) * 1000 elapsed_ms = (time.perf_counter() - started_at) * 1000
@@ -780,25 +688,16 @@ def huggingface_structured_json_response(
elapsed_ms, elapsed_ms,
) )
logger.warning("HF structured model not found (no response_format path): {}", candidate_model) logger.warning("HF structured model not found (no response_format path): {}", candidate_model)
continue
======= =======
response = client.chat.completions.create( except Exception as second_err:
model=model, last_error = second_err
messages=messages, >>>>>>> pr-437
temperature=temperature, continue
max_tokens=max_tokens,
response_format={"type": "json_object"}
)
except Exception as e:
details = _error_details(e)
logger.error("❌ Hugging Face API call failed | error_class={} | type={} | message={} | repr={}", _classify_hf_error(e), details["type"], details["message"], details["repr"])
raise
>>>>>>> pr-416
response_text = response.choices[0].message.content if response is None:
raise last_error or RuntimeError("All fallback models failed")
# Clean up response text if needed response_text = (response.choices[0].message.content or "").strip()
response_text = response_text.strip()
if response_text.startswith("```json"): if response_text.startswith("```json"):
response_text = response_text[7:] response_text = response_text[7:]
if response_text.endswith("```"): if response_text.endswith("```"):
@@ -806,57 +705,37 @@ def huggingface_structured_json_response(
response_text = response_text.strip() response_text = response_text.strip()
try: try:
parsed_json = json.loads(response_text) return json.loads(response_text)
logger.info("✅ Hugging Face structured JSON response parsed successfully") except json.JSONDecodeError:
return parsed_json json_match = re.search(r"\{.*\}", response_text, re.DOTALL)
except json.JSONDecodeError as json_err:
logger.error(f"❌ JSON parsing failed: {json_err}")
logger.error(f"Raw response: {response_text}")
json_match = re.search(r'\{.*\}', response_text, re.DOTALL)
if json_match: if json_match:
try: return json.loads(json_match.group())
extracted_json = json.loads(json_match.group())
logger.info("✅ JSON extracted using regex fallback")
return extracted_json
except json.JSONDecodeError:
pass
return {"error": "Failed to parse JSON response", "raw_response": response_text} return {"error": "Failed to parse JSON response", "raw_response": response_text}
except Exception as e: except Exception as exc:
error_msg = str(e) if str(e) else repr(e) details = _error_details(exc)
error_type = type(e).__name__ logger.error(
details = _error_details(e) "❌ Hugging Face structured JSON generation failed | error_class={} | type={} | message={} | repr={}",
logger.error("❌ Hugging Face structured JSON generation failed | error_class={} | type={} | message={} | repr={}", _classify_hf_error(e), error_type, details["message"], details["repr"]) _classify_hf_error(exc),
logger.error(f"❌ Full exception details: {repr(e)}") details["type"],
import traceback details["message"],
logger.error(f"❌ Traceback: {traceback.format_exc()}") details["repr"],
raise Exception(f"Hugging Face structured JSON generation failed: {error_type}: {error_msg}") )
raise Exception(f"Hugging Face structured JSON generation failed: {exc}") from exc
def get_available_models() -> list: def get_available_models() -> list:
""" """Get list of available Hugging Face models for text generation."""
Get list of available Hugging Face models for text generation.
Returns:
list: List of available model identifiers
"""
return [ return [
PREMIUM_DEFAULT_MODEL, PREMIUM_DEFAULT_MODEL,
"moonshotai/Kimi-K2-Instruct-0905:groq", "moonshotai/Kimi-K2-Instruct-0905:groq",
"Qwen/Qwen2.5-VL-7B-Instruct", "Qwen/Qwen2.5-VL-7B-Instruct",
"meta-llama/Llama-3.1-8B-Instruct:groq", "meta-llama/Llama-3.1-8B-Instruct:groq",
"microsoft/Phi-3-medium-4k-instruct:groq", "microsoft/Phi-3-medium-4k-instruct:groq",
SIF_LOW_COST_MODEL_DEFAULTS[0] SIF_LOW_COST_MODEL_DEFAULTS[0],
] ]
def validate_model(model: str) -> bool: def validate_model(model: str) -> bool:
""" """Validate if a model identifier is supported."""
Validate if a model identifier is supported. return model in get_available_models()
Args:
model (str): Model identifier to validate
Returns:
bool: True if model is supported, False otherwise
"""
available_models = get_available_models()
return model in available_models