Merge_PR_416_fix_textgen_ai_models_mapping

This commit is contained in:
ajaysi
2026-03-12 16:05:47 +05:30
5 changed files with 509 additions and 81 deletions

View File

@@ -47,26 +47,10 @@ Last Updated: January 2025
"""
import os
import sys
from pathlib import Path
import json
import re
from typing import Optional, Dict, Any, List
from dotenv import load_dotenv
# Fix the environment loading path - load from backend directory
current_dir = Path(__file__).parent.parent # services directory
backend_dir = current_dir.parent # backend directory
env_path = backend_dir / '.env'
if env_path.exists():
load_dotenv(env_path)
print(f"Loaded .env from: {env_path}")
else:
# Fallback to current directory
load_dotenv()
print(f"No .env found at {env_path}, using current directory")
from functools import lru_cache
from typing import Optional, Dict, Any
from loguru import logger
from utils.logger_utils import get_service_logger
@@ -74,22 +58,24 @@ from utils.logger_utils import get_service_logger
# Use service-specific logger to avoid conflicts
logger = get_service_logger("huggingface_provider")
<<<<<<< HEAD
from tenacity import (
retry,
retry_if_exception,
stop_after_attempt,
wait_random_exponential,
)
=======
>>>>>>> pr-416
try:
from openai import OpenAI
from openai import NotFoundError
OPENAI_AVAILABLE = True
except ImportError:
OPENAI_AVAILABLE = False
NotFoundError = Exception
logger.warn("OpenAI library not available. Install with: pip install openai")
<<<<<<< HEAD
HF_FALLBACK_MODELS = [
"openai/gpt-oss-120b:cerebras",
"moonshotai/Kimi-K2-Instruct-0905:cerebras",
@@ -179,8 +165,32 @@ def _hf_error_details(exc: Exception) -> str:
return details
def get_huggingface_api_key() -> str:
=======
def _classify_hf_error(error: Exception) -> str:
message = str(error or "").lower()
if any(x in message for x in ["insufficient", "quota", "billing", "payment", "credits", "balance"]):
return "billing_or_quota"
if any(x in message for x in ["unauthorized", "forbidden", "permission", "invalid api key", "authentication"]):
return "auth_or_permission"
if ("not found" in message) or ("404" in message):
return "model_not_found"
return "other"
def _error_details(error: Exception) -> Dict[str, str]:
return {
"type": type(error).__name__,
"message": str(error),
"repr": repr(error),
}
def get_huggingface_api_key(explicit_api_key: Optional[str] = None) -> str:
>>>>>>> pr-416
"""Get Hugging Face API key with proper error handling."""
api_key = os.getenv('HF_TOKEN')
api_key = explicit_api_key or os.getenv('HF_TOKEN')
if not api_key:
error_msg = "HF_TOKEN environment variable is not set. Please set it in your .env file."
logger.error(error_msg)
@@ -194,11 +204,19 @@ def get_huggingface_api_key() -> str:
return api_key
<<<<<<< HEAD
@retry(
retry=retry_if_exception(_should_retry_hf_error),
wait=wait_random_exponential(min=1, max=60),
stop=stop_after_attempt(6),
)
=======
@lru_cache(maxsize=16)
def _get_hf_client(api_key: str):
return OpenAI(base_url="https://router.huggingface.co/v1", api_key=api_key)
>>>>>>> pr-416
def huggingface_text_response(
prompt: str,
model: str = "openai/gpt-oss-120b:cerebras",
@@ -206,7 +224,8 @@ def huggingface_text_response(
temperature: float = 0.7,
max_tokens: int = 2048,
top_p: float = 0.9,
system_prompt: Optional[str] = None
system_prompt: Optional[str] = None,
api_key: Optional[str] = None,
) -> str:
"""
Generate text response using Hugging Face Inference Providers API.
@@ -248,17 +267,21 @@ def huggingface_text_response(
raise ImportError("OpenAI library not available. Install with: pip install openai")
# Get API key with proper error handling
api_key = get_huggingface_api_key()
api_key = get_huggingface_api_key(api_key)
logger.info(f"🔑 Hugging Face API key loaded: {bool(api_key)} (length: {len(api_key) if api_key else 0})")
if not api_key:
raise Exception("HF_TOKEN not found in environment variables")
# Initialize Hugging Face client
<<<<<<< HEAD
client = OpenAI(
base_url="https://router.huggingface.co/v1",
api_key=api_key,
)
=======
client = _get_hf_client(api_key)
>>>>>>> pr-416
logger.info("✅ Hugging Face client initialized for text response")
# Prepare input for the API
@@ -289,11 +312,14 @@ def huggingface_text_response(
logger.info("🚀 Making Hugging Face API call (chat completion)...")
<<<<<<< HEAD
# Add rate limiting to prevent expensive API calls
import time
time.sleep(1) # 1 second delay between API calls
# Call exactly the requested model; no retries, no fallbacks, no variants
=======
>>>>>>> pr-416
response = client.chat.completions.create(
model=model,
messages=messages,
@@ -312,11 +338,12 @@ def huggingface_text_response(
generated_text = re.sub(r'```\n?', '', generated_text)
generated_text = generated_text.strip()
logger.info(f"✅ Hugging Face text response generated successfully (length: {len(generated_text)})")
logger.info("✅ Hugging Face text response generated successfully (length: {})", len(generated_text))
return generated_text
except Exception as e:
error_class = _classify_hf_error(e)
<<<<<<< HEAD
error_details = _hf_error_details(e)
logger.error(f"❌ Hugging Face text generation failed: {error_details}")
@@ -333,9 +360,12 @@ def huggingface_text_response(
else:
logger.error(f"🔍 No HTTP response attached to exception object.")
=======
details = _error_details(e)
logger.error("❌ Hugging Face text generation failed | error_class={} | type={} | message={} | repr={}", error_class, details["type"], details["message"], details["repr"])
>>>>>>> pr-416
raise Exception(f"Hugging Face text generation failed: {str(e)}")
@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
def huggingface_structured_json_response(
prompt: str,
schema: Dict[str, Any],
@@ -343,7 +373,8 @@ def huggingface_structured_json_response(
fallback_models: Optional[List[str]] = None,
temperature: float = 0.7,
max_tokens: int = 8192,
system_prompt: Optional[str] = None
system_prompt: Optional[str] = None,
api_key: Optional[str] = None,
) -> Dict[str, Any]:
"""
Generate structured JSON response using Hugging Face Inference Providers API.
@@ -395,7 +426,7 @@ def huggingface_structured_json_response(
raise ImportError("OpenAI library not available. Install with: pip install openai")
# Get API key with proper error handling
api_key = get_huggingface_api_key()
api_key = get_huggingface_api_key(api_key)
logger.info(f"🔑 Hugging Face API key loaded: {bool(api_key)} (length: {len(api_key) if api_key else 0})")
if not api_key:
@@ -403,10 +434,14 @@ def huggingface_structured_json_response(
# Initialize OpenAI client with Hugging Face base URL
# Use standard Inference API endpoint
<<<<<<< HEAD
client = OpenAI(
base_url="https://router.huggingface.co/v1",
api_key=api_key,
)
=======
client = _get_hf_client(api_key)
>>>>>>> pr-416
logger.info("✅ Hugging Face client initialized for structured JSON response")
# Prepare input for the API
@@ -446,11 +481,8 @@ def huggingface_structured_json_response(
json_schema_str = json.dumps(schema, indent=2)
messages[-1]["content"] += f"\n\nJSON Schema:\n{json_schema_str}"
# Add rate limiting to prevent expensive API calls
import time
time.sleep(1) # 1 second delay between API calls
try:
<<<<<<< HEAD
response = None
last_error = None
for candidate_model in _fallback_model_sequence(model, fallback_models):
@@ -525,25 +557,52 @@ def huggingface_structured_json_response(
last_error = nf_err
logger.warning("HF structured model not found (no response_format path): {}", candidate_model)
continue
=======
response = client.chat.completions.create(
model=model,
messages=messages,
temperature=temperature,
max_tokens=max_tokens,
response_format={"type": "json_object"}
)
except Exception as e:
details = _error_details(e)
logger.error("❌ Hugging Face API call failed | error_class={} | type={} | message={} | repr={}", _classify_hf_error(e), details["type"], details["message"], details["repr"])
raise
>>>>>>> pr-416
if response is None:
raise last_error or e
response_text = response.choices[0].message.content
# ... (same parsing logic would apply, simplified here for brevity)
response_text = response.choices[0].message.content
# Clean up response text if needed
response_text = response_text.strip()
if response_text.startswith("```json"):
response_text = response_text[7:]
if response_text.endswith("```"):
response_text = response_text[:-3]
response_text = response_text.strip()
try:
parsed_json = json.loads(response_text)
logger.info("✅ Hugging Face structured JSON response parsed successfully")
return parsed_json
except json.JSONDecodeError as json_err:
logger.error(f"❌ JSON parsing failed: {json_err}")
logger.error(f"Raw response: {response_text}")
json_match = re.search(r'\{.*\}', response_text, re.DOTALL)
if json_match:
try:
return json.loads(response_text)
except:
# Regex fallback
json_match = re.search(r'\{.*\}', response_text, re.DOTALL)
if json_match:
return json.loads(json_match.group())
return {"error": "Failed to parse JSON response", "raw_response": response_text}
raise e
extracted_json = json.loads(json_match.group())
logger.info("✅ JSON extracted using regex fallback")
return extracted_json
except json.JSONDecodeError:
pass
return {"error": "Failed to parse JSON response", "raw_response": response_text}
except Exception as e:
error_msg = str(e) if str(e) else repr(e)
error_type = type(e).__name__
logger.error(f"❌ Hugging Face structured JSON generation failed: {error_type}: {error_msg}")
details = _error_details(e)
logger.error("❌ Hugging Face structured JSON generation failed | error_class={} | type={} | message={} | repr={}", _classify_hf_error(e), error_type, details["message"], details["repr"])
logger.error(f"❌ Full exception details: {repr(e)}")
import traceback
logger.error(f"❌ Traceback: {traceback.format_exc()}")