"Add_structured_routing_logs_to_text_generation_modular"

This commit is contained in:
ajaysi
2026-03-12 17:12:15 +05:30
parent e85c7d442e
commit 482a600e14
3 changed files with 127 additions and 2 deletions

View File

@@ -55,7 +55,7 @@ from functools import lru_cache
from typing import Optional, Dict, Any, List
from loguru import logger
from utils.logger_utils import get_service_logger
from utils.logger_utils import get_service_logger, emit_routing_event
from .routing_policy import PREMIUM_DEFAULT_MODEL, SIF_LOW_COST_MODEL_DEFAULTS
# Use service-specific logger to avoid conflicts
@@ -192,6 +192,7 @@ def huggingface_text_response(
top_p: float = 0.9,
system_prompt: Optional[str] = None,
api_key: Optional[str] = None,
tenant_user_id: Optional[str] = None,
) -> str:
"""
Generate text response using Hugging Face Inference Providers API.
@@ -264,6 +265,18 @@ def huggingface_text_response(
response = None
last_error = None
for candidate_model in _fallback_model_sequence(model, fallback_models):
# Emit routing event for each model attempt
route_intent = "primary" if candidate_model == model else "fallback"
emit_routing_event(
logger,
flow_type="huggingface_text",
route_intent=route_intent,
provider_selected="huggingface",
model_selected=candidate_model,
tenant_user_id=tenant_user_id,
extra={"original_model": model, "api_call": True}
)
try:
response = client.chat.completions.create(
model=candidate_model,
@@ -324,6 +337,7 @@ def huggingface_structured_json_response(
max_tokens: int = 8192,
system_prompt: Optional[str] = None,
api_key: Optional[str] = None,
tenant_user_id: Optional[str] = None,
) -> Dict[str, Any]:
"""
Generate structured JSON response using Hugging Face Inference Providers API.
@@ -403,6 +417,18 @@ def huggingface_structured_json_response(
last_error = None
for candidate_model in _fallback_model_sequence(model, fallback_models):
# Emit routing event for each model attempt
route_intent = "primary" if candidate_model == model else "fallback"
emit_routing_event(
logger,
flow_type="huggingface_structured",
route_intent=route_intent,
provider_selected="huggingface",
model_selected=candidate_model,
tenant_user_id=tenant_user_id,
extra={"original_model": model, "api_call": True, "response_format": "json_object"}
)
try:
response = client.chat.completions.create(
model=candidate_model,

View File

@@ -19,6 +19,7 @@ from ..routing_policy import (
SIF_LOW_COST_MODEL_DEFAULTS,
resolve_text_provider_alias,
)
from ...utils.logger_utils import emit_routing_event
PREMIUM_HF_MINIMAL_FALLBACK_MODELS = [
@@ -58,6 +59,10 @@ def llm_text_gen(
resolved_flow_type = flow_type or ("sif_agent" if preferred_hf_models else "premium_tool")
flow_tag = f"flow_type={resolved_flow_type}"
subscription_preflight_completed = False
# Initialize routing state for structured logging
fallback_count = 0
fallback_models_tried = []
logger.info(f"[llm_text_gen][{flow_tag}] Starting text generation")
logger.debug(f"[llm_text_gen] Prompt length: {len(prompt)} characters")
@@ -138,6 +143,20 @@ def llm_text_gen(
os.environ["HF_TOKEN"] = resolved_key
logger.debug(f"[llm_text_gen] Using provider: {gpt_provider}, model: {model}")
# Emit routing event for primary selection
emit_routing_event(
logger,
flow_type=resolved_flow_type,
route_intent="primary",
provider_selected=gpt_provider,
model_selected=model,
preferred_provider=preferred_provider,
fallback_count=fallback_count,
fallback_models_tried=fallback_models_tried,
tenant_user_id=user_id,
extra={"available_providers": available_providers}
)
# Map provider name to APIProvider enum (define at function scope for usage tracking)
from models.subscription_models import APIProvider
@@ -303,6 +322,7 @@ def llm_text_gen(
max_tokens=max_tokens,
system_prompt=system_instructions,
allow_model_variant_fallback=hf_allow_model_variant_fallback,
tenant_user_id=user_id
)
else:
response_text = huggingface_text_response(
@@ -312,7 +332,8 @@ def llm_text_gen(
temperature=temperature,
max_tokens=max_tokens,
top_p=top_p,
system_prompt=system_instructions
system_prompt=system_instructions,
tenant_user_id=user_id
)
else:
logger.error(f"[llm_text_gen] Unknown provider: {gpt_provider}")
@@ -360,16 +381,34 @@ def llm_text_gen(
try:
logger.info(f"[llm_text_gen][{flow_tag}] Trying SINGLE fallback provider: {fallback_provider}")
actual_provider_used = fallback_provider
fallback_count += 1
route_intent = "fallback"
# Update provider enum for fallback
if fallback_provider == "google":
provider_enum = APIProvider.GEMINI
actual_provider_name = "gemini"
fallback_model = "gemini-2.0-flash-lite"
fallback_models_tried.append(fallback_model)
elif fallback_provider == "huggingface":
provider_enum = APIProvider.MISTRAL
actual_provider_name = "huggingface"
fallback_model = preferred_hf_models[0] if preferred_hf_models else PREMIUM_DEFAULT_MODEL
fallback_models_tried.append(fallback_model)
# Emit routing event for fallback attempt
emit_routing_event(
logger,
flow_type=resolved_flow_type,
route_intent=route_intent,
provider_selected=fallback_provider,
model_selected=fallback_model,
preferred_provider=preferred_provider,
fallback_count=fallback_count,
fallback_models_tried=fallback_models_tried,
tenant_user_id=user_id,
extra={"available_providers": available_providers}
)
if fallback_provider == "google":
if json_struct:
@@ -402,6 +441,7 @@ def llm_text_gen(
system_prompt=system_instructions,
fallback_models=PREMIUM_HF_MINIMAL_FALLBACK_MODELS,
allow_model_variant_fallback=True,
tenant_user_id=user_id
)
else:
response_text = huggingface_text_response(
@@ -413,6 +453,7 @@ def llm_text_gen(
system_prompt=system_instructions,
fallback_models=PREMIUM_HF_MINIMAL_FALLBACK_MODELS,
allow_model_variant_fallback=True,
tenant_user_id=user_id
)
# TRACK USAGE after successful fallback call