Refine HF fallback policy controls and SIF low-cost routing

This commit is contained in:
ي
2026-03-12 15:03:47 +05:30
parent b410ece4ca
commit bf191374a5
3 changed files with 85 additions and 20 deletions

View File

@@ -34,7 +34,11 @@ class SharedLLMWrapper:
try:
# We ignore kwargs like 'max_tokens' as llm_text_gen handles defaults,
# but we could map them if needed.
return llm_text_gen(prompt, user_id=self.user_id)
return llm_text_gen(
prompt,
user_id=self.user_id,
preferred_hf_models=REMOTE_LOW_COST_HF_MODELS,
)
except Exception as e:
logger.error(f"SharedLLMWrapper failed to generate text: {e}")
return f"[ERROR: Shared LLM generation failed for user {self.user_id}]"
@@ -44,6 +48,13 @@ class SharedLLMWrapper:
_local_llm_cache = {}
REMOTE_LOW_COST_HF_MODELS = [
"Qwen/Qwen2.5-1.5B-Instruct",
"Qwen/Qwen2.5-0.5B-Instruct",
"TinyLlama/TinyLlama-1.1B-Chat-v1.0",
]
LOCAL_LLM_FALLBACKS = [
"Qwen/Qwen2.5-1.5B-Instruct",
"Qwen/Qwen2.5-0.5B-Instruct",