Refine HF fallback policy controls and SIF low-cost routing
This commit is contained in:
@@ -34,7 +34,11 @@ class SharedLLMWrapper:
|
||||
try:
|
||||
# We ignore kwargs like 'max_tokens' as llm_text_gen handles defaults,
|
||||
# but we could map them if needed.
|
||||
return llm_text_gen(prompt, user_id=self.user_id)
|
||||
return llm_text_gen(
|
||||
prompt,
|
||||
user_id=self.user_id,
|
||||
preferred_hf_models=REMOTE_LOW_COST_HF_MODELS,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"SharedLLMWrapper failed to generate text: {e}")
|
||||
return f"[ERROR: Shared LLM generation failed for user {self.user_id}]"
|
||||
@@ -44,6 +48,13 @@ class SharedLLMWrapper:
|
||||
|
||||
_local_llm_cache = {}
|
||||
|
||||
|
||||
REMOTE_LOW_COST_HF_MODELS = [
|
||||
"Qwen/Qwen2.5-1.5B-Instruct",
|
||||
"Qwen/Qwen2.5-0.5B-Instruct",
|
||||
"TinyLlama/TinyLlama-1.1B-Chat-v1.0",
|
||||
]
|
||||
|
||||
LOCAL_LLM_FALLBACKS = [
|
||||
"Qwen/Qwen2.5-1.5B-Instruct",
|
||||
"Qwen/Qwen2.5-0.5B-Instruct",
|
||||
|
||||
Reference in New Issue
Block a user