Make SIF fail fast and add low-cost remote LLM fallback

This commit is contained in:
ي
2026-03-09 15:38:03 +05:30
committed by ajaysi
parent 651bd2b5f0
commit 4230385e70
7 changed files with 224 additions and 66 deletions

View File

@@ -44,6 +44,12 @@ class SharedLLMWrapper:
_local_llm_cache = {}
LOCAL_LLM_FALLBACKS = [
"Qwen/Qwen2.5-1.5B-Instruct",
"Qwen/Qwen2.5-0.5B-Instruct",
"TinyLlama/TinyLlama-1.1B-Chat-v1.0",
]
class LocalLLMWrapper:
"""
Lazily loads a local LLM via txtai and caches it globally.
@@ -72,22 +78,56 @@ class LocalLLMWrapper:
if task_to_use == "text-generation":
task_to_use = "language-generation"
logger.info(f"Loading local LLM (singleton): {self.model_path} (task={task_to_use})")
try:
_local_llm_cache[cache_key] = LLM(path=self.model_path, task=task_to_use)
except Exception as e:
candidate_models = []
for candidate in [self.model_path, *LOCAL_LLM_FALLBACKS]:
if candidate not in candidate_models:
candidate_models.append(candidate)
last_error = None
for candidate_model in candidate_models:
candidate_key = f"{candidate_model}:{self.task}"
if candidate_key in _local_llm_cache:
if candidate_model != self.model_path:
logger.warning(f"Using cached fallback local LLM model: {candidate_model}")
return _local_llm_cache[candidate_key]
logger.info(f"Loading local LLM (singleton): {candidate_model} (task={task_to_use})")
try:
import transformers
from transformers.pipelines import SUPPORTED_TASKS
logger.error(
f"LocalLLMWrapper init failed (model={self.model_path}, requested_task={task_to_use}, "
f"transformers={getattr(transformers, '__version__', 'unknown')}, "
f"supported_tasks={sorted(list(SUPPORTED_TASKS.keys()))[:50]})"
_local_llm_cache[candidate_key] = LLM(path=candidate_model, task=task_to_use)
if candidate_model != self.model_path:
logger.warning(
f"Loaded fallback local LLM model '{candidate_model}' after failure on '{self.model_path}'"
)
return _local_llm_cache[candidate_key]
except Exception as e:
last_error = e
message = str(e).lower()
is_memory_issue = (
"paging file is too small" in message
or "os error 1455" in message
or "out of memory" in message
or "not enough memory" in message
)
except Exception:
pass
logger.error(f"Failed to initialize LocalLLMWrapper: {e}")
raise e
if is_memory_issue:
logger.warning(
f"Local LLM memory load failure for '{candidate_model}', trying smaller fallback. Error: {e}"
)
continue
logger.warning(f"Local LLM load failed for '{candidate_model}', trying next fallback. Error: {e}")
continue
try:
import transformers
from transformers.pipelines import SUPPORTED_TASKS
logger.error(
f"LocalLLMWrapper init failed (model={self.model_path}, requested_task={task_to_use}, "
f"transformers={getattr(transformers, '__version__', 'unknown')}, "
f"supported_tasks={sorted(list(SUPPORTED_TASKS.keys()))[:50]})"
)
except Exception:
pass
logger.error(f"Failed to initialize LocalLLMWrapper after fallback attempts: {last_error}")
raise last_error
return _local_llm_cache[cache_key]
@@ -98,7 +138,7 @@ class LocalLLMWrapper:
return self.llm(prompt, **kwargs)
class SIFBaseAgent(BaseALwrityAgent):
def __init__(self, intelligence_service: TxtaiIntelligenceService, user_id: str, agent_type: str = "sif_agent", model_name: str = "Qwen/Qwen2.5-3B-Instruct", llm: Any = None):
def __init__(self, intelligence_service: TxtaiIntelligenceService, user_id: str, agent_type: str = "sif_agent", model_name: str = "Qwen/Qwen2.5-1.5B-Instruct", llm: Any = None):
# Hybrid LLM Strategy:
# 1. Shared LLM for external/high-quality generation (available to all agents)
self.shared_llm = SharedLLMWrapper(user_id)