Make SIF fail fast and add low-cost remote LLM fallback

2026-03-09 15:38:03 +05:30
parent 651bd2b5f0
commit 4230385e70
7 changed files with 224 additions and 66 deletions
--- a/backend/services/llm_providers/main_text_generation.py
+++ b/backend/services/llm_providers/main_text_generation.py
@@ -6,7 +6,7 @@ migrated from the legacy lib/gpt_providers/text_generation/main_text_generation.

 import os
 import json
-from typing import Optional, Dict, Any
+from typing import Optional, Dict, Any, List
 from datetime import datetime
 from loguru import logger
 from fastapi import HTTPException
@@ -16,7 +16,13 @@ from .gemini_provider import gemini_text_response, gemini_structured_json_respon
 from .huggingface_provider import huggingface_text_response, huggingface_structured_json_response


-def llm_text_gen(prompt: str, system_prompt: Optional[str] = None, json_struct: Optional[Dict[str, Any]] = None, user_id: str = None) -> str:
+def llm_text_gen(
+    prompt: str,
+    system_prompt: Optional[str] = None,
+    json_struct: Optional[Dict[str, Any]] = None,
+    user_id: str = None,
+    preferred_hf_models: Optional[List[str]] = None,
+) -> str:
    """
    Generate text using Language Model (LLM) based on the provided prompt.
    
@@ -54,7 +60,7 @@ def llm_text_gen(prompt: str, system_prompt: Optional[str] = None, json_struct:
            model = "gemini-2.0-flash-001"
        elif env_provider in ['hf_response_api', 'huggingface', 'hf']:
            gpt_provider = "huggingface"
-            model = "mistralai/Mistral-7B-Instruct-v0.3"
+            model = "mistralai/Mistral-7B-Instruct-v0.3:groq"
        
        # Default blog characteristics
        blog_tone = "Professional"
@@ -80,7 +86,7 @@ def llm_text_gen(prompt: str, system_prompt: Optional[str] = None, json_struct:
                model = "gemini-2.0-flash-001"
            elif "huggingface" in available_providers:
                gpt_provider = "huggingface"
-                model = "mistralai/Mistral-7B-Instruct-v0.3"
+                model = "mistralai/Mistral-7B-Instruct-v0.3:groq"
            else:
                logger.error("[llm_text_gen] No API keys found for supported providers.")
                raise RuntimeError("No LLM API keys configured. Configure GEMINI_API_KEY or HF_TOKEN to enable AI responses.")
@@ -93,9 +99,13 @@ def llm_text_gen(prompt: str, system_prompt: Optional[str] = None, json_struct:
                    model = "gemini-2.0-flash-001"
                elif "huggingface" in available_providers:
                    gpt_provider = "huggingface"
-                    model = "mistralai/Mistral-7B-Instruct-v0.3"
+                    model = "mistralai/Mistral-7B-Instruct-v0.3:groq"
                else:
                    raise RuntimeError("No supported providers available.")
+
+        if gpt_provider == "huggingface" and preferred_hf_models:
+            model = preferred_hf_models[0]
+            logger.info(f"[llm_text_gen] Using preferred low-cost HF model: {model}")
            
        logger.debug(f"[llm_text_gen] Using provider: {gpt_provider}, model: {model}")

@@ -303,7 +313,7 @@ def llm_text_gen(prompt: str, system_prompt: Optional[str] = None, json_struct:
                    elif fallback_provider == "huggingface":
                        provider_enum = APIProvider.MISTRAL
                        actual_provider_name = "huggingface"
-                        fallback_model = "mistralai/Mistral-7B-Instruct-v0.3"
+                        fallback_model = "mistralai/Mistral-7B-Instruct-v0.3:groq"
                    
                    if fallback_provider == "google":
                        if json_struct:
@@ -330,7 +340,7 @@ def llm_text_gen(prompt: str, system_prompt: Optional[str] = None, json_struct:
                            response_text = huggingface_structured_json_response(
                                prompt=prompt,
                                schema=json_struct,
-                                model="mistralai/Mistral-7B-Instruct-v0.3",
+                                model="mistralai/Mistral-7B-Instruct-v0.3:groq",
                                temperature=temperature,
                                max_tokens=max_tokens,
                                system_prompt=system_instructions
@@ -338,7 +348,7 @@ def llm_text_gen(prompt: str, system_prompt: Optional[str] = None, json_struct:
                        else:
                            response_text = huggingface_text_response(
                                prompt=prompt,
-                                model="mistralai/Mistral-7B-Instruct-v0.3",
+                                model="mistralai/Mistral-7B-Instruct-v0.3:groq",
                                temperature=temperature,
                                max_tokens=max_tokens,
                                top_p=top_p,
@@ -394,4 +404,4 @@ def get_api_key(gpt_provider: str) -> Optional[str]:
        return api_key_manager.get_api_key(mapped_provider)
    except Exception as e:
        logger.error(f"[get_api_key] Error getting API key for {gpt_provider}: {str(e)}")
-        return None 
+        return None