Hugging Face Integration. Remove OpenAI and Anthropic and DeepSeek. Add Hugging Face.

2025-10-29 20:15:04 +05:30
parent 4431cd9848
commit 3219e6bbe4
15 changed files with 883 additions and 482 deletions
--- a/backend/alwrity_utils/rate_limiter.py
+++ b/backend/alwrity_utils/rate_limiter.py
@@ -14,7 +14,7 @@ from loguru import logger
 class RateLimiter:
    """Manages rate limiting for ALwrity backend."""
-    def __init__(self, window_seconds: int = 60, max_requests: int = 200):
+    def __init__(self, window_seconds: int = 60, max_requests: int = 1000):  # Increased for development
        self.window_seconds = window_seconds
        self.max_requests = max_requests
        self.request_counts: Dict[str, List[float]] = defaultdict(list)
@@ -28,6 +28,12 @@ class RateLimiter:
            "/ai-analytics",
            "/gap-analysis",
            "/calendar-events",
            # Research endpoints - exempt from rate limiting
            "/api/research",
            "/api/blog-writer",
            "/api/blog-writer/research",
            "/api/blog-writer/research/",
            "/api/blog/research/status",
            "/calendar-generation/progress",
            "/health",
            "/health/database",
--- a/backend/services/blog_writer/research/competitor_analyzer.py
+++ b/backend/services/blog_writer/research/competitor_analyzer.py
@@ -39,7 +39,7 @@ class CompetitorAnalyzer:
        }}
        """
-        from services.llm_providers.gemini_provider import gemini_structured_json_response
+        from services.llm_providers.main_text_generation import llm_text_gen
        competitor_schema = {
            "type": "object",
@@ -55,11 +55,9 @@ class CompetitorAnalyzer:
            "required": ["top_competitors", "content_gaps", "opportunities", "competitive_advantages", "market_positioning", "industry_leaders", "analysis_notes"]
        }
-        competitor_analysis = gemini_structured_json_response(
+        competitor_analysis = llm_text_gen(
            prompt=competitor_prompt,
-            schema=competitor_schema,
+            json_struct=competitor_schema
            temperature=0.3,
            max_tokens=4000
        )
        if isinstance(competitor_analysis, dict) and 'error' not in competitor_analysis:
--- a/backend/services/blog_writer/research/content_angle_generator.py
+++ b/backend/services/blog_writer/research/content_angle_generator.py
@@ -48,7 +48,7 @@ class ContentAngleGenerator:
        }}
        """
-        from services.llm_providers.gemini_provider import gemini_structured_json_response
+        from services.llm_providers.main_text_generation import llm_text_gen
        angles_schema = {
            "type": "object",
@@ -63,11 +63,9 @@ class ContentAngleGenerator:
            "required": ["content_angles"]
        }
-        angles_result = gemini_structured_json_response(
+        angles_result = llm_text_gen(
            prompt=angles_prompt,
-            schema=angles_schema,
+            json_struct=angles_schema
            temperature=0.7,
            max_tokens=4000
        )
        if isinstance(angles_result, dict) and 'content_angles' in angles_result:
--- a/backend/services/blog_writer/research/keyword_analyzer.py
+++ b/backend/services/blog_writer/research/keyword_analyzer.py
@@ -44,7 +44,7 @@ class KeywordAnalyzer:
        }}
        """
-        from services.llm_providers.gemini_provider import gemini_structured_json_response
+        from services.llm_providers.main_text_generation import llm_text_gen
        keyword_schema = {
            "type": "object",
@@ -62,11 +62,9 @@ class KeywordAnalyzer:
            "required": ["primary", "secondary", "long_tail", "search_intent", "difficulty", "content_gaps", "semantic_keywords", "trending_terms", "analysis_insights"]
        }
-        keyword_analysis = gemini_structured_json_response(
+        keyword_analysis = llm_text_gen(
            prompt=keyword_prompt,
-            schema=keyword_schema,
+            json_struct=keyword_schema
            temperature=0.3,
            max_tokens=4000
        )
        if isinstance(keyword_analysis, dict) and 'error' not in keyword_analysis:
--- a/backend/services/llm_providers/README.md
+++ b/backend/services/llm_providers/README.md
@@ -1,6 +1,43 @@
-# Gemini Provider Module
+# LLM Providers Module
-This module provides functions for interacting with Google's Gemini API, specifically designed for structured JSON output and text generation. It follows the official Gemini API documentation and implements best practices for reliable AI interactions.
+This module provides functions for interacting with multiple LLM providers, specifically Google's Gemini API and Hugging Face Inference Providers. It follows official API documentation and implements best practices for reliable AI interactions.
 ## Supported Providers
 - **Google Gemini**: High-quality text generation with structured JSON output
 - **Hugging Face**: Multiple models via Inference Providers with unified interface
 ## Quick Start
 ```python
 from services.llm_providers.main_text_generation import llm_text_gen
 # Generate text (auto-detects available provider)
 response = llm_text_gen("Write a blog post about AI trends")
 print(response)
 ```
 ## Configuration
 Set your preferred provider using the `GPT_PROVIDER` environment variable:
 ```bash
 # Use Google Gemini (default)
 export GPT_PROVIDER=gemini
 # Use Hugging Face
 export GPT_PROVIDER=hf_response_api
 ```
 Configure API keys:
 ```bash
 # For Google Gemini
 export GEMINI_API_KEY=your_gemini_api_key_here
 # For Hugging Face
 export HF_TOKEN=your_huggingface_token_here
 ```
 ## Key Features
--- a/backend/services/llm_providers/README_HUGGINGFACE_INTEGRATION.md
+++ b/backend/services/llm_providers/README_HUGGINGFACE_INTEGRATION.md
@@ -0,0 +1,237 @@
 # Hugging Face Integration for AI Blog Writer
 ## Overview
 The AI Blog Writer now supports both Google Gemini and Hugging Face as LLM providers, with a clean environment variable-based configuration system. This integration uses the [Hugging Face Responses API](https://huggingface.co/docs/inference-providers/guides/responses-api) which provides a unified interface for model interactions.
 ## Supported Providers
 ### 1. Google Gemini (Default)
 - **Provider ID**: `google`
 - **Environment Variable**: `GEMINI_API_KEY`
 - **Models**: `gemini-2.0-flash-001`
 - **Features**: Text generation, structured JSON output
 ### 2. Hugging Face
 - **Provider ID**: `huggingface`
 - **Environment Variable**: `HF_TOKEN`
 - **Models**: Multiple models via Inference Providers
 - **Features**: Text generation, structured JSON output, multi-model support
 ## Configuration
 ### Environment Variables
 Set the `GPT_PROVIDER` environment variable to choose your preferred provider:
 ```bash
 # Use Google Gemini (default)
 export GPT_PROVIDER=gemini
 # or
 export GPT_PROVIDER=google
 # Use Hugging Face
 export GPT_PROVIDER=hf_response_api
 # or
 export GPT_PROVIDER=huggingface
 # or
 export GPT_PROVIDER=hf
 ```
 ### API Keys
 Configure the appropriate API key for your chosen provider:
 ```bash
 # For Google Gemini
 export GEMINI_API_KEY=your_gemini_api_key_here
 # For Hugging Face
 export HF_TOKEN=your_huggingface_token_here
 ```
 ## Usage
 ### Basic Text Generation
 ```python
 from services.llm_providers.main_text_generation import llm_text_gen
 # Generate text (uses configured provider)
 response = llm_text_gen("Write a blog post about AI trends")
 print(response)
 ```
 ### Structured JSON Generation
 ```python
 from services.llm_providers.main_text_generation import llm_text_gen
 # Define JSON schema
 schema = {
    "type": "object",
    "properties": {
        "title": {"type": "string"},
        "sections": {
            "type": "array",
            "items": {
                "type": "object",
                "properties": {
                    "heading": {"type": "string"},
                    "content": {"type": "string"}
                }
            }
        }
    }
 }
 # Generate structured response
 response = llm_text_gen(
    "Create a blog outline about machine learning",
    json_struct=schema
 )
 print(response)
 ```
 ### Direct Provider Usage
 ```python
 # Google Gemini
 from services.llm_providers.gemini_provider import gemini_text_response
 response = gemini_text_response(
    prompt="Write about AI",
    temperature=0.7,
    max_tokens=1000
 )
 # Hugging Face
 from services.llm_providers.huggingface_provider import huggingface_text_response
 response = huggingface_text_response(
    prompt="Write about AI",
    model="openai/gpt-oss-120b:groq",
    temperature=0.7,
    max_tokens=1000
 )
 ```
 ## Available Hugging Face Models
 The Hugging Face provider supports multiple models via Inference Providers:
 - `openai/gpt-oss-120b:groq` (default)
 - `moonshotai/Kimi-K2-Instruct-0905:groq`
 - `Qwen/Qwen2.5-VL-7B-Instruct`
 - `meta-llama/Llama-3.1-8B-Instruct:groq`
 - `microsoft/Phi-3-medium-4k-instruct:groq`
 - `mistralai/Mistral-7B-Instruct-v0.3:groq`
 ## Provider Selection Logic
 1. **Environment Variable**: If `GPT_PROVIDER` is set, use the specified provider
 2. **Auto-detection**: If no environment variable, check available API keys:
   - Prefer Google Gemini if `GEMINI_API_KEY` is available
   - Fall back to Hugging Face if `HF_TOKEN` is available
 3. **Fallback**: If the specified provider fails, automatically try the other provider
 ## Error Handling
 The system includes comprehensive error handling:
 - **Missing API Keys**: Clear error messages with setup instructions
 - **Provider Failures**: Automatic fallback to the other provider
 - **Invalid Models**: Validation with helpful error messages
 - **Network Issues**: Retry logic with exponential backoff
 ## Migration from Previous Version
 ### Removed Providers
 The following providers have been removed to simplify the system:
 - OpenAI
 - Anthropic
 - DeepSeek
 ### Updated Imports
 ```python
 # Old imports (no longer work)
 from services.llm_providers.openai_provider import openai_chatgpt
 from services.llm_providers.anthropic_provider import anthropic_text_response
 from services.llm_providers.deepseek_provider import deepseek_text_response
 # New imports
 from services.llm_providers.gemini_provider import gemini_text_response, gemini_structured_json_response
 from services.llm_providers.huggingface_provider import huggingface_text_response, huggingface_structured_json_response
 ```
 ## Testing
 Run the integration tests to verify everything works:
 ```bash
 cd backend
 python -c "
 import sys
 sys.path.insert(0, '.')
 from services.llm_providers.main_text_generation import check_gpt_provider
 print('Google provider supported:', check_gpt_provider('google'))
 print('Hugging Face provider supported:', check_gpt_provider('huggingface'))
 print('OpenAI provider supported:', check_gpt_provider('openai'))
 "
 ```
 ## Performance Considerations
 ### Google Gemini
 - Fast response times
 - High-quality outputs
 - Good for structured content
 ### Hugging Face
 - Multiple model options
 - Cost-effective for high-volume usage
 - Good for experimentation with different models
 ## Troubleshooting
 ### Common Issues
 1. **"No LLM API keys configured"**
   - Ensure either `GEMINI_API_KEY` or `HF_TOKEN` is set
   - Check that the API key is valid
 2. **"Unknown LLM provider"**
   - Use only `google` or `huggingface` as provider values
   - Check the `GPT_PROVIDER` environment variable
 3. **"HF_TOKEN appears to be invalid"**
   - Ensure your Hugging Face token starts with `hf_`
   - Get a new token from [Hugging Face Settings](https://huggingface.co/settings/tokens)
 4. **"OpenAI library not available"**
   - Install the OpenAI library: `pip install openai`
   - This is required for Hugging Face Responses API
 ### Debug Mode
 Enable debug logging to see provider selection:
 ```python
 import logging
 logging.basicConfig(level=logging.DEBUG)
 ```
 ## Future Enhancements
 - Support for additional Hugging Face models
 - Model-specific parameter optimization
 - Advanced caching strategies
 - Performance monitoring and metrics
 - A/B testing between providers
 ## Support
 For issues or questions:
 1. Check the troubleshooting section above
 2. Review the [Hugging Face Responses API documentation](https://huggingface.co/docs/inference-providers/guides/responses-api)
 3. Check the Google Gemini API documentation for Gemini-specific issues
--- a/backend/services/llm_providers/init.py
+++ b/backend/services/llm_providers/init.py
@@ -5,17 +5,14 @@ migrated from the legacy lib/gpt_providers functionality.
 """
 from services.llm_providers.main_text_generation import llm_text_gen
 from services.llm_providers.openai_provider import openai_chatgpt, test_openai_api_key
 from services.llm_providers.gemini_provider import gemini_text_response, gemini_structured_json_response
-from services.llm_providers.anthropic_provider import anthropic_text_response
+from services.llm_providers.huggingface_provider import huggingface_text_response, huggingface_structured_json_response
-from services.llm_providers.deepseek_provider import deepseek_text_response
+
 __all__ = [
    "llm_text_gen",
    "openai_chatgpt",
    "test_openai_api_key",
    "gemini_text_response", 
    "gemini_structured_json_response",
-    "anthropic_text_response",
+    "huggingface_text_response",
-    "deepseek_text_response"
+    "huggingface_structured_json_response"
 ] 
--- a/backend/services/llm_providers/anthropic_provider.py
+++ b/backend/services/llm_providers/anthropic_provider.py
@@ -1,98 +0,0 @@
 """Anthropic Provider Service for ALwrity Backend.
 This service handles Anthropic API integrations,
 migrated from the legacy lib/gpt_providers/text_generation/anthropic_text_gen.py
 """
 import os
 import json
 import time
 from typing import Dict, Any, Tuple
 from loguru import logger
 from tenacity import (
    retry,
    stop_after_attempt,
    wait_random_exponential,
 )
 # Import APIKeyManager
 from ..onboarding.api_key_manager import APIKeyManager
 try:
    import anthropic
 except ImportError:
    anthropic = None
    logger.warning("Anthropic library not available. Install with: pip install anthropic")
 async def test_anthropic_api_key(api_key: str) -> Tuple[bool, str]:
    """
    Test if the provided Anthropic API key is valid.
    Args:
        api_key (str): The Anthropic API key to test
    Returns:
        tuple[bool, str]: A tuple containing (is_valid, message)
    """
    if not anthropic:
        return False, "Anthropic library not available"
    try:
        # Create Anthropic client with the provided key
        client = anthropic.Anthropic(api_key=api_key)
        # Try to generate a simple response as a test
        response = client.messages.create(
            model="claude-3-5-sonnet-20241022",
            max_tokens=10,
            messages=[{"role": "user", "content": "Hello"}]
        )
        # If we get here, the key is valid
        return True, "Anthropic API key is valid"
    except anthropic.AuthenticationError:
        return False, "Invalid Anthropic API key"
    except anthropic.RateLimitError:
        return False, "Rate limit exceeded. Please try again later."
    except Exception as e:
        return False, f"Error testing Anthropic API key: {str(e)}"
@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
 def anthropic_text_response(prompt: str, model: str = "claude-3-5-sonnet-20241022", 
                           temperature: float = 0.7, max_tokens: int = 4000, 
                           system_prompt: str = None) -> str:
    """Get response from Anthropic Claude."""
    if not anthropic:
        logger.error("Anthropic library not available")
        return "Anthropic library not available. Please install anthropic package."
    try:
        # Use APIKeyManager instead of direct environment variable access
        api_key_manager = APIKeyManager()
        api_key = api_key_manager.get_api_key("anthropic")
        if not api_key:
            raise ValueError("Anthropic API key not found. Please configure it in the onboarding process.")
        client = anthropic.Anthropic(api_key=api_key)
        # Prepare messages
        messages = []
        if system_prompt:
            messages.append({"role": "system", "content": system_prompt})
        messages.append({"role": "user", "content": prompt})
        response = client.messages.create(
            model=model,
            max_tokens=max_tokens,
            temperature=temperature,
            messages=messages
        )
        logger.info(f"[anthropic_text_response] Generated response with {len(response.content[0].text)} characters")
        return response.content[0].text
    except Exception as err:
        logger.error(f"Failed to get response from Anthropic: {err}. Retrying.")
        raise 
--- a/backend/services/llm_providers/deepseek_provider.py
+++ b/backend/services/llm_providers/deepseek_provider.py
@@ -1,105 +0,0 @@
 """DeepSeek Provider Service for ALwrity Backend.
 This service handles DeepSeek API integrations,
 migrated from the legacy lib/gpt_providers/text_generation/deepseek_text_gen.py
 """
 import os
 import json
 import time
 from typing import Dict, Any, Tuple
 from loguru import logger
 from tenacity import (
    retry,
    stop_after_attempt,
    wait_random_exponential,
 )
 # Import APIKeyManager
 from ..onboarding.api_key_manager import APIKeyManager
 try:
    import openai
 except ImportError:
    openai = None
    logger.warning("OpenAI library not available. Install with: pip install openai")
 async def test_deepseek_api_key(api_key: str) -> Tuple[bool, str]:
    """
    Test if the provided DeepSeek API key is valid.
    Args:
        api_key (str): The DeepSeek API key to test
    Returns:
        tuple[bool, str]: A tuple containing (is_valid, message)
    """
    if not openai:
        return False, "OpenAI library not available"
    try:
        # Create DeepSeek client with the provided key
        client = openai.OpenAI(
            api_key=api_key,
            base_url="https://api.deepseek.com/v1"
        )
        # Try to generate a simple response as a test
        response = client.chat.completions.create(
            model="deepseek-chat",
            messages=[{"role": "user", "content": "Hello"}],
            max_tokens=10,
            temperature=0.1
        )
        # If we get here, the key is valid
        return True, "DeepSeek API key is valid"
    except openai.AuthenticationError:
        return False, "Invalid DeepSeek API key"
    except openai.RateLimitError:
        return False, "Rate limit exceeded. Please try again later."
    except Exception as e:
        return False, f"Error testing DeepSeek API key: {str(e)}"
@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
 def deepseek_text_response(prompt: str, model: str = "deepseek-chat", 
                          temperature: float = 0.7, max_tokens: int = 4000, 
                          system_prompt: str = None) -> str:
    """Get response from DeepSeek."""
    if not openai:
        logger.error("OpenAI library not available")
        return "OpenAI library not available. Please install openai package."
    try:
        # Use APIKeyManager instead of direct environment variable access
        api_key_manager = APIKeyManager()
        api_key = api_key_manager.get_api_key("deepseek")
        if not api_key:
            raise ValueError("DeepSeek API key not found. Please configure it in the onboarding process.")
        client = openai.OpenAI(
            api_key=api_key,
            base_url="https://api.deepseek.com/v1"
        )
        # Prepare messages
        messages = []
        if system_prompt:
            messages.append({"role": "system", "content": system_prompt})
        messages.append({"role": "user", "content": prompt})
        response = client.chat.completions.create(
            model=model,
            messages=messages,
            max_tokens=max_tokens,
            temperature=temperature
        )
        logger.info(f"[deepseek_text_response] Generated response with {len(response.choices[0].message.content)} characters")
        return response.choices[0].message.content
    except Exception as err:
        logger.error(f"Failed to get response from DeepSeek: {err}. Retrying.")
        raise 
--- a/backend/services/llm_providers/gemini_provider.py
+++ b/backend/services/llm_providers/gemini_provider.py
@@ -402,18 +402,33 @@ def gemini_structured_json_response(prompt, schema, temperature=0.7, top_p=0.9,
        try:
            # Convert sync call to async for retry logic
            import asyncio
            loop = asyncio.new_event_loop()
            asyncio.set_event_loop(loop)
-            response = loop.run_until_complete(
+            # Check if there's already an event loop running
-                retry_with_backoff(
+            try:
-                    make_api_call,
+                loop = asyncio.get_running_loop()
-                    config=CONTENT_RETRY_CONFIG,
+                # If we're already in an async context, we need to run this differently
-                    operation_name="gemini_structured_json",
+                logger.warning("⚠️ Already in async context, using direct sync call")
-                    context={"schema_type": type(types_schema).__name__, "max_tokens": max_tokens}
+                # For now, let's use a simpler approach without retry logic
                response = client.models.generate_content(
                    model="gemini-2.5-flash",
                    contents=prompt,
                    config=generation_config,
                )
-            )
+                logger.info("✅ Gemini API call completed successfully (sync mode)")
-            logger.info("✅ Gemini API call completed successfully")
+            except RuntimeError:
                # No event loop running, we can create one
                loop = asyncio.new_event_loop()
                asyncio.set_event_loop(loop)
                response = loop.run_until_complete(
                    retry_with_backoff(
                        make_api_call,
                        config=CONTENT_RETRY_CONFIG,
                        operation_name="gemini_structured_json",
                        context={"schema_type": type(types_schema).__name__, "max_tokens": max_tokens}
                    )
                )
                logger.info("✅ Gemini API call completed successfully")
        except Exception as api_error:
            logger.error(f"❌ Gemini API call failed: {api_error}")
            logger.error(f"❌ API Error type: {type(api_error).__name__}")
--- a/backend/services/llm_providers/huggingface_provider.py
+++ b/backend/services/llm_providers/huggingface_provider.py
@@ -0,0 +1,441 @@
 """
 Hugging Face Provider Module for ALwrity
 This module provides functions for interacting with Hugging Face's Inference Providers API
 using the Responses API (beta) which provides a unified interface for model interactions.
 Key Features:
 - Text response generation with retry logic
 - Structured JSON response generation with schema validation
 - Comprehensive error handling and logging
 - Automatic API key management
 - Support for various Hugging Face models via Inference Providers
 Best Practices:
 1. Use structured output for complex, multi-field responses
 2. Keep schemas simple and flat to avoid truncation
 3. Set appropriate token limits (8192 for complex outputs)
 4. Use low temperature (0.1-0.3) for consistent structured output
 5. Implement proper error handling in calling functions
 6. Use the Responses API for better compatibility
 Usage Examples:
    # Text response
    result = huggingface_text_response(prompt, temperature=0.7, max_tokens=2048)
    # Structured JSON response
    schema = {
        "type": "object",
        "properties": {
            "tasks": {
                "type": "array",
                "items": {"type": "object", "properties": {...}}
            }
        }
    }
    result = huggingface_structured_json_response(prompt, schema, temperature=0.2, max_tokens=8192)
 Dependencies:
 - openai (for Hugging Face Responses API)
 - tenacity (for retry logic)
 - logging (for debugging)
 - json (for fallback parsing)
 Author: ALwrity Team
 Version: 1.0
 Last Updated: January 2025
 """
 import os
 import sys
 from pathlib import Path
 import json
 import re
 from typing import Optional, Dict, Any
 from dotenv import load_dotenv
 # Fix the environment loading path - load from backend directory
 current_dir = Path(__file__).parent.parent  # services directory
 backend_dir = current_dir.parent  # backend directory
 env_path = backend_dir / '.env'
 if env_path.exists():
    load_dotenv(env_path)
    print(f"Loaded .env from: {env_path}")
 else:
    # Fallback to current directory
    load_dotenv()
    print(f"No .env found at {env_path}, using current directory")
 from loguru import logger
 from utils.logger_utils import get_service_logger
 # Use service-specific logger to avoid conflicts
 logger = get_service_logger("huggingface_provider")
 from tenacity import (
    retry,
    stop_after_attempt,
    wait_random_exponential,
 )
 try:
    from openai import OpenAI
    OPENAI_AVAILABLE = True
 except ImportError:
    OPENAI_AVAILABLE = False
    logger.warn("OpenAI library not available. Install with: pip install openai")
 def get_huggingface_api_key() -> str:
    """Get Hugging Face API key with proper error handling."""
    api_key = os.getenv('HF_TOKEN')
    if not api_key:
        error_msg = "HF_TOKEN environment variable is not set. Please set it in your .env file."
        logger.error(error_msg)
        raise ValueError(error_msg)
    # Validate API key format (basic check)
    if not api_key.startswith('hf_'):
        error_msg = "HF_TOKEN appears to be invalid. It should start with 'hf_'."
        logger.error(error_msg)
        raise ValueError(error_msg)
    return api_key
@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
 def huggingface_text_response(
    prompt: str,
    model: str = "openai/gpt-oss-120b:groq",
    temperature: float = 0.7,
    max_tokens: int = 2048,
    top_p: float = 0.9,
    system_prompt: Optional[str] = None
 ) -> str:
    """
    Generate text response using Hugging Face Inference Providers API.
    This function uses the Hugging Face Responses API which provides a unified interface
    for model interactions with built-in retry logic and error handling.
    Args:
        prompt (str): The input prompt for the AI model
        model (str): Hugging Face model identifier (default: "openai/gpt-oss-120b:groq")
        temperature (float): Controls randomness (0.0-1.0)
        max_tokens (int): Maximum tokens in response
        top_p (float): Nucleus sampling parameter (0.0-1.0)
        system_prompt (str, optional): System instruction for the model
    Returns:
        str: Generated text response
    Raises:
        Exception: If API key is missing or API call fails
    Best Practices:
        - Use appropriate temperature for your use case (0.7 for creative, 0.1-0.3 for factual)
        - Set max_tokens based on expected response length
        - Use system_prompt to guide model behavior
        - Handle errors gracefully in calling functions
    Example:
        result = huggingface_text_response(
            prompt="Write a blog post about AI",
            model="openai/gpt-oss-120b:groq",
            temperature=0.7,
            max_tokens=2048,
            system_prompt="You are a professional content writer."
        )
    """
    try:
        if not OPENAI_AVAILABLE:
            raise ImportError("OpenAI library not available. Install with: pip install openai")
        # Get API key with proper error handling
        api_key = get_huggingface_api_key()
        logger.info(f"🔑 Hugging Face API key loaded: {bool(api_key)} (length: {len(api_key) if api_key else 0})")
        if not api_key:
            raise Exception("HF_TOKEN not found in environment variables")
        # Initialize Hugging Face client using Responses API
        client = OpenAI(
            base_url="https://router.huggingface.co/v1",
            api_key=api_key,
        )
        logger.info("✅ Hugging Face client initialized for text response")
        # Prepare input for the API
        input_content = []
        # Add system prompt if provided
        if system_prompt:
            input_content.append({
                "role": "system",
                "content": system_prompt
            })
        # Add user prompt
        input_content.append({
            "role": "user", 
            "content": prompt
        })
        # Add debugging for API call
        logger.info(
            "Hugging Face text call | model=%s | prompt_len=%s | temp=%s | top_p=%s | max_tokens=%s",
            model,
            len(prompt) if isinstance(prompt, str) else '<non-str>',
            temperature,
            top_p,
            max_tokens,
        )
        logger.info("🚀 Making Hugging Face API call...")
        # Add rate limiting to prevent expensive API calls
        import time
        time.sleep(1)  # 1 second delay between API calls
        # Make the API call using Responses API
        response = client.responses.parse(
            model=model,
            input=input_content,
            temperature=temperature,
            top_p=top_p,
        )
        # Extract text from response
        if hasattr(response, 'output_text') and response.output_text:
            generated_text = response.output_text
        elif hasattr(response, 'output') and response.output:
            # Handle case where output is a list
            if isinstance(response.output, list) and len(response.output) > 0:
                generated_text = response.output[0].get('content', '')
            else:
                generated_text = str(response.output)
        else:
            generated_text = str(response)
        # Clean up the response
        if generated_text:
            # Remove any markdown formatting if present
            generated_text = re.sub(r'```[a-zA-Z]*\n?', '', generated_text)
            generated_text = re.sub(r'```\n?', '', generated_text)
            generated_text = generated_text.strip()
        logger.info(f"✅ Hugging Face text response generated successfully (length: {len(generated_text)})")
        return generated_text
    except Exception as e:
        logger.error(f"❌ Hugging Face text generation failed: {str(e)}")
        raise Exception(f"Hugging Face text generation failed: {str(e)}")
@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
 def huggingface_structured_json_response(
    prompt: str,
    schema: Dict[str, Any],
    model: str = "openai/gpt-oss-120b:groq",
    temperature: float = 0.7,
    max_tokens: int = 8192,
    system_prompt: Optional[str] = None
 ) -> Dict[str, Any]:
    """
    Generate structured JSON response using Hugging Face Inference Providers API.
    This function uses the Hugging Face Responses API with structured output support
    to generate JSON responses that match a provided schema.
    Args:
        prompt (str): The input prompt for the AI model
        schema (dict): JSON schema defining the expected output structure
        model (str): Hugging Face model identifier (default: "openai/gpt-oss-120b:groq")
        temperature (float): Controls randomness (0.0-1.0). Use 0.1-0.3 for structured output
        max_tokens (int): Maximum tokens in response. Use 8192 for complex outputs
        system_prompt (str, optional): System instruction for the model
    Returns:
        dict: Parsed JSON response matching the provided schema
    Raises:
        Exception: If API key is missing or API call fails
    Best Practices:
        - Keep schemas simple and flat to avoid truncation
        - Use low temperature (0.1-0.3) for consistent structured output
        - Set max_tokens to 8192 for complex multi-field responses
        - Avoid deeply nested schemas with many required fields
        - Test with smaller outputs first, then scale up
    Example:
        schema = {
            "type": "object",
            "properties": {
                "tasks": {
                    "type": "array",
                    "items": {
                        "type": "object",
                        "properties": {
                            "title": {"type": "string"},
                            "description": {"type": "string"}
                        }
                    }
                }
            }
        }
        result = huggingface_structured_json_response(prompt, schema, temperature=0.2, max_tokens=8192)
    """
    try:
        if not OPENAI_AVAILABLE:
            raise ImportError("OpenAI library not available. Install with: pip install openai")
        # Get API key with proper error handling
        api_key = get_huggingface_api_key()
        logger.info(f"🔑 Hugging Face API key loaded: {bool(api_key)} (length: {len(api_key) if api_key else 0})")
        if not api_key:
            raise Exception("HF_TOKEN not found in environment variables")
        # Initialize Hugging Face client using Responses API
        client = OpenAI(
            base_url="https://router.huggingface.co/v1",
            api_key=api_key,
        )
        logger.info("✅ Hugging Face client initialized for structured JSON response")
        # Prepare input for the API
        input_content = []
        # Add system prompt if provided
        if system_prompt:
            input_content.append({
                "role": "system",
                "content": system_prompt
            })
        # Add user prompt with JSON instruction
        json_instruction = "Please respond with valid JSON that matches the provided schema."
        input_content.append({
            "role": "user", 
            "content": f"{prompt}\n\n{json_instruction}"
        })
        # Add debugging for API call
        logger.info(
            "Hugging Face structured call | model=%s | prompt_len=%s | schema_kind=%s | temp=%s | max_tokens=%s",
            model,
            len(prompt) if isinstance(prompt, str) else '<non-str>',
            type(schema).__name__,
            temperature,
            max_tokens,
        )
        logger.info("🚀 Making Hugging Face structured API call...")
        # Make the API call using Responses API with structured output
        # Use simple text generation and parse JSON manually to avoid API format issues
        logger.info("🚀 Making Hugging Face API call (text mode with JSON parsing)...")
        # Add JSON instruction to the prompt
        json_instruction = "\n\nPlease respond with valid JSON that matches this exact structure:\n" + json.dumps(schema, indent=2)
        input_content[-1]["content"] = input_content[-1]["content"] + json_instruction
        # Add rate limiting to prevent expensive API calls
        import time
        time.sleep(1)  # 1 second delay between API calls
        response = client.responses.parse(
            model=model,
            input=input_content,
            temperature=temperature
        )
        # Extract structured data from response
        if hasattr(response, 'output_parsed') and response.output_parsed:
            # The new API returns parsed data directly (Pydantic model case)
            logger.info("✅ Hugging Face structured JSON response parsed successfully")
            # Convert Pydantic model to dict if needed
            if hasattr(response.output_parsed, 'model_dump'):
                return response.output_parsed.model_dump()
            elif hasattr(response.output_parsed, 'dict'):
                return response.output_parsed.dict()
            else:
                return response.output_parsed
        elif hasattr(response, 'output_text') and response.output_text:
            # Fallback to text parsing if output_parsed is not available
            response_text = response.output_text
            # Clean up the response text
            response_text = re.sub(r'```json\n?', '', response_text)
            response_text = re.sub(r'```\n?', '', response_text)
            response_text = response_text.strip()
            try:
                parsed_json = json.loads(response_text)
                logger.info("✅ Hugging Face structured JSON response parsed from text")
                return parsed_json
            except json.JSONDecodeError as json_err:
                logger.error(f"❌ JSON parsing failed: {json_err}")
                logger.error(f"Raw response: {response_text}")
                # Try to extract JSON from the response using regex
                json_match = re.search(r'\{.*\}', response_text, re.DOTALL)
                if json_match:
                    try:
                        extracted_json = json.loads(json_match.group())
                        logger.info("✅ JSON extracted using regex fallback")
                        return extracted_json
                    except json.JSONDecodeError:
                        pass
                # If all else fails, return a structured error response
                logger.error("❌ All JSON parsing attempts failed")
                return {
                    "error": "Failed to parse JSON response",
                    "raw_response": response_text,
                    "schema_expected": schema
                }
        else:
            logger.error("❌ No valid response data found")
            return {
                "error": "No valid response data found",
                "raw_response": str(response),
                "schema_expected": schema
            }
    except Exception as e:
        error_msg = str(e) if str(e) else repr(e)
        error_type = type(e).__name__
        logger.error(f"❌ Hugging Face structured JSON generation failed: {error_type}: {error_msg}")
        logger.error(f"❌ Full exception details: {repr(e)}")
        import traceback
        logger.error(f"❌ Traceback: {traceback.format_exc()}")
        raise Exception(f"Hugging Face structured JSON generation failed: {error_type}: {error_msg}")
 def get_available_models() -> list:
    """
    Get list of available Hugging Face models for text generation.
    Returns:
        list: List of available model identifiers
    """
    return [
        "openai/gpt-oss-120b:groq",
        "moonshotai/Kimi-K2-Instruct-0905:groq",
        "Qwen/Qwen2.5-VL-7B-Instruct",
        "meta-llama/Llama-3.1-8B-Instruct:groq",
        "microsoft/Phi-3-medium-4k-instruct:groq",
        "mistralai/Mistral-7B-Instruct-v0.3:groq"
    ]
 def validate_model(model: str) -> bool:
    """
    Validate if a model identifier is supported.
    Args:
        model (str): Model identifier to validate
    Returns:
        bool: True if model is supported, False otherwise
    """
    available_models = get_available_models()
    return model in available_models
--- a/backend/services/llm_providers/main_text_generation.py
+++ b/backend/services/llm_providers/main_text_generation.py
@@ -10,10 +10,9 @@ from typing import Optional, Dict, Any
 from loguru import logger
 from ..onboarding.api_key_manager import APIKeyManager
 from .openai_provider import openai_chatgpt
 from .gemini_provider import gemini_text_response, gemini_structured_json_response
-from .anthropic_provider import anthropic_text_response
+from .huggingface_provider import huggingface_text_response, huggingface_structured_json_response
-from .deepseek_provider import deepseek_text_response
+
 def llm_text_gen(prompt: str, system_prompt: Optional[str] = None, json_struct: Optional[Dict[str, Any]] = None) -> str:
    """
@@ -31,13 +30,6 @@ def llm_text_gen(prompt: str, system_prompt: Optional[str] = None, json_struct:
        logger.info("[llm_text_gen] Starting text generation")
        logger.debug(f"[llm_text_gen] Prompt length: {len(prompt)} characters")
        # Initialize API key manager and reload keys from .env file
        api_key_manager = APIKeyManager()
        api_key_manager.load_api_keys()  # Force reload from .env file
        # Debug: Log loaded API keys
        logger.debug(f"[llm_text_gen] Loaded API keys: {api_key_manager.get_all_keys()}")
        # Set default values for LLM parameters
        gpt_provider = "google"  # Default to Google Gemini
        model = "gemini-2.0-flash-001"
@@ -49,6 +41,15 @@ def llm_text_gen(prompt: str, system_prompt: Optional[str] = None, json_struct:
        frequency_penalty = 0.0
        presence_penalty = 0.0
        # Check for GPT_PROVIDER environment variable
        env_provider = os.getenv('GPT_PROVIDER', '').lower()
        if env_provider in ['gemini', 'google']:
            gpt_provider = "google"
            model = "gemini-2.0-flash-001"
        elif env_provider in ['hf_response_api', 'huggingface', 'hf']:
            gpt_provider = "huggingface"
            model = "openai/gpt-oss-120b:groq"
        # Default blog characteristics
        blog_tone = "Professional"
        blog_demographic = "Professional"
@@ -57,41 +58,40 @@ def llm_text_gen(prompt: str, system_prompt: Optional[str] = None, json_struct:
        blog_output_format = "markdown"
        blog_length = 2000
-        # Try to get provider from environment or config
+        # Check which providers have API keys available using APIKeyManager
-        try:
+        api_key_manager = APIKeyManager()
-            # Check which providers have API keys available
+        available_providers = []
-            available_providers = []
+        if api_key_manager.get_api_key("gemini"):
-            if api_key_manager.get_api_key("openai"):
+            available_providers.append("google")
-                available_providers.append("openai")
+        if api_key_manager.get_api_key("hf_token"):
-            if api_key_manager.get_api_key("gemini"):
+            available_providers.append("huggingface")
-                available_providers.append("google")
+        
-            if api_key_manager.get_api_key("anthropic"):
+        # If no environment variable set, auto-detect based on available keys
-                available_providers.append("anthropic")
+        if not env_provider:
-            if api_key_manager.get_api_key("deepseek"):
+            # Prefer Google Gemini if available, otherwise use Hugging Face
                available_providers.append("deepseek")
            # Prefer Google Gemini if available, otherwise use first available
            if "google" in available_providers:
                gpt_provider = "google"
                model = "gemini-2.0-flash-001"
-            elif available_providers:
+            elif "huggingface" in available_providers:
-                gpt_provider = available_providers[0]
+                gpt_provider = "huggingface"
-                if gpt_provider == "openai":
+                model = "openai/gpt-oss-120b:groq"
                    model = "gpt-4o"
                elif gpt_provider == "anthropic":
                    model = "claude-3-5-sonnet-20241022"
                elif gpt_provider == "deepseek":
                    model = "deepseek-chat"
            else:
-                logger.error("[llm_text_gen] No API keys found. Structured mock responses are disabled.")
+                logger.error("[llm_text_gen] No API keys found for supported providers.")
-                raise RuntimeError("No LLM API keys configured. Configure provider API keys to enable AI responses.")
+                raise RuntimeError("No LLM API keys configured. Configure GEMINI_API_KEY or HF_TOKEN to enable AI responses.")
-                
+        else:
-            logger.debug(f"[llm_text_gen] Using provider: {gpt_provider}, model: {model}")
+            # Environment variable was set, validate it's supported
            if gpt_provider not in available_providers:
                logger.warning(f"[llm_text_gen] Provider {gpt_provider} not available, falling back to available providers")
                if "google" in available_providers:
                    gpt_provider = "google"
                    model = "gemini-2.0-flash-001"
                elif "huggingface" in available_providers:
                    gpt_provider = "huggingface"
                    model = "openai/gpt-oss-120b:groq"
                else:
                    raise RuntimeError("No supported providers available.")
-        except Exception as err:
+        logger.debug(f"[llm_text_gen] Using provider: {gpt_provider}, model: {model}")
            logger.warning(f"[llm_text_gen] Error determining provider, using defaults: {err}")
            gpt_provider = "google"
            model = "gemini-2.0-flash-001"
        # Construct the system prompt if not provided
        if system_prompt is None:
@@ -118,18 +118,7 @@ def llm_text_gen(prompt: str, system_prompt: Optional[str] = None, json_struct:
        # Generate response based on provider
        try:
-            if gpt_provider == "openai":
+            if gpt_provider == "google":
                return openai_chatgpt(
                    prompt=prompt,
                    model=model,
                    temperature=temperature,
                    max_tokens=max_tokens,
                    top_p=top_p,
                    n=n,
                    fp=fp,
                    system_prompt=system_instructions
                )
            elif gpt_provider == "google":
                if json_struct:
                    return gemini_structured_json_response(
                        prompt=prompt,
@@ -149,66 +138,83 @@ def llm_text_gen(prompt: str, system_prompt: Optional[str] = None, json_struct:
                        max_tokens=max_tokens,
                        system_prompt=system_instructions
                    )
-            elif gpt_provider == "anthropic":
+            elif gpt_provider == "huggingface":
-                return anthropic_text_response(
+                if json_struct:
-                    prompt=prompt,
+                    return huggingface_structured_json_response(
-                    model=model,
+                        prompt=prompt,
-                    temperature=temperature,
+                        schema=json_struct,
-                    max_tokens=max_tokens,
+                        model=model,
-                    system_prompt=system_instructions
+                        temperature=temperature,
-                )
+                        max_tokens=max_tokens,
-            elif gpt_provider == "deepseek":
+                        system_prompt=system_instructions
-                return deepseek_text_response(
+                    )
-                    prompt=prompt,
+                else:
-                    model=model,
+                    return huggingface_text_response(
-                    temperature=temperature,
+                        prompt=prompt,
-                    max_tokens=max_tokens,
+                        model=model,
-                    system_prompt=system_instructions
+                        temperature=temperature,
-                )
+                        max_tokens=max_tokens,
                        top_p=top_p,
                        system_prompt=system_instructions
                    )
            else:
                logger.error(f"[llm_text_gen] Unknown provider: {gpt_provider}")
-                raise RuntimeError("Unknown LLM provider.")
+                raise RuntimeError("Unknown LLM provider. Supported providers: google, huggingface")
        except Exception as provider_error:
            logger.error(f"[llm_text_gen] Provider {gpt_provider} failed: {str(provider_error)}")
-            # Try to fallback to another provider
+            
-            fallback_providers = ["openai", "anthropic", "deepseek"]
+            # CIRCUIT BREAKER: Only try ONE fallback to prevent expensive API calls
-            for fallback_provider in fallback_providers:
+            fallback_providers = ["google", "huggingface"]
-                if fallback_provider in available_providers and fallback_provider != gpt_provider:
+            fallback_providers = [p for p in fallback_providers if p in available_providers and p != gpt_provider]
-                    try:
+            
-                        logger.info(f"[llm_text_gen] Trying fallback provider: {fallback_provider}")
+            if fallback_providers:
-                        if fallback_provider == "openai":
+                fallback_provider = fallback_providers[0]  # Only try the first available
-                            return openai_chatgpt(
+                try:
                    logger.info(f"[llm_text_gen] Trying SINGLE fallback provider: {fallback_provider}")
                    if fallback_provider == "google":
                        if json_struct:
                            return gemini_structured_json_response(
                                prompt=prompt,
-                                model="gpt-4o",
+                                schema=json_struct,
                                temperature=temperature,
                                top_p=top_p,
                                top_k=n,
                                max_tokens=max_tokens,
                                system_prompt=system_instructions
                            )
                        else:
                            return gemini_text_response(
                                prompt=prompt,
                                temperature=temperature,
                                top_p=top_p,
                                n=n,
                                max_tokens=max_tokens,
                                system_prompt=system_instructions
                            )
                    elif fallback_provider == "huggingface":
                        if json_struct:
                            return huggingface_structured_json_response(
                                prompt=prompt,
                                schema=json_struct,
                                model="openai/gpt-oss-120b:groq",
                                temperature=temperature,
                                max_tokens=max_tokens,
                                system_prompt=system_instructions
                            )
                        else:
                            return huggingface_text_response(
                                prompt=prompt,
                                model="openai/gpt-oss-120b:groq",
                                temperature=temperature,
                                max_tokens=max_tokens,
                                top_p=top_p,
                                n=n,
                                fp=fp,
                                system_prompt=system_instructions
                            )
-                        elif fallback_provider == "anthropic":
+                except Exception as fallback_error:
-                            return anthropic_text_response(
+                    logger.error(f"[llm_text_gen] Fallback provider {fallback_provider} also failed: {str(fallback_error)}")
                                prompt=prompt,
                                model="claude-3-5-sonnet-20241022",
                                temperature=temperature,
                                max_tokens=max_tokens,
                                system_prompt=system_instructions
                            )
                        elif fallback_provider == "deepseek":
                            return deepseek_text_response(
                                prompt=prompt,
                                model="deepseek-chat",
                                temperature=temperature,
                                max_tokens=max_tokens,
                                system_prompt=system_instructions
                            )
                    except Exception as fallback_error:
                        logger.error(f"[llm_text_gen] Fallback provider {fallback_provider} also failed: {str(fallback_error)}")
                        continue
-            # If all providers fail, raise an error (no mock)
+            # CIRCUIT BREAKER: Stop immediately to prevent expensive API calls
-            logger.error("[llm_text_gen] All providers failed. Structured mock responses are disabled.")
+            logger.error("[llm_text_gen] CIRCUIT BREAKER: Stopping to prevent expensive API calls.")
            raise RuntimeError("All LLM providers failed to generate a response.")
    except Exception as e:
@@ -217,7 +223,7 @@ def llm_text_gen(prompt: str, system_prompt: Optional[str] = None, json_struct:
 def check_gpt_provider(gpt_provider: str) -> bool:
    """Check if the specified GPT provider is supported."""
-    supported_providers = ["openai", "google", "anthropic", "deepseek"]
+    supported_providers = ["google", "huggingface"]
    return gpt_provider in supported_providers
 def get_api_key(gpt_provider: str) -> Optional[str]:
@@ -225,10 +231,8 @@ def get_api_key(gpt_provider: str) -> Optional[str]:
    try:
        api_key_manager = APIKeyManager()
        provider_mapping = {
            "openai": "openai",
            "google": "gemini",
-            "anthropic": "anthropic",
+            "huggingface": "hf_token"
            "deepseek": "deepseek"
        }
        mapped_provider = provider_mapping.get(gpt_provider, gpt_provider)
--- a/backend/services/llm_providers/openai_provider.py
+++ b/backend/services/llm_providers/openai_provider.py
@@ -1,133 +0,0 @@
 """OpenAI Provider Service for ALwrity Backend.
 This service handles OpenAI API integrations,
 migrated from the legacy lib/gpt_providers/text_generation/openai_text_gen.py
 """
 import os
 import time
 import openai
 import asyncio
 from typing import Tuple
 from loguru import logger
 from tenacity import (
    retry,
    stop_after_attempt,
    wait_random_exponential,
 )
 # Import APIKeyManager
 from ..onboarding.api_key_manager import APIKeyManager
 async def test_openai_api_key(api_key: str) -> Tuple[bool, str]:
    """
    Test if the provided OpenAI API key is valid.
    Args:
        api_key (str): The OpenAI API key to test
    Returns:
        tuple[bool, str]: A tuple containing (is_valid, message)
    """
    try:
        # Create OpenAI client with the provided key
        client = openai.OpenAI(api_key=api_key)
        # Try to list models as a simple API test
        models = client.models.list()
        # If we get here, the key is valid
        return True, "OpenAI API key is valid"
    except openai.AuthenticationError:
        return False, "Invalid OpenAI API key"
    except openai.RateLimitError:
        return False, "Rate limit exceeded. Please try again later."
    except Exception as e:
        return False, f"Error testing OpenAI API key: {str(e)}"
@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
 def openai_chatgpt(prompt: str, model: str = "gpt-4o", temperature: float = 0.7, 
                   max_tokens: int = 4000, top_p: float = 0.9, n: int = 1, 
                   fp: int = 16, system_prompt: str = None) -> str:
    """
    Wrapper function for OpenAI's ChatGPT completion.
    Args:
        prompt (str): The input text to generate completion for.
        model (str, optional): Model to be used for the completion. Defaults to "gpt-4o".
        temperature (float, optional): Controls randomness. Lower values make responses more deterministic. Defaults to 0.7.
        max_tokens (int, optional): Maximum number of tokens to generate. Defaults to 4000.
        top_p (float, optional): Controls diversity. Defaults to 0.9.
        n (int, optional): Number of completions to generate. Defaults to 1.
        fp (int, optional): Frequency penalty. Defaults to 16.
        system_prompt (str, optional): System prompt for the conversation. Defaults to None.
    Returns:
        str: The generated text completion.
    Raises:
        SystemExit: If an API error, connection error, or rate limit error occurs.
    """
    # Wait for 5 seconds to comply with rate limits
    for _ in range(5):
        time.sleep(1)
    try:
        # Create variables to collect the stream of chunks
        collected_chunks = []
        collected_messages = []
        full_reply_content = None
        # Use APIKeyManager instead of direct environment variable access
        api_key_manager = APIKeyManager()
        api_key = api_key_manager.get_api_key("openai")
        if not api_key:
            raise ValueError("OpenAI API key not found. Please configure it in the onboarding process.")
        client = openai.OpenAI(api_key=api_key)
        # Prepare messages
        messages = []
        if system_prompt:
            messages.append({"role": "system", "content": system_prompt})
        messages.append({"role": "user", "content": prompt})
        response = client.chat.completions.create(
            model=model,
            messages=messages,
            max_tokens=max_tokens,
            n=n,
            top_p=top_p,
            stream=True,
            frequency_penalty=fp,
            temperature=temperature
        )
        # Iterate through the stream of events
        for chunk in response:
            collected_chunks.append(chunk)  # save the event response
            chunk_message = chunk.choices[0].delta.content  # extract the message
            collected_messages.append(chunk_message)  # save the message
            print(chunk.choices[0].delta.content, end="", flush=True)
        # Clean None in collected_messages
        collected_messages = [m for m in collected_messages if m is not None]
        full_reply_content = ''.join([m for m in collected_messages])
        logger.info(f"[openai_chatgpt] Generated response with {len(full_reply_content)} characters")
        return full_reply_content
    except openai.APIError as e:
        logger.error(f"OpenAI API Error: {e}")
        raise SystemExit from e
    except openai.RateLimitError as e:
        logger.error(f"OpenAI Rate Limit Error: {e}")
        raise SystemExit from e
    except openai.APIConnectionError as e:
        logger.error(f"OpenAI API Connection Error: {e}")
        raise SystemExit from e
    except Exception as e:
        logger.error(f"Unexpected error in OpenAI API call: {e}")
        raise SystemExit from e 
--- a/backend/services/onboarding/api_key_manager.py
+++ b/backend/services/onboarding/api_key_manager.py
@@ -388,10 +388,8 @@ class APIKeyManager:
    def _load_from_env(self):
        """Load API keys from environment variables."""
        providers = [
            'OPENAI_API_KEY',
            'ANTHROPIC_API_KEY', 
            'GEMINI_API_KEY',
-            'MISTRAL_API_KEY',
+            'HF_TOKEN',
            'TAVILY_API_KEY',
            'SERPER_API_KEY',
            'METAPHOR_API_KEY',
--- a/frontend/src/hooks/usePolling.ts
+++ b/frontend/src/hooks/usePolling.ts
@@ -24,7 +24,7 @@ export function usePolling(
  options: UsePollingOptions = {}
 ): UsePollingReturn {
  const {
-    interval = 2000, // 2 seconds default
+    interval = 5000, // 5 seconds default - increased to reduce load
    onProgress,
    onComplete,
    onError
@@ -99,13 +99,17 @@ export function usePolling(
        }
        if (status.status === 'completed') {
          console.log('✅ Task completed - stopping polling immediately');
          setResult(status.result);
          onComplete?.(status.result);
          stopPolling();
          return; // Exit early to prevent further processing
        } else if (status.status === 'failed') {
          console.log('❌ Task failed - stopping polling immediately');
          setError(status.error || 'Task failed');
          onError?.(status.error || 'Task failed');
          stopPolling();
          return; // Exit early to prevent further processing
        }
        attemptsRef.current++;
@@ -113,12 +117,16 @@ export function usePolling(
        const errorMessage = err instanceof Error ? err.message : 'Unknown error occurred';
        console.error('Polling error:', errorMessage);
-        // Only stop polling for actual task failures (404, task not found)
+        // Stop polling for task failures and rate limiting
        // For network errors, timeouts, etc., continue polling
        if (errorMessage.includes('404') || errorMessage.includes('Task not found')) {
          setError('Task not found - it may have expired or been cleaned up');
          onError?.('Task not found - it may have expired or been cleaned up');
          stopPolling();
        } else if (errorMessage.includes('429') || errorMessage.includes('Too Many Requests')) {
          console.warn('Rate limited - stopping polling to prevent further issues');
          setError('Rate limited - please try again later');
          onError?.('Rate limited - please try again later');
          stopPolling();
        }
        // For other errors (timeouts, network issues), continue polling
        // The backend will eventually complete or fail, and we'll catch it