From 3219e6bbe4d7bd49a4012174af81f1cc7195ad0f Mon Sep 17 00:00:00 2001
From: ajaysi <ajay.calsoft@gmail.com>
Date: Wed, 29 Oct 2025 20:15:04 +0530
Subject: [PATCH] Hugging Face Integration. Remove OpenAI and Anthropic and
 DeepSeek. Add Hugging Face.

---
 backend/alwrity_utils/rate_limiter.py         |   8 +-
 .../research/competitor_analyzer.py           |   8 +-
 .../research/content_angle_generator.py       |   8 +-
 .../blog_writer/research/keyword_analyzer.py  |   8 +-
 backend/services/llm_providers/README.md      |  41 +-
 .../README_HUGGINGFACE_INTEGRATION.md         | 237 ++++++++++
 backend/services/llm_providers/__init__.py    |  11 +-
 .../llm_providers/anthropic_provider.py       |  98 ----
 .../llm_providers/deepseek_provider.py        | 105 -----
 .../services/llm_providers/gemini_provider.py |  35 +-
 .../llm_providers/huggingface_provider.py     | 441 ++++++++++++++++++
 .../llm_providers/main_text_generation.py     | 214 ++++-----
 .../services/llm_providers/openai_provider.py | 133 ------
 .../services/onboarding/api_key_manager.py    |   4 +-
 frontend/src/hooks/usePolling.ts              |  14 +-
 15 files changed, 883 insertions(+), 482 deletions(-)
 create mode 100644 backend/services/llm_providers/README_HUGGINGFACE_INTEGRATION.md
 delete mode 100644 backend/services/llm_providers/anthropic_provider.py
 delete mode 100644 backend/services/llm_providers/deepseek_provider.py
 create mode 100644 backend/services/llm_providers/huggingface_provider.py
 delete mode 100644 backend/services/llm_providers/openai_provider.py

diff --git a/backend/alwrity_utils/rate_limiter.py b/backend/alwrity_utils/rate_limiter.py
index 4fcc92ee..8db3bab8 100644
--- a/backend/alwrity_utils/rate_limiter.py
+++ b/backend/alwrity_utils/rate_limiter.py
@@ -14,7 +14,7 @@ from loguru import logger
 class RateLimiter:
     """Manages rate limiting for ALwrity backend."""
     
-    def __init__(self, window_seconds: int = 60, max_requests: int = 200):
+    def __init__(self, window_seconds: int = 60, max_requests: int = 1000):  # Increased for development
         self.window_seconds = window_seconds
         self.max_requests = max_requests
         self.request_counts: Dict[str, List[float]] = defaultdict(list)
@@ -28,6 +28,12 @@ class RateLimiter:
             "/ai-analytics",
             "/gap-analysis",
             "/calendar-events",
+            # Research endpoints - exempt from rate limiting
+            "/api/research",
+            "/api/blog-writer",
+            "/api/blog-writer/research",
+            "/api/blog-writer/research/",
+            "/api/blog/research/status",
             "/calendar-generation/progress",
             "/health",
             "/health/database",
diff --git a/backend/services/blog_writer/research/competitor_analyzer.py b/backend/services/blog_writer/research/competitor_analyzer.py
index 20e58101..0e085b0c 100644
--- a/backend/services/blog_writer/research/competitor_analyzer.py
+++ b/backend/services/blog_writer/research/competitor_analyzer.py
@@ -39,7 +39,7 @@ class CompetitorAnalyzer:
         }}
         """
         
-        from services.llm_providers.gemini_provider import gemini_structured_json_response
+        from services.llm_providers.main_text_generation import llm_text_gen
         
         competitor_schema = {
             "type": "object",
@@ -55,11 +55,9 @@ class CompetitorAnalyzer:
             "required": ["top_competitors", "content_gaps", "opportunities", "competitive_advantages", "market_positioning", "industry_leaders", "analysis_notes"]
         }
         
-        competitor_analysis = gemini_structured_json_response(
+        competitor_analysis = llm_text_gen(
             prompt=competitor_prompt,
-            schema=competitor_schema,
-            temperature=0.3,
-            max_tokens=4000
+            json_struct=competitor_schema
         )
         
         if isinstance(competitor_analysis, dict) and 'error' not in competitor_analysis:
diff --git a/backend/services/blog_writer/research/content_angle_generator.py b/backend/services/blog_writer/research/content_angle_generator.py
index e4f283d1..69f32555 100644
--- a/backend/services/blog_writer/research/content_angle_generator.py
+++ b/backend/services/blog_writer/research/content_angle_generator.py
@@ -48,7 +48,7 @@ class ContentAngleGenerator:
         }}
         """
         
-        from services.llm_providers.gemini_provider import gemini_structured_json_response
+        from services.llm_providers.main_text_generation import llm_text_gen
         
         angles_schema = {
             "type": "object",
@@ -63,11 +63,9 @@ class ContentAngleGenerator:
             "required": ["content_angles"]
         }
         
-        angles_result = gemini_structured_json_response(
+        angles_result = llm_text_gen(
             prompt=angles_prompt,
-            schema=angles_schema,
-            temperature=0.7,
-            max_tokens=4000
+            json_struct=angles_schema
         )
         
         if isinstance(angles_result, dict) and 'content_angles' in angles_result:
diff --git a/backend/services/blog_writer/research/keyword_analyzer.py b/backend/services/blog_writer/research/keyword_analyzer.py
index 9e42e6dc..598d6fcc 100644
--- a/backend/services/blog_writer/research/keyword_analyzer.py
+++ b/backend/services/blog_writer/research/keyword_analyzer.py
@@ -44,7 +44,7 @@ class KeywordAnalyzer:
         }}
         """
         
-        from services.llm_providers.gemini_provider import gemini_structured_json_response
+        from services.llm_providers.main_text_generation import llm_text_gen
         
         keyword_schema = {
             "type": "object",
@@ -62,11 +62,9 @@ class KeywordAnalyzer:
             "required": ["primary", "secondary", "long_tail", "search_intent", "difficulty", "content_gaps", "semantic_keywords", "trending_terms", "analysis_insights"]
         }
         
-        keyword_analysis = gemini_structured_json_response(
+        keyword_analysis = llm_text_gen(
             prompt=keyword_prompt,
-            schema=keyword_schema,
-            temperature=0.3,
-            max_tokens=4000
+            json_struct=keyword_schema
         )
         
         if isinstance(keyword_analysis, dict) and 'error' not in keyword_analysis:
diff --git a/backend/services/llm_providers/README.md b/backend/services/llm_providers/README.md
index b7b58271..b85687d2 100644
--- a/backend/services/llm_providers/README.md
+++ b/backend/services/llm_providers/README.md
@@ -1,6 +1,43 @@
-# Gemini Provider Module
+# LLM Providers Module
 
-This module provides functions for interacting with Google's Gemini API, specifically designed for structured JSON output and text generation. It follows the official Gemini API documentation and implements best practices for reliable AI interactions.
+This module provides functions for interacting with multiple LLM providers, specifically Google's Gemini API and Hugging Face Inference Providers. It follows official API documentation and implements best practices for reliable AI interactions.
+
+## Supported Providers
+
+- **Google Gemini**: High-quality text generation with structured JSON output
+- **Hugging Face**: Multiple models via Inference Providers with unified interface
+
+## Quick Start
+
+```python
+from services.llm_providers.main_text_generation import llm_text_gen
+
+# Generate text (auto-detects available provider)
+response = llm_text_gen("Write a blog post about AI trends")
+print(response)
+```
+
+## Configuration
+
+Set your preferred provider using the `GPT_PROVIDER` environment variable:
+
+```bash
+# Use Google Gemini (default)
+export GPT_PROVIDER=gemini
+
+# Use Hugging Face
+export GPT_PROVIDER=hf_response_api
+```
+
+Configure API keys:
+
+```bash
+# For Google Gemini
+export GEMINI_API_KEY=your_gemini_api_key_here
+
+# For Hugging Face
+export HF_TOKEN=your_huggingface_token_here
+```
 
 ## Key Features
 
diff --git a/backend/services/llm_providers/README_HUGGINGFACE_INTEGRATION.md b/backend/services/llm_providers/README_HUGGINGFACE_INTEGRATION.md
new file mode 100644
index 00000000..71f7f91b
--- /dev/null
+++ b/backend/services/llm_providers/README_HUGGINGFACE_INTEGRATION.md
@@ -0,0 +1,237 @@
+# Hugging Face Integration for AI Blog Writer
+
+## Overview
+
+The AI Blog Writer now supports both Google Gemini and Hugging Face as LLM providers, with a clean environment variable-based configuration system. This integration uses the [Hugging Face Responses API](https://huggingface.co/docs/inference-providers/guides/responses-api) which provides a unified interface for model interactions.
+
+## Supported Providers
+
+### 1. Google Gemini (Default)
+- **Provider ID**: `google`
+- **Environment Variable**: `GEMINI_API_KEY`
+- **Models**: `gemini-2.0-flash-001`
+- **Features**: Text generation, structured JSON output
+
+### 2. Hugging Face
+- **Provider ID**: `huggingface`
+- **Environment Variable**: `HF_TOKEN`
+- **Models**: Multiple models via Inference Providers
+- **Features**: Text generation, structured JSON output, multi-model support
+
+## Configuration
+
+### Environment Variables
+
+Set the `GPT_PROVIDER` environment variable to choose your preferred provider:
+
+```bash
+# Use Google Gemini (default)
+export GPT_PROVIDER=gemini
+# or
+export GPT_PROVIDER=google
+
+# Use Hugging Face
+export GPT_PROVIDER=hf_response_api
+# or
+export GPT_PROVIDER=huggingface
+# or
+export GPT_PROVIDER=hf
+```
+
+### API Keys
+
+Configure the appropriate API key for your chosen provider:
+
+```bash
+# For Google Gemini
+export GEMINI_API_KEY=your_gemini_api_key_here
+
+# For Hugging Face
+export HF_TOKEN=your_huggingface_token_here
+```
+
+## Usage
+
+### Basic Text Generation
+
+```python
+from services.llm_providers.main_text_generation import llm_text_gen
+
+# Generate text (uses configured provider)
+response = llm_text_gen("Write a blog post about AI trends")
+print(response)
+```
+
+### Structured JSON Generation
+
+```python
+from services.llm_providers.main_text_generation import llm_text_gen
+
+# Define JSON schema
+schema = {
+    "type": "object",
+    "properties": {
+        "title": {"type": "string"},
+        "sections": {
+            "type": "array",
+            "items": {
+                "type": "object",
+                "properties": {
+                    "heading": {"type": "string"},
+                    "content": {"type": "string"}
+                }
+            }
+        }
+    }
+}
+
+# Generate structured response
+response = llm_text_gen(
+    "Create a blog outline about machine learning",
+    json_struct=schema
+)
+print(response)
+```
+
+### Direct Provider Usage
+
+```python
+# Google Gemini
+from services.llm_providers.gemini_provider import gemini_text_response
+
+response = gemini_text_response(
+    prompt="Write about AI",
+    temperature=0.7,
+    max_tokens=1000
+)
+
+# Hugging Face
+from services.llm_providers.huggingface_provider import huggingface_text_response
+
+response = huggingface_text_response(
+    prompt="Write about AI",
+    model="openai/gpt-oss-120b:groq",
+    temperature=0.7,
+    max_tokens=1000
+)
+```
+
+## Available Hugging Face Models
+
+The Hugging Face provider supports multiple models via Inference Providers:
+
+- `openai/gpt-oss-120b:groq` (default)
+- `moonshotai/Kimi-K2-Instruct-0905:groq`
+- `Qwen/Qwen2.5-VL-7B-Instruct`
+- `meta-llama/Llama-3.1-8B-Instruct:groq`
+- `microsoft/Phi-3-medium-4k-instruct:groq`
+- `mistralai/Mistral-7B-Instruct-v0.3:groq`
+
+## Provider Selection Logic
+
+1. **Environment Variable**: If `GPT_PROVIDER` is set, use the specified provider
+2. **Auto-detection**: If no environment variable, check available API keys:
+   - Prefer Google Gemini if `GEMINI_API_KEY` is available
+   - Fall back to Hugging Face if `HF_TOKEN` is available
+3. **Fallback**: If the specified provider fails, automatically try the other provider
+
+## Error Handling
+
+The system includes comprehensive error handling:
+
+- **Missing API Keys**: Clear error messages with setup instructions
+- **Provider Failures**: Automatic fallback to the other provider
+- **Invalid Models**: Validation with helpful error messages
+- **Network Issues**: Retry logic with exponential backoff
+
+## Migration from Previous Version
+
+### Removed Providers
+The following providers have been removed to simplify the system:
+- OpenAI
+- Anthropic
+- DeepSeek
+
+### Updated Imports
+```python
+# Old imports (no longer work)
+from services.llm_providers.openai_provider import openai_chatgpt
+from services.llm_providers.anthropic_provider import anthropic_text_response
+from services.llm_providers.deepseek_provider import deepseek_text_response
+
+# New imports
+from services.llm_providers.gemini_provider import gemini_text_response, gemini_structured_json_response
+from services.llm_providers.huggingface_provider import huggingface_text_response, huggingface_structured_json_response
+```
+
+## Testing
+
+Run the integration tests to verify everything works:
+
+```bash
+cd backend
+python -c "
+import sys
+sys.path.insert(0, '.')
+from services.llm_providers.main_text_generation import check_gpt_provider
+print('Google provider supported:', check_gpt_provider('google'))
+print('Hugging Face provider supported:', check_gpt_provider('huggingface'))
+print('OpenAI provider supported:', check_gpt_provider('openai'))
+"
+```
+
+## Performance Considerations
+
+### Google Gemini
+- Fast response times
+- High-quality outputs
+- Good for structured content
+
+### Hugging Face
+- Multiple model options
+- Cost-effective for high-volume usage
+- Good for experimentation with different models
+
+## Troubleshooting
+
+### Common Issues
+
+1. **"No LLM API keys configured"**
+   - Ensure either `GEMINI_API_KEY` or `HF_TOKEN` is set
+   - Check that the API key is valid
+
+2. **"Unknown LLM provider"**
+   - Use only `google` or `huggingface` as provider values
+   - Check the `GPT_PROVIDER` environment variable
+
+3. **"HF_TOKEN appears to be invalid"**
+   - Ensure your Hugging Face token starts with `hf_`
+   - Get a new token from [Hugging Face Settings](https://huggingface.co/settings/tokens)
+
+4. **"OpenAI library not available"**
+   - Install the OpenAI library: `pip install openai`
+   - This is required for Hugging Face Responses API
+
+### Debug Mode
+
+Enable debug logging to see provider selection:
+
+```python
+import logging
+logging.basicConfig(level=logging.DEBUG)
+```
+
+## Future Enhancements
+
+- Support for additional Hugging Face models
+- Model-specific parameter optimization
+- Advanced caching strategies
+- Performance monitoring and metrics
+- A/B testing between providers
+
+## Support
+
+For issues or questions:
+1. Check the troubleshooting section above
+2. Review the [Hugging Face Responses API documentation](https://huggingface.co/docs/inference-providers/guides/responses-api)
+3. Check the Google Gemini API documentation for Gemini-specific issues
diff --git a/backend/services/llm_providers/__init__.py b/backend/services/llm_providers/__init__.py
index 3a9bbebd..2bd18b9b 100644
--- a/backend/services/llm_providers/__init__.py
+++ b/backend/services/llm_providers/__init__.py
@@ -5,17 +5,14 @@ migrated from the legacy lib/gpt_providers functionality.
 """
 
 from services.llm_providers.main_text_generation import llm_text_gen
-from services.llm_providers.openai_provider import openai_chatgpt, test_openai_api_key
 from services.llm_providers.gemini_provider import gemini_text_response, gemini_structured_json_response
-from services.llm_providers.anthropic_provider import anthropic_text_response
-from services.llm_providers.deepseek_provider import deepseek_text_response
+from services.llm_providers.huggingface_provider import huggingface_text_response, huggingface_structured_json_response
+
 
 __all__ = [
     "llm_text_gen",
-    "openai_chatgpt",
-    "test_openai_api_key",
     "gemini_text_response", 
     "gemini_structured_json_response",
-    "anthropic_text_response",
-    "deepseek_text_response"
+    "huggingface_text_response",
+    "huggingface_structured_json_response"
 ] 
\ No newline at end of file
diff --git a/backend/services/llm_providers/anthropic_provider.py b/backend/services/llm_providers/anthropic_provider.py
deleted file mode 100644
index adffd35e..00000000
--- a/backend/services/llm_providers/anthropic_provider.py
+++ /dev/null
@@ -1,98 +0,0 @@
-"""Anthropic Provider Service for ALwrity Backend.
-
-This service handles Anthropic API integrations,
-migrated from the legacy lib/gpt_providers/text_generation/anthropic_text_gen.py
-"""
-
-import os
-import json
-import time
-from typing import Dict, Any, Tuple
-from loguru import logger
-from tenacity import (
-    retry,
-    stop_after_attempt,
-    wait_random_exponential,
-)
-
-# Import APIKeyManager
-from ..onboarding.api_key_manager import APIKeyManager
-
-try:
-    import anthropic
-except ImportError:
-    anthropic = None
-    logger.warning("Anthropic library not available. Install with: pip install anthropic")
-
-async def test_anthropic_api_key(api_key: str) -> Tuple[bool, str]:
-    """
-    Test if the provided Anthropic API key is valid.
-    
-    Args:
-        api_key (str): The Anthropic API key to test
-        
-    Returns:
-        tuple[bool, str]: A tuple containing (is_valid, message)
-    """
-    if not anthropic:
-        return False, "Anthropic library not available"
-    
-    try:
-        # Create Anthropic client with the provided key
-        client = anthropic.Anthropic(api_key=api_key)
-        
-        # Try to generate a simple response as a test
-        response = client.messages.create(
-            model="claude-3-5-sonnet-20241022",
-            max_tokens=10,
-            messages=[{"role": "user", "content": "Hello"}]
-        )
-        
-        # If we get here, the key is valid
-        return True, "Anthropic API key is valid"
-        
-    except anthropic.AuthenticationError:
-        return False, "Invalid Anthropic API key"
-    except anthropic.RateLimitError:
-        return False, "Rate limit exceeded. Please try again later."
-    except Exception as e:
-        return False, f"Error testing Anthropic API key: {str(e)}"
-
-@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
-def anthropic_text_response(prompt: str, model: str = "claude-3-5-sonnet-20241022", 
-                           temperature: float = 0.7, max_tokens: int = 4000, 
-                           system_prompt: str = None) -> str:
-    """Get response from Anthropic Claude."""
-    if not anthropic:
-        logger.error("Anthropic library not available")
-        return "Anthropic library not available. Please install anthropic package."
-    
-    try:
-        # Use APIKeyManager instead of direct environment variable access
-        api_key_manager = APIKeyManager()
-        api_key = api_key_manager.get_api_key("anthropic")
-        
-        if not api_key:
-            raise ValueError("Anthropic API key not found. Please configure it in the onboarding process.")
-        
-        client = anthropic.Anthropic(api_key=api_key)
-        
-        # Prepare messages
-        messages = []
-        if system_prompt:
-            messages.append({"role": "system", "content": system_prompt})
-        messages.append({"role": "user", "content": prompt})
-        
-        response = client.messages.create(
-            model=model,
-            max_tokens=max_tokens,
-            temperature=temperature,
-            messages=messages
-        )
-        
-        logger.info(f"[anthropic_text_response] Generated response with {len(response.content[0].text)} characters")
-        return response.content[0].text
-        
-    except Exception as err:
-        logger.error(f"Failed to get response from Anthropic: {err}. Retrying.")
-        raise 
\ No newline at end of file
diff --git a/backend/services/llm_providers/deepseek_provider.py b/backend/services/llm_providers/deepseek_provider.py
deleted file mode 100644
index 488b57bf..00000000
--- a/backend/services/llm_providers/deepseek_provider.py
+++ /dev/null
@@ -1,105 +0,0 @@
-"""DeepSeek Provider Service for ALwrity Backend.
-
-This service handles DeepSeek API integrations,
-migrated from the legacy lib/gpt_providers/text_generation/deepseek_text_gen.py
-"""
-
-import os
-import json
-import time
-from typing import Dict, Any, Tuple
-from loguru import logger
-from tenacity import (
-    retry,
-    stop_after_attempt,
-    wait_random_exponential,
-)
-
-# Import APIKeyManager
-from ..onboarding.api_key_manager import APIKeyManager
-
-try:
-    import openai
-except ImportError:
-    openai = None
-    logger.warning("OpenAI library not available. Install with: pip install openai")
-
-async def test_deepseek_api_key(api_key: str) -> Tuple[bool, str]:
-    """
-    Test if the provided DeepSeek API key is valid.
-    
-    Args:
-        api_key (str): The DeepSeek API key to test
-        
-    Returns:
-        tuple[bool, str]: A tuple containing (is_valid, message)
-    """
-    if not openai:
-        return False, "OpenAI library not available"
-    
-    try:
-        # Create DeepSeek client with the provided key
-        client = openai.OpenAI(
-            api_key=api_key,
-            base_url="https://api.deepseek.com/v1"
-        )
-        
-        # Try to generate a simple response as a test
-        response = client.chat.completions.create(
-            model="deepseek-chat",
-            messages=[{"role": "user", "content": "Hello"}],
-            max_tokens=10,
-            temperature=0.1
-        )
-        
-        # If we get here, the key is valid
-        return True, "DeepSeek API key is valid"
-        
-    except openai.AuthenticationError:
-        return False, "Invalid DeepSeek API key"
-    except openai.RateLimitError:
-        return False, "Rate limit exceeded. Please try again later."
-    except Exception as e:
-        return False, f"Error testing DeepSeek API key: {str(e)}"
-
-@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
-def deepseek_text_response(prompt: str, model: str = "deepseek-chat", 
-                          temperature: float = 0.7, max_tokens: int = 4000, 
-                          system_prompt: str = None) -> str:
-    """Get response from DeepSeek."""
-    if not openai:
-        logger.error("OpenAI library not available")
-        return "OpenAI library not available. Please install openai package."
-    
-    try:
-        # Use APIKeyManager instead of direct environment variable access
-        api_key_manager = APIKeyManager()
-        api_key = api_key_manager.get_api_key("deepseek")
-        
-        if not api_key:
-            raise ValueError("DeepSeek API key not found. Please configure it in the onboarding process.")
-        
-        client = openai.OpenAI(
-            api_key=api_key,
-            base_url="https://api.deepseek.com/v1"
-        )
-        
-        # Prepare messages
-        messages = []
-        if system_prompt:
-            messages.append({"role": "system", "content": system_prompt})
-        messages.append({"role": "user", "content": prompt})
-        
-        response = client.chat.completions.create(
-            model=model,
-            messages=messages,
-            max_tokens=max_tokens,
-            temperature=temperature
-        )
-        
-        logger.info(f"[deepseek_text_response] Generated response with {len(response.choices[0].message.content)} characters")
-        return response.choices[0].message.content
-        
-    except Exception as err:
-        logger.error(f"Failed to get response from DeepSeek: {err}. Retrying.")
-        raise 
\ No newline at end of file
diff --git a/backend/services/llm_providers/gemini_provider.py b/backend/services/llm_providers/gemini_provider.py
index d6da5432..5f28e652 100644
--- a/backend/services/llm_providers/gemini_provider.py
+++ b/backend/services/llm_providers/gemini_provider.py
@@ -402,18 +402,33 @@ def gemini_structured_json_response(prompt, schema, temperature=0.7, top_p=0.9,
         try:
             # Convert sync call to async for retry logic
             import asyncio
-            loop = asyncio.new_event_loop()
-            asyncio.set_event_loop(loop)
             
-            response = loop.run_until_complete(
-                retry_with_backoff(
-                    make_api_call,
-                    config=CONTENT_RETRY_CONFIG,
-                    operation_name="gemini_structured_json",
-                    context={"schema_type": type(types_schema).__name__, "max_tokens": max_tokens}
+            # Check if there's already an event loop running
+            try:
+                loop = asyncio.get_running_loop()
+                # If we're already in an async context, we need to run this differently
+                logger.warning("⚠️ Already in async context, using direct sync call")
+                # For now, let's use a simpler approach without retry logic
+                response = client.models.generate_content(
+                    model="gemini-2.5-flash",
+                    contents=prompt,
+                    config=generation_config,
                 )
-            )
-            logger.info("✅ Gemini API call completed successfully")
+                logger.info("✅ Gemini API call completed successfully (sync mode)")
+            except RuntimeError:
+                # No event loop running, we can create one
+                loop = asyncio.new_event_loop()
+                asyncio.set_event_loop(loop)
+                
+                response = loop.run_until_complete(
+                    retry_with_backoff(
+                        make_api_call,
+                        config=CONTENT_RETRY_CONFIG,
+                        operation_name="gemini_structured_json",
+                        context={"schema_type": type(types_schema).__name__, "max_tokens": max_tokens}
+                    )
+                )
+                logger.info("✅ Gemini API call completed successfully")
         except Exception as api_error:
             logger.error(f"❌ Gemini API call failed: {api_error}")
             logger.error(f"❌ API Error type: {type(api_error).__name__}")
diff --git a/backend/services/llm_providers/huggingface_provider.py b/backend/services/llm_providers/huggingface_provider.py
new file mode 100644
index 00000000..707efe69
--- /dev/null
+++ b/backend/services/llm_providers/huggingface_provider.py
@@ -0,0 +1,441 @@
+"""
+Hugging Face Provider Module for ALwrity
+
+This module provides functions for interacting with Hugging Face's Inference Providers API
+using the Responses API (beta) which provides a unified interface for model interactions.
+
+Key Features:
+- Text response generation with retry logic
+- Structured JSON response generation with schema validation
+- Comprehensive error handling and logging
+- Automatic API key management
+- Support for various Hugging Face models via Inference Providers
+
+Best Practices:
+1. Use structured output for complex, multi-field responses
+2. Keep schemas simple and flat to avoid truncation
+3. Set appropriate token limits (8192 for complex outputs)
+4. Use low temperature (0.1-0.3) for consistent structured output
+5. Implement proper error handling in calling functions
+6. Use the Responses API for better compatibility
+
+Usage Examples:
+    # Text response
+    result = huggingface_text_response(prompt, temperature=0.7, max_tokens=2048)
+    
+    # Structured JSON response
+    schema = {
+        "type": "object",
+        "properties": {
+            "tasks": {
+                "type": "array",
+                "items": {"type": "object", "properties": {...}}
+            }
+        }
+    }
+    result = huggingface_structured_json_response(prompt, schema, temperature=0.2, max_tokens=8192)
+
+Dependencies:
+- openai (for Hugging Face Responses API)
+- tenacity (for retry logic)
+- logging (for debugging)
+- json (for fallback parsing)
+
+Author: ALwrity Team
+Version: 1.0
+Last Updated: January 2025
+"""
+
+import os
+import sys
+from pathlib import Path
+import json
+import re
+from typing import Optional, Dict, Any
+
+from dotenv import load_dotenv
+
+# Fix the environment loading path - load from backend directory
+current_dir = Path(__file__).parent.parent  # services directory
+backend_dir = current_dir.parent  # backend directory
+env_path = backend_dir / '.env'
+
+if env_path.exists():
+    load_dotenv(env_path)
+    print(f"Loaded .env from: {env_path}")
+else:
+    # Fallback to current directory
+    load_dotenv()
+    print(f"No .env found at {env_path}, using current directory")
+
+from loguru import logger
+from utils.logger_utils import get_service_logger
+
+# Use service-specific logger to avoid conflicts
+logger = get_service_logger("huggingface_provider")
+
+from tenacity import (
+    retry,
+    stop_after_attempt,
+    wait_random_exponential,
+)
+
+try:
+    from openai import OpenAI
+    OPENAI_AVAILABLE = True
+except ImportError:
+    OPENAI_AVAILABLE = False
+    logger.warn("OpenAI library not available. Install with: pip install openai")
+
+def get_huggingface_api_key() -> str:
+    """Get Hugging Face API key with proper error handling."""
+    api_key = os.getenv('HF_TOKEN')
+    if not api_key:
+        error_msg = "HF_TOKEN environment variable is not set. Please set it in your .env file."
+        logger.error(error_msg)
+        raise ValueError(error_msg)
+    
+    # Validate API key format (basic check)
+    if not api_key.startswith('hf_'):
+        error_msg = "HF_TOKEN appears to be invalid. It should start with 'hf_'."
+        logger.error(error_msg)
+        raise ValueError(error_msg)
+    
+    return api_key
+
+@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
+def huggingface_text_response(
+    prompt: str,
+    model: str = "openai/gpt-oss-120b:groq",
+    temperature: float = 0.7,
+    max_tokens: int = 2048,
+    top_p: float = 0.9,
+    system_prompt: Optional[str] = None
+) -> str:
+    """
+    Generate text response using Hugging Face Inference Providers API.
+    
+    This function uses the Hugging Face Responses API which provides a unified interface
+    for model interactions with built-in retry logic and error handling.
+    
+    Args:
+        prompt (str): The input prompt for the AI model
+        model (str): Hugging Face model identifier (default: "openai/gpt-oss-120b:groq")
+        temperature (float): Controls randomness (0.0-1.0)
+        max_tokens (int): Maximum tokens in response
+        top_p (float): Nucleus sampling parameter (0.0-1.0)
+        system_prompt (str, optional): System instruction for the model
+    
+    Returns:
+        str: Generated text response
+        
+    Raises:
+        Exception: If API key is missing or API call fails
+        
+    Best Practices:
+        - Use appropriate temperature for your use case (0.7 for creative, 0.1-0.3 for factual)
+        - Set max_tokens based on expected response length
+        - Use system_prompt to guide model behavior
+        - Handle errors gracefully in calling functions
+        
+    Example:
+        result = huggingface_text_response(
+            prompt="Write a blog post about AI",
+            model="openai/gpt-oss-120b:groq",
+            temperature=0.7,
+            max_tokens=2048,
+            system_prompt="You are a professional content writer."
+        )
+    """
+    try:
+        if not OPENAI_AVAILABLE:
+            raise ImportError("OpenAI library not available. Install with: pip install openai")
+        
+        # Get API key with proper error handling
+        api_key = get_huggingface_api_key()
+        logger.info(f"🔑 Hugging Face API key loaded: {bool(api_key)} (length: {len(api_key) if api_key else 0})")
+        
+        if not api_key:
+            raise Exception("HF_TOKEN not found in environment variables")
+            
+        # Initialize Hugging Face client using Responses API
+        client = OpenAI(
+            base_url="https://router.huggingface.co/v1",
+            api_key=api_key,
+        )
+        logger.info("✅ Hugging Face client initialized for text response")
+
+        # Prepare input for the API
+        input_content = []
+        
+        # Add system prompt if provided
+        if system_prompt:
+            input_content.append({
+                "role": "system",
+                "content": system_prompt
+            })
+        
+        # Add user prompt
+        input_content.append({
+            "role": "user", 
+            "content": prompt
+        })
+
+        # Add debugging for API call
+        logger.info(
+            "Hugging Face text call | model=%s | prompt_len=%s | temp=%s | top_p=%s | max_tokens=%s",
+            model,
+            len(prompt) if isinstance(prompt, str) else '<non-str>',
+            temperature,
+            top_p,
+            max_tokens,
+        )
+        
+        logger.info("🚀 Making Hugging Face API call...")
+        
+        # Add rate limiting to prevent expensive API calls
+        import time
+        time.sleep(1)  # 1 second delay between API calls
+        
+        # Make the API call using Responses API
+        response = client.responses.parse(
+            model=model,
+            input=input_content,
+            temperature=temperature,
+            top_p=top_p,
+        )
+        
+        # Extract text from response
+        if hasattr(response, 'output_text') and response.output_text:
+            generated_text = response.output_text
+        elif hasattr(response, 'output') and response.output:
+            # Handle case where output is a list
+            if isinstance(response.output, list) and len(response.output) > 0:
+                generated_text = response.output[0].get('content', '')
+            else:
+                generated_text = str(response.output)
+        else:
+            generated_text = str(response)
+        
+        # Clean up the response
+        if generated_text:
+            # Remove any markdown formatting if present
+            generated_text = re.sub(r'```[a-zA-Z]*\n?', '', generated_text)
+            generated_text = re.sub(r'```\n?', '', generated_text)
+            generated_text = generated_text.strip()
+        
+        logger.info(f"✅ Hugging Face text response generated successfully (length: {len(generated_text)})")
+        return generated_text
+        
+    except Exception as e:
+        logger.error(f"❌ Hugging Face text generation failed: {str(e)}")
+        raise Exception(f"Hugging Face text generation failed: {str(e)}")
+
+@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
+def huggingface_structured_json_response(
+    prompt: str,
+    schema: Dict[str, Any],
+    model: str = "openai/gpt-oss-120b:groq",
+    temperature: float = 0.7,
+    max_tokens: int = 8192,
+    system_prompt: Optional[str] = None
+) -> Dict[str, Any]:
+    """
+    Generate structured JSON response using Hugging Face Inference Providers API.
+    
+    This function uses the Hugging Face Responses API with structured output support
+    to generate JSON responses that match a provided schema.
+    
+    Args:
+        prompt (str): The input prompt for the AI model
+        schema (dict): JSON schema defining the expected output structure
+        model (str): Hugging Face model identifier (default: "openai/gpt-oss-120b:groq")
+        temperature (float): Controls randomness (0.0-1.0). Use 0.1-0.3 for structured output
+        max_tokens (int): Maximum tokens in response. Use 8192 for complex outputs
+        system_prompt (str, optional): System instruction for the model
+    
+    Returns:
+        dict: Parsed JSON response matching the provided schema
+        
+    Raises:
+        Exception: If API key is missing or API call fails
+        
+    Best Practices:
+        - Keep schemas simple and flat to avoid truncation
+        - Use low temperature (0.1-0.3) for consistent structured output
+        - Set max_tokens to 8192 for complex multi-field responses
+        - Avoid deeply nested schemas with many required fields
+        - Test with smaller outputs first, then scale up
+        
+    Example:
+        schema = {
+            "type": "object",
+            "properties": {
+                "tasks": {
+                    "type": "array",
+                    "items": {
+                        "type": "object",
+                        "properties": {
+                            "title": {"type": "string"},
+                            "description": {"type": "string"}
+                        }
+                    }
+                }
+            }
+        }
+        result = huggingface_structured_json_response(prompt, schema, temperature=0.2, max_tokens=8192)
+    """
+    try:
+        if not OPENAI_AVAILABLE:
+            raise ImportError("OpenAI library not available. Install with: pip install openai")
+        
+        # Get API key with proper error handling
+        api_key = get_huggingface_api_key()
+        logger.info(f"🔑 Hugging Face API key loaded: {bool(api_key)} (length: {len(api_key) if api_key else 0})")
+        
+        if not api_key:
+            raise Exception("HF_TOKEN not found in environment variables")
+            
+        # Initialize Hugging Face client using Responses API
+        client = OpenAI(
+            base_url="https://router.huggingface.co/v1",
+            api_key=api_key,
+        )
+        logger.info("✅ Hugging Face client initialized for structured JSON response")
+
+        # Prepare input for the API
+        input_content = []
+        
+        # Add system prompt if provided
+        if system_prompt:
+            input_content.append({
+                "role": "system",
+                "content": system_prompt
+            })
+        
+        # Add user prompt with JSON instruction
+        json_instruction = "Please respond with valid JSON that matches the provided schema."
+        input_content.append({
+            "role": "user", 
+            "content": f"{prompt}\n\n{json_instruction}"
+        })
+
+        # Add debugging for API call
+        logger.info(
+            "Hugging Face structured call | model=%s | prompt_len=%s | schema_kind=%s | temp=%s | max_tokens=%s",
+            model,
+            len(prompt) if isinstance(prompt, str) else '<non-str>',
+            type(schema).__name__,
+            temperature,
+            max_tokens,
+        )
+        
+        logger.info("🚀 Making Hugging Face structured API call...")
+        
+        # Make the API call using Responses API with structured output
+        # Use simple text generation and parse JSON manually to avoid API format issues
+        logger.info("🚀 Making Hugging Face API call (text mode with JSON parsing)...")
+        
+        # Add JSON instruction to the prompt
+        json_instruction = "\n\nPlease respond with valid JSON that matches this exact structure:\n" + json.dumps(schema, indent=2)
+        input_content[-1]["content"] = input_content[-1]["content"] + json_instruction
+        
+        # Add rate limiting to prevent expensive API calls
+        import time
+        time.sleep(1)  # 1 second delay between API calls
+        
+        response = client.responses.parse(
+            model=model,
+            input=input_content,
+            temperature=temperature
+        )
+        
+        # Extract structured data from response
+        if hasattr(response, 'output_parsed') and response.output_parsed:
+            # The new API returns parsed data directly (Pydantic model case)
+            logger.info("✅ Hugging Face structured JSON response parsed successfully")
+            # Convert Pydantic model to dict if needed
+            if hasattr(response.output_parsed, 'model_dump'):
+                return response.output_parsed.model_dump()
+            elif hasattr(response.output_parsed, 'dict'):
+                return response.output_parsed.dict()
+            else:
+                return response.output_parsed
+        elif hasattr(response, 'output_text') and response.output_text:
+            # Fallback to text parsing if output_parsed is not available
+            response_text = response.output_text
+            # Clean up the response text
+            response_text = re.sub(r'```json\n?', '', response_text)
+            response_text = re.sub(r'```\n?', '', response_text)
+            response_text = response_text.strip()
+            
+            try:
+                parsed_json = json.loads(response_text)
+                logger.info("✅ Hugging Face structured JSON response parsed from text")
+                return parsed_json
+            except json.JSONDecodeError as json_err:
+                logger.error(f"❌ JSON parsing failed: {json_err}")
+                logger.error(f"Raw response: {response_text}")
+                
+                # Try to extract JSON from the response using regex
+                json_match = re.search(r'\{.*\}', response_text, re.DOTALL)
+                if json_match:
+                    try:
+                        extracted_json = json.loads(json_match.group())
+                        logger.info("✅ JSON extracted using regex fallback")
+                        return extracted_json
+                    except json.JSONDecodeError:
+                        pass
+                
+                # If all else fails, return a structured error response
+                logger.error("❌ All JSON parsing attempts failed")
+                return {
+                    "error": "Failed to parse JSON response",
+                    "raw_response": response_text,
+                    "schema_expected": schema
+                }
+        else:
+            logger.error("❌ No valid response data found")
+            return {
+                "error": "No valid response data found",
+                "raw_response": str(response),
+                "schema_expected": schema
+            }
+        
+    except Exception as e:
+        error_msg = str(e) if str(e) else repr(e)
+        error_type = type(e).__name__
+        logger.error(f"❌ Hugging Face structured JSON generation failed: {error_type}: {error_msg}")
+        logger.error(f"❌ Full exception details: {repr(e)}")
+        import traceback
+        logger.error(f"❌ Traceback: {traceback.format_exc()}")
+        raise Exception(f"Hugging Face structured JSON generation failed: {error_type}: {error_msg}")
+
+def get_available_models() -> list:
+    """
+    Get list of available Hugging Face models for text generation.
+    
+    Returns:
+        list: List of available model identifiers
+    """
+    return [
+        "openai/gpt-oss-120b:groq",
+        "moonshotai/Kimi-K2-Instruct-0905:groq",
+        "Qwen/Qwen2.5-VL-7B-Instruct",
+        "meta-llama/Llama-3.1-8B-Instruct:groq",
+        "microsoft/Phi-3-medium-4k-instruct:groq",
+        "mistralai/Mistral-7B-Instruct-v0.3:groq"
+    ]
+
+def validate_model(model: str) -> bool:
+    """
+    Validate if a model identifier is supported.
+    
+    Args:
+        model (str): Model identifier to validate
+        
+    Returns:
+        bool: True if model is supported, False otherwise
+    """
+    available_models = get_available_models()
+    return model in available_models
diff --git a/backend/services/llm_providers/main_text_generation.py b/backend/services/llm_providers/main_text_generation.py
index cc573630..0fe47867 100644
--- a/backend/services/llm_providers/main_text_generation.py
+++ b/backend/services/llm_providers/main_text_generation.py
@@ -10,10 +10,9 @@ from typing import Optional, Dict, Any
 from loguru import logger
 from ..onboarding.api_key_manager import APIKeyManager
 
-from .openai_provider import openai_chatgpt
 from .gemini_provider import gemini_text_response, gemini_structured_json_response
-from .anthropic_provider import anthropic_text_response
-from .deepseek_provider import deepseek_text_response
+from .huggingface_provider import huggingface_text_response, huggingface_structured_json_response
+
 
 def llm_text_gen(prompt: str, system_prompt: Optional[str] = None, json_struct: Optional[Dict[str, Any]] = None) -> str:
     """
@@ -31,13 +30,6 @@ def llm_text_gen(prompt: str, system_prompt: Optional[str] = None, json_struct:
         logger.info("[llm_text_gen] Starting text generation")
         logger.debug(f"[llm_text_gen] Prompt length: {len(prompt)} characters")
         
-        # Initialize API key manager and reload keys from .env file
-        api_key_manager = APIKeyManager()
-        api_key_manager.load_api_keys()  # Force reload from .env file
-        
-        # Debug: Log loaded API keys
-        logger.debug(f"[llm_text_gen] Loaded API keys: {api_key_manager.get_all_keys()}")
-        
         # Set default values for LLM parameters
         gpt_provider = "google"  # Default to Google Gemini
         model = "gemini-2.0-flash-001"
@@ -49,6 +41,15 @@ def llm_text_gen(prompt: str, system_prompt: Optional[str] = None, json_struct:
         frequency_penalty = 0.0
         presence_penalty = 0.0
         
+        # Check for GPT_PROVIDER environment variable
+        env_provider = os.getenv('GPT_PROVIDER', '').lower()
+        if env_provider in ['gemini', 'google']:
+            gpt_provider = "google"
+            model = "gemini-2.0-flash-001"
+        elif env_provider in ['hf_response_api', 'huggingface', 'hf']:
+            gpt_provider = "huggingface"
+            model = "openai/gpt-oss-120b:groq"
+        
         # Default blog characteristics
         blog_tone = "Professional"
         blog_demographic = "Professional"
@@ -57,41 +58,40 @@ def llm_text_gen(prompt: str, system_prompt: Optional[str] = None, json_struct:
         blog_output_format = "markdown"
         blog_length = 2000
         
-        # Try to get provider from environment or config
-        try:
-            # Check which providers have API keys available
-            available_providers = []
-            if api_key_manager.get_api_key("openai"):
-                available_providers.append("openai")
-            if api_key_manager.get_api_key("gemini"):
-                available_providers.append("google")
-            if api_key_manager.get_api_key("anthropic"):
-                available_providers.append("anthropic")
-            if api_key_manager.get_api_key("deepseek"):
-                available_providers.append("deepseek")
-            
-            # Prefer Google Gemini if available, otherwise use first available
+        # Check which providers have API keys available using APIKeyManager
+        api_key_manager = APIKeyManager()
+        available_providers = []
+        if api_key_manager.get_api_key("gemini"):
+            available_providers.append("google")
+        if api_key_manager.get_api_key("hf_token"):
+            available_providers.append("huggingface")
+        
+        # If no environment variable set, auto-detect based on available keys
+        if not env_provider:
+            # Prefer Google Gemini if available, otherwise use Hugging Face
             if "google" in available_providers:
                 gpt_provider = "google"
                 model = "gemini-2.0-flash-001"
-            elif available_providers:
-                gpt_provider = available_providers[0]
-                if gpt_provider == "openai":
-                    model = "gpt-4o"
-                elif gpt_provider == "anthropic":
-                    model = "claude-3-5-sonnet-20241022"
-                elif gpt_provider == "deepseek":
-                    model = "deepseek-chat"
+            elif "huggingface" in available_providers:
+                gpt_provider = "huggingface"
+                model = "openai/gpt-oss-120b:groq"
             else:
-                logger.error("[llm_text_gen] No API keys found. Structured mock responses are disabled.")
-                raise RuntimeError("No LLM API keys configured. Configure provider API keys to enable AI responses.")
-                
-            logger.debug(f"[llm_text_gen] Using provider: {gpt_provider}, model: {model}")
+                logger.error("[llm_text_gen] No API keys found for supported providers.")
+                raise RuntimeError("No LLM API keys configured. Configure GEMINI_API_KEY or HF_TOKEN to enable AI responses.")
+        else:
+            # Environment variable was set, validate it's supported
+            if gpt_provider not in available_providers:
+                logger.warning(f"[llm_text_gen] Provider {gpt_provider} not available, falling back to available providers")
+                if "google" in available_providers:
+                    gpt_provider = "google"
+                    model = "gemini-2.0-flash-001"
+                elif "huggingface" in available_providers:
+                    gpt_provider = "huggingface"
+                    model = "openai/gpt-oss-120b:groq"
+                else:
+                    raise RuntimeError("No supported providers available.")
             
-        except Exception as err:
-            logger.warning(f"[llm_text_gen] Error determining provider, using defaults: {err}")
-            gpt_provider = "google"
-            model = "gemini-2.0-flash-001"
+        logger.debug(f"[llm_text_gen] Using provider: {gpt_provider}, model: {model}")
 
         # Construct the system prompt if not provided
         if system_prompt is None:
@@ -118,18 +118,7 @@ def llm_text_gen(prompt: str, system_prompt: Optional[str] = None, json_struct:
 
         # Generate response based on provider
         try:
-            if gpt_provider == "openai":
-                return openai_chatgpt(
-                    prompt=prompt,
-                    model=model,
-                    temperature=temperature,
-                    max_tokens=max_tokens,
-                    top_p=top_p,
-                    n=n,
-                    fp=fp,
-                    system_prompt=system_instructions
-                )
-            elif gpt_provider == "google":
+            if gpt_provider == "google":
                 if json_struct:
                     return gemini_structured_json_response(
                         prompt=prompt,
@@ -149,66 +138,83 @@ def llm_text_gen(prompt: str, system_prompt: Optional[str] = None, json_struct:
                         max_tokens=max_tokens,
                         system_prompt=system_instructions
                     )
-            elif gpt_provider == "anthropic":
-                return anthropic_text_response(
-                    prompt=prompt,
-                    model=model,
-                    temperature=temperature,
-                    max_tokens=max_tokens,
-                    system_prompt=system_instructions
-                )
-            elif gpt_provider == "deepseek":
-                return deepseek_text_response(
-                    prompt=prompt,
-                    model=model,
-                    temperature=temperature,
-                    max_tokens=max_tokens,
-                    system_prompt=system_instructions
-                )
+            elif gpt_provider == "huggingface":
+                if json_struct:
+                    return huggingface_structured_json_response(
+                        prompt=prompt,
+                        schema=json_struct,
+                        model=model,
+                        temperature=temperature,
+                        max_tokens=max_tokens,
+                        system_prompt=system_instructions
+                    )
+                else:
+                    return huggingface_text_response(
+                        prompt=prompt,
+                        model=model,
+                        temperature=temperature,
+                        max_tokens=max_tokens,
+                        top_p=top_p,
+                        system_prompt=system_instructions
+                    )
             else:
                 logger.error(f"[llm_text_gen] Unknown provider: {gpt_provider}")
-                raise RuntimeError("Unknown LLM provider.")
+                raise RuntimeError("Unknown LLM provider. Supported providers: google, huggingface")
         except Exception as provider_error:
             logger.error(f"[llm_text_gen] Provider {gpt_provider} failed: {str(provider_error)}")
-            # Try to fallback to another provider
-            fallback_providers = ["openai", "anthropic", "deepseek"]
-            for fallback_provider in fallback_providers:
-                if fallback_provider in available_providers and fallback_provider != gpt_provider:
-                    try:
-                        logger.info(f"[llm_text_gen] Trying fallback provider: {fallback_provider}")
-                        if fallback_provider == "openai":
-                            return openai_chatgpt(
+            
+            # CIRCUIT BREAKER: Only try ONE fallback to prevent expensive API calls
+            fallback_providers = ["google", "huggingface"]
+            fallback_providers = [p for p in fallback_providers if p in available_providers and p != gpt_provider]
+            
+            if fallback_providers:
+                fallback_provider = fallback_providers[0]  # Only try the first available
+                try:
+                    logger.info(f"[llm_text_gen] Trying SINGLE fallback provider: {fallback_provider}")
+                    if fallback_provider == "google":
+                        if json_struct:
+                            return gemini_structured_json_response(
                                 prompt=prompt,
-                                model="gpt-4o",
+                                schema=json_struct,
+                                temperature=temperature,
+                                top_p=top_p,
+                                top_k=n,
+                                max_tokens=max_tokens,
+                                system_prompt=system_instructions
+                            )
+                        else:
+                            return gemini_text_response(
+                                prompt=prompt,
+                                temperature=temperature,
+                                top_p=top_p,
+                                n=n,
+                                max_tokens=max_tokens,
+                                system_prompt=system_instructions
+                            )
+                    elif fallback_provider == "huggingface":
+                        if json_struct:
+                            return huggingface_structured_json_response(
+                                prompt=prompt,
+                                schema=json_struct,
+                                model="openai/gpt-oss-120b:groq",
+                                temperature=temperature,
+                                max_tokens=max_tokens,
+                                system_prompt=system_instructions
+                            )
+                        else:
+                            return huggingface_text_response(
+                                prompt=prompt,
+                                model="openai/gpt-oss-120b:groq",
                                 temperature=temperature,
                                 max_tokens=max_tokens,
                                 top_p=top_p,
-                                n=n,
-                                fp=fp,
                                 system_prompt=system_instructions
                             )
-                        elif fallback_provider == "anthropic":
-                            return anthropic_text_response(
-                                prompt=prompt,
-                                model="claude-3-5-sonnet-20241022",
-                                temperature=temperature,
-                                max_tokens=max_tokens,
-                                system_prompt=system_instructions
-                            )
-                        elif fallback_provider == "deepseek":
-                            return deepseek_text_response(
-                                prompt=prompt,
-                                model="deepseek-chat",
-                                temperature=temperature,
-                                max_tokens=max_tokens,
-                                system_prompt=system_instructions
-                            )
-                    except Exception as fallback_error:
-                        logger.error(f"[llm_text_gen] Fallback provider {fallback_provider} also failed: {str(fallback_error)}")
-                        continue
+                except Exception as fallback_error:
+                    logger.error(f"[llm_text_gen] Fallback provider {fallback_provider} also failed: {str(fallback_error)}")
             
-            # If all providers fail, raise an error (no mock)
-            logger.error("[llm_text_gen] All providers failed. Structured mock responses are disabled.")
+            # CIRCUIT BREAKER: Stop immediately to prevent expensive API calls
+            logger.error("[llm_text_gen] CIRCUIT BREAKER: Stopping to prevent expensive API calls.")
             raise RuntimeError("All LLM providers failed to generate a response.")
 
     except Exception as e:
@@ -217,7 +223,7 @@ def llm_text_gen(prompt: str, system_prompt: Optional[str] = None, json_struct:
 
 def check_gpt_provider(gpt_provider: str) -> bool:
     """Check if the specified GPT provider is supported."""
-    supported_providers = ["openai", "google", "anthropic", "deepseek"]
+    supported_providers = ["google", "huggingface"]
     return gpt_provider in supported_providers
 
 def get_api_key(gpt_provider: str) -> Optional[str]:
@@ -225,10 +231,8 @@ def get_api_key(gpt_provider: str) -> Optional[str]:
     try:
         api_key_manager = APIKeyManager()
         provider_mapping = {
-            "openai": "openai",
             "google": "gemini",
-            "anthropic": "anthropic",
-            "deepseek": "deepseek"
+            "huggingface": "hf_token"
         }
         
         mapped_provider = provider_mapping.get(gpt_provider, gpt_provider)
diff --git a/backend/services/llm_providers/openai_provider.py b/backend/services/llm_providers/openai_provider.py
deleted file mode 100644
index 975fa4c1..00000000
--- a/backend/services/llm_providers/openai_provider.py
+++ /dev/null
@@ -1,133 +0,0 @@
-"""OpenAI Provider Service for ALwrity Backend.
-
-This service handles OpenAI API integrations,
-migrated from the legacy lib/gpt_providers/text_generation/openai_text_gen.py
-"""
-
-import os
-import time
-import openai
-import asyncio
-from typing import Tuple
-from loguru import logger
-from tenacity import (
-    retry,
-    stop_after_attempt,
-    wait_random_exponential,
-)
-
-# Import APIKeyManager
-from ..onboarding.api_key_manager import APIKeyManager
-
-async def test_openai_api_key(api_key: str) -> Tuple[bool, str]:
-    """
-    Test if the provided OpenAI API key is valid.
-    
-    Args:
-        api_key (str): The OpenAI API key to test
-        
-    Returns:
-        tuple[bool, str]: A tuple containing (is_valid, message)
-    """
-    try:
-        # Create OpenAI client with the provided key
-        client = openai.OpenAI(api_key=api_key)
-        
-        # Try to list models as a simple API test
-        models = client.models.list()
-        
-        # If we get here, the key is valid
-        return True, "OpenAI API key is valid"
-        
-    except openai.AuthenticationError:
-        return False, "Invalid OpenAI API key"
-    except openai.RateLimitError:
-        return False, "Rate limit exceeded. Please try again later."
-    except Exception as e:
-        return False, f"Error testing OpenAI API key: {str(e)}"
-
-@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
-def openai_chatgpt(prompt: str, model: str = "gpt-4o", temperature: float = 0.7, 
-                   max_tokens: int = 4000, top_p: float = 0.9, n: int = 1, 
-                   fp: int = 16, system_prompt: str = None) -> str:
-    """
-    Wrapper function for OpenAI's ChatGPT completion.
-
-    Args:
-        prompt (str): The input text to generate completion for.
-        model (str, optional): Model to be used for the completion. Defaults to "gpt-4o".
-        temperature (float, optional): Controls randomness. Lower values make responses more deterministic. Defaults to 0.7.
-        max_tokens (int, optional): Maximum number of tokens to generate. Defaults to 4000.
-        top_p (float, optional): Controls diversity. Defaults to 0.9.
-        n (int, optional): Number of completions to generate. Defaults to 1.
-        fp (int, optional): Frequency penalty. Defaults to 16.
-        system_prompt (str, optional): System prompt for the conversation. Defaults to None.
-
-    Returns:
-        str: The generated text completion.
-
-    Raises:
-        SystemExit: If an API error, connection error, or rate limit error occurs.
-    """
-    # Wait for 5 seconds to comply with rate limits
-    for _ in range(5):
-        time.sleep(1)
-
-    try:
-        # Create variables to collect the stream of chunks
-        collected_chunks = []
-        collected_messages = []
-        full_reply_content = None
-        
-        # Use APIKeyManager instead of direct environment variable access
-        api_key_manager = APIKeyManager()
-        api_key = api_key_manager.get_api_key("openai")
-        
-        if not api_key:
-            raise ValueError("OpenAI API key not found. Please configure it in the onboarding process.")
-        
-        client = openai.OpenAI(api_key=api_key)
-        
-        # Prepare messages
-        messages = []
-        if system_prompt:
-            messages.append({"role": "system", "content": system_prompt})
-        messages.append({"role": "user", "content": prompt})
-        
-        response = client.chat.completions.create(
-            model=model,
-            messages=messages,
-            max_tokens=max_tokens,
-            n=n,
-            top_p=top_p,
-            stream=True,
-            frequency_penalty=fp,
-            temperature=temperature
-        )
-        
-        # Iterate through the stream of events
-        for chunk in response:
-            collected_chunks.append(chunk)  # save the event response
-            chunk_message = chunk.choices[0].delta.content  # extract the message
-            collected_messages.append(chunk_message)  # save the message
-            print(chunk.choices[0].delta.content, end="", flush=True)
-        
-        # Clean None in collected_messages
-        collected_messages = [m for m in collected_messages if m is not None]
-        full_reply_content = ''.join([m for m in collected_messages])
-        
-        logger.info(f"[openai_chatgpt] Generated response with {len(full_reply_content)} characters")
-        return full_reply_content
-
-    except openai.APIError as e:
-        logger.error(f"OpenAI API Error: {e}")
-        raise SystemExit from e
-    except openai.RateLimitError as e:
-        logger.error(f"OpenAI Rate Limit Error: {e}")
-        raise SystemExit from e
-    except openai.APIConnectionError as e:
-        logger.error(f"OpenAI API Connection Error: {e}")
-        raise SystemExit from e
-    except Exception as e:
-        logger.error(f"Unexpected error in OpenAI API call: {e}")
-        raise SystemExit from e 
\ No newline at end of file
diff --git a/backend/services/onboarding/api_key_manager.py b/backend/services/onboarding/api_key_manager.py
index 2c7837bf..72e37f80 100644
--- a/backend/services/onboarding/api_key_manager.py
+++ b/backend/services/onboarding/api_key_manager.py
@@ -388,10 +388,8 @@ class APIKeyManager:
     def _load_from_env(self):
         """Load API keys from environment variables."""
         providers = [
-            'OPENAI_API_KEY',
-            'ANTHROPIC_API_KEY', 
             'GEMINI_API_KEY',
-            'MISTRAL_API_KEY',
+            'HF_TOKEN',
             'TAVILY_API_KEY',
             'SERPER_API_KEY',
             'METAPHOR_API_KEY',
diff --git a/frontend/src/hooks/usePolling.ts b/frontend/src/hooks/usePolling.ts
index 15bf6126..524713f8 100644
--- a/frontend/src/hooks/usePolling.ts
+++ b/frontend/src/hooks/usePolling.ts
@@ -24,7 +24,7 @@ export function usePolling(
   options: UsePollingOptions = {}
 ): UsePollingReturn {
   const {
-    interval = 2000, // 2 seconds default
+    interval = 5000, // 5 seconds default - increased to reduce load
     onProgress,
     onComplete,
     onError
@@ -99,13 +99,17 @@ export function usePolling(
         }
 
         if (status.status === 'completed') {
+          console.log('✅ Task completed - stopping polling immediately');
           setResult(status.result);
           onComplete?.(status.result);
           stopPolling();
+          return; // Exit early to prevent further processing
         } else if (status.status === 'failed') {
+          console.log('❌ Task failed - stopping polling immediately');
           setError(status.error || 'Task failed');
           onError?.(status.error || 'Task failed');
           stopPolling();
+          return; // Exit early to prevent further processing
         }
 
         attemptsRef.current++;
@@ -113,12 +117,16 @@ export function usePolling(
         const errorMessage = err instanceof Error ? err.message : 'Unknown error occurred';
         console.error('Polling error:', errorMessage);
         
-        // Only stop polling for actual task failures (404, task not found)
-        // For network errors, timeouts, etc., continue polling
+        // Stop polling for task failures and rate limiting
         if (errorMessage.includes('404') || errorMessage.includes('Task not found')) {
           setError('Task not found - it may have expired or been cleaned up');
           onError?.('Task not found - it may have expired or been cleaned up');
           stopPolling();
+        } else if (errorMessage.includes('429') || errorMessage.includes('Too Many Requests')) {
+          console.warn('Rate limited - stopping polling to prevent further issues');
+          setError('Rate limited - please try again later');
+          onError?.('Rate limited - please try again later');
+          stopPolling();
         }
         // For other errors (timeouts, network issues), continue polling
         // The backend will eventually complete or fail, and we'll catch it