Hugging Face Integration. Remove OpenAI and Anthropic and DeepSeek. Add Hugging Face.

This commit is contained in:
ajaysi
2025-10-29 20:15:04 +05:30
parent 4431cd9848
commit 3219e6bbe4
15 changed files with 883 additions and 482 deletions

View File

@@ -14,7 +14,7 @@ from loguru import logger
class RateLimiter: class RateLimiter:
"""Manages rate limiting for ALwrity backend.""" """Manages rate limiting for ALwrity backend."""
def __init__(self, window_seconds: int = 60, max_requests: int = 200): def __init__(self, window_seconds: int = 60, max_requests: int = 1000): # Increased for development
self.window_seconds = window_seconds self.window_seconds = window_seconds
self.max_requests = max_requests self.max_requests = max_requests
self.request_counts: Dict[str, List[float]] = defaultdict(list) self.request_counts: Dict[str, List[float]] = defaultdict(list)
@@ -28,6 +28,12 @@ class RateLimiter:
"/ai-analytics", "/ai-analytics",
"/gap-analysis", "/gap-analysis",
"/calendar-events", "/calendar-events",
# Research endpoints - exempt from rate limiting
"/api/research",
"/api/blog-writer",
"/api/blog-writer/research",
"/api/blog-writer/research/",
"/api/blog/research/status",
"/calendar-generation/progress", "/calendar-generation/progress",
"/health", "/health",
"/health/database", "/health/database",

View File

@@ -39,7 +39,7 @@ class CompetitorAnalyzer:
}} }}
""" """
from services.llm_providers.gemini_provider import gemini_structured_json_response from services.llm_providers.main_text_generation import llm_text_gen
competitor_schema = { competitor_schema = {
"type": "object", "type": "object",
@@ -55,11 +55,9 @@ class CompetitorAnalyzer:
"required": ["top_competitors", "content_gaps", "opportunities", "competitive_advantages", "market_positioning", "industry_leaders", "analysis_notes"] "required": ["top_competitors", "content_gaps", "opportunities", "competitive_advantages", "market_positioning", "industry_leaders", "analysis_notes"]
} }
competitor_analysis = gemini_structured_json_response( competitor_analysis = llm_text_gen(
prompt=competitor_prompt, prompt=competitor_prompt,
schema=competitor_schema, json_struct=competitor_schema
temperature=0.3,
max_tokens=4000
) )
if isinstance(competitor_analysis, dict) and 'error' not in competitor_analysis: if isinstance(competitor_analysis, dict) and 'error' not in competitor_analysis:

View File

@@ -48,7 +48,7 @@ class ContentAngleGenerator:
}} }}
""" """
from services.llm_providers.gemini_provider import gemini_structured_json_response from services.llm_providers.main_text_generation import llm_text_gen
angles_schema = { angles_schema = {
"type": "object", "type": "object",
@@ -63,11 +63,9 @@ class ContentAngleGenerator:
"required": ["content_angles"] "required": ["content_angles"]
} }
angles_result = gemini_structured_json_response( angles_result = llm_text_gen(
prompt=angles_prompt, prompt=angles_prompt,
schema=angles_schema, json_struct=angles_schema
temperature=0.7,
max_tokens=4000
) )
if isinstance(angles_result, dict) and 'content_angles' in angles_result: if isinstance(angles_result, dict) and 'content_angles' in angles_result:

View File

@@ -44,7 +44,7 @@ class KeywordAnalyzer:
}} }}
""" """
from services.llm_providers.gemini_provider import gemini_structured_json_response from services.llm_providers.main_text_generation import llm_text_gen
keyword_schema = { keyword_schema = {
"type": "object", "type": "object",
@@ -62,11 +62,9 @@ class KeywordAnalyzer:
"required": ["primary", "secondary", "long_tail", "search_intent", "difficulty", "content_gaps", "semantic_keywords", "trending_terms", "analysis_insights"] "required": ["primary", "secondary", "long_tail", "search_intent", "difficulty", "content_gaps", "semantic_keywords", "trending_terms", "analysis_insights"]
} }
keyword_analysis = gemini_structured_json_response( keyword_analysis = llm_text_gen(
prompt=keyword_prompt, prompt=keyword_prompt,
schema=keyword_schema, json_struct=keyword_schema
temperature=0.3,
max_tokens=4000
) )
if isinstance(keyword_analysis, dict) and 'error' not in keyword_analysis: if isinstance(keyword_analysis, dict) and 'error' not in keyword_analysis:

View File

@@ -1,6 +1,43 @@
# Gemini Provider Module # LLM Providers Module
This module provides functions for interacting with Google's Gemini API, specifically designed for structured JSON output and text generation. It follows the official Gemini API documentation and implements best practices for reliable AI interactions. This module provides functions for interacting with multiple LLM providers, specifically Google's Gemini API and Hugging Face Inference Providers. It follows official API documentation and implements best practices for reliable AI interactions.
## Supported Providers
- **Google Gemini**: High-quality text generation with structured JSON output
- **Hugging Face**: Multiple models via Inference Providers with unified interface
## Quick Start
```python
from services.llm_providers.main_text_generation import llm_text_gen
# Generate text (auto-detects available provider)
response = llm_text_gen("Write a blog post about AI trends")
print(response)
```
## Configuration
Set your preferred provider using the `GPT_PROVIDER` environment variable:
```bash
# Use Google Gemini (default)
export GPT_PROVIDER=gemini
# Use Hugging Face
export GPT_PROVIDER=hf_response_api
```
Configure API keys:
```bash
# For Google Gemini
export GEMINI_API_KEY=your_gemini_api_key_here
# For Hugging Face
export HF_TOKEN=your_huggingface_token_here
```
## Key Features ## Key Features

View File

@@ -0,0 +1,237 @@
# Hugging Face Integration for AI Blog Writer
## Overview
The AI Blog Writer now supports both Google Gemini and Hugging Face as LLM providers, with a clean environment variable-based configuration system. This integration uses the [Hugging Face Responses API](https://huggingface.co/docs/inference-providers/guides/responses-api) which provides a unified interface for model interactions.
## Supported Providers
### 1. Google Gemini (Default)
- **Provider ID**: `google`
- **Environment Variable**: `GEMINI_API_KEY`
- **Models**: `gemini-2.0-flash-001`
- **Features**: Text generation, structured JSON output
### 2. Hugging Face
- **Provider ID**: `huggingface`
- **Environment Variable**: `HF_TOKEN`
- **Models**: Multiple models via Inference Providers
- **Features**: Text generation, structured JSON output, multi-model support
## Configuration
### Environment Variables
Set the `GPT_PROVIDER` environment variable to choose your preferred provider:
```bash
# Use Google Gemini (default)
export GPT_PROVIDER=gemini
# or
export GPT_PROVIDER=google
# Use Hugging Face
export GPT_PROVIDER=hf_response_api
# or
export GPT_PROVIDER=huggingface
# or
export GPT_PROVIDER=hf
```
### API Keys
Configure the appropriate API key for your chosen provider:
```bash
# For Google Gemini
export GEMINI_API_KEY=your_gemini_api_key_here
# For Hugging Face
export HF_TOKEN=your_huggingface_token_here
```
## Usage
### Basic Text Generation
```python
from services.llm_providers.main_text_generation import llm_text_gen
# Generate text (uses configured provider)
response = llm_text_gen("Write a blog post about AI trends")
print(response)
```
### Structured JSON Generation
```python
from services.llm_providers.main_text_generation import llm_text_gen
# Define JSON schema
schema = {
"type": "object",
"properties": {
"title": {"type": "string"},
"sections": {
"type": "array",
"items": {
"type": "object",
"properties": {
"heading": {"type": "string"},
"content": {"type": "string"}
}
}
}
}
}
# Generate structured response
response = llm_text_gen(
"Create a blog outline about machine learning",
json_struct=schema
)
print(response)
```
### Direct Provider Usage
```python
# Google Gemini
from services.llm_providers.gemini_provider import gemini_text_response
response = gemini_text_response(
prompt="Write about AI",
temperature=0.7,
max_tokens=1000
)
# Hugging Face
from services.llm_providers.huggingface_provider import huggingface_text_response
response = huggingface_text_response(
prompt="Write about AI",
model="openai/gpt-oss-120b:groq",
temperature=0.7,
max_tokens=1000
)
```
## Available Hugging Face Models
The Hugging Face provider supports multiple models via Inference Providers:
- `openai/gpt-oss-120b:groq` (default)
- `moonshotai/Kimi-K2-Instruct-0905:groq`
- `Qwen/Qwen2.5-VL-7B-Instruct`
- `meta-llama/Llama-3.1-8B-Instruct:groq`
- `microsoft/Phi-3-medium-4k-instruct:groq`
- `mistralai/Mistral-7B-Instruct-v0.3:groq`
## Provider Selection Logic
1. **Environment Variable**: If `GPT_PROVIDER` is set, use the specified provider
2. **Auto-detection**: If no environment variable, check available API keys:
- Prefer Google Gemini if `GEMINI_API_KEY` is available
- Fall back to Hugging Face if `HF_TOKEN` is available
3. **Fallback**: If the specified provider fails, automatically try the other provider
## Error Handling
The system includes comprehensive error handling:
- **Missing API Keys**: Clear error messages with setup instructions
- **Provider Failures**: Automatic fallback to the other provider
- **Invalid Models**: Validation with helpful error messages
- **Network Issues**: Retry logic with exponential backoff
## Migration from Previous Version
### Removed Providers
The following providers have been removed to simplify the system:
- OpenAI
- Anthropic
- DeepSeek
### Updated Imports
```python
# Old imports (no longer work)
from services.llm_providers.openai_provider import openai_chatgpt
from services.llm_providers.anthropic_provider import anthropic_text_response
from services.llm_providers.deepseek_provider import deepseek_text_response
# New imports
from services.llm_providers.gemini_provider import gemini_text_response, gemini_structured_json_response
from services.llm_providers.huggingface_provider import huggingface_text_response, huggingface_structured_json_response
```
## Testing
Run the integration tests to verify everything works:
```bash
cd backend
python -c "
import sys
sys.path.insert(0, '.')
from services.llm_providers.main_text_generation import check_gpt_provider
print('Google provider supported:', check_gpt_provider('google'))
print('Hugging Face provider supported:', check_gpt_provider('huggingface'))
print('OpenAI provider supported:', check_gpt_provider('openai'))
"
```
## Performance Considerations
### Google Gemini
- Fast response times
- High-quality outputs
- Good for structured content
### Hugging Face
- Multiple model options
- Cost-effective for high-volume usage
- Good for experimentation with different models
## Troubleshooting
### Common Issues
1. **"No LLM API keys configured"**
- Ensure either `GEMINI_API_KEY` or `HF_TOKEN` is set
- Check that the API key is valid
2. **"Unknown LLM provider"**
- Use only `google` or `huggingface` as provider values
- Check the `GPT_PROVIDER` environment variable
3. **"HF_TOKEN appears to be invalid"**
- Ensure your Hugging Face token starts with `hf_`
- Get a new token from [Hugging Face Settings](https://huggingface.co/settings/tokens)
4. **"OpenAI library not available"**
- Install the OpenAI library: `pip install openai`
- This is required for Hugging Face Responses API
### Debug Mode
Enable debug logging to see provider selection:
```python
import logging
logging.basicConfig(level=logging.DEBUG)
```
## Future Enhancements
- Support for additional Hugging Face models
- Model-specific parameter optimization
- Advanced caching strategies
- Performance monitoring and metrics
- A/B testing between providers
## Support
For issues or questions:
1. Check the troubleshooting section above
2. Review the [Hugging Face Responses API documentation](https://huggingface.co/docs/inference-providers/guides/responses-api)
3. Check the Google Gemini API documentation for Gemini-specific issues

View File

@@ -5,17 +5,14 @@ migrated from the legacy lib/gpt_providers functionality.
""" """
from services.llm_providers.main_text_generation import llm_text_gen from services.llm_providers.main_text_generation import llm_text_gen
from services.llm_providers.openai_provider import openai_chatgpt, test_openai_api_key
from services.llm_providers.gemini_provider import gemini_text_response, gemini_structured_json_response from services.llm_providers.gemini_provider import gemini_text_response, gemini_structured_json_response
from services.llm_providers.anthropic_provider import anthropic_text_response from services.llm_providers.huggingface_provider import huggingface_text_response, huggingface_structured_json_response
from services.llm_providers.deepseek_provider import deepseek_text_response
__all__ = [ __all__ = [
"llm_text_gen", "llm_text_gen",
"openai_chatgpt",
"test_openai_api_key",
"gemini_text_response", "gemini_text_response",
"gemini_structured_json_response", "gemini_structured_json_response",
"anthropic_text_response", "huggingface_text_response",
"deepseek_text_response" "huggingface_structured_json_response"
] ]

View File

@@ -1,98 +0,0 @@
"""Anthropic Provider Service for ALwrity Backend.
This service handles Anthropic API integrations,
migrated from the legacy lib/gpt_providers/text_generation/anthropic_text_gen.py
"""
import os
import json
import time
from typing import Dict, Any, Tuple
from loguru import logger
from tenacity import (
retry,
stop_after_attempt,
wait_random_exponential,
)
# Import APIKeyManager
from ..onboarding.api_key_manager import APIKeyManager
try:
import anthropic
except ImportError:
anthropic = None
logger.warning("Anthropic library not available. Install with: pip install anthropic")
async def test_anthropic_api_key(api_key: str) -> Tuple[bool, str]:
"""
Test if the provided Anthropic API key is valid.
Args:
api_key (str): The Anthropic API key to test
Returns:
tuple[bool, str]: A tuple containing (is_valid, message)
"""
if not anthropic:
return False, "Anthropic library not available"
try:
# Create Anthropic client with the provided key
client = anthropic.Anthropic(api_key=api_key)
# Try to generate a simple response as a test
response = client.messages.create(
model="claude-3-5-sonnet-20241022",
max_tokens=10,
messages=[{"role": "user", "content": "Hello"}]
)
# If we get here, the key is valid
return True, "Anthropic API key is valid"
except anthropic.AuthenticationError:
return False, "Invalid Anthropic API key"
except anthropic.RateLimitError:
return False, "Rate limit exceeded. Please try again later."
except Exception as e:
return False, f"Error testing Anthropic API key: {str(e)}"
@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
def anthropic_text_response(prompt: str, model: str = "claude-3-5-sonnet-20241022",
temperature: float = 0.7, max_tokens: int = 4000,
system_prompt: str = None) -> str:
"""Get response from Anthropic Claude."""
if not anthropic:
logger.error("Anthropic library not available")
return "Anthropic library not available. Please install anthropic package."
try:
# Use APIKeyManager instead of direct environment variable access
api_key_manager = APIKeyManager()
api_key = api_key_manager.get_api_key("anthropic")
if not api_key:
raise ValueError("Anthropic API key not found. Please configure it in the onboarding process.")
client = anthropic.Anthropic(api_key=api_key)
# Prepare messages
messages = []
if system_prompt:
messages.append({"role": "system", "content": system_prompt})
messages.append({"role": "user", "content": prompt})
response = client.messages.create(
model=model,
max_tokens=max_tokens,
temperature=temperature,
messages=messages
)
logger.info(f"[anthropic_text_response] Generated response with {len(response.content[0].text)} characters")
return response.content[0].text
except Exception as err:
logger.error(f"Failed to get response from Anthropic: {err}. Retrying.")
raise

View File

@@ -1,105 +0,0 @@
"""DeepSeek Provider Service for ALwrity Backend.
This service handles DeepSeek API integrations,
migrated from the legacy lib/gpt_providers/text_generation/deepseek_text_gen.py
"""
import os
import json
import time
from typing import Dict, Any, Tuple
from loguru import logger
from tenacity import (
retry,
stop_after_attempt,
wait_random_exponential,
)
# Import APIKeyManager
from ..onboarding.api_key_manager import APIKeyManager
try:
import openai
except ImportError:
openai = None
logger.warning("OpenAI library not available. Install with: pip install openai")
async def test_deepseek_api_key(api_key: str) -> Tuple[bool, str]:
"""
Test if the provided DeepSeek API key is valid.
Args:
api_key (str): The DeepSeek API key to test
Returns:
tuple[bool, str]: A tuple containing (is_valid, message)
"""
if not openai:
return False, "OpenAI library not available"
try:
# Create DeepSeek client with the provided key
client = openai.OpenAI(
api_key=api_key,
base_url="https://api.deepseek.com/v1"
)
# Try to generate a simple response as a test
response = client.chat.completions.create(
model="deepseek-chat",
messages=[{"role": "user", "content": "Hello"}],
max_tokens=10,
temperature=0.1
)
# If we get here, the key is valid
return True, "DeepSeek API key is valid"
except openai.AuthenticationError:
return False, "Invalid DeepSeek API key"
except openai.RateLimitError:
return False, "Rate limit exceeded. Please try again later."
except Exception as e:
return False, f"Error testing DeepSeek API key: {str(e)}"
@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
def deepseek_text_response(prompt: str, model: str = "deepseek-chat",
temperature: float = 0.7, max_tokens: int = 4000,
system_prompt: str = None) -> str:
"""Get response from DeepSeek."""
if not openai:
logger.error("OpenAI library not available")
return "OpenAI library not available. Please install openai package."
try:
# Use APIKeyManager instead of direct environment variable access
api_key_manager = APIKeyManager()
api_key = api_key_manager.get_api_key("deepseek")
if not api_key:
raise ValueError("DeepSeek API key not found. Please configure it in the onboarding process.")
client = openai.OpenAI(
api_key=api_key,
base_url="https://api.deepseek.com/v1"
)
# Prepare messages
messages = []
if system_prompt:
messages.append({"role": "system", "content": system_prompt})
messages.append({"role": "user", "content": prompt})
response = client.chat.completions.create(
model=model,
messages=messages,
max_tokens=max_tokens,
temperature=temperature
)
logger.info(f"[deepseek_text_response] Generated response with {len(response.choices[0].message.content)} characters")
return response.choices[0].message.content
except Exception as err:
logger.error(f"Failed to get response from DeepSeek: {err}. Retrying.")
raise

View File

@@ -402,18 +402,33 @@ def gemini_structured_json_response(prompt, schema, temperature=0.7, top_p=0.9,
try: try:
# Convert sync call to async for retry logic # Convert sync call to async for retry logic
import asyncio import asyncio
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
response = loop.run_until_complete( # Check if there's already an event loop running
retry_with_backoff( try:
make_api_call, loop = asyncio.get_running_loop()
config=CONTENT_RETRY_CONFIG, # If we're already in an async context, we need to run this differently
operation_name="gemini_structured_json", logger.warning("⚠️ Already in async context, using direct sync call")
context={"schema_type": type(types_schema).__name__, "max_tokens": max_tokens} # For now, let's use a simpler approach without retry logic
response = client.models.generate_content(
model="gemini-2.5-flash",
contents=prompt,
config=generation_config,
) )
) logger.info("✅ Gemini API call completed successfully (sync mode)")
logger.info("✅ Gemini API call completed successfully") except RuntimeError:
# No event loop running, we can create one
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
response = loop.run_until_complete(
retry_with_backoff(
make_api_call,
config=CONTENT_RETRY_CONFIG,
operation_name="gemini_structured_json",
context={"schema_type": type(types_schema).__name__, "max_tokens": max_tokens}
)
)
logger.info("✅ Gemini API call completed successfully")
except Exception as api_error: except Exception as api_error:
logger.error(f"❌ Gemini API call failed: {api_error}") logger.error(f"❌ Gemini API call failed: {api_error}")
logger.error(f"❌ API Error type: {type(api_error).__name__}") logger.error(f"❌ API Error type: {type(api_error).__name__}")

View File

@@ -0,0 +1,441 @@
"""
Hugging Face Provider Module for ALwrity
This module provides functions for interacting with Hugging Face's Inference Providers API
using the Responses API (beta) which provides a unified interface for model interactions.
Key Features:
- Text response generation with retry logic
- Structured JSON response generation with schema validation
- Comprehensive error handling and logging
- Automatic API key management
- Support for various Hugging Face models via Inference Providers
Best Practices:
1. Use structured output for complex, multi-field responses
2. Keep schemas simple and flat to avoid truncation
3. Set appropriate token limits (8192 for complex outputs)
4. Use low temperature (0.1-0.3) for consistent structured output
5. Implement proper error handling in calling functions
6. Use the Responses API for better compatibility
Usage Examples:
# Text response
result = huggingface_text_response(prompt, temperature=0.7, max_tokens=2048)
# Structured JSON response
schema = {
"type": "object",
"properties": {
"tasks": {
"type": "array",
"items": {"type": "object", "properties": {...}}
}
}
}
result = huggingface_structured_json_response(prompt, schema, temperature=0.2, max_tokens=8192)
Dependencies:
- openai (for Hugging Face Responses API)
- tenacity (for retry logic)
- logging (for debugging)
- json (for fallback parsing)
Author: ALwrity Team
Version: 1.0
Last Updated: January 2025
"""
import os
import sys
from pathlib import Path
import json
import re
from typing import Optional, Dict, Any
from dotenv import load_dotenv
# Fix the environment loading path - load from backend directory
current_dir = Path(__file__).parent.parent # services directory
backend_dir = current_dir.parent # backend directory
env_path = backend_dir / '.env'
if env_path.exists():
load_dotenv(env_path)
print(f"Loaded .env from: {env_path}")
else:
# Fallback to current directory
load_dotenv()
print(f"No .env found at {env_path}, using current directory")
from loguru import logger
from utils.logger_utils import get_service_logger
# Use service-specific logger to avoid conflicts
logger = get_service_logger("huggingface_provider")
from tenacity import (
retry,
stop_after_attempt,
wait_random_exponential,
)
try:
from openai import OpenAI
OPENAI_AVAILABLE = True
except ImportError:
OPENAI_AVAILABLE = False
logger.warn("OpenAI library not available. Install with: pip install openai")
def get_huggingface_api_key() -> str:
"""Get Hugging Face API key with proper error handling."""
api_key = os.getenv('HF_TOKEN')
if not api_key:
error_msg = "HF_TOKEN environment variable is not set. Please set it in your .env file."
logger.error(error_msg)
raise ValueError(error_msg)
# Validate API key format (basic check)
if not api_key.startswith('hf_'):
error_msg = "HF_TOKEN appears to be invalid. It should start with 'hf_'."
logger.error(error_msg)
raise ValueError(error_msg)
return api_key
@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
def huggingface_text_response(
prompt: str,
model: str = "openai/gpt-oss-120b:groq",
temperature: float = 0.7,
max_tokens: int = 2048,
top_p: float = 0.9,
system_prompt: Optional[str] = None
) -> str:
"""
Generate text response using Hugging Face Inference Providers API.
This function uses the Hugging Face Responses API which provides a unified interface
for model interactions with built-in retry logic and error handling.
Args:
prompt (str): The input prompt for the AI model
model (str): Hugging Face model identifier (default: "openai/gpt-oss-120b:groq")
temperature (float): Controls randomness (0.0-1.0)
max_tokens (int): Maximum tokens in response
top_p (float): Nucleus sampling parameter (0.0-1.0)
system_prompt (str, optional): System instruction for the model
Returns:
str: Generated text response
Raises:
Exception: If API key is missing or API call fails
Best Practices:
- Use appropriate temperature for your use case (0.7 for creative, 0.1-0.3 for factual)
- Set max_tokens based on expected response length
- Use system_prompt to guide model behavior
- Handle errors gracefully in calling functions
Example:
result = huggingface_text_response(
prompt="Write a blog post about AI",
model="openai/gpt-oss-120b:groq",
temperature=0.7,
max_tokens=2048,
system_prompt="You are a professional content writer."
)
"""
try:
if not OPENAI_AVAILABLE:
raise ImportError("OpenAI library not available. Install with: pip install openai")
# Get API key with proper error handling
api_key = get_huggingface_api_key()
logger.info(f"🔑 Hugging Face API key loaded: {bool(api_key)} (length: {len(api_key) if api_key else 0})")
if not api_key:
raise Exception("HF_TOKEN not found in environment variables")
# Initialize Hugging Face client using Responses API
client = OpenAI(
base_url="https://router.huggingface.co/v1",
api_key=api_key,
)
logger.info("✅ Hugging Face client initialized for text response")
# Prepare input for the API
input_content = []
# Add system prompt if provided
if system_prompt:
input_content.append({
"role": "system",
"content": system_prompt
})
# Add user prompt
input_content.append({
"role": "user",
"content": prompt
})
# Add debugging for API call
logger.info(
"Hugging Face text call | model=%s | prompt_len=%s | temp=%s | top_p=%s | max_tokens=%s",
model,
len(prompt) if isinstance(prompt, str) else '<non-str>',
temperature,
top_p,
max_tokens,
)
logger.info("🚀 Making Hugging Face API call...")
# Add rate limiting to prevent expensive API calls
import time
time.sleep(1) # 1 second delay between API calls
# Make the API call using Responses API
response = client.responses.parse(
model=model,
input=input_content,
temperature=temperature,
top_p=top_p,
)
# Extract text from response
if hasattr(response, 'output_text') and response.output_text:
generated_text = response.output_text
elif hasattr(response, 'output') and response.output:
# Handle case where output is a list
if isinstance(response.output, list) and len(response.output) > 0:
generated_text = response.output[0].get('content', '')
else:
generated_text = str(response.output)
else:
generated_text = str(response)
# Clean up the response
if generated_text:
# Remove any markdown formatting if present
generated_text = re.sub(r'```[a-zA-Z]*\n?', '', generated_text)
generated_text = re.sub(r'```\n?', '', generated_text)
generated_text = generated_text.strip()
logger.info(f"✅ Hugging Face text response generated successfully (length: {len(generated_text)})")
return generated_text
except Exception as e:
logger.error(f"❌ Hugging Face text generation failed: {str(e)}")
raise Exception(f"Hugging Face text generation failed: {str(e)}")
@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
def huggingface_structured_json_response(
prompt: str,
schema: Dict[str, Any],
model: str = "openai/gpt-oss-120b:groq",
temperature: float = 0.7,
max_tokens: int = 8192,
system_prompt: Optional[str] = None
) -> Dict[str, Any]:
"""
Generate structured JSON response using Hugging Face Inference Providers API.
This function uses the Hugging Face Responses API with structured output support
to generate JSON responses that match a provided schema.
Args:
prompt (str): The input prompt for the AI model
schema (dict): JSON schema defining the expected output structure
model (str): Hugging Face model identifier (default: "openai/gpt-oss-120b:groq")
temperature (float): Controls randomness (0.0-1.0). Use 0.1-0.3 for structured output
max_tokens (int): Maximum tokens in response. Use 8192 for complex outputs
system_prompt (str, optional): System instruction for the model
Returns:
dict: Parsed JSON response matching the provided schema
Raises:
Exception: If API key is missing or API call fails
Best Practices:
- Keep schemas simple and flat to avoid truncation
- Use low temperature (0.1-0.3) for consistent structured output
- Set max_tokens to 8192 for complex multi-field responses
- Avoid deeply nested schemas with many required fields
- Test with smaller outputs first, then scale up
Example:
schema = {
"type": "object",
"properties": {
"tasks": {
"type": "array",
"items": {
"type": "object",
"properties": {
"title": {"type": "string"},
"description": {"type": "string"}
}
}
}
}
}
result = huggingface_structured_json_response(prompt, schema, temperature=0.2, max_tokens=8192)
"""
try:
if not OPENAI_AVAILABLE:
raise ImportError("OpenAI library not available. Install with: pip install openai")
# Get API key with proper error handling
api_key = get_huggingface_api_key()
logger.info(f"🔑 Hugging Face API key loaded: {bool(api_key)} (length: {len(api_key) if api_key else 0})")
if not api_key:
raise Exception("HF_TOKEN not found in environment variables")
# Initialize Hugging Face client using Responses API
client = OpenAI(
base_url="https://router.huggingface.co/v1",
api_key=api_key,
)
logger.info("✅ Hugging Face client initialized for structured JSON response")
# Prepare input for the API
input_content = []
# Add system prompt if provided
if system_prompt:
input_content.append({
"role": "system",
"content": system_prompt
})
# Add user prompt with JSON instruction
json_instruction = "Please respond with valid JSON that matches the provided schema."
input_content.append({
"role": "user",
"content": f"{prompt}\n\n{json_instruction}"
})
# Add debugging for API call
logger.info(
"Hugging Face structured call | model=%s | prompt_len=%s | schema_kind=%s | temp=%s | max_tokens=%s",
model,
len(prompt) if isinstance(prompt, str) else '<non-str>',
type(schema).__name__,
temperature,
max_tokens,
)
logger.info("🚀 Making Hugging Face structured API call...")
# Make the API call using Responses API with structured output
# Use simple text generation and parse JSON manually to avoid API format issues
logger.info("🚀 Making Hugging Face API call (text mode with JSON parsing)...")
# Add JSON instruction to the prompt
json_instruction = "\n\nPlease respond with valid JSON that matches this exact structure:\n" + json.dumps(schema, indent=2)
input_content[-1]["content"] = input_content[-1]["content"] + json_instruction
# Add rate limiting to prevent expensive API calls
import time
time.sleep(1) # 1 second delay between API calls
response = client.responses.parse(
model=model,
input=input_content,
temperature=temperature
)
# Extract structured data from response
if hasattr(response, 'output_parsed') and response.output_parsed:
# The new API returns parsed data directly (Pydantic model case)
logger.info("✅ Hugging Face structured JSON response parsed successfully")
# Convert Pydantic model to dict if needed
if hasattr(response.output_parsed, 'model_dump'):
return response.output_parsed.model_dump()
elif hasattr(response.output_parsed, 'dict'):
return response.output_parsed.dict()
else:
return response.output_parsed
elif hasattr(response, 'output_text') and response.output_text:
# Fallback to text parsing if output_parsed is not available
response_text = response.output_text
# Clean up the response text
response_text = re.sub(r'```json\n?', '', response_text)
response_text = re.sub(r'```\n?', '', response_text)
response_text = response_text.strip()
try:
parsed_json = json.loads(response_text)
logger.info("✅ Hugging Face structured JSON response parsed from text")
return parsed_json
except json.JSONDecodeError as json_err:
logger.error(f"❌ JSON parsing failed: {json_err}")
logger.error(f"Raw response: {response_text}")
# Try to extract JSON from the response using regex
json_match = re.search(r'\{.*\}', response_text, re.DOTALL)
if json_match:
try:
extracted_json = json.loads(json_match.group())
logger.info("✅ JSON extracted using regex fallback")
return extracted_json
except json.JSONDecodeError:
pass
# If all else fails, return a structured error response
logger.error("❌ All JSON parsing attempts failed")
return {
"error": "Failed to parse JSON response",
"raw_response": response_text,
"schema_expected": schema
}
else:
logger.error("❌ No valid response data found")
return {
"error": "No valid response data found",
"raw_response": str(response),
"schema_expected": schema
}
except Exception as e:
error_msg = str(e) if str(e) else repr(e)
error_type = type(e).__name__
logger.error(f"❌ Hugging Face structured JSON generation failed: {error_type}: {error_msg}")
logger.error(f"❌ Full exception details: {repr(e)}")
import traceback
logger.error(f"❌ Traceback: {traceback.format_exc()}")
raise Exception(f"Hugging Face structured JSON generation failed: {error_type}: {error_msg}")
def get_available_models() -> list:
"""
Get list of available Hugging Face models for text generation.
Returns:
list: List of available model identifiers
"""
return [
"openai/gpt-oss-120b:groq",
"moonshotai/Kimi-K2-Instruct-0905:groq",
"Qwen/Qwen2.5-VL-7B-Instruct",
"meta-llama/Llama-3.1-8B-Instruct:groq",
"microsoft/Phi-3-medium-4k-instruct:groq",
"mistralai/Mistral-7B-Instruct-v0.3:groq"
]
def validate_model(model: str) -> bool:
"""
Validate if a model identifier is supported.
Args:
model (str): Model identifier to validate
Returns:
bool: True if model is supported, False otherwise
"""
available_models = get_available_models()
return model in available_models

View File

@@ -10,10 +10,9 @@ from typing import Optional, Dict, Any
from loguru import logger from loguru import logger
from ..onboarding.api_key_manager import APIKeyManager from ..onboarding.api_key_manager import APIKeyManager
from .openai_provider import openai_chatgpt
from .gemini_provider import gemini_text_response, gemini_structured_json_response from .gemini_provider import gemini_text_response, gemini_structured_json_response
from .anthropic_provider import anthropic_text_response from .huggingface_provider import huggingface_text_response, huggingface_structured_json_response
from .deepseek_provider import deepseek_text_response
def llm_text_gen(prompt: str, system_prompt: Optional[str] = None, json_struct: Optional[Dict[str, Any]] = None) -> str: def llm_text_gen(prompt: str, system_prompt: Optional[str] = None, json_struct: Optional[Dict[str, Any]] = None) -> str:
""" """
@@ -31,13 +30,6 @@ def llm_text_gen(prompt: str, system_prompt: Optional[str] = None, json_struct:
logger.info("[llm_text_gen] Starting text generation") logger.info("[llm_text_gen] Starting text generation")
logger.debug(f"[llm_text_gen] Prompt length: {len(prompt)} characters") logger.debug(f"[llm_text_gen] Prompt length: {len(prompt)} characters")
# Initialize API key manager and reload keys from .env file
api_key_manager = APIKeyManager()
api_key_manager.load_api_keys() # Force reload from .env file
# Debug: Log loaded API keys
logger.debug(f"[llm_text_gen] Loaded API keys: {api_key_manager.get_all_keys()}")
# Set default values for LLM parameters # Set default values for LLM parameters
gpt_provider = "google" # Default to Google Gemini gpt_provider = "google" # Default to Google Gemini
model = "gemini-2.0-flash-001" model = "gemini-2.0-flash-001"
@@ -49,6 +41,15 @@ def llm_text_gen(prompt: str, system_prompt: Optional[str] = None, json_struct:
frequency_penalty = 0.0 frequency_penalty = 0.0
presence_penalty = 0.0 presence_penalty = 0.0
# Check for GPT_PROVIDER environment variable
env_provider = os.getenv('GPT_PROVIDER', '').lower()
if env_provider in ['gemini', 'google']:
gpt_provider = "google"
model = "gemini-2.0-flash-001"
elif env_provider in ['hf_response_api', 'huggingface', 'hf']:
gpt_provider = "huggingface"
model = "openai/gpt-oss-120b:groq"
# Default blog characteristics # Default blog characteristics
blog_tone = "Professional" blog_tone = "Professional"
blog_demographic = "Professional" blog_demographic = "Professional"
@@ -57,41 +58,40 @@ def llm_text_gen(prompt: str, system_prompt: Optional[str] = None, json_struct:
blog_output_format = "markdown" blog_output_format = "markdown"
blog_length = 2000 blog_length = 2000
# Try to get provider from environment or config # Check which providers have API keys available using APIKeyManager
try: api_key_manager = APIKeyManager()
# Check which providers have API keys available available_providers = []
available_providers = [] if api_key_manager.get_api_key("gemini"):
if api_key_manager.get_api_key("openai"): available_providers.append("google")
available_providers.append("openai") if api_key_manager.get_api_key("hf_token"):
if api_key_manager.get_api_key("gemini"): available_providers.append("huggingface")
available_providers.append("google")
if api_key_manager.get_api_key("anthropic"): # If no environment variable set, auto-detect based on available keys
available_providers.append("anthropic") if not env_provider:
if api_key_manager.get_api_key("deepseek"): # Prefer Google Gemini if available, otherwise use Hugging Face
available_providers.append("deepseek")
# Prefer Google Gemini if available, otherwise use first available
if "google" in available_providers: if "google" in available_providers:
gpt_provider = "google" gpt_provider = "google"
model = "gemini-2.0-flash-001" model = "gemini-2.0-flash-001"
elif available_providers: elif "huggingface" in available_providers:
gpt_provider = available_providers[0] gpt_provider = "huggingface"
if gpt_provider == "openai": model = "openai/gpt-oss-120b:groq"
model = "gpt-4o"
elif gpt_provider == "anthropic":
model = "claude-3-5-sonnet-20241022"
elif gpt_provider == "deepseek":
model = "deepseek-chat"
else: else:
logger.error("[llm_text_gen] No API keys found. Structured mock responses are disabled.") logger.error("[llm_text_gen] No API keys found for supported providers.")
raise RuntimeError("No LLM API keys configured. Configure provider API keys to enable AI responses.") raise RuntimeError("No LLM API keys configured. Configure GEMINI_API_KEY or HF_TOKEN to enable AI responses.")
else:
logger.debug(f"[llm_text_gen] Using provider: {gpt_provider}, model: {model}") # Environment variable was set, validate it's supported
if gpt_provider not in available_providers:
logger.warning(f"[llm_text_gen] Provider {gpt_provider} not available, falling back to available providers")
if "google" in available_providers:
gpt_provider = "google"
model = "gemini-2.0-flash-001"
elif "huggingface" in available_providers:
gpt_provider = "huggingface"
model = "openai/gpt-oss-120b:groq"
else:
raise RuntimeError("No supported providers available.")
except Exception as err: logger.debug(f"[llm_text_gen] Using provider: {gpt_provider}, model: {model}")
logger.warning(f"[llm_text_gen] Error determining provider, using defaults: {err}")
gpt_provider = "google"
model = "gemini-2.0-flash-001"
# Construct the system prompt if not provided # Construct the system prompt if not provided
if system_prompt is None: if system_prompt is None:
@@ -118,18 +118,7 @@ def llm_text_gen(prompt: str, system_prompt: Optional[str] = None, json_struct:
# Generate response based on provider # Generate response based on provider
try: try:
if gpt_provider == "openai": if gpt_provider == "google":
return openai_chatgpt(
prompt=prompt,
model=model,
temperature=temperature,
max_tokens=max_tokens,
top_p=top_p,
n=n,
fp=fp,
system_prompt=system_instructions
)
elif gpt_provider == "google":
if json_struct: if json_struct:
return gemini_structured_json_response( return gemini_structured_json_response(
prompt=prompt, prompt=prompt,
@@ -149,66 +138,83 @@ def llm_text_gen(prompt: str, system_prompt: Optional[str] = None, json_struct:
max_tokens=max_tokens, max_tokens=max_tokens,
system_prompt=system_instructions system_prompt=system_instructions
) )
elif gpt_provider == "anthropic": elif gpt_provider == "huggingface":
return anthropic_text_response( if json_struct:
prompt=prompt, return huggingface_structured_json_response(
model=model, prompt=prompt,
temperature=temperature, schema=json_struct,
max_tokens=max_tokens, model=model,
system_prompt=system_instructions temperature=temperature,
) max_tokens=max_tokens,
elif gpt_provider == "deepseek": system_prompt=system_instructions
return deepseek_text_response( )
prompt=prompt, else:
model=model, return huggingface_text_response(
temperature=temperature, prompt=prompt,
max_tokens=max_tokens, model=model,
system_prompt=system_instructions temperature=temperature,
) max_tokens=max_tokens,
top_p=top_p,
system_prompt=system_instructions
)
else: else:
logger.error(f"[llm_text_gen] Unknown provider: {gpt_provider}") logger.error(f"[llm_text_gen] Unknown provider: {gpt_provider}")
raise RuntimeError("Unknown LLM provider.") raise RuntimeError("Unknown LLM provider. Supported providers: google, huggingface")
except Exception as provider_error: except Exception as provider_error:
logger.error(f"[llm_text_gen] Provider {gpt_provider} failed: {str(provider_error)}") logger.error(f"[llm_text_gen] Provider {gpt_provider} failed: {str(provider_error)}")
# Try to fallback to another provider
fallback_providers = ["openai", "anthropic", "deepseek"] # CIRCUIT BREAKER: Only try ONE fallback to prevent expensive API calls
for fallback_provider in fallback_providers: fallback_providers = ["google", "huggingface"]
if fallback_provider in available_providers and fallback_provider != gpt_provider: fallback_providers = [p for p in fallback_providers if p in available_providers and p != gpt_provider]
try:
logger.info(f"[llm_text_gen] Trying fallback provider: {fallback_provider}") if fallback_providers:
if fallback_provider == "openai": fallback_provider = fallback_providers[0] # Only try the first available
return openai_chatgpt( try:
logger.info(f"[llm_text_gen] Trying SINGLE fallback provider: {fallback_provider}")
if fallback_provider == "google":
if json_struct:
return gemini_structured_json_response(
prompt=prompt, prompt=prompt,
model="gpt-4o", schema=json_struct,
temperature=temperature,
top_p=top_p,
top_k=n,
max_tokens=max_tokens,
system_prompt=system_instructions
)
else:
return gemini_text_response(
prompt=prompt,
temperature=temperature,
top_p=top_p,
n=n,
max_tokens=max_tokens,
system_prompt=system_instructions
)
elif fallback_provider == "huggingface":
if json_struct:
return huggingface_structured_json_response(
prompt=prompt,
schema=json_struct,
model="openai/gpt-oss-120b:groq",
temperature=temperature,
max_tokens=max_tokens,
system_prompt=system_instructions
)
else:
return huggingface_text_response(
prompt=prompt,
model="openai/gpt-oss-120b:groq",
temperature=temperature, temperature=temperature,
max_tokens=max_tokens, max_tokens=max_tokens,
top_p=top_p, top_p=top_p,
n=n,
fp=fp,
system_prompt=system_instructions system_prompt=system_instructions
) )
elif fallback_provider == "anthropic": except Exception as fallback_error:
return anthropic_text_response( logger.error(f"[llm_text_gen] Fallback provider {fallback_provider} also failed: {str(fallback_error)}")
prompt=prompt,
model="claude-3-5-sonnet-20241022",
temperature=temperature,
max_tokens=max_tokens,
system_prompt=system_instructions
)
elif fallback_provider == "deepseek":
return deepseek_text_response(
prompt=prompt,
model="deepseek-chat",
temperature=temperature,
max_tokens=max_tokens,
system_prompt=system_instructions
)
except Exception as fallback_error:
logger.error(f"[llm_text_gen] Fallback provider {fallback_provider} also failed: {str(fallback_error)}")
continue
# If all providers fail, raise an error (no mock) # CIRCUIT BREAKER: Stop immediately to prevent expensive API calls
logger.error("[llm_text_gen] All providers failed. Structured mock responses are disabled.") logger.error("[llm_text_gen] CIRCUIT BREAKER: Stopping to prevent expensive API calls.")
raise RuntimeError("All LLM providers failed to generate a response.") raise RuntimeError("All LLM providers failed to generate a response.")
except Exception as e: except Exception as e:
@@ -217,7 +223,7 @@ def llm_text_gen(prompt: str, system_prompt: Optional[str] = None, json_struct:
def check_gpt_provider(gpt_provider: str) -> bool: def check_gpt_provider(gpt_provider: str) -> bool:
"""Check if the specified GPT provider is supported.""" """Check if the specified GPT provider is supported."""
supported_providers = ["openai", "google", "anthropic", "deepseek"] supported_providers = ["google", "huggingface"]
return gpt_provider in supported_providers return gpt_provider in supported_providers
def get_api_key(gpt_provider: str) -> Optional[str]: def get_api_key(gpt_provider: str) -> Optional[str]:
@@ -225,10 +231,8 @@ def get_api_key(gpt_provider: str) -> Optional[str]:
try: try:
api_key_manager = APIKeyManager() api_key_manager = APIKeyManager()
provider_mapping = { provider_mapping = {
"openai": "openai",
"google": "gemini", "google": "gemini",
"anthropic": "anthropic", "huggingface": "hf_token"
"deepseek": "deepseek"
} }
mapped_provider = provider_mapping.get(gpt_provider, gpt_provider) mapped_provider = provider_mapping.get(gpt_provider, gpt_provider)

View File

@@ -1,133 +0,0 @@
"""OpenAI Provider Service for ALwrity Backend.
This service handles OpenAI API integrations,
migrated from the legacy lib/gpt_providers/text_generation/openai_text_gen.py
"""
import os
import time
import openai
import asyncio
from typing import Tuple
from loguru import logger
from tenacity import (
retry,
stop_after_attempt,
wait_random_exponential,
)
# Import APIKeyManager
from ..onboarding.api_key_manager import APIKeyManager
async def test_openai_api_key(api_key: str) -> Tuple[bool, str]:
"""
Test if the provided OpenAI API key is valid.
Args:
api_key (str): The OpenAI API key to test
Returns:
tuple[bool, str]: A tuple containing (is_valid, message)
"""
try:
# Create OpenAI client with the provided key
client = openai.OpenAI(api_key=api_key)
# Try to list models as a simple API test
models = client.models.list()
# If we get here, the key is valid
return True, "OpenAI API key is valid"
except openai.AuthenticationError:
return False, "Invalid OpenAI API key"
except openai.RateLimitError:
return False, "Rate limit exceeded. Please try again later."
except Exception as e:
return False, f"Error testing OpenAI API key: {str(e)}"
@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
def openai_chatgpt(prompt: str, model: str = "gpt-4o", temperature: float = 0.7,
max_tokens: int = 4000, top_p: float = 0.9, n: int = 1,
fp: int = 16, system_prompt: str = None) -> str:
"""
Wrapper function for OpenAI's ChatGPT completion.
Args:
prompt (str): The input text to generate completion for.
model (str, optional): Model to be used for the completion. Defaults to "gpt-4o".
temperature (float, optional): Controls randomness. Lower values make responses more deterministic. Defaults to 0.7.
max_tokens (int, optional): Maximum number of tokens to generate. Defaults to 4000.
top_p (float, optional): Controls diversity. Defaults to 0.9.
n (int, optional): Number of completions to generate. Defaults to 1.
fp (int, optional): Frequency penalty. Defaults to 16.
system_prompt (str, optional): System prompt for the conversation. Defaults to None.
Returns:
str: The generated text completion.
Raises:
SystemExit: If an API error, connection error, or rate limit error occurs.
"""
# Wait for 5 seconds to comply with rate limits
for _ in range(5):
time.sleep(1)
try:
# Create variables to collect the stream of chunks
collected_chunks = []
collected_messages = []
full_reply_content = None
# Use APIKeyManager instead of direct environment variable access
api_key_manager = APIKeyManager()
api_key = api_key_manager.get_api_key("openai")
if not api_key:
raise ValueError("OpenAI API key not found. Please configure it in the onboarding process.")
client = openai.OpenAI(api_key=api_key)
# Prepare messages
messages = []
if system_prompt:
messages.append({"role": "system", "content": system_prompt})
messages.append({"role": "user", "content": prompt})
response = client.chat.completions.create(
model=model,
messages=messages,
max_tokens=max_tokens,
n=n,
top_p=top_p,
stream=True,
frequency_penalty=fp,
temperature=temperature
)
# Iterate through the stream of events
for chunk in response:
collected_chunks.append(chunk) # save the event response
chunk_message = chunk.choices[0].delta.content # extract the message
collected_messages.append(chunk_message) # save the message
print(chunk.choices[0].delta.content, end="", flush=True)
# Clean None in collected_messages
collected_messages = [m for m in collected_messages if m is not None]
full_reply_content = ''.join([m for m in collected_messages])
logger.info(f"[openai_chatgpt] Generated response with {len(full_reply_content)} characters")
return full_reply_content
except openai.APIError as e:
logger.error(f"OpenAI API Error: {e}")
raise SystemExit from e
except openai.RateLimitError as e:
logger.error(f"OpenAI Rate Limit Error: {e}")
raise SystemExit from e
except openai.APIConnectionError as e:
logger.error(f"OpenAI API Connection Error: {e}")
raise SystemExit from e
except Exception as e:
logger.error(f"Unexpected error in OpenAI API call: {e}")
raise SystemExit from e

View File

@@ -388,10 +388,8 @@ class APIKeyManager:
def _load_from_env(self): def _load_from_env(self):
"""Load API keys from environment variables.""" """Load API keys from environment variables."""
providers = [ providers = [
'OPENAI_API_KEY',
'ANTHROPIC_API_KEY',
'GEMINI_API_KEY', 'GEMINI_API_KEY',
'MISTRAL_API_KEY', 'HF_TOKEN',
'TAVILY_API_KEY', 'TAVILY_API_KEY',
'SERPER_API_KEY', 'SERPER_API_KEY',
'METAPHOR_API_KEY', 'METAPHOR_API_KEY',

View File

@@ -24,7 +24,7 @@ export function usePolling(
options: UsePollingOptions = {} options: UsePollingOptions = {}
): UsePollingReturn { ): UsePollingReturn {
const { const {
interval = 2000, // 2 seconds default interval = 5000, // 5 seconds default - increased to reduce load
onProgress, onProgress,
onComplete, onComplete,
onError onError
@@ -99,13 +99,17 @@ export function usePolling(
} }
if (status.status === 'completed') { if (status.status === 'completed') {
console.log('✅ Task completed - stopping polling immediately');
setResult(status.result); setResult(status.result);
onComplete?.(status.result); onComplete?.(status.result);
stopPolling(); stopPolling();
return; // Exit early to prevent further processing
} else if (status.status === 'failed') { } else if (status.status === 'failed') {
console.log('❌ Task failed - stopping polling immediately');
setError(status.error || 'Task failed'); setError(status.error || 'Task failed');
onError?.(status.error || 'Task failed'); onError?.(status.error || 'Task failed');
stopPolling(); stopPolling();
return; // Exit early to prevent further processing
} }
attemptsRef.current++; attemptsRef.current++;
@@ -113,12 +117,16 @@ export function usePolling(
const errorMessage = err instanceof Error ? err.message : 'Unknown error occurred'; const errorMessage = err instanceof Error ? err.message : 'Unknown error occurred';
console.error('Polling error:', errorMessage); console.error('Polling error:', errorMessage);
// Only stop polling for actual task failures (404, task not found) // Stop polling for task failures and rate limiting
// For network errors, timeouts, etc., continue polling
if (errorMessage.includes('404') || errorMessage.includes('Task not found')) { if (errorMessage.includes('404') || errorMessage.includes('Task not found')) {
setError('Task not found - it may have expired or been cleaned up'); setError('Task not found - it may have expired or been cleaned up');
onError?.('Task not found - it may have expired or been cleaned up'); onError?.('Task not found - it may have expired or been cleaned up');
stopPolling(); stopPolling();
} else if (errorMessage.includes('429') || errorMessage.includes('Too Many Requests')) {
console.warn('Rate limited - stopping polling to prevent further issues');
setError('Rate limited - please try again later');
onError?.('Rate limited - please try again later');
stopPolling();
} }
// For other errors (timeouts, network issues), continue polling // For other errors (timeouts, network issues), continue polling
// The backend will eventually complete or fail, and we'll catch it // The backend will eventually complete or fail, and we'll catch it