chore: Update backend services, intelligence integration, and documentation
This commit is contained in:
@@ -925,6 +925,8 @@ class SIFIntegrationService:
|
|||||||
# Content pillar analysis
|
# Content pillar analysis
|
||||||
if self.intelligence_service.is_initialized():
|
if self.intelligence_service.is_initialized():
|
||||||
clusters = await self.intelligence_service.cluster(min_score=0.6)
|
clusters = await self.intelligence_service.cluster(min_score=0.6)
|
||||||
|
if asyncio.iscoroutine(clusters):
|
||||||
|
clusters = await clusters
|
||||||
insights["content_pillars"] = self._format_clusters_as_pillars(clusters)
|
insights["content_pillars"] = self._format_clusters_as_pillars(clusters)
|
||||||
|
|
||||||
# Semantic gaps analysis
|
# Semantic gaps analysis
|
||||||
|
|||||||
@@ -215,6 +215,7 @@ class TxtaiIntelligenceService:
|
|||||||
logger.error(f"Detected known txtai/faiss IndexIDMap/nprobe incompatibility for user {self.user_id}. Attempting re-init with numpy backend fallback...")
|
logger.error(f"Detected known txtai/faiss IndexIDMap/nprobe incompatibility for user {self.user_id}. Attempting re-init with numpy backend fallback...")
|
||||||
# Switch to numpy backend which doesn't have this issue
|
# Switch to numpy backend which doesn't have this issue
|
||||||
self._backend = "numpy"
|
self._backend = "numpy"
|
||||||
|
self._initialized = False
|
||||||
self._initialize_embeddings()
|
self._initialize_embeddings()
|
||||||
if self.embeddings:
|
if self.embeddings:
|
||||||
results = self.embeddings.search(query, limit=limit)
|
results = self.embeddings.search(query, limit=limit)
|
||||||
@@ -270,7 +271,9 @@ class TxtaiIntelligenceService:
|
|||||||
except AttributeError as ae:
|
except AttributeError as ae:
|
||||||
if "nprobe" in str(ae):
|
if "nprobe" in str(ae):
|
||||||
logger.error(f"Detected IndexIDMap nprobe error in similarity for user {self.user_id}. Falling back to numpy backend...")
|
logger.error(f"Detected IndexIDMap nprobe error in similarity for user {self.user_id}. Falling back to numpy backend...")
|
||||||
|
# Switch to numpy backend which doesn't have this issue
|
||||||
self._backend = "numpy"
|
self._backend = "numpy"
|
||||||
|
self._initialized = False
|
||||||
self._initialize_embeddings()
|
self._initialize_embeddings()
|
||||||
if self.embeddings:
|
if self.embeddings:
|
||||||
similarity = self.embeddings.similarity(text1, text2)
|
similarity = self.embeddings.similarity(text1, text2)
|
||||||
@@ -328,7 +331,7 @@ class TxtaiIntelligenceService:
|
|||||||
# Check if we have graph functionality available
|
# Check if we have graph functionality available
|
||||||
if not hasattr(self.embeddings, 'graph') or not self.embeddings.graph:
|
if not hasattr(self.embeddings, 'graph') or not self.embeddings.graph:
|
||||||
logger.warning(f"Graph clustering not available for user {self.user_id}. Using fallback clustering.")
|
logger.warning(f"Graph clustering not available for user {self.user_id}. Using fallback clustering.")
|
||||||
return self._fallback_clustering(min_score)
|
return await self._fallback_clustering(min_score)
|
||||||
|
|
||||||
# Use graph-based clustering if available
|
# Use graph-based clustering if available
|
||||||
# Perform a search to get graph structure
|
# Perform a search to get graph structure
|
||||||
@@ -338,10 +341,13 @@ class TxtaiIntelligenceService:
|
|||||||
except AttributeError as ae:
|
except AttributeError as ae:
|
||||||
if "nprobe" in str(ae):
|
if "nprobe" in str(ae):
|
||||||
logger.error(f"Detected IndexIDMap nprobe error in cluster for user {self.user_id}. Falling back to numpy backend...")
|
logger.error(f"Detected IndexIDMap nprobe error in cluster for user {self.user_id}. Falling back to numpy backend...")
|
||||||
|
# Force re-initialization with numpy backend to bypass FAISS issue
|
||||||
self._backend = "numpy"
|
self._backend = "numpy"
|
||||||
|
self._initialized = False
|
||||||
self._initialize_embeddings()
|
self._initialize_embeddings()
|
||||||
if self.embeddings:
|
if self.embeddings:
|
||||||
graph_results = self.embeddings.search(sample_query, limit=10, graph=True)
|
# Retry with numpy backend (no graph support, so fallback)
|
||||||
|
return await self._fallback_clustering(min_score)
|
||||||
else:
|
else:
|
||||||
raise ae
|
raise ae
|
||||||
else:
|
else:
|
||||||
@@ -349,7 +355,7 @@ class TxtaiIntelligenceService:
|
|||||||
|
|
||||||
if not graph_results:
|
if not graph_results:
|
||||||
logger.warning(f"No graph results for clustering user {self.user_id}")
|
logger.warning(f"No graph results for clustering user {self.user_id}")
|
||||||
return self._fallback_clustering(min_score)
|
return await self._fallback_clustering(min_score)
|
||||||
|
|
||||||
# Extract clusters from graph results
|
# Extract clusters from graph results
|
||||||
clusters = self._extract_clusters_from_graph(graph_results, min_score)
|
clusters = self._extract_clusters_from_graph(graph_results, min_score)
|
||||||
@@ -377,7 +383,7 @@ class TxtaiIntelligenceService:
|
|||||||
logger.error(f"Clustering failed for user {self.user_id}: {e}")
|
logger.error(f"Clustering failed for user {self.user_id}: {e}")
|
||||||
logger.error(f"Min score: {min_score}")
|
logger.error(f"Min score: {min_score}")
|
||||||
logger.error(f"Full traceback: {traceback.format_exc()}")
|
logger.error(f"Full traceback: {traceback.format_exc()}")
|
||||||
return self._fallback_clustering(min_score)
|
return await self._fallback_clustering(min_score)
|
||||||
|
|
||||||
async def _fallback_clustering(self, min_score: float) -> List[List[int]]:
|
async def _fallback_clustering(self, min_score: float) -> List[List[int]]:
|
||||||
"""Fallback clustering method when graph clustering is not available."""
|
"""Fallback clustering method when graph clustering is not available."""
|
||||||
|
|||||||
104
docs/llm_gateway/Architecture.md
Normal file
104
docs/llm_gateway/Architecture.md
Normal file
@@ -0,0 +1,104 @@
|
|||||||
|
# ALwrity LLM Gateway – Architecture Overview
|
||||||
|
|
||||||
|
ALwrity’s LLM Gateway lives under [llm_providers](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers) and provides a consistent, production‑oriented interface for text, image, audio, and video generation across multiple model providers. It encapsulates provider differences, applies subscription enforcement, and centralizes observability and reliability patterns.
|
||||||
|
|
||||||
|
## Goals
|
||||||
|
- Unified surface for LLM operations across providers
|
||||||
|
- Strong subscription enforcement and cost awareness
|
||||||
|
- Resilient calls with retries and structured error handling
|
||||||
|
- Extensible provider architecture with clear contracts
|
||||||
|
- Transparent metrics, usage logging, and pricing integration
|
||||||
|
|
||||||
|
## High‑Level Flow
|
||||||
|
1. Entry points route requests to the appropriate capability:
|
||||||
|
- Text generation via [main_text_generation.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/main_text_generation.py)
|
||||||
|
- Image generation and editing via [image_generation](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/image_generation)
|
||||||
|
- Video generation via [video_generation](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/video_generation)
|
||||||
|
- Audio/STT via [audio_to_text_generation](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/audio_to_text_generation)
|
||||||
|
2. Subscription enforcement integrates before provider calls:
|
||||||
|
- Uses PricingService and UsageTrackingService to validate tokens/operations
|
||||||
|
- Blocks requests that exceed limits with actionable error payloads
|
||||||
|
3. Provider module performs the call with provider‑specific SDKs/APIs
|
||||||
|
4. Results are normalized to ALwrity types and returned upstream
|
||||||
|
|
||||||
|
## Core Components
|
||||||
|
- **Text Generation Entry**: [main_text_generation.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/main_text_generation.py)
|
||||||
|
- Detects available providers via APIKeyManager
|
||||||
|
- Applies strict subscription checks using PricingService and UsageTrackingService
|
||||||
|
- Routes to Gemini or Hugging Face implementations
|
||||||
|
- **Image Generation Contracts**: [base.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/image_generation/base.py)
|
||||||
|
- Options and Result dataclasses
|
||||||
|
- Protocols for generation, edit, and face‑swap providers
|
||||||
|
- **Video Generation Contracts**: [base.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/video_generation/base.py)
|
||||||
|
- Options and Result dataclasses
|
||||||
|
- Async protocol with progress callbacks
|
||||||
|
- **Provider Implementations**:
|
||||||
|
- Gemini text: [gemini_provider.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/gemini_provider.py)
|
||||||
|
- Hugging Face text: [huggingface_provider.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/huggingface_provider.py)
|
||||||
|
- Hugging Face image: [hf_provider.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/image_generation/hf_provider.py)
|
||||||
|
- WaveSpeed video: [wavespeed_provider.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/video_generation/wavespeed_provider.py)
|
||||||
|
|
||||||
|
## Provider Abstraction
|
||||||
|
- Image providers conform to:
|
||||||
|
- ImageGenerationProvider.generate(options) -> ImageGenerationResult
|
||||||
|
- ImageEditProvider.edit(options) -> ImageGenerationResult
|
||||||
|
- FaceSwapProvider.swap_face(options) -> ImageGenerationResult
|
||||||
|
- Video providers conform to:
|
||||||
|
- VideoGenerationProvider.generate_video(options, progress_cb) -> VideoGenerationResult
|
||||||
|
|
||||||
|
These contracts ensure consistent options/result types so downstream UI and logging remain stable regardless of provider.
|
||||||
|
|
||||||
|
## Subscription Enforcement
|
||||||
|
- Performed in the text pipeline entry point before any provider call:
|
||||||
|
- See enforcement and usage checks in [main_text_generation.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/main_text_generation.py#L117-L166)
|
||||||
|
- Preflight operations endpoint also validates multi‑operation cost/limits:
|
||||||
|
- See [preflight.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/api/subscription/routes/preflight.py)
|
||||||
|
- Image/video modules typically rely on the calling route to validate limits first, then perform provider calls.
|
||||||
|
|
||||||
|
## Configuration and Secrets
|
||||||
|
- Gemini: GEMINI_API_KEY
|
||||||
|
- Loaded and validated in [gemini_provider.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/gemini_provider.py#L101-L116)
|
||||||
|
- Hugging Face: HF_TOKEN
|
||||||
|
- Loaded and validated in [huggingface_provider.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/huggingface_provider.py#L90-L105)
|
||||||
|
- Hugging Face image defaults: HF_IMAGE_MODEL
|
||||||
|
- Used in [image_generation/hf_provider.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/image_generation/hf_provider.py#L17-L21)
|
||||||
|
- Provider clients must never log secrets; logs are provider‑scoped via get_service_logger.
|
||||||
|
|
||||||
|
## Reliability and Error Handling
|
||||||
|
- Exponential backoff retries using tenacity:
|
||||||
|
- Gemini text: [gemini_text_response](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/gemini_provider.py#L117)
|
||||||
|
- Hugging Face text: [huggingface_text_response](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/huggingface_provider.py#L106)
|
||||||
|
- Structured exceptions surface HTTP 429 for limit breaches with usage info
|
||||||
|
- Provider modules return normalized results; callers handle downstream persistence and telemetry
|
||||||
|
|
||||||
|
## Pricing and Cost Awareness
|
||||||
|
- Preflight cost estimation computes operation costs per provider/model:
|
||||||
|
- See multi‑operation handling in [preflight.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/api/subscription/routes/preflight.py#L100-L144)
|
||||||
|
- Video cost calculation is provider/model aware:
|
||||||
|
- See WaveSpeed services and `calculate_cost` in [video_generation/wavespeed_provider.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/video_generation/wavespeed_provider.py#L44-L56)
|
||||||
|
|
||||||
|
## Observability
|
||||||
|
- Service‑scoped loggers for each provider/module
|
||||||
|
- Central usage logs recorded via subscription services on the calling routes
|
||||||
|
- Provider metadata normalized in result objects for consistent analytics
|
||||||
|
|
||||||
|
## Extensibility Guidelines
|
||||||
|
- Implement the appropriate Protocol interface in a new provider module
|
||||||
|
- Normalize options and results to the gateway dataclasses
|
||||||
|
- Keep environment/key validation local to the provider module
|
||||||
|
- Add cost mapping in PricingService and preflight for new operations/models
|
||||||
|
- Wire subscription validation in the calling route before invoking provider
|
||||||
|
|
||||||
|
## Request Lifecycle (Text)
|
||||||
|
1. Client submits prompt to text endpoint
|
||||||
|
2. Entry point determines provider (env or APIKeyManager) and validates subscription limits
|
||||||
|
3. Provider‑specific function executes with retries and returns normalized text
|
||||||
|
4. Caller logs usage and returns response to client
|
||||||
|
|
||||||
|
## Request Lifecycle (Media)
|
||||||
|
1. Client submits generation/edit/face‑swap request
|
||||||
|
2. Route validates plan limits (tokens, requests, or per‑operation limits)
|
||||||
|
3. Provider service executes call and produces normalized binary payload and metadata
|
||||||
|
4. Caller logs usage and returns media/links to client
|
||||||
|
|
||||||
|
This architecture isolates provider variability while standardizing contracts, enabling safe expansion to new models and modalities without destabilizing upstream consumers.
|
||||||
96
docs/llm_gateway/Extending_the_Gateway.md
Normal file
96
docs/llm_gateway/Extending_the_Gateway.md
Normal file
@@ -0,0 +1,96 @@
|
|||||||
|
# Extending the LLM Gateway
|
||||||
|
|
||||||
|
This guide provides a checklist and templates for adding new providers or modalities to the ALwrity LLM Gateway.
|
||||||
|
|
||||||
|
## Checklist
|
||||||
|
|
||||||
|
1. **Define the Provider Interface**:
|
||||||
|
- [ ] Create a new module in `backend/services/llm_providers/<modality>/`.
|
||||||
|
- [ ] Define input options dataclass (e.g., `MyNewProviderOptions`).
|
||||||
|
- [ ] Implement the standard Protocol (e.g., `ImageGenerationProvider`).
|
||||||
|
|
||||||
|
2. **Configuration**:
|
||||||
|
- [ ] Add necessary API keys to `.env.example` and `APIKeyManager`.
|
||||||
|
- [ ] Add new provider enum to `backend/models/subscription_models.py` (`APIProvider`).
|
||||||
|
|
||||||
|
3. **Pricing & Usage**:
|
||||||
|
- [ ] Add default pricing in `PricingService` or migration script.
|
||||||
|
- [ ] Ensure `UsageSummary` table has columns for this provider (if it's a major one) or map it to a generic category.
|
||||||
|
|
||||||
|
4. **Integration**:
|
||||||
|
- [ ] Register the provider in the main entry point (e.g., `main_image_generation.py`).
|
||||||
|
- [ ] Update `preflight.py` to handle cost estimation for this provider.
|
||||||
|
|
||||||
|
5. **Frontend**:
|
||||||
|
- [ ] Update `billingService.ts` to handle the new provider key in usage stats (if applicable).
|
||||||
|
- [ ] Add provider icon/color in `billingService.ts`.
|
||||||
|
|
||||||
|
## Skeleton Template (Python)
|
||||||
|
|
||||||
|
Here is a template for a new **Image Generation Provider**:
|
||||||
|
|
||||||
|
```python
|
||||||
|
from __future__ import annotations
|
||||||
|
import os
|
||||||
|
from typing import Optional, Dict, Any
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from .base import ImageGenerationOptions, ImageGenerationResult, ImageGenerationProvider
|
||||||
|
from utils.logger_utils import get_service_logger
|
||||||
|
|
||||||
|
logger = get_service_logger("image_generation.my_new_provider")
|
||||||
|
|
||||||
|
class MyNewProvider(ImageGenerationProvider):
|
||||||
|
"""
|
||||||
|
My New Provider implementation.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, api_key: Optional[str] = None):
|
||||||
|
self.api_key = api_key or os.getenv("MY_PROVIDER_API_KEY")
|
||||||
|
if not self.api_key:
|
||||||
|
raise RuntimeError("MY_PROVIDER_API_KEY is required")
|
||||||
|
# Initialize client here
|
||||||
|
|
||||||
|
def generate(self, options: ImageGenerationOptions) -> ImageGenerationResult:
|
||||||
|
logger.info(f"Generating image with MyNewProvider: {options.prompt[:50]}...")
|
||||||
|
|
||||||
|
try:
|
||||||
|
# 1. Call External API
|
||||||
|
# response = client.generate(...)
|
||||||
|
|
||||||
|
# 2. Process Response (Mock)
|
||||||
|
image_bytes = b"fake_image_data"
|
||||||
|
width = options.width
|
||||||
|
height = options.height
|
||||||
|
|
||||||
|
# 3. Return Standard Result
|
||||||
|
return ImageGenerationResult(
|
||||||
|
image_bytes=image_bytes,
|
||||||
|
width=width,
|
||||||
|
height=height,
|
||||||
|
provider="my_new_provider",
|
||||||
|
model=options.model or "default-model",
|
||||||
|
seed=options.seed,
|
||||||
|
metadata={"raw_response": "..."}
|
||||||
|
)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Generation failed: {e}")
|
||||||
|
raise
|
||||||
|
```
|
||||||
|
|
||||||
|
## Skeleton Template (Route Integration)
|
||||||
|
|
||||||
|
In `main_image_generation.py`:
|
||||||
|
|
||||||
|
```python
|
||||||
|
from .image_generation.my_new_provider import MyNewProvider
|
||||||
|
|
||||||
|
def generate_image(prompt: str, provider: str, ...):
|
||||||
|
# ... existing code ...
|
||||||
|
|
||||||
|
if provider == "my_new_provider":
|
||||||
|
service = MyNewProvider()
|
||||||
|
result = service.generate(options)
|
||||||
|
|
||||||
|
# ... existing code ...
|
||||||
|
```
|
||||||
59
docs/llm_gateway/Features_and_Status.md
Normal file
59
docs/llm_gateway/Features_and_Status.md
Normal file
@@ -0,0 +1,59 @@
|
|||||||
|
# LLM Gateway – Features & Implementation Status
|
||||||
|
|
||||||
|
This document provides a high-level overview of the LLM Gateway's capabilities and the current production status of each component.
|
||||||
|
|
||||||
|
## Core Features
|
||||||
|
|
||||||
|
- **Unified Interface**: Single API surface for text, image, video, and audio generation, abstracting away provider-specific SDKs.
|
||||||
|
- **Provider Agnostic**: Switch between Gemini, Hugging Face, Stability, WaveSpeed, etc., via configuration or runtime parameters.
|
||||||
|
- **Subscription Enforcement**: Strict pre-flight checks against user plans (Free, Basic, Pro, Enterprise) before any API call.
|
||||||
|
- **Cost Awareness**: Granular tracking of input/output tokens, request counts, and media generation costs per provider/model.
|
||||||
|
- **Resilience**: Built-in retries (exponential backoff) for transient failures (rate limits, timeouts).
|
||||||
|
- **Observability**: Centralized logging (`APIUsageLog`) and usage aggregation (`UsageSummary`) for all modalities.
|
||||||
|
- **Streaming Support**: (Partial) Infrastructure exists for text streaming, though primarily used for blocking responses currently.
|
||||||
|
|
||||||
|
## Implementation Status
|
||||||
|
|
||||||
|
### 1. Text Generation
|
||||||
|
| Feature | Provider | Status | Notes |
|
||||||
|
| :--- | :--- | :--- | :--- |
|
||||||
|
| **Chat/Completion** | Google Gemini | ✅ Production | Default provider. Supports `gemini-2.0-flash`. |
|
||||||
|
| **Chat/Completion** | Hugging Face | ✅ Production | via Inference Providers (e.g., `mistralai/Mistral-7B`). |
|
||||||
|
| **Structured JSON** | Gemini | ✅ Production | Uses `response_schema` for reliable parsing. |
|
||||||
|
| **Structured JSON** | Hugging Face | ✅ Production | Uses `response_format={ "type": "json_object" }`. |
|
||||||
|
|
||||||
|
### 2. Image Generation
|
||||||
|
| Feature | Provider | Status | Notes |
|
||||||
|
| :--- | :--- | :--- | :--- |
|
||||||
|
| **Text-to-Image** | Google Gemini | ✅ Production | Imagen 3 models. |
|
||||||
|
| **Text-to-Image** | Hugging Face | ✅ Production | FLUX.1 via fal-ai/Black Forest Labs. |
|
||||||
|
| **Text-to-Image** | Stability AI | ✅ Production | Core/SD3 models. |
|
||||||
|
| **Text-to-Image** | WaveSpeed | ✅ Production | High-speed generation. |
|
||||||
|
| **Image Editing** | WaveSpeed | ✅ Production | Inpainting, background removal, face swap. |
|
||||||
|
|
||||||
|
### 3. Video Generation
|
||||||
|
| Feature | Provider | Status | Notes |
|
||||||
|
| :--- | :--- | :--- | :--- |
|
||||||
|
| **Text-to-Video** | WaveSpeed | ✅ Production | HunyuanVideo-1.5, LTX-2 Pro. |
|
||||||
|
| **Image-to-Video** | WaveSpeed | 🚧 Planned | Roadmap item. |
|
||||||
|
|
||||||
|
### 4. Audio Generation
|
||||||
|
| Feature | Provider | Status | Notes |
|
||||||
|
| :--- | :--- | :--- | :--- |
|
||||||
|
| **Text-to-Speech** | Gemini | ✅ Production | Audio generation capability. |
|
||||||
|
| **Text-to-Speech** | WaveSpeed | ✅ Production | Fast TTS. |
|
||||||
|
| **Speech-to-Text** | Gemini | ✅ Production | Transcription (via `audio_to_text_generation`). |
|
||||||
|
|
||||||
|
### 5. Research & Tools
|
||||||
|
| Feature | Provider | Status | Notes |
|
||||||
|
| :--- | :--- | :--- | :--- |
|
||||||
|
| **Web Search** | Tavily | ✅ Production | Integrated for grounded research. |
|
||||||
|
| **Web Search** | Serper | ✅ Production | Google Search API alternative. |
|
||||||
|
| **Web Search** | Exa | ✅ Production | Neural search. |
|
||||||
|
|
||||||
|
## Roadmap & Next Steps
|
||||||
|
|
||||||
|
- **Streaming Standardization**: Unify streaming interfaces across all text providers for consistent frontend UX.
|
||||||
|
- **Model Fallbacks**: Automatic failover to secondary providers if the primary is down (currently manual/env-based).
|
||||||
|
- **Fine-tuning Support**: Add gateway endpoints for triggering and using fine-tuned jobs.
|
||||||
|
- **Caching Layer**: Redis-based semantic caching for frequent queries to reduce costs.
|
||||||
97
docs/llm_gateway/Modules.md
Normal file
97
docs/llm_gateway/Modules.md
Normal file
@@ -0,0 +1,97 @@
|
|||||||
|
# LLM Gateway – Module Reference
|
||||||
|
|
||||||
|
This document catalogs the modules under [llm_providers](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers) with their responsibilities, key classes/functions, configuration, and integration points.
|
||||||
|
|
||||||
|
## Text Generation
|
||||||
|
- **Entry point**: [main_text_generation.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/main_text_generation.py)
|
||||||
|
- llm_text_gen(prompt, system_prompt, json_struct, user_id)
|
||||||
|
- Responsibilities:
|
||||||
|
- Resolve provider (env or APIKeyManager)
|
||||||
|
- Perform strict subscription checks (PricingService, UsageTrackingService)
|
||||||
|
- Call Gemini or Hugging Face implementations
|
||||||
|
- Integration:
|
||||||
|
- models.subscription_models.APIProvider mapping
|
||||||
|
- services.subscription.PricingService, UsageTrackingService
|
||||||
|
|
||||||
|
- **Gemini provider**: [gemini_provider.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/gemini_provider.py)
|
||||||
|
- get_gemini_api_key() – env validation
|
||||||
|
- gemini_text_response(...) – tenacity‑backed retries, text output
|
||||||
|
- gemini_structured_json_response(...) – structured JSON output
|
||||||
|
- Config: GEMINI_API_KEY
|
||||||
|
- SDK: google.generativeai
|
||||||
|
|
||||||
|
- **Hugging Face provider**: [huggingface_provider.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/huggingface_provider.py)
|
||||||
|
- get_huggingface_api_key() – env validation
|
||||||
|
- huggingface_text_response(...) – Responses API (OpenAI client), retries
|
||||||
|
- huggingface_structured_json_response(...) – structured JSON output
|
||||||
|
- Config: HF_TOKEN
|
||||||
|
- SDK: openai client pointed at Hugging Face router
|
||||||
|
|
||||||
|
## Image Generation
|
||||||
|
- **Contracts**: [image_generation/base.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/image_generation/base.py)
|
||||||
|
- ImageGenerationOptions, ImageGenerationResult
|
||||||
|
- ImageEditOptions, FaceSwapOptions (with to_dict helpers)
|
||||||
|
- Protocols: ImageGenerationProvider, ImageEditProvider, FaceSwapProvider
|
||||||
|
|
||||||
|
- **Hugging Face image**: [image_generation/hf_provider.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/image_generation/hf_provider.py)
|
||||||
|
- Class: HuggingFaceImageProvider(ImageGenerationProvider)
|
||||||
|
- generate(options) -> ImageGenerationResult
|
||||||
|
- Config: HF_TOKEN, HF_IMAGE_MODEL (default FLUX.1‑Krea‑dev)
|
||||||
|
- SDK: huggingface_hub.InferenceClient (provider="fal-ai")
|
||||||
|
|
||||||
|
- **Other image modules**:
|
||||||
|
- [image_generation/gemini_provider.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/image_generation/gemini_provider.py) – Gemini image generation integration
|
||||||
|
- [image_generation/wavespeed_provider.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/image_generation/wavespeed_provider.py) – WaveSpeed image editing
|
||||||
|
- [image_generation/wavespeed_face_swap_provider.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/image_generation/wavespeed_face_swap_provider.py) – Face swap
|
||||||
|
- [image_generation/wavespeed_edit_provider.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/image_generation/wavespeed_edit_provider.py) – General edits
|
||||||
|
|
||||||
|
## Video Generation
|
||||||
|
- **Contracts**: [video_generation/base.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/video_generation/base.py)
|
||||||
|
- VideoGenerationOptions, VideoGenerationResult
|
||||||
|
- Protocol: VideoGenerationProvider (async, progress callbacks)
|
||||||
|
|
||||||
|
- **WaveSpeed video**: [video_generation/wavespeed_provider.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/video_generation/wavespeed_provider.py)
|
||||||
|
- BaseWaveSpeedTextToVideoService:
|
||||||
|
- MODEL_NAME/PATH contract
|
||||||
|
- calculate_cost(resolution, duration)
|
||||||
|
- input validation helpers
|
||||||
|
- Model services (e.g., HunyuanVideoService, LTX‑2 variants)
|
||||||
|
- Client: services.wavespeed.client.WaveSpeedClient
|
||||||
|
|
||||||
|
## Audio / STT
|
||||||
|
- **Modules**:
|
||||||
|
- [audio_to_text_generation/gemini_audio_text.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/audio_to_text_generation/gemini_audio_text.py)
|
||||||
|
- [audio_to_text_generation/stt_audio_blog.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/audio_to_text_generation/stt_audio_blog.py)
|
||||||
|
- Responsibilities:
|
||||||
|
- Convert audio to text
|
||||||
|
- Provide structured outputs for downstream blog/content workflows
|
||||||
|
|
||||||
|
## Shared Patterns
|
||||||
|
- **Environment handling**:
|
||||||
|
- Providers validate their own secrets and default models
|
||||||
|
- No secrets logged; provider‑scoped logger via utils.logger_utils.get_service_logger
|
||||||
|
- **Result normalization**:
|
||||||
|
- Binary payloads (image_bytes, video_bytes) and metadata are standardized
|
||||||
|
- Provider name/model surfaced in result for analytics
|
||||||
|
- **Retries and resilience**:
|
||||||
|
- Text providers use tenacity exponential backoff
|
||||||
|
- Media providers implement validation and sensible defaults
|
||||||
|
|
||||||
|
## Integration Points
|
||||||
|
- Subscription enforcement and preflight:
|
||||||
|
- [api/subscription/routes/preflight.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/api/subscription/routes/preflight.py)
|
||||||
|
- PricingService/UsageTrackingService are invoked prior to calling providers
|
||||||
|
- Usage logging:
|
||||||
|
- Centralized in subscription services; gateway returns normalized data for logging
|
||||||
|
- Pricing:
|
||||||
|
- Per‑provider and per‑model costs reflected in preflight and service layers
|
||||||
|
|
||||||
|
## Extending the Gateway
|
||||||
|
1. Choose modality (text/image/video/audio)
|
||||||
|
2. Implement the appropriate Protocol and dataclasses
|
||||||
|
3. Validate and load configuration from environment
|
||||||
|
4. Normalize outputs to gateway result types
|
||||||
|
5. Add pricing/preflight entries and update subscription limit checks
|
||||||
|
6. Add route handlers that perform validation then call the new provider
|
||||||
|
|
||||||
|
Following this reference ensures new providers integrate smoothly with ALwrity’s subscription, pricing, and analytics subsystems while keeping UI/API stable across diverse models.
|
||||||
71
docs/subscription/Billing_and_Usage.md
Normal file
71
docs/subscription/Billing_and_Usage.md
Normal file
@@ -0,0 +1,71 @@
|
|||||||
|
# Subscription, Billing & Usage Tracking
|
||||||
|
|
||||||
|
This document details how ALwrity manages subscriptions, processes payments via Stripe, and tracks granular usage for every user interaction.
|
||||||
|
|
||||||
|
## 1. Subscription Model
|
||||||
|
|
||||||
|
ALwrity uses a **tier-based subscription model** enforced at the API gateway level.
|
||||||
|
|
||||||
|
### Tiers
|
||||||
|
- **Free**: Limited access, community support.
|
||||||
|
- **Basic**: Entry-level AI usage, standard support.
|
||||||
|
- **Pro**: High limits, advanced models (Gemini Pro, FLUX), priority support.
|
||||||
|
- **Enterprise**: Custom limits, dedicated infrastructure.
|
||||||
|
|
||||||
|
### Data Model (`UserSubscription`)
|
||||||
|
Stored in the user's SQLite database (`alwrity_user_{id}.db`):
|
||||||
|
- `stripe_customer_id`: Link to Stripe Customer.
|
||||||
|
- `stripe_subscription_id`: Active subscription ID.
|
||||||
|
- `plan_id`: Internal plan reference (linked to `SubscriptionPlan`).
|
||||||
|
- `status`: `active`, `past_due`, `canceled`, etc.
|
||||||
|
- `current_period_start` / `end`: Defines the billing cycle window.
|
||||||
|
|
||||||
|
## 2. Billing Integration (Stripe)
|
||||||
|
|
||||||
|
We use **Stripe** for all payments, webhooks, and portal management.
|
||||||
|
|
||||||
|
### Key Components
|
||||||
|
- **StripeService**: Handles checkout creation, portal sessions, and webhooks.
|
||||||
|
- **Webhooks**: Listens for events like `invoice.payment_succeeded`, `customer.subscription.updated`.
|
||||||
|
- **Idempotency**: All webhooks are tracked in `ProcessedStripeEvent` to prevent duplicate processing.
|
||||||
|
- **Reliability**: Events are processed transactionally; failures are logged and retried by Stripe.
|
||||||
|
- **Configuration**: Plan-to-Price mapping is loaded from environment variables (`STRIPE_PLAN_PRICE_MAPPING_TEST` / `_LIVE`) to ensure sync between code and Stripe Dashboard.
|
||||||
|
|
||||||
|
### Checkout Flow
|
||||||
|
1. Frontend calls `/api/subscription/create-checkout-session`.
|
||||||
|
2. Backend validates user and creates Stripe Session.
|
||||||
|
3. User pays on Stripe.
|
||||||
|
4. Stripe sends `checkout.session.completed` webhook.
|
||||||
|
5. Backend provisions subscription and credits in `UserSubscription`.
|
||||||
|
|
||||||
|
## 3. Usage Tracking
|
||||||
|
|
||||||
|
Every API call to an LLM provider is tracked, costed, and logged.
|
||||||
|
|
||||||
|
### Tracking Flow
|
||||||
|
1. **Pre-flight Check** (`check_usage_limits`):
|
||||||
|
- Before generating content, the system estimates cost/tokens.
|
||||||
|
- If user exceeds plan limits (e.g., "50 videos/month"), the request is rejected (429).
|
||||||
|
2. **Execution**: The provider generates the content.
|
||||||
|
3. **Post-execution Log** (`track_usage`):
|
||||||
|
- Actual tokens/duration are measured.
|
||||||
|
- Cost is calculated based on `APIProviderPricing` table.
|
||||||
|
- Entry added to `APIUsageLog` (granular) and aggregated into `UsageSummary` (monthly totals).
|
||||||
|
|
||||||
|
### Database Tables
|
||||||
|
- **`APIUsageLog`**: Immutable ledger of every call.
|
||||||
|
- Fields: `user_id`, `provider`, `model`, `input_tokens`, `output_tokens`, `cost`, `status_code`.
|
||||||
|
- **`UsageSummary`**: Aggregated stats per billing period.
|
||||||
|
- Fields: `total_calls`, `total_cost`, `gemini_calls`, `video_calls`, etc.
|
||||||
|
- **Unique Constraint**: Enforced on `(user_id, billing_period)` to prevent data drift.
|
||||||
|
|
||||||
|
### Pricing Engine (`PricingService`)
|
||||||
|
- Costs are not hardcoded. They are fetched from `APIProviderPricing` table.
|
||||||
|
- Supports per-token (text), per-image (media), and per-second (video/audio) pricing models.
|
||||||
|
- Admin can update pricing in DB without redeploying code.
|
||||||
|
|
||||||
|
## 4. Frontend Integration
|
||||||
|
|
||||||
|
- **Usage Dashboard**: Visualizes consumption vs. limits.
|
||||||
|
- **Real-time**: Usage stats are typically updated immediately after generation.
|
||||||
|
- **Limit Rings**: UI components show percentage used (e.g., "80% of monthly video limit").
|
||||||
Reference in New Issue
Block a user