chore: Update backend services, intelligence integration, and documentation
This commit is contained in:
@@ -925,6 +925,8 @@ class SIFIntegrationService:
|
||||
# Content pillar analysis
|
||||
if self.intelligence_service.is_initialized():
|
||||
clusters = await self.intelligence_service.cluster(min_score=0.6)
|
||||
if asyncio.iscoroutine(clusters):
|
||||
clusters = await clusters
|
||||
insights["content_pillars"] = self._format_clusters_as_pillars(clusters)
|
||||
|
||||
# Semantic gaps analysis
|
||||
|
||||
@@ -215,6 +215,7 @@ class TxtaiIntelligenceService:
|
||||
logger.error(f"Detected known txtai/faiss IndexIDMap/nprobe incompatibility for user {self.user_id}. Attempting re-init with numpy backend fallback...")
|
||||
# Switch to numpy backend which doesn't have this issue
|
||||
self._backend = "numpy"
|
||||
self._initialized = False
|
||||
self._initialize_embeddings()
|
||||
if self.embeddings:
|
||||
results = self.embeddings.search(query, limit=limit)
|
||||
@@ -270,7 +271,9 @@ class TxtaiIntelligenceService:
|
||||
except AttributeError as ae:
|
||||
if "nprobe" in str(ae):
|
||||
logger.error(f"Detected IndexIDMap nprobe error in similarity for user {self.user_id}. Falling back to numpy backend...")
|
||||
# Switch to numpy backend which doesn't have this issue
|
||||
self._backend = "numpy"
|
||||
self._initialized = False
|
||||
self._initialize_embeddings()
|
||||
if self.embeddings:
|
||||
similarity = self.embeddings.similarity(text1, text2)
|
||||
@@ -328,7 +331,7 @@ class TxtaiIntelligenceService:
|
||||
# Check if we have graph functionality available
|
||||
if not hasattr(self.embeddings, 'graph') or not self.embeddings.graph:
|
||||
logger.warning(f"Graph clustering not available for user {self.user_id}. Using fallback clustering.")
|
||||
return self._fallback_clustering(min_score)
|
||||
return await self._fallback_clustering(min_score)
|
||||
|
||||
# Use graph-based clustering if available
|
||||
# Perform a search to get graph structure
|
||||
@@ -338,10 +341,13 @@ class TxtaiIntelligenceService:
|
||||
except AttributeError as ae:
|
||||
if "nprobe" in str(ae):
|
||||
logger.error(f"Detected IndexIDMap nprobe error in cluster for user {self.user_id}. Falling back to numpy backend...")
|
||||
# Force re-initialization with numpy backend to bypass FAISS issue
|
||||
self._backend = "numpy"
|
||||
self._initialized = False
|
||||
self._initialize_embeddings()
|
||||
if self.embeddings:
|
||||
graph_results = self.embeddings.search(sample_query, limit=10, graph=True)
|
||||
# Retry with numpy backend (no graph support, so fallback)
|
||||
return await self._fallback_clustering(min_score)
|
||||
else:
|
||||
raise ae
|
||||
else:
|
||||
@@ -349,7 +355,7 @@ class TxtaiIntelligenceService:
|
||||
|
||||
if not graph_results:
|
||||
logger.warning(f"No graph results for clustering user {self.user_id}")
|
||||
return self._fallback_clustering(min_score)
|
||||
return await self._fallback_clustering(min_score)
|
||||
|
||||
# Extract clusters from graph results
|
||||
clusters = self._extract_clusters_from_graph(graph_results, min_score)
|
||||
@@ -377,7 +383,7 @@ class TxtaiIntelligenceService:
|
||||
logger.error(f"Clustering failed for user {self.user_id}: {e}")
|
||||
logger.error(f"Min score: {min_score}")
|
||||
logger.error(f"Full traceback: {traceback.format_exc()}")
|
||||
return self._fallback_clustering(min_score)
|
||||
return await self._fallback_clustering(min_score)
|
||||
|
||||
async def _fallback_clustering(self, min_score: float) -> List[List[int]]:
|
||||
"""Fallback clustering method when graph clustering is not available."""
|
||||
|
||||
104
docs/llm_gateway/Architecture.md
Normal file
104
docs/llm_gateway/Architecture.md
Normal file
@@ -0,0 +1,104 @@
|
||||
# ALwrity LLM Gateway – Architecture Overview
|
||||
|
||||
ALwrity’s LLM Gateway lives under [llm_providers](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers) and provides a consistent, production‑oriented interface for text, image, audio, and video generation across multiple model providers. It encapsulates provider differences, applies subscription enforcement, and centralizes observability and reliability patterns.
|
||||
|
||||
## Goals
|
||||
- Unified surface for LLM operations across providers
|
||||
- Strong subscription enforcement and cost awareness
|
||||
- Resilient calls with retries and structured error handling
|
||||
- Extensible provider architecture with clear contracts
|
||||
- Transparent metrics, usage logging, and pricing integration
|
||||
|
||||
## High‑Level Flow
|
||||
1. Entry points route requests to the appropriate capability:
|
||||
- Text generation via [main_text_generation.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/main_text_generation.py)
|
||||
- Image generation and editing via [image_generation](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/image_generation)
|
||||
- Video generation via [video_generation](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/video_generation)
|
||||
- Audio/STT via [audio_to_text_generation](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/audio_to_text_generation)
|
||||
2. Subscription enforcement integrates before provider calls:
|
||||
- Uses PricingService and UsageTrackingService to validate tokens/operations
|
||||
- Blocks requests that exceed limits with actionable error payloads
|
||||
3. Provider module performs the call with provider‑specific SDKs/APIs
|
||||
4. Results are normalized to ALwrity types and returned upstream
|
||||
|
||||
## Core Components
|
||||
- **Text Generation Entry**: [main_text_generation.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/main_text_generation.py)
|
||||
- Detects available providers via APIKeyManager
|
||||
- Applies strict subscription checks using PricingService and UsageTrackingService
|
||||
- Routes to Gemini or Hugging Face implementations
|
||||
- **Image Generation Contracts**: [base.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/image_generation/base.py)
|
||||
- Options and Result dataclasses
|
||||
- Protocols for generation, edit, and face‑swap providers
|
||||
- **Video Generation Contracts**: [base.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/video_generation/base.py)
|
||||
- Options and Result dataclasses
|
||||
- Async protocol with progress callbacks
|
||||
- **Provider Implementations**:
|
||||
- Gemini text: [gemini_provider.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/gemini_provider.py)
|
||||
- Hugging Face text: [huggingface_provider.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/huggingface_provider.py)
|
||||
- Hugging Face image: [hf_provider.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/image_generation/hf_provider.py)
|
||||
- WaveSpeed video: [wavespeed_provider.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/video_generation/wavespeed_provider.py)
|
||||
|
||||
## Provider Abstraction
|
||||
- Image providers conform to:
|
||||
- ImageGenerationProvider.generate(options) -> ImageGenerationResult
|
||||
- ImageEditProvider.edit(options) -> ImageGenerationResult
|
||||
- FaceSwapProvider.swap_face(options) -> ImageGenerationResult
|
||||
- Video providers conform to:
|
||||
- VideoGenerationProvider.generate_video(options, progress_cb) -> VideoGenerationResult
|
||||
|
||||
These contracts ensure consistent options/result types so downstream UI and logging remain stable regardless of provider.
|
||||
|
||||
## Subscription Enforcement
|
||||
- Performed in the text pipeline entry point before any provider call:
|
||||
- See enforcement and usage checks in [main_text_generation.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/main_text_generation.py#L117-L166)
|
||||
- Preflight operations endpoint also validates multi‑operation cost/limits:
|
||||
- See [preflight.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/api/subscription/routes/preflight.py)
|
||||
- Image/video modules typically rely on the calling route to validate limits first, then perform provider calls.
|
||||
|
||||
## Configuration and Secrets
|
||||
- Gemini: GEMINI_API_KEY
|
||||
- Loaded and validated in [gemini_provider.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/gemini_provider.py#L101-L116)
|
||||
- Hugging Face: HF_TOKEN
|
||||
- Loaded and validated in [huggingface_provider.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/huggingface_provider.py#L90-L105)
|
||||
- Hugging Face image defaults: HF_IMAGE_MODEL
|
||||
- Used in [image_generation/hf_provider.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/image_generation/hf_provider.py#L17-L21)
|
||||
- Provider clients must never log secrets; logs are provider‑scoped via get_service_logger.
|
||||
|
||||
## Reliability and Error Handling
|
||||
- Exponential backoff retries using tenacity:
|
||||
- Gemini text: [gemini_text_response](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/gemini_provider.py#L117)
|
||||
- Hugging Face text: [huggingface_text_response](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/huggingface_provider.py#L106)
|
||||
- Structured exceptions surface HTTP 429 for limit breaches with usage info
|
||||
- Provider modules return normalized results; callers handle downstream persistence and telemetry
|
||||
|
||||
## Pricing and Cost Awareness
|
||||
- Preflight cost estimation computes operation costs per provider/model:
|
||||
- See multi‑operation handling in [preflight.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/api/subscription/routes/preflight.py#L100-L144)
|
||||
- Video cost calculation is provider/model aware:
|
||||
- See WaveSpeed services and `calculate_cost` in [video_generation/wavespeed_provider.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/video_generation/wavespeed_provider.py#L44-L56)
|
||||
|
||||
## Observability
|
||||
- Service‑scoped loggers for each provider/module
|
||||
- Central usage logs recorded via subscription services on the calling routes
|
||||
- Provider metadata normalized in result objects for consistent analytics
|
||||
|
||||
## Extensibility Guidelines
|
||||
- Implement the appropriate Protocol interface in a new provider module
|
||||
- Normalize options and results to the gateway dataclasses
|
||||
- Keep environment/key validation local to the provider module
|
||||
- Add cost mapping in PricingService and preflight for new operations/models
|
||||
- Wire subscription validation in the calling route before invoking provider
|
||||
|
||||
## Request Lifecycle (Text)
|
||||
1. Client submits prompt to text endpoint
|
||||
2. Entry point determines provider (env or APIKeyManager) and validates subscription limits
|
||||
3. Provider‑specific function executes with retries and returns normalized text
|
||||
4. Caller logs usage and returns response to client
|
||||
|
||||
## Request Lifecycle (Media)
|
||||
1. Client submits generation/edit/face‑swap request
|
||||
2. Route validates plan limits (tokens, requests, or per‑operation limits)
|
||||
3. Provider service executes call and produces normalized binary payload and metadata
|
||||
4. Caller logs usage and returns media/links to client
|
||||
|
||||
This architecture isolates provider variability while standardizing contracts, enabling safe expansion to new models and modalities without destabilizing upstream consumers.
|
||||
96
docs/llm_gateway/Extending_the_Gateway.md
Normal file
96
docs/llm_gateway/Extending_the_Gateway.md
Normal file
@@ -0,0 +1,96 @@
|
||||
# Extending the LLM Gateway
|
||||
|
||||
This guide provides a checklist and templates for adding new providers or modalities to the ALwrity LLM Gateway.
|
||||
|
||||
## Checklist
|
||||
|
||||
1. **Define the Provider Interface**:
|
||||
- [ ] Create a new module in `backend/services/llm_providers/<modality>/`.
|
||||
- [ ] Define input options dataclass (e.g., `MyNewProviderOptions`).
|
||||
- [ ] Implement the standard Protocol (e.g., `ImageGenerationProvider`).
|
||||
|
||||
2. **Configuration**:
|
||||
- [ ] Add necessary API keys to `.env.example` and `APIKeyManager`.
|
||||
- [ ] Add new provider enum to `backend/models/subscription_models.py` (`APIProvider`).
|
||||
|
||||
3. **Pricing & Usage**:
|
||||
- [ ] Add default pricing in `PricingService` or migration script.
|
||||
- [ ] Ensure `UsageSummary` table has columns for this provider (if it's a major one) or map it to a generic category.
|
||||
|
||||
4. **Integration**:
|
||||
- [ ] Register the provider in the main entry point (e.g., `main_image_generation.py`).
|
||||
- [ ] Update `preflight.py` to handle cost estimation for this provider.
|
||||
|
||||
5. **Frontend**:
|
||||
- [ ] Update `billingService.ts` to handle the new provider key in usage stats (if applicable).
|
||||
- [ ] Add provider icon/color in `billingService.ts`.
|
||||
|
||||
## Skeleton Template (Python)
|
||||
|
||||
Here is a template for a new **Image Generation Provider**:
|
||||
|
||||
```python
|
||||
from __future__ import annotations
|
||||
import os
|
||||
from typing import Optional, Dict, Any
|
||||
from dataclasses import dataclass
|
||||
from .base import ImageGenerationOptions, ImageGenerationResult, ImageGenerationProvider
|
||||
from utils.logger_utils import get_service_logger
|
||||
|
||||
logger = get_service_logger("image_generation.my_new_provider")
|
||||
|
||||
class MyNewProvider(ImageGenerationProvider):
|
||||
"""
|
||||
My New Provider implementation.
|
||||
"""
|
||||
|
||||
def __init__(self, api_key: Optional[str] = None):
|
||||
self.api_key = api_key or os.getenv("MY_PROVIDER_API_KEY")
|
||||
if not self.api_key:
|
||||
raise RuntimeError("MY_PROVIDER_API_KEY is required")
|
||||
# Initialize client here
|
||||
|
||||
def generate(self, options: ImageGenerationOptions) -> ImageGenerationResult:
|
||||
logger.info(f"Generating image with MyNewProvider: {options.prompt[:50]}...")
|
||||
|
||||
try:
|
||||
# 1. Call External API
|
||||
# response = client.generate(...)
|
||||
|
||||
# 2. Process Response (Mock)
|
||||
image_bytes = b"fake_image_data"
|
||||
width = options.width
|
||||
height = options.height
|
||||
|
||||
# 3. Return Standard Result
|
||||
return ImageGenerationResult(
|
||||
image_bytes=image_bytes,
|
||||
width=width,
|
||||
height=height,
|
||||
provider="my_new_provider",
|
||||
model=options.model or "default-model",
|
||||
seed=options.seed,
|
||||
metadata={"raw_response": "..."}
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Generation failed: {e}")
|
||||
raise
|
||||
```
|
||||
|
||||
## Skeleton Template (Route Integration)
|
||||
|
||||
In `main_image_generation.py`:
|
||||
|
||||
```python
|
||||
from .image_generation.my_new_provider import MyNewProvider
|
||||
|
||||
def generate_image(prompt: str, provider: str, ...):
|
||||
# ... existing code ...
|
||||
|
||||
if provider == "my_new_provider":
|
||||
service = MyNewProvider()
|
||||
result = service.generate(options)
|
||||
|
||||
# ... existing code ...
|
||||
```
|
||||
59
docs/llm_gateway/Features_and_Status.md
Normal file
59
docs/llm_gateway/Features_and_Status.md
Normal file
@@ -0,0 +1,59 @@
|
||||
# LLM Gateway – Features & Implementation Status
|
||||
|
||||
This document provides a high-level overview of the LLM Gateway's capabilities and the current production status of each component.
|
||||
|
||||
## Core Features
|
||||
|
||||
- **Unified Interface**: Single API surface for text, image, video, and audio generation, abstracting away provider-specific SDKs.
|
||||
- **Provider Agnostic**: Switch between Gemini, Hugging Face, Stability, WaveSpeed, etc., via configuration or runtime parameters.
|
||||
- **Subscription Enforcement**: Strict pre-flight checks against user plans (Free, Basic, Pro, Enterprise) before any API call.
|
||||
- **Cost Awareness**: Granular tracking of input/output tokens, request counts, and media generation costs per provider/model.
|
||||
- **Resilience**: Built-in retries (exponential backoff) for transient failures (rate limits, timeouts).
|
||||
- **Observability**: Centralized logging (`APIUsageLog`) and usage aggregation (`UsageSummary`) for all modalities.
|
||||
- **Streaming Support**: (Partial) Infrastructure exists for text streaming, though primarily used for blocking responses currently.
|
||||
|
||||
## Implementation Status
|
||||
|
||||
### 1. Text Generation
|
||||
| Feature | Provider | Status | Notes |
|
||||
| :--- | :--- | :--- | :--- |
|
||||
| **Chat/Completion** | Google Gemini | ✅ Production | Default provider. Supports `gemini-2.0-flash`. |
|
||||
| **Chat/Completion** | Hugging Face | ✅ Production | via Inference Providers (e.g., `mistralai/Mistral-7B`). |
|
||||
| **Structured JSON** | Gemini | ✅ Production | Uses `response_schema` for reliable parsing. |
|
||||
| **Structured JSON** | Hugging Face | ✅ Production | Uses `response_format={ "type": "json_object" }`. |
|
||||
|
||||
### 2. Image Generation
|
||||
| Feature | Provider | Status | Notes |
|
||||
| :--- | :--- | :--- | :--- |
|
||||
| **Text-to-Image** | Google Gemini | ✅ Production | Imagen 3 models. |
|
||||
| **Text-to-Image** | Hugging Face | ✅ Production | FLUX.1 via fal-ai/Black Forest Labs. |
|
||||
| **Text-to-Image** | Stability AI | ✅ Production | Core/SD3 models. |
|
||||
| **Text-to-Image** | WaveSpeed | ✅ Production | High-speed generation. |
|
||||
| **Image Editing** | WaveSpeed | ✅ Production | Inpainting, background removal, face swap. |
|
||||
|
||||
### 3. Video Generation
|
||||
| Feature | Provider | Status | Notes |
|
||||
| :--- | :--- | :--- | :--- |
|
||||
| **Text-to-Video** | WaveSpeed | ✅ Production | HunyuanVideo-1.5, LTX-2 Pro. |
|
||||
| **Image-to-Video** | WaveSpeed | 🚧 Planned | Roadmap item. |
|
||||
|
||||
### 4. Audio Generation
|
||||
| Feature | Provider | Status | Notes |
|
||||
| :--- | :--- | :--- | :--- |
|
||||
| **Text-to-Speech** | Gemini | ✅ Production | Audio generation capability. |
|
||||
| **Text-to-Speech** | WaveSpeed | ✅ Production | Fast TTS. |
|
||||
| **Speech-to-Text** | Gemini | ✅ Production | Transcription (via `audio_to_text_generation`). |
|
||||
|
||||
### 5. Research & Tools
|
||||
| Feature | Provider | Status | Notes |
|
||||
| :--- | :--- | :--- | :--- |
|
||||
| **Web Search** | Tavily | ✅ Production | Integrated for grounded research. |
|
||||
| **Web Search** | Serper | ✅ Production | Google Search API alternative. |
|
||||
| **Web Search** | Exa | ✅ Production | Neural search. |
|
||||
|
||||
## Roadmap & Next Steps
|
||||
|
||||
- **Streaming Standardization**: Unify streaming interfaces across all text providers for consistent frontend UX.
|
||||
- **Model Fallbacks**: Automatic failover to secondary providers if the primary is down (currently manual/env-based).
|
||||
- **Fine-tuning Support**: Add gateway endpoints for triggering and using fine-tuned jobs.
|
||||
- **Caching Layer**: Redis-based semantic caching for frequent queries to reduce costs.
|
||||
97
docs/llm_gateway/Modules.md
Normal file
97
docs/llm_gateway/Modules.md
Normal file
@@ -0,0 +1,97 @@
|
||||
# LLM Gateway – Module Reference
|
||||
|
||||
This document catalogs the modules under [llm_providers](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers) with their responsibilities, key classes/functions, configuration, and integration points.
|
||||
|
||||
## Text Generation
|
||||
- **Entry point**: [main_text_generation.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/main_text_generation.py)
|
||||
- llm_text_gen(prompt, system_prompt, json_struct, user_id)
|
||||
- Responsibilities:
|
||||
- Resolve provider (env or APIKeyManager)
|
||||
- Perform strict subscription checks (PricingService, UsageTrackingService)
|
||||
- Call Gemini or Hugging Face implementations
|
||||
- Integration:
|
||||
- models.subscription_models.APIProvider mapping
|
||||
- services.subscription.PricingService, UsageTrackingService
|
||||
|
||||
- **Gemini provider**: [gemini_provider.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/gemini_provider.py)
|
||||
- get_gemini_api_key() – env validation
|
||||
- gemini_text_response(...) – tenacity‑backed retries, text output
|
||||
- gemini_structured_json_response(...) – structured JSON output
|
||||
- Config: GEMINI_API_KEY
|
||||
- SDK: google.generativeai
|
||||
|
||||
- **Hugging Face provider**: [huggingface_provider.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/huggingface_provider.py)
|
||||
- get_huggingface_api_key() – env validation
|
||||
- huggingface_text_response(...) – Responses API (OpenAI client), retries
|
||||
- huggingface_structured_json_response(...) – structured JSON output
|
||||
- Config: HF_TOKEN
|
||||
- SDK: openai client pointed at Hugging Face router
|
||||
|
||||
## Image Generation
|
||||
- **Contracts**: [image_generation/base.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/image_generation/base.py)
|
||||
- ImageGenerationOptions, ImageGenerationResult
|
||||
- ImageEditOptions, FaceSwapOptions (with to_dict helpers)
|
||||
- Protocols: ImageGenerationProvider, ImageEditProvider, FaceSwapProvider
|
||||
|
||||
- **Hugging Face image**: [image_generation/hf_provider.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/image_generation/hf_provider.py)
|
||||
- Class: HuggingFaceImageProvider(ImageGenerationProvider)
|
||||
- generate(options) -> ImageGenerationResult
|
||||
- Config: HF_TOKEN, HF_IMAGE_MODEL (default FLUX.1‑Krea‑dev)
|
||||
- SDK: huggingface_hub.InferenceClient (provider="fal-ai")
|
||||
|
||||
- **Other image modules**:
|
||||
- [image_generation/gemini_provider.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/image_generation/gemini_provider.py) – Gemini image generation integration
|
||||
- [image_generation/wavespeed_provider.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/image_generation/wavespeed_provider.py) – WaveSpeed image editing
|
||||
- [image_generation/wavespeed_face_swap_provider.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/image_generation/wavespeed_face_swap_provider.py) – Face swap
|
||||
- [image_generation/wavespeed_edit_provider.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/image_generation/wavespeed_edit_provider.py) – General edits
|
||||
|
||||
## Video Generation
|
||||
- **Contracts**: [video_generation/base.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/video_generation/base.py)
|
||||
- VideoGenerationOptions, VideoGenerationResult
|
||||
- Protocol: VideoGenerationProvider (async, progress callbacks)
|
||||
|
||||
- **WaveSpeed video**: [video_generation/wavespeed_provider.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/video_generation/wavespeed_provider.py)
|
||||
- BaseWaveSpeedTextToVideoService:
|
||||
- MODEL_NAME/PATH contract
|
||||
- calculate_cost(resolution, duration)
|
||||
- input validation helpers
|
||||
- Model services (e.g., HunyuanVideoService, LTX‑2 variants)
|
||||
- Client: services.wavespeed.client.WaveSpeedClient
|
||||
|
||||
## Audio / STT
|
||||
- **Modules**:
|
||||
- [audio_to_text_generation/gemini_audio_text.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/audio_to_text_generation/gemini_audio_text.py)
|
||||
- [audio_to_text_generation/stt_audio_blog.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/audio_to_text_generation/stt_audio_blog.py)
|
||||
- Responsibilities:
|
||||
- Convert audio to text
|
||||
- Provide structured outputs for downstream blog/content workflows
|
||||
|
||||
## Shared Patterns
|
||||
- **Environment handling**:
|
||||
- Providers validate their own secrets and default models
|
||||
- No secrets logged; provider‑scoped logger via utils.logger_utils.get_service_logger
|
||||
- **Result normalization**:
|
||||
- Binary payloads (image_bytes, video_bytes) and metadata are standardized
|
||||
- Provider name/model surfaced in result for analytics
|
||||
- **Retries and resilience**:
|
||||
- Text providers use tenacity exponential backoff
|
||||
- Media providers implement validation and sensible defaults
|
||||
|
||||
## Integration Points
|
||||
- Subscription enforcement and preflight:
|
||||
- [api/subscription/routes/preflight.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/api/subscription/routes/preflight.py)
|
||||
- PricingService/UsageTrackingService are invoked prior to calling providers
|
||||
- Usage logging:
|
||||
- Centralized in subscription services; gateway returns normalized data for logging
|
||||
- Pricing:
|
||||
- Per‑provider and per‑model costs reflected in preflight and service layers
|
||||
|
||||
## Extending the Gateway
|
||||
1. Choose modality (text/image/video/audio)
|
||||
2. Implement the appropriate Protocol and dataclasses
|
||||
3. Validate and load configuration from environment
|
||||
4. Normalize outputs to gateway result types
|
||||
5. Add pricing/preflight entries and update subscription limit checks
|
||||
6. Add route handlers that perform validation then call the new provider
|
||||
|
||||
Following this reference ensures new providers integrate smoothly with ALwrity’s subscription, pricing, and analytics subsystems while keeping UI/API stable across diverse models.
|
||||
71
docs/subscription/Billing_and_Usage.md
Normal file
71
docs/subscription/Billing_and_Usage.md
Normal file
@@ -0,0 +1,71 @@
|
||||
# Subscription, Billing & Usage Tracking
|
||||
|
||||
This document details how ALwrity manages subscriptions, processes payments via Stripe, and tracks granular usage for every user interaction.
|
||||
|
||||
## 1. Subscription Model
|
||||
|
||||
ALwrity uses a **tier-based subscription model** enforced at the API gateway level.
|
||||
|
||||
### Tiers
|
||||
- **Free**: Limited access, community support.
|
||||
- **Basic**: Entry-level AI usage, standard support.
|
||||
- **Pro**: High limits, advanced models (Gemini Pro, FLUX), priority support.
|
||||
- **Enterprise**: Custom limits, dedicated infrastructure.
|
||||
|
||||
### Data Model (`UserSubscription`)
|
||||
Stored in the user's SQLite database (`alwrity_user_{id}.db`):
|
||||
- `stripe_customer_id`: Link to Stripe Customer.
|
||||
- `stripe_subscription_id`: Active subscription ID.
|
||||
- `plan_id`: Internal plan reference (linked to `SubscriptionPlan`).
|
||||
- `status`: `active`, `past_due`, `canceled`, etc.
|
||||
- `current_period_start` / `end`: Defines the billing cycle window.
|
||||
|
||||
## 2. Billing Integration (Stripe)
|
||||
|
||||
We use **Stripe** for all payments, webhooks, and portal management.
|
||||
|
||||
### Key Components
|
||||
- **StripeService**: Handles checkout creation, portal sessions, and webhooks.
|
||||
- **Webhooks**: Listens for events like `invoice.payment_succeeded`, `customer.subscription.updated`.
|
||||
- **Idempotency**: All webhooks are tracked in `ProcessedStripeEvent` to prevent duplicate processing.
|
||||
- **Reliability**: Events are processed transactionally; failures are logged and retried by Stripe.
|
||||
- **Configuration**: Plan-to-Price mapping is loaded from environment variables (`STRIPE_PLAN_PRICE_MAPPING_TEST` / `_LIVE`) to ensure sync between code and Stripe Dashboard.
|
||||
|
||||
### Checkout Flow
|
||||
1. Frontend calls `/api/subscription/create-checkout-session`.
|
||||
2. Backend validates user and creates Stripe Session.
|
||||
3. User pays on Stripe.
|
||||
4. Stripe sends `checkout.session.completed` webhook.
|
||||
5. Backend provisions subscription and credits in `UserSubscription`.
|
||||
|
||||
## 3. Usage Tracking
|
||||
|
||||
Every API call to an LLM provider is tracked, costed, and logged.
|
||||
|
||||
### Tracking Flow
|
||||
1. **Pre-flight Check** (`check_usage_limits`):
|
||||
- Before generating content, the system estimates cost/tokens.
|
||||
- If user exceeds plan limits (e.g., "50 videos/month"), the request is rejected (429).
|
||||
2. **Execution**: The provider generates the content.
|
||||
3. **Post-execution Log** (`track_usage`):
|
||||
- Actual tokens/duration are measured.
|
||||
- Cost is calculated based on `APIProviderPricing` table.
|
||||
- Entry added to `APIUsageLog` (granular) and aggregated into `UsageSummary` (monthly totals).
|
||||
|
||||
### Database Tables
|
||||
- **`APIUsageLog`**: Immutable ledger of every call.
|
||||
- Fields: `user_id`, `provider`, `model`, `input_tokens`, `output_tokens`, `cost`, `status_code`.
|
||||
- **`UsageSummary`**: Aggregated stats per billing period.
|
||||
- Fields: `total_calls`, `total_cost`, `gemini_calls`, `video_calls`, etc.
|
||||
- **Unique Constraint**: Enforced on `(user_id, billing_period)` to prevent data drift.
|
||||
|
||||
### Pricing Engine (`PricingService`)
|
||||
- Costs are not hardcoded. They are fetched from `APIProviderPricing` table.
|
||||
- Supports per-token (text), per-image (media), and per-second (video/audio) pricing models.
|
||||
- Admin can update pricing in DB without redeploying code.
|
||||
|
||||
## 4. Frontend Integration
|
||||
|
||||
- **Usage Dashboard**: Visualizes consumption vs. limits.
|
||||
- **Real-time**: Usage stats are typically updated immediately after generation.
|
||||
- **Limit Rings**: UI components show percentage used (e.g., "80% of monthly video limit").
|
||||
Reference in New Issue
Block a user