chore: Update backend services, intelligence integration, and documentation

2026-03-05 22:14:25 +05:30
parent 5cccb89df8
commit 110f7318cc
7 changed files with 439 additions and 4 deletions
--- a/backend/services/intelligence/sif_integration.py
+++ b/backend/services/intelligence/sif_integration.py
@@ -925,6 +925,8 @@ class SIFIntegrationService:
            # Content pillar analysis
            if self.intelligence_service.is_initialized():
                clusters = await self.intelligence_service.cluster(min_score=0.6)
+                if asyncio.iscoroutine(clusters):
+                    clusters = await clusters
                insights["content_pillars"] = self._format_clusters_as_pillars(clusters)
                
                # Semantic gaps analysis
--- a/backend/services/intelligence/txtai_service.py
+++ b/backend/services/intelligence/txtai_service.py
@@ -215,6 +215,7 @@ class TxtaiIntelligenceService:
                    logger.error(f"Detected known txtai/faiss IndexIDMap/nprobe incompatibility for user {self.user_id}. Attempting re-init with numpy backend fallback...")
                    # Switch to numpy backend which doesn't have this issue
                    self._backend = "numpy"
+                    self._initialized = False
                    self._initialize_embeddings()
                    if self.embeddings:
                        results = self.embeddings.search(query, limit=limit)
@@ -270,7 +271,9 @@ class TxtaiIntelligenceService:
            except AttributeError as ae:
                if "nprobe" in str(ae):
                    logger.error(f"Detected IndexIDMap nprobe error in similarity for user {self.user_id}. Falling back to numpy backend...")
+                    # Switch to numpy backend which doesn't have this issue
                    self._backend = "numpy"
+                    self._initialized = False
                    self._initialize_embeddings()
                    if self.embeddings:
                        similarity = self.embeddings.similarity(text1, text2)
@@ -328,7 +331,7 @@ class TxtaiIntelligenceService:
            # Check if we have graph functionality available
            if not hasattr(self.embeddings, 'graph') or not self.embeddings.graph:
                logger.warning(f"Graph clustering not available for user {self.user_id}. Using fallback clustering.")
-                return self._fallback_clustering(min_score)
+                return await self._fallback_clustering(min_score)
            
            # Use graph-based clustering if available
            # Perform a search to get graph structure
@@ -338,10 +341,13 @@ class TxtaiIntelligenceService:
            except AttributeError as ae:
                if "nprobe" in str(ae):
                    logger.error(f"Detected IndexIDMap nprobe error in cluster for user {self.user_id}. Falling back to numpy backend...")
+                    # Force re-initialization with numpy backend to bypass FAISS issue
                    self._backend = "numpy"
+                    self._initialized = False
                    self._initialize_embeddings()
                    if self.embeddings:
-                        graph_results = self.embeddings.search(sample_query, limit=10, graph=True)
+                        # Retry with numpy backend (no graph support, so fallback)
+                        return await self._fallback_clustering(min_score)
                    else:
                        raise ae
                else:
@@ -349,7 +355,7 @@ class TxtaiIntelligenceService:
            
            if not graph_results:
                logger.warning(f"No graph results for clustering user {self.user_id}")
-                return self._fallback_clustering(min_score)
+                return await self._fallback_clustering(min_score)
            
            # Extract clusters from graph results
            clusters = self._extract_clusters_from_graph(graph_results, min_score)
@@ -377,7 +383,7 @@ class TxtaiIntelligenceService:
            logger.error(f"Clustering failed for user {self.user_id}: {e}")
            logger.error(f"Min score: {min_score}")
            logger.error(f"Full traceback: {traceback.format_exc()}")
-            return self._fallback_clustering(min_score)
+            return await self._fallback_clustering(min_score)
    
    async def _fallback_clustering(self, min_score: float) -> List[List[int]]:
        """Fallback clustering method when graph clustering is not available."""
--- a/docs/llm_gateway/Architecture.md
+++ b/docs/llm_gateway/Architecture.md
@@ -0,0 +1,104 @@
+# ALwrity LLM Gateway – Architecture Overview
+
+ALwrity’s LLM Gateway lives under [llm_providers](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers) and provides a consistent, production‑oriented interface for text, image, audio, and video generation across multiple model providers. It encapsulates provider differences, applies subscription enforcement, and centralizes observability and reliability patterns.
+
+## Goals
+- Unified surface for LLM operations across providers
+- Strong subscription enforcement and cost awareness
+- Resilient calls with retries and structured error handling
+- Extensible provider architecture with clear contracts
+- Transparent metrics, usage logging, and pricing integration
+
+## High‑Level Flow
+1. Entry points route requests to the appropriate capability:
+   - Text generation via [main_text_generation.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/main_text_generation.py)
+   - Image generation and editing via [image_generation](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/image_generation)
+   - Video generation via [video_generation](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/video_generation)
+   - Audio/STT via [audio_to_text_generation](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/audio_to_text_generation)
+2. Subscription enforcement integrates before provider calls:
+   - Uses PricingService and UsageTrackingService to validate tokens/operations
+   - Blocks requests that exceed limits with actionable error payloads
+3. Provider module performs the call with provider‑specific SDKs/APIs
+4. Results are normalized to ALwrity types and returned upstream
+
+## Core Components
+- **Text Generation Entry**: [main_text_generation.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/main_text_generation.py)
+  - Detects available providers via APIKeyManager
+  - Applies strict subscription checks using PricingService and UsageTrackingService
+  - Routes to Gemini or Hugging Face implementations
+- **Image Generation Contracts**: [base.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/image_generation/base.py)
+  - Options and Result dataclasses
+  - Protocols for generation, edit, and face‑swap providers
+- **Video Generation Contracts**: [base.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/video_generation/base.py)
+  - Options and Result dataclasses
+  - Async protocol with progress callbacks
+- **Provider Implementations**:
+  - Gemini text: [gemini_provider.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/gemini_provider.py)
+  - Hugging Face text: [huggingface_provider.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/huggingface_provider.py)
+  - Hugging Face image: [hf_provider.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/image_generation/hf_provider.py)
+  - WaveSpeed video: [wavespeed_provider.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/video_generation/wavespeed_provider.py)
+
+## Provider Abstraction
+- Image providers conform to:
+  - ImageGenerationProvider.generate(options) -> ImageGenerationResult
+  - ImageEditProvider.edit(options) -> ImageGenerationResult
+  - FaceSwapProvider.swap_face(options) -> ImageGenerationResult
+- Video providers conform to:
+  - VideoGenerationProvider.generate_video(options, progress_cb) -> VideoGenerationResult
+
+These contracts ensure consistent options/result types so downstream UI and logging remain stable regardless of provider.
+
+## Subscription Enforcement
+- Performed in the text pipeline entry point before any provider call:
+  - See enforcement and usage checks in [main_text_generation.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/main_text_generation.py#L117-L166)
+- Preflight operations endpoint also validates multi‑operation cost/limits:
+  - See [preflight.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/api/subscription/routes/preflight.py)
+- Image/video modules typically rely on the calling route to validate limits first, then perform provider calls.
+
+## Configuration and Secrets
+- Gemini: GEMINI_API_KEY
+  - Loaded and validated in [gemini_provider.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/gemini_provider.py#L101-L116)
+- Hugging Face: HF_TOKEN
+  - Loaded and validated in [huggingface_provider.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/huggingface_provider.py#L90-L105)
+- Hugging Face image defaults: HF_IMAGE_MODEL
+  - Used in [image_generation/hf_provider.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/image_generation/hf_provider.py#L17-L21)
+- Provider clients must never log secrets; logs are provider‑scoped via get_service_logger.
+
+## Reliability and Error Handling
+- Exponential backoff retries using tenacity:
+  - Gemini text: [gemini_text_response](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/gemini_provider.py#L117)
+  - Hugging Face text: [huggingface_text_response](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/huggingface_provider.py#L106)
+- Structured exceptions surface HTTP 429 for limit breaches with usage info
+- Provider modules return normalized results; callers handle downstream persistence and telemetry
+
+## Pricing and Cost Awareness
+- Preflight cost estimation computes operation costs per provider/model:
+  - See multi‑operation handling in [preflight.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/api/subscription/routes/preflight.py#L100-L144)
+- Video cost calculation is provider/model aware:
+  - See WaveSpeed services and `calculate_cost` in [video_generation/wavespeed_provider.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/video_generation/wavespeed_provider.py#L44-L56)
+
+## Observability
+- Service‑scoped loggers for each provider/module
+- Central usage logs recorded via subscription services on the calling routes
+- Provider metadata normalized in result objects for consistent analytics
+
+## Extensibility Guidelines
+- Implement the appropriate Protocol interface in a new provider module
+- Normalize options and results to the gateway dataclasses
+- Keep environment/key validation local to the provider module
+- Add cost mapping in PricingService and preflight for new operations/models
+- Wire subscription validation in the calling route before invoking provider
+
+## Request Lifecycle (Text)
+1. Client submits prompt to text endpoint
+2. Entry point determines provider (env or APIKeyManager) and validates subscription limits
+3. Provider‑specific function executes with retries and returns normalized text
+4. Caller logs usage and returns response to client
+
+## Request Lifecycle (Media)
+1. Client submits generation/edit/face‑swap request
+2. Route validates plan limits (tokens, requests, or per‑operation limits)
+3. Provider service executes call and produces normalized binary payload and metadata
+4. Caller logs usage and returns media/links to client
+
+This architecture isolates provider variability while standardizing contracts, enabling safe expansion to new models and modalities without destabilizing upstream consumers.
--- a/docs/llm_gateway/Extending_the_Gateway.md
+++ b/docs/llm_gateway/Extending_the_Gateway.md
@@ -0,0 +1,96 @@
+# Extending the LLM Gateway
+
+This guide provides a checklist and templates for adding new providers or modalities to the ALwrity LLM Gateway.
+
+## Checklist
+
+1.  **Define the Provider Interface**:
+    - [ ] Create a new module in `backend/services/llm_providers/<modality>/`.
+    - [ ] Define input options dataclass (e.g., `MyNewProviderOptions`).
+    - [ ] Implement the standard Protocol (e.g., `ImageGenerationProvider`).
+
+2.  **Configuration**:
+    - [ ] Add necessary API keys to `.env.example` and `APIKeyManager`.
+    - [ ] Add new provider enum to `backend/models/subscription_models.py` (`APIProvider`).
+
+3.  **Pricing & Usage**:
+    - [ ] Add default pricing in `PricingService` or migration script.
+    - [ ] Ensure `UsageSummary` table has columns for this provider (if it's a major one) or map it to a generic category.
+
+4.  **Integration**:
+    - [ ] Register the provider in the main entry point (e.g., `main_image_generation.py`).
+    - [ ] Update `preflight.py` to handle cost estimation for this provider.
+
+5.  **Frontend**:
+    - [ ] Update `billingService.ts` to handle the new provider key in usage stats (if applicable).
+    - [ ] Add provider icon/color in `billingService.ts`.
+
+## Skeleton Template (Python)
+
+Here is a template for a new **Image Generation Provider**:
+
+```python
+from __future__ import annotations
+import os
+from typing import Optional, Dict, Any
+from dataclasses import dataclass
+from .base import ImageGenerationOptions, ImageGenerationResult, ImageGenerationProvider
+from utils.logger_utils import get_service_logger
+
+logger = get_service_logger("image_generation.my_new_provider")
+
+class MyNewProvider(ImageGenerationProvider):
+    """
+    My New Provider implementation.
+    """
+
+    def __init__(self, api_key: Optional[str] = None):
+        self.api_key = api_key or os.getenv("MY_PROVIDER_API_KEY")
+        if not self.api_key:
+            raise RuntimeError("MY_PROVIDER_API_KEY is required")
+        # Initialize client here
+
+    def generate(self, options: ImageGenerationOptions) -> ImageGenerationResult:
+        logger.info(f"Generating image with MyNewProvider: {options.prompt[:50]}...")
+        
+        try:
+            # 1. Call External API
+            # response = client.generate(...)
+            
+            # 2. Process Response (Mock)
+            image_bytes = b"fake_image_data" 
+            width = options.width
+            height = options.height
+            
+            # 3. Return Standard Result
+            return ImageGenerationResult(
+                image_bytes=image_bytes,
+                width=width,
+                height=height,
+                provider="my_new_provider",
+                model=options.model or "default-model",
+                seed=options.seed,
+                metadata={"raw_response": "..."}
+            )
+            
+        except Exception as e:
+            logger.error(f"Generation failed: {e}")
+            raise
+```
+
+## Skeleton Template (Route Integration)
+
+In `main_image_generation.py`:
+
+```python
+from .image_generation.my_new_provider import MyNewProvider
+
+def generate_image(prompt: str, provider: str, ...):
+    # ... existing code ...
+    
+    if provider == "my_new_provider":
+        service = MyNewProvider()
+        result = service.generate(options)
+        
+    # ... existing code ...
+```
--- a/docs/llm_gateway/Features_and_Status.md
+++ b/docs/llm_gateway/Features_and_Status.md
@@ -0,0 +1,59 @@
+# LLM Gateway – Features & Implementation Status
+
+This document provides a high-level overview of the LLM Gateway's capabilities and the current production status of each component.
+
+## Core Features
+
+- **Unified Interface**: Single API surface for text, image, video, and audio generation, abstracting away provider-specific SDKs.
+- **Provider Agnostic**: Switch between Gemini, Hugging Face, Stability, WaveSpeed, etc., via configuration or runtime parameters.
+- **Subscription Enforcement**: Strict pre-flight checks against user plans (Free, Basic, Pro, Enterprise) before any API call.
+- **Cost Awareness**: Granular tracking of input/output tokens, request counts, and media generation costs per provider/model.
+- **Resilience**: Built-in retries (exponential backoff) for transient failures (rate limits, timeouts).
+- **Observability**: Centralized logging (`APIUsageLog`) and usage aggregation (`UsageSummary`) for all modalities.
+- **Streaming Support**: (Partial) Infrastructure exists for text streaming, though primarily used for blocking responses currently.
+
+## Implementation Status
+
+### 1. Text Generation
+| Feature | Provider | Status | Notes |
+| :--- | :--- | :--- | :--- |
+| **Chat/Completion** | Google Gemini | ✅ Production | Default provider. Supports `gemini-2.0-flash`. |
+| **Chat/Completion** | Hugging Face | ✅ Production | via Inference Providers (e.g., `mistralai/Mistral-7B`). |
+| **Structured JSON** | Gemini | ✅ Production | Uses `response_schema` for reliable parsing. |
+| **Structured JSON** | Hugging Face | ✅ Production | Uses `response_format={ "type": "json_object" }`. |
+
+### 2. Image Generation
+| Feature | Provider | Status | Notes |
+| :--- | :--- | :--- | :--- |
+| **Text-to-Image** | Google Gemini | ✅ Production | Imagen 3 models. |
+| **Text-to-Image** | Hugging Face | ✅ Production | FLUX.1 via fal-ai/Black Forest Labs. |
+| **Text-to-Image** | Stability AI | ✅ Production | Core/SD3 models. |
+| **Text-to-Image** | WaveSpeed | ✅ Production | High-speed generation. |
+| **Image Editing** | WaveSpeed | ✅ Production | Inpainting, background removal, face swap. |
+
+### 3. Video Generation
+| Feature | Provider | Status | Notes |
+| :--- | :--- | :--- | :--- |
+| **Text-to-Video** | WaveSpeed | ✅ Production | HunyuanVideo-1.5, LTX-2 Pro. |
+| **Image-to-Video** | WaveSpeed | 🚧 Planned | Roadmap item. |
+
+### 4. Audio Generation
+| Feature | Provider | Status | Notes |
+| :--- | :--- | :--- | :--- |
+| **Text-to-Speech** | Gemini | ✅ Production | Audio generation capability. |
+| **Text-to-Speech** | WaveSpeed | ✅ Production | Fast TTS. |
+| **Speech-to-Text** | Gemini | ✅ Production | Transcription (via `audio_to_text_generation`). |
+
+### 5. Research & Tools
+| Feature | Provider | Status | Notes |
+| :--- | :--- | :--- | :--- |
+| **Web Search** | Tavily | ✅ Production | Integrated for grounded research. |
+| **Web Search** | Serper | ✅ Production | Google Search API alternative. |
+| **Web Search** | Exa | ✅ Production | Neural search. |
+
+## Roadmap & Next Steps
+
+- **Streaming Standardization**: Unify streaming interfaces across all text providers for consistent frontend UX.
+- **Model Fallbacks**: Automatic failover to secondary providers if the primary is down (currently manual/env-based).
+- **Fine-tuning Support**: Add gateway endpoints for triggering and using fine-tuned jobs.
+- **Caching Layer**: Redis-based semantic caching for frequent queries to reduce costs.
--- a/docs/llm_gateway/Modules.md
+++ b/docs/llm_gateway/Modules.md
@@ -0,0 +1,97 @@
+# LLM Gateway – Module Reference
+
+This document catalogs the modules under [llm_providers](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers) with their responsibilities, key classes/functions, configuration, and integration points.
+
+## Text Generation
+- **Entry point**: [main_text_generation.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/main_text_generation.py)
+  - llm_text_gen(prompt, system_prompt, json_struct, user_id)
+  - Responsibilities:
+    - Resolve provider (env or APIKeyManager)
+    - Perform strict subscription checks (PricingService, UsageTrackingService)
+    - Call Gemini or Hugging Face implementations
+  - Integration:
+    - models.subscription_models.APIProvider mapping
+    - services.subscription.PricingService, UsageTrackingService
+
+- **Gemini provider**: [gemini_provider.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/gemini_provider.py)
+  - get_gemini_api_key() – env validation
+  - gemini_text_response(...) – tenacity‑backed retries, text output
+  - gemini_structured_json_response(...) – structured JSON output
+  - Config: GEMINI_API_KEY
+  - SDK: google.generativeai
+
+- **Hugging Face provider**: [huggingface_provider.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/huggingface_provider.py)
+  - get_huggingface_api_key() – env validation
+  - huggingface_text_response(...) – Responses API (OpenAI client), retries
+  - huggingface_structured_json_response(...) – structured JSON output
+  - Config: HF_TOKEN
+  - SDK: openai client pointed at Hugging Face router
+
+## Image Generation
+- **Contracts**: [image_generation/base.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/image_generation/base.py)
+  - ImageGenerationOptions, ImageGenerationResult
+  - ImageEditOptions, FaceSwapOptions (with to_dict helpers)
+  - Protocols: ImageGenerationProvider, ImageEditProvider, FaceSwapProvider
+
+- **Hugging Face image**: [image_generation/hf_provider.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/image_generation/hf_provider.py)
+  - Class: HuggingFaceImageProvider(ImageGenerationProvider)
+  - generate(options) -> ImageGenerationResult
+  - Config: HF_TOKEN, HF_IMAGE_MODEL (default FLUX.1‑Krea‑dev)
+  - SDK: huggingface_hub.InferenceClient (provider="fal-ai")
+
+- **Other image modules**:
+  - [image_generation/gemini_provider.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/image_generation/gemini_provider.py) – Gemini image generation integration
+  - [image_generation/wavespeed_provider.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/image_generation/wavespeed_provider.py) – WaveSpeed image editing
+  - [image_generation/wavespeed_face_swap_provider.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/image_generation/wavespeed_face_swap_provider.py) – Face swap
+  - [image_generation/wavespeed_edit_provider.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/image_generation/wavespeed_edit_provider.py) – General edits
+
+## Video Generation
+- **Contracts**: [video_generation/base.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/video_generation/base.py)
+  - VideoGenerationOptions, VideoGenerationResult
+  - Protocol: VideoGenerationProvider (async, progress callbacks)
+
+- **WaveSpeed video**: [video_generation/wavespeed_provider.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/video_generation/wavespeed_provider.py)
+  - BaseWaveSpeedTextToVideoService:
+    - MODEL_NAME/PATH contract
+    - calculate_cost(resolution, duration)
+    - input validation helpers
+  - Model services (e.g., HunyuanVideoService, LTX‑2 variants)
+  - Client: services.wavespeed.client.WaveSpeedClient
+
+## Audio / STT
+- **Modules**:
+  - [audio_to_text_generation/gemini_audio_text.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/audio_to_text_generation/gemini_audio_text.py)
+  - [audio_to_text_generation/stt_audio_blog.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/audio_to_text_generation/stt_audio_blog.py)
+  - Responsibilities:
+    - Convert audio to text
+    - Provide structured outputs for downstream blog/content workflows
+
+## Shared Patterns
+- **Environment handling**:
+  - Providers validate their own secrets and default models
+  - No secrets logged; provider‑scoped logger via utils.logger_utils.get_service_logger
+- **Result normalization**:
+  - Binary payloads (image_bytes, video_bytes) and metadata are standardized
+  - Provider name/model surfaced in result for analytics
+- **Retries and resilience**:
+  - Text providers use tenacity exponential backoff
+  - Media providers implement validation and sensible defaults
+
+## Integration Points
+- Subscription enforcement and preflight:
+  - [api/subscription/routes/preflight.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/api/subscription/routes/preflight.py)
+  - PricingService/UsageTrackingService are invoked prior to calling providers
+- Usage logging:
+  - Centralized in subscription services; gateway returns normalized data for logging
+- Pricing:
+  - Per‑provider and per‑model costs reflected in preflight and service layers
+
+## Extending the Gateway
+1. Choose modality (text/image/video/audio)
+2. Implement the appropriate Protocol and dataclasses
+3. Validate and load configuration from environment
+4. Normalize outputs to gateway result types
+5. Add pricing/preflight entries and update subscription limit checks
+6. Add route handlers that perform validation then call the new provider
+
+Following this reference ensures new providers integrate smoothly with ALwrity’s subscription, pricing, and analytics subsystems while keeping UI/API stable across diverse models.
--- a/docs/subscription/Billing_and_Usage.md
+++ b/docs/subscription/Billing_and_Usage.md
@@ -0,0 +1,71 @@
+# Subscription, Billing & Usage Tracking
+
+This document details how ALwrity manages subscriptions, processes payments via Stripe, and tracks granular usage for every user interaction.
+
+## 1. Subscription Model
+
+ALwrity uses a **tier-based subscription model** enforced at the API gateway level.
+
+### Tiers
+- **Free**: Limited access, community support.
+- **Basic**: Entry-level AI usage, standard support.
+- **Pro**: High limits, advanced models (Gemini Pro, FLUX), priority support.
+- **Enterprise**: Custom limits, dedicated infrastructure.
+
+### Data Model (`UserSubscription`)
+Stored in the user's SQLite database (`alwrity_user_{id}.db`):
+- `stripe_customer_id`: Link to Stripe Customer.
+- `stripe_subscription_id`: Active subscription ID.
+- `plan_id`: Internal plan reference (linked to `SubscriptionPlan`).
+- `status`: `active`, `past_due`, `canceled`, etc.
+- `current_period_start` / `end`: Defines the billing cycle window.
+
+## 2. Billing Integration (Stripe)
+
+We use **Stripe** for all payments, webhooks, and portal management.
+
+### Key Components
+- **StripeService**: Handles checkout creation, portal sessions, and webhooks.
+- **Webhooks**: Listens for events like `invoice.payment_succeeded`, `customer.subscription.updated`.
+  - **Idempotency**: All webhooks are tracked in `ProcessedStripeEvent` to prevent duplicate processing.
+  - **Reliability**: Events are processed transactionally; failures are logged and retried by Stripe.
+- **Configuration**: Plan-to-Price mapping is loaded from environment variables (`STRIPE_PLAN_PRICE_MAPPING_TEST` / `_LIVE`) to ensure sync between code and Stripe Dashboard.
+
+### Checkout Flow
+1. Frontend calls `/api/subscription/create-checkout-session`.
+2. Backend validates user and creates Stripe Session.
+3. User pays on Stripe.
+4. Stripe sends `checkout.session.completed` webhook.
+5. Backend provisions subscription and credits in `UserSubscription`.
+
+## 3. Usage Tracking
+
+Every API call to an LLM provider is tracked, costed, and logged.
+
+### Tracking Flow
+1. **Pre-flight Check** (`check_usage_limits`):
+   - Before generating content, the system estimates cost/tokens.
+   - If user exceeds plan limits (e.g., "50 videos/month"), the request is rejected (429).
+2. **Execution**: The provider generates the content.
+3. **Post-execution Log** (`track_usage`):
+   - Actual tokens/duration are measured.
+   - Cost is calculated based on `APIProviderPricing` table.
+   - Entry added to `APIUsageLog` (granular) and aggregated into `UsageSummary` (monthly totals).
+
+### Database Tables
+- **`APIUsageLog`**: Immutable ledger of every call.
+  - Fields: `user_id`, `provider`, `model`, `input_tokens`, `output_tokens`, `cost`, `status_code`.
+- **`UsageSummary`**: Aggregated stats per billing period.
+  - Fields: `total_calls`, `total_cost`, `gemini_calls`, `video_calls`, etc.
+  - **Unique Constraint**: Enforced on `(user_id, billing_period)` to prevent data drift.
+
+### Pricing Engine (`PricingService`)
+- Costs are not hardcoded. They are fetched from `APIProviderPricing` table.
+- Supports per-token (text), per-image (media), and per-second (video/audio) pricing models.
+- Admin can update pricing in DB without redeploying code.
+
+## 4. Frontend Integration
+
+- **Usage Dashboard**: Visualizes consumption vs. limits.
+- **Real-time**: Usage stats are typically updated immediately after generation.
+- **Limit Rings**: UI components show percentage used (e.g., "80% of monthly video limit").