chore: Update backend services, intelligence integration, and documentation

2026-03-05 22:14:25 +05:30
parent 5cccb89df8
commit 110f7318cc
7 changed files with 439 additions and 4 deletions
--- a/backend/services/intelligence/sif_integration.py
+++ b/backend/services/intelligence/sif_integration.py
@@ -925,6 +925,8 @@ class SIFIntegrationService:
            # Content pillar analysis
            if self.intelligence_service.is_initialized():
                clusters = await self.intelligence_service.cluster(min_score=0.6)
                if asyncio.iscoroutine(clusters):
                    clusters = await clusters
                insights["content_pillars"] = self._format_clusters_as_pillars(clusters)
                # Semantic gaps analysis
--- a/backend/services/intelligence/txtai_service.py
+++ b/backend/services/intelligence/txtai_service.py
@@ -215,6 +215,7 @@ class TxtaiIntelligenceService:
                    logger.error(f"Detected known txtai/faiss IndexIDMap/nprobe incompatibility for user {self.user_id}. Attempting re-init with numpy backend fallback...")
                    # Switch to numpy backend which doesn't have this issue
                    self._backend = "numpy"
                    self._initialized = False
                    self._initialize_embeddings()
                    if self.embeddings:
                        results = self.embeddings.search(query, limit=limit)
@@ -270,7 +271,9 @@ class TxtaiIntelligenceService:
            except AttributeError as ae:
                if "nprobe" in str(ae):
                    logger.error(f"Detected IndexIDMap nprobe error in similarity for user {self.user_id}. Falling back to numpy backend...")
                    # Switch to numpy backend which doesn't have this issue
                    self._backend = "numpy"
                    self._initialized = False
                    self._initialize_embeddings()
                    if self.embeddings:
                        similarity = self.embeddings.similarity(text1, text2)
@@ -328,7 +331,7 @@ class TxtaiIntelligenceService:
            # Check if we have graph functionality available
            if not hasattr(self.embeddings, 'graph') or not self.embeddings.graph:
                logger.warning(f"Graph clustering not available for user {self.user_id}. Using fallback clustering.")
-                return self._fallback_clustering(min_score)
+                return await self._fallback_clustering(min_score)
            # Use graph-based clustering if available
            # Perform a search to get graph structure
@@ -338,10 +341,13 @@ class TxtaiIntelligenceService:
            except AttributeError as ae:
                if "nprobe" in str(ae):
                    logger.error(f"Detected IndexIDMap nprobe error in cluster for user {self.user_id}. Falling back to numpy backend...")
                    # Force re-initialization with numpy backend to bypass FAISS issue
                    self._backend = "numpy"
                    self._initialized = False
                    self._initialize_embeddings()
                    if self.embeddings:
-                        graph_results = self.embeddings.search(sample_query, limit=10, graph=True)
+                        # Retry with numpy backend (no graph support, so fallback)
                        return await self._fallback_clustering(min_score)
                    else:
                        raise ae
                else:
@@ -349,7 +355,7 @@ class TxtaiIntelligenceService:
            if not graph_results:
                logger.warning(f"No graph results for clustering user {self.user_id}")
-                return self._fallback_clustering(min_score)
+                return await self._fallback_clustering(min_score)
            # Extract clusters from graph results
            clusters = self._extract_clusters_from_graph(graph_results, min_score)
@@ -377,7 +383,7 @@ class TxtaiIntelligenceService:
            logger.error(f"Clustering failed for user {self.user_id}: {e}")
            logger.error(f"Min score: {min_score}")
            logger.error(f"Full traceback: {traceback.format_exc()}")
-            return self._fallback_clustering(min_score)
+            return await self._fallback_clustering(min_score)
    async def _fallback_clustering(self, min_score: float) -> List[List[int]]:
        """Fallback clustering method when graph clustering is not available."""
--- a/docs/llm_gateway/Architecture.md
+++ b/docs/llm_gateway/Architecture.md
@@ -0,0 +1,104 @@
 # ALwrity LLM Gateway – Architecture Overview
 ALwrity’s LLM Gateway lives under [llm_providers](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers) and provides a consistent, production‑oriented interface for text, image, audio, and video generation across multiple model providers. It encapsulates provider differences, applies subscription enforcement, and centralizes observability and reliability patterns.
 ## Goals
 - Unified surface for LLM operations across providers
 - Strong subscription enforcement and cost awareness
 - Resilient calls with retries and structured error handling
 - Extensible provider architecture with clear contracts
 - Transparent metrics, usage logging, and pricing integration
 ## High‑Level Flow
 1. Entry points route requests to the appropriate capability:
   - Text generation via [main_text_generation.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/main_text_generation.py)
   - Image generation and editing via [image_generation](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/image_generation)
   - Video generation via [video_generation](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/video_generation)
   - Audio/STT via [audio_to_text_generation](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/audio_to_text_generation)
 2. Subscription enforcement integrates before provider calls:
   - Uses PricingService and UsageTrackingService to validate tokens/operations
   - Blocks requests that exceed limits with actionable error payloads
 3. Provider module performs the call with provider‑specific SDKs/APIs
 4. Results are normalized to ALwrity types and returned upstream
 ## Core Components
 - **Text Generation Entry**: [main_text_generation.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/main_text_generation.py)
  - Detects available providers via APIKeyManager
  - Applies strict subscription checks using PricingService and UsageTrackingService
  - Routes to Gemini or Hugging Face implementations
 - **Image Generation Contracts**: [base.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/image_generation/base.py)
  - Options and Result dataclasses
  - Protocols for generation, edit, and face‑swap providers
 - **Video Generation Contracts**: [base.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/video_generation/base.py)
  - Options and Result dataclasses
  - Async protocol with progress callbacks
 - **Provider Implementations**:
  - Gemini text: [gemini_provider.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/gemini_provider.py)
  - Hugging Face text: [huggingface_provider.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/huggingface_provider.py)
  - Hugging Face image: [hf_provider.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/image_generation/hf_provider.py)
  - WaveSpeed video: [wavespeed_provider.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/video_generation/wavespeed_provider.py)
 ## Provider Abstraction
 - Image providers conform to:
  - ImageGenerationProvider.generate(options) -> ImageGenerationResult
  - ImageEditProvider.edit(options) -> ImageGenerationResult
  - FaceSwapProvider.swap_face(options) -> ImageGenerationResult
 - Video providers conform to:
  - VideoGenerationProvider.generate_video(options, progress_cb) -> VideoGenerationResult
 These contracts ensure consistent options/result types so downstream UI and logging remain stable regardless of provider.
 ## Subscription Enforcement
 - Performed in the text pipeline entry point before any provider call:
  - See enforcement and usage checks in [main_text_generation.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/main_text_generation.py#L117-L166)
 - Preflight operations endpoint also validates multi‑operation cost/limits:
  - See [preflight.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/api/subscription/routes/preflight.py)
 - Image/video modules typically rely on the calling route to validate limits first, then perform provider calls.
 ## Configuration and Secrets
 - Gemini: GEMINI_API_KEY
  - Loaded and validated in [gemini_provider.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/gemini_provider.py#L101-L116)
 - Hugging Face: HF_TOKEN
  - Loaded and validated in [huggingface_provider.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/huggingface_provider.py#L90-L105)
 - Hugging Face image defaults: HF_IMAGE_MODEL
  - Used in [image_generation/hf_provider.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/image_generation/hf_provider.py#L17-L21)
 - Provider clients must never log secrets; logs are provider‑scoped via get_service_logger.
 ## Reliability and Error Handling
 - Exponential backoff retries using tenacity:
  - Gemini text: [gemini_text_response](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/gemini_provider.py#L117)
  - Hugging Face text: [huggingface_text_response](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/huggingface_provider.py#L106)
 - Structured exceptions surface HTTP 429 for limit breaches with usage info
 - Provider modules return normalized results; callers handle downstream persistence and telemetry
 ## Pricing and Cost Awareness
 - Preflight cost estimation computes operation costs per provider/model:
  - See multi‑operation handling in [preflight.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/api/subscription/routes/preflight.py#L100-L144)
 - Video cost calculation is provider/model aware:
  - See WaveSpeed services and `calculate_cost` in [video_generation/wavespeed_provider.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/video_generation/wavespeed_provider.py#L44-L56)
 ## Observability
 - Service‑scoped loggers for each provider/module
 - Central usage logs recorded via subscription services on the calling routes
 - Provider metadata normalized in result objects for consistent analytics
 ## Extensibility Guidelines
 - Implement the appropriate Protocol interface in a new provider module
 - Normalize options and results to the gateway dataclasses
 - Keep environment/key validation local to the provider module
 - Add cost mapping in PricingService and preflight for new operations/models
 - Wire subscription validation in the calling route before invoking provider
 ## Request Lifecycle (Text)
 1. Client submits prompt to text endpoint
 2. Entry point determines provider (env or APIKeyManager) and validates subscription limits
 3. Provider‑specific function executes with retries and returns normalized text
 4. Caller logs usage and returns response to client
 ## Request Lifecycle (Media)
 1. Client submits generation/edit/face‑swap request
 2. Route validates plan limits (tokens, requests, or per‑operation limits)
 3. Provider service executes call and produces normalized binary payload and metadata
 4. Caller logs usage and returns media/links to client
 This architecture isolates provider variability while standardizing contracts, enabling safe expansion to new models and modalities without destabilizing upstream consumers.
--- a/docs/llm_gateway/Extending_the_Gateway.md
+++ b/docs/llm_gateway/Extending_the_Gateway.md
@@ -0,0 +1,96 @@
 # Extending the LLM Gateway
 This guide provides a checklist and templates for adding new providers or modalities to the ALwrity LLM Gateway.
 ## Checklist
 1.  **Define the Provider Interface**:
    - [ ] Create a new module in `backend/services/llm_providers/<modality>/`.
    - [ ] Define input options dataclass (e.g., `MyNewProviderOptions`).
    - [ ] Implement the standard Protocol (e.g., `ImageGenerationProvider`).
 2.  **Configuration**:
    - [ ] Add necessary API keys to `.env.example` and `APIKeyManager`.
    - [ ] Add new provider enum to `backend/models/subscription_models.py` (`APIProvider`).
 3.  **Pricing & Usage**:
    - [ ] Add default pricing in `PricingService` or migration script.
    - [ ] Ensure `UsageSummary` table has columns for this provider (if it's a major one) or map it to a generic category.
 4.  **Integration**:
    - [ ] Register the provider in the main entry point (e.g., `main_image_generation.py`).
    - [ ] Update `preflight.py` to handle cost estimation for this provider.
 5.  **Frontend**:
    - [ ] Update `billingService.ts` to handle the new provider key in usage stats (if applicable).
    - [ ] Add provider icon/color in `billingService.ts`.
 ## Skeleton Template (Python)
 Here is a template for a new **Image Generation Provider**:
 ```python
 from __future__ import annotations
 import os
 from typing import Optional, Dict, Any
 from dataclasses import dataclass
 from .base import ImageGenerationOptions, ImageGenerationResult, ImageGenerationProvider
 from utils.logger_utils import get_service_logger
 logger = get_service_logger("image_generation.my_new_provider")
 class MyNewProvider(ImageGenerationProvider):
    """
    My New Provider implementation.
    """
    def __init__(self, api_key: Optional[str] = None):
        self.api_key = api_key or os.getenv("MY_PROVIDER_API_KEY")
        if not self.api_key:
            raise RuntimeError("MY_PROVIDER_API_KEY is required")
        # Initialize client here
    def generate(self, options: ImageGenerationOptions) -> ImageGenerationResult:
        logger.info(f"Generating image with MyNewProvider: {options.prompt[:50]}...")
        try:
            # 1. Call External API
            # response = client.generate(...)
            # 2. Process Response (Mock)
            image_bytes = b"fake_image_data" 
            width = options.width
            height = options.height
            # 3. Return Standard Result
            return ImageGenerationResult(
                image_bytes=image_bytes,
                width=width,
                height=height,
                provider="my_new_provider",
                model=options.model or "default-model",
                seed=options.seed,
                metadata={"raw_response": "..."}
            )
        except Exception as e:
            logger.error(f"Generation failed: {e}")
            raise
 ```
 ## Skeleton Template (Route Integration)
 In `main_image_generation.py`:
 ```python
 from .image_generation.my_new_provider import MyNewProvider
 def generate_image(prompt: str, provider: str, ...):
    # ... existing code ...
    if provider == "my_new_provider":
        service = MyNewProvider()
        result = service.generate(options)
    # ... existing code ...
 ```
--- a/docs/llm_gateway/Features_and_Status.md
+++ b/docs/llm_gateway/Features_and_Status.md
@@ -0,0 +1,59 @@
 # LLM Gateway – Features & Implementation Status
 This document provides a high-level overview of the LLM Gateway's capabilities and the current production status of each component.
 ## Core Features
 - **Unified Interface**: Single API surface for text, image, video, and audio generation, abstracting away provider-specific SDKs.
 - **Provider Agnostic**: Switch between Gemini, Hugging Face, Stability, WaveSpeed, etc., via configuration or runtime parameters.
 - **Subscription Enforcement**: Strict pre-flight checks against user plans (Free, Basic, Pro, Enterprise) before any API call.
 - **Cost Awareness**: Granular tracking of input/output tokens, request counts, and media generation costs per provider/model.
 - **Resilience**: Built-in retries (exponential backoff) for transient failures (rate limits, timeouts).
 - **Observability**: Centralized logging (`APIUsageLog`) and usage aggregation (`UsageSummary`) for all modalities.
 - **Streaming Support**: (Partial) Infrastructure exists for text streaming, though primarily used for blocking responses currently.
 ## Implementation Status
 ### 1. Text Generation
 | Feature | Provider | Status | Notes |
 | :--- | :--- | :--- | :--- |
 | **Chat/Completion** | Google Gemini | ✅ Production | Default provider. Supports `gemini-2.0-flash`. |
 | **Chat/Completion** | Hugging Face | ✅ Production | via Inference Providers (e.g., `mistralai/Mistral-7B`). |
 | **Structured JSON** | Gemini | ✅ Production | Uses `response_schema` for reliable parsing. |
 | **Structured JSON** | Hugging Face | ✅ Production | Uses `response_format={ "type": "json_object" }`. |
 ### 2. Image Generation
 | Feature | Provider | Status | Notes |
 | :--- | :--- | :--- | :--- |
 | **Text-to-Image** | Google Gemini | ✅ Production | Imagen 3 models. |
 | **Text-to-Image** | Hugging Face | ✅ Production | FLUX.1 via fal-ai/Black Forest Labs. |
 | **Text-to-Image** | Stability AI | ✅ Production | Core/SD3 models. |
 | **Text-to-Image** | WaveSpeed | ✅ Production | High-speed generation. |
 | **Image Editing** | WaveSpeed | ✅ Production | Inpainting, background removal, face swap. |
 ### 3. Video Generation
 | Feature | Provider | Status | Notes |
 | :--- | :--- | :--- | :--- |
 | **Text-to-Video** | WaveSpeed | ✅ Production | HunyuanVideo-1.5, LTX-2 Pro. |
 | **Image-to-Video** | WaveSpeed | 🚧 Planned | Roadmap item. |
 ### 4. Audio Generation
 | Feature | Provider | Status | Notes |
 | :--- | :--- | :--- | :--- |
 | **Text-to-Speech** | Gemini | ✅ Production | Audio generation capability. |
 | **Text-to-Speech** | WaveSpeed | ✅ Production | Fast TTS. |
 | **Speech-to-Text** | Gemini | ✅ Production | Transcription (via `audio_to_text_generation`). |
 ### 5. Research & Tools
 | Feature | Provider | Status | Notes |
 | :--- | :--- | :--- | :--- |
 | **Web Search** | Tavily | ✅ Production | Integrated for grounded research. |
 | **Web Search** | Serper | ✅ Production | Google Search API alternative. |
 | **Web Search** | Exa | ✅ Production | Neural search. |
 ## Roadmap & Next Steps
 - **Streaming Standardization**: Unify streaming interfaces across all text providers for consistent frontend UX.
 - **Model Fallbacks**: Automatic failover to secondary providers if the primary is down (currently manual/env-based).
 - **Fine-tuning Support**: Add gateway endpoints for triggering and using fine-tuned jobs.
 - **Caching Layer**: Redis-based semantic caching for frequent queries to reduce costs.
--- a/docs/llm_gateway/Modules.md
+++ b/docs/llm_gateway/Modules.md
@@ -0,0 +1,97 @@
 # LLM Gateway – Module Reference
 This document catalogs the modules under [llm_providers](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers) with their responsibilities, key classes/functions, configuration, and integration points.
 ## Text Generation
 - **Entry point**: [main_text_generation.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/main_text_generation.py)
  - llm_text_gen(prompt, system_prompt, json_struct, user_id)
  - Responsibilities:
    - Resolve provider (env or APIKeyManager)
    - Perform strict subscription checks (PricingService, UsageTrackingService)
    - Call Gemini or Hugging Face implementations
  - Integration:
    - models.subscription_models.APIProvider mapping
    - services.subscription.PricingService, UsageTrackingService
 - **Gemini provider**: [gemini_provider.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/gemini_provider.py)
  - get_gemini_api_key() – env validation
  - gemini_text_response(...) – tenacity‑backed retries, text output
  - gemini_structured_json_response(...) – structured JSON output
  - Config: GEMINI_API_KEY
  - SDK: google.generativeai
 - **Hugging Face provider**: [huggingface_provider.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/huggingface_provider.py)
  - get_huggingface_api_key() – env validation
  - huggingface_text_response(...) – Responses API (OpenAI client), retries
  - huggingface_structured_json_response(...) – structured JSON output
  - Config: HF_TOKEN
  - SDK: openai client pointed at Hugging Face router
 ## Image Generation
 - **Contracts**: [image_generation/base.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/image_generation/base.py)
  - ImageGenerationOptions, ImageGenerationResult
  - ImageEditOptions, FaceSwapOptions (with to_dict helpers)
  - Protocols: ImageGenerationProvider, ImageEditProvider, FaceSwapProvider
 - **Hugging Face image**: [image_generation/hf_provider.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/image_generation/hf_provider.py)
  - Class: HuggingFaceImageProvider(ImageGenerationProvider)
  - generate(options) -> ImageGenerationResult
  - Config: HF_TOKEN, HF_IMAGE_MODEL (default FLUX.1‑Krea‑dev)
  - SDK: huggingface_hub.InferenceClient (provider="fal-ai")
 - **Other image modules**:
  - [image_generation/gemini_provider.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/image_generation/gemini_provider.py) – Gemini image generation integration
  - [image_generation/wavespeed_provider.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/image_generation/wavespeed_provider.py) – WaveSpeed image editing
  - [image_generation/wavespeed_face_swap_provider.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/image_generation/wavespeed_face_swap_provider.py) – Face swap
  - [image_generation/wavespeed_edit_provider.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/image_generation/wavespeed_edit_provider.py) – General edits
 ## Video Generation
 - **Contracts**: [video_generation/base.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/video_generation/base.py)
  - VideoGenerationOptions, VideoGenerationResult
  - Protocol: VideoGenerationProvider (async, progress callbacks)
 - **WaveSpeed video**: [video_generation/wavespeed_provider.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/video_generation/wavespeed_provider.py)
  - BaseWaveSpeedTextToVideoService:
    - MODEL_NAME/PATH contract
    - calculate_cost(resolution, duration)
    - input validation helpers
  - Model services (e.g., HunyuanVideoService, LTX‑2 variants)
  - Client: services.wavespeed.client.WaveSpeedClient
 ## Audio / STT
 - **Modules**:
  - [audio_to_text_generation/gemini_audio_text.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/audio_to_text_generation/gemini_audio_text.py)
  - [audio_to_text_generation/stt_audio_blog.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/audio_to_text_generation/stt_audio_blog.py)
  - Responsibilities:
    - Convert audio to text
    - Provide structured outputs for downstream blog/content workflows
 ## Shared Patterns
 - **Environment handling**:
  - Providers validate their own secrets and default models
  - No secrets logged; provider‑scoped logger via utils.logger_utils.get_service_logger
 - **Result normalization**:
  - Binary payloads (image_bytes, video_bytes) and metadata are standardized
  - Provider name/model surfaced in result for analytics
 - **Retries and resilience**:
  - Text providers use tenacity exponential backoff
  - Media providers implement validation and sensible defaults
 ## Integration Points
 - Subscription enforcement and preflight:
  - [api/subscription/routes/preflight.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/api/subscription/routes/preflight.py)
  - PricingService/UsageTrackingService are invoked prior to calling providers
 - Usage logging:
  - Centralized in subscription services; gateway returns normalized data for logging
 - Pricing:
  - Per‑provider and per‑model costs reflected in preflight and service layers
 ## Extending the Gateway
 1. Choose modality (text/image/video/audio)
 2. Implement the appropriate Protocol and dataclasses
 3. Validate and load configuration from environment
 4. Normalize outputs to gateway result types
 5. Add pricing/preflight entries and update subscription limit checks
 6. Add route handlers that perform validation then call the new provider
 Following this reference ensures new providers integrate smoothly with ALwrity’s subscription, pricing, and analytics subsystems while keeping UI/API stable across diverse models.
--- a/docs/subscription/Billing_and_Usage.md
+++ b/docs/subscription/Billing_and_Usage.md
@@ -0,0 +1,71 @@
 # Subscription, Billing & Usage Tracking
 This document details how ALwrity manages subscriptions, processes payments via Stripe, and tracks granular usage for every user interaction.
 ## 1. Subscription Model
 ALwrity uses a **tier-based subscription model** enforced at the API gateway level.
 ### Tiers
 - **Free**: Limited access, community support.
 - **Basic**: Entry-level AI usage, standard support.
 - **Pro**: High limits, advanced models (Gemini Pro, FLUX), priority support.
 - **Enterprise**: Custom limits, dedicated infrastructure.
 ### Data Model (`UserSubscription`)
 Stored in the user's SQLite database (`alwrity_user_{id}.db`):
 - `stripe_customer_id`: Link to Stripe Customer.
 - `stripe_subscription_id`: Active subscription ID.
 - `plan_id`: Internal plan reference (linked to `SubscriptionPlan`).
 - `status`: `active`, `past_due`, `canceled`, etc.
 - `current_period_start` / `end`: Defines the billing cycle window.
 ## 2. Billing Integration (Stripe)
 We use **Stripe** for all payments, webhooks, and portal management.
 ### Key Components
 - **StripeService**: Handles checkout creation, portal sessions, and webhooks.
 - **Webhooks**: Listens for events like `invoice.payment_succeeded`, `customer.subscription.updated`.
  - **Idempotency**: All webhooks are tracked in `ProcessedStripeEvent` to prevent duplicate processing.
  - **Reliability**: Events are processed transactionally; failures are logged and retried by Stripe.
 - **Configuration**: Plan-to-Price mapping is loaded from environment variables (`STRIPE_PLAN_PRICE_MAPPING_TEST` / `_LIVE`) to ensure sync between code and Stripe Dashboard.
 ### Checkout Flow
 1. Frontend calls `/api/subscription/create-checkout-session`.
 2. Backend validates user and creates Stripe Session.
 3. User pays on Stripe.
 4. Stripe sends `checkout.session.completed` webhook.
 5. Backend provisions subscription and credits in `UserSubscription`.
 ## 3. Usage Tracking
 Every API call to an LLM provider is tracked, costed, and logged.
 ### Tracking Flow
 1. **Pre-flight Check** (`check_usage_limits`):
   - Before generating content, the system estimates cost/tokens.
   - If user exceeds plan limits (e.g., "50 videos/month"), the request is rejected (429).
 2. **Execution**: The provider generates the content.
 3. **Post-execution Log** (`track_usage`):
   - Actual tokens/duration are measured.
   - Cost is calculated based on `APIProviderPricing` table.
   - Entry added to `APIUsageLog` (granular) and aggregated into `UsageSummary` (monthly totals).
 ### Database Tables
 - **`APIUsageLog`**: Immutable ledger of every call.
  - Fields: `user_id`, `provider`, `model`, `input_tokens`, `output_tokens`, `cost`, `status_code`.
 - **`UsageSummary`**: Aggregated stats per billing period.
  - Fields: `total_calls`, `total_cost`, `gemini_calls`, `video_calls`, etc.
  - **Unique Constraint**: Enforced on `(user_id, billing_period)` to prevent data drift.
 ### Pricing Engine (`PricingService`)
 - Costs are not hardcoded. They are fetched from `APIProviderPricing` table.
 - Supports per-token (text), per-image (media), and per-second (video/audio) pricing models.
 - Admin can update pricing in DB without redeploying code.
 ## 4. Frontend Integration
 - **Usage Dashboard**: Visualizes consumption vs. limits.
 - **Real-time**: Usage stats are typically updated immediately after generation.
 - **Limit Rings**: UI components show percentage used (e.g., "80% of monthly video limit").